1 /*
2     Copyright (c) 2005-2024 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #ifndef __TBB_enumerable_thread_specific_H
18 #define __TBB_enumerable_thread_specific_H
19 
20 #include "detail/_config.h"
21 #include "detail/_namespace_injection.h"
22 #include "detail/_assert.h"
23 #include "detail/_template_helpers.h"
24 #include "detail/_aligned_space.h"
25 
26 #include "concurrent_vector.h"
27 #include "tbb_allocator.h"
28 #include "cache_aligned_allocator.h"
29 #include "profiling.h"
30 
31 #include <atomic>
32 #include <thread>
33 #include <cstring> // memcpy
34 #include <cstddef> // std::ptrdiff_t
35 
36 #include "task.h" // for task::suspend_point
37 
38 #if _WIN32 || _WIN64
39 #ifndef NOMINMAX
40 #define NOMINMAX
41 #define __TBB_DEFINED_NOMINMAX 1
42 #endif
43 #include <windows.h>
44 #if __TBB_DEFINED_NOMINMAX
45 #undef NOMINMAX
46 #undef __TBB_DEFINED_NOMINMAX
47 #endif
48 #else
49 #include <pthread.h>
50 #endif
51 
52 namespace tbb {
53 namespace detail {
54 namespace d1 {
55 
56 //! enum for selecting between single key and key-per-instance versions
57 enum ets_key_usage_type {
58     ets_key_per_instance
59     , ets_no_key
60 #if __TBB_RESUMABLE_TASKS
61     , ets_suspend_aware
62 #endif
63 };
64 
65 // Forward declaration to use in internal classes
66 template <typename T, typename Allocator, ets_key_usage_type ETS_key_type>
67 class enumerable_thread_specific;
68 
69 template <std::size_t ThreadIDSize>
70 struct internal_ets_key_selector {
71     using key_type = std::thread::id;
current_keyinternal_ets_key_selector72     static key_type current_key() {
73         return std::this_thread::get_id();
74     }
75 };
76 
77 // Intel Compiler on OSX cannot create atomics objects that instantiated from non-fundamental types
78 #if __INTEL_COMPILER && __APPLE__
79 template<>
80 struct internal_ets_key_selector<sizeof(std::size_t)> {
81     using key_type = std::size_t;
82     static key_type current_key() {
83         auto id = std::this_thread::get_id();
84         return reinterpret_cast<key_type&>(id);
85     }
86 };
87 #endif
88 
89 template <ets_key_usage_type ETS_key_type>
90 struct ets_key_selector : internal_ets_key_selector<sizeof(std::thread::id)> {};
91 
92 #if __TBB_RESUMABLE_TASKS
93 template <>
94 struct ets_key_selector<ets_suspend_aware> {
95     using key_type = suspend_point;
96     static key_type current_key() {
97         return r1::current_suspend_point();
98     }
99 };
100 #endif
101 
102 template<ets_key_usage_type ETS_key_type>
103 class ets_base : detail::no_copy {
104 protected:
105     using key_type = typename ets_key_selector<ETS_key_type>::key_type;
106 
107 public:
108     struct slot;
109     struct array {
110         array* next;
111         std::size_t lg_size;
112         slot& at( std::size_t k ) {
113             return (reinterpret_cast<slot*>(reinterpret_cast<void*>(this+1)))[k];
114         }
115         std::size_t size() const { return std::size_t(1) << lg_size; }
116         std::size_t mask() const { return size() - 1; }
117         std::size_t start( std::size_t h ) const {
118             return h >> (8 * sizeof(std::size_t) - lg_size);
119         }
120     };
121     struct slot {
122         std::atomic<key_type> key;
123         void* ptr;
124         bool empty() const { return key.load(std::memory_order_relaxed) == key_type(); }
125         bool match( key_type k ) const { return key.load(std::memory_order_relaxed) == k; }
126         bool claim( key_type k ) {
127             // TODO: maybe claim ptr, because key_type is not guaranteed to fit into word size
128             key_type expected = key_type();
129             return key.compare_exchange_strong(expected, k);
130         }
131     };
132 
133 protected:
134     //! Root of linked list of arrays of decreasing size.
135     /** nullptr if and only if my_count==0.
136         Each array in the list is half the size of its predecessor. */
137     std::atomic<array*> my_root;
138     std::atomic<std::size_t> my_count;
139 
140     virtual void* create_local() = 0;
141     virtual void* create_array(std::size_t _size) = 0;  // _size in bytes
142     virtual void free_array(void* ptr, std::size_t _size) = 0; // _size in bytes
143 
144     array* allocate( std::size_t lg_size ) {
145         std::size_t n = std::size_t(1) << lg_size;
146         array* a = static_cast<array*>(create_array(sizeof(array) + n * sizeof(slot)));
147         a->lg_size = lg_size;
148         std::memset( a + 1, 0, n * sizeof(slot) );
149         return a;
150     }
151     void deallocate(array* a) {
152         std::size_t n = std::size_t(1) << (a->lg_size);
153         free_array( static_cast<void*>(a), std::size_t(sizeof(array) + n * sizeof(slot)) );
154     }
155 
156     ets_base() : my_root{nullptr}, my_count{0} {}
157     virtual ~ets_base();  // g++ complains if this is not virtual
158 
159     void* table_lookup( bool& exists );
160     void  table_clear();
161     // The following functions are not used in concurrent context,
162     // so we don't need synchronization and ITT annotations there.
163     template <ets_key_usage_type E2>
164     void table_elementwise_copy( const ets_base& other,
165                                  void*(*add_element)(ets_base<E2>&, void*) ) {
166         __TBB_ASSERT(!my_root.load(std::memory_order_relaxed), nullptr);
167         __TBB_ASSERT(!my_count.load(std::memory_order_relaxed), nullptr);
168         if( !other.my_root.load(std::memory_order_relaxed) ) return;
169         array* root = allocate(other.my_root.load(std::memory_order_relaxed)->lg_size);
170         my_root.store(root, std::memory_order_relaxed);
171         root->next = nullptr;
172         my_count.store(other.my_count.load(std::memory_order_relaxed), std::memory_order_relaxed);
173         std::size_t mask = root->mask();
174         for( array* r = other.my_root.load(std::memory_order_relaxed); r; r = r->next ) {
175             for( std::size_t i = 0; i < r->size(); ++i ) {
176                 slot& s1 = r->at(i);
177                 if( !s1.empty() ) {
178                     for( std::size_t j = root->start(std::hash<key_type>{}(s1.key.load(std::memory_order_relaxed))); ; j = (j+1)&mask ) {
179                         slot& s2 = root->at(j);
180                         if( s2.empty() ) {
181                             s2.ptr = add_element(static_cast<ets_base<E2>&>(*this), s1.ptr);
182                             s2.key.store(s1.key.load(std::memory_order_relaxed), std::memory_order_relaxed);
183                             break;
184                         }
185                         else if( s2.match(s1.key.load(std::memory_order_relaxed)) )
186                             break;
187                     }
188                 }
189             }
190         }
191     }
192     void table_swap( ets_base& other ) {
193        __TBB_ASSERT(this!=&other, "Don't swap an instance with itself");
194        swap_atomics_relaxed(my_root, other.my_root);
195        swap_atomics_relaxed(my_count, other.my_count);
196     }
197 };
198 
199 template<ets_key_usage_type ETS_key_type>
200 ets_base<ETS_key_type>::~ets_base() {
201     __TBB_ASSERT(!my_root.load(std::memory_order_relaxed), nullptr);
202 }
203 
204 template<ets_key_usage_type ETS_key_type>
205 void ets_base<ETS_key_type>::table_clear() {
206     while ( array* r = my_root.load(std::memory_order_relaxed) ) {
207         my_root.store(r->next, std::memory_order_relaxed);
208         deallocate(r);
209     }
210     my_count.store(0, std::memory_order_relaxed);
211 }
212 
213 template<ets_key_usage_type ETS_key_type>
214 void* ets_base<ETS_key_type>::table_lookup( bool& exists ) {
215     const key_type k = ets_key_selector<ETS_key_type>::current_key();
216 
217     __TBB_ASSERT(k != key_type(), nullptr);
218     void* found;
219     std::size_t h = std::hash<key_type>{}(k);
220     for( array* r = my_root.load(std::memory_order_acquire); r; r = r->next ) {
221         call_itt_notify(acquired,r);
222         std::size_t mask=r->mask();
223         for(std::size_t i = r->start(h); ;i=(i+1)&mask) {
224             slot& s = r->at(i);
225             if( s.empty() ) break;
226             if( s.match(k) ) {
227                 if( r == my_root.load(std::memory_order_acquire) ) {
228                     // Success at top level
229                     exists = true;
230                     return s.ptr;
231                 } else {
232                     // Success at some other level.  Need to insert at top level.
233                     exists = true;
234                     found = s.ptr;
235                     goto insert;
236                 }
237             }
238         }
239     }
240     // Key does not yet exist.  The density of slots in the table does not exceed 0.5,
241     // for if this will occur a new table is allocated with double the current table
242     // size, which is swapped in as the new root table.  So an empty slot is guaranteed.
243     exists = false;
244     found = create_local();
245     {
246         std::size_t c = ++my_count;
247         array* r = my_root.load(std::memory_order_acquire);
248         call_itt_notify(acquired,r);
249         if( !r || c > r->size()/2 ) {
250             std::size_t s = r ? r->lg_size : 2;
251             while( c > std::size_t(1)<<(s-1) ) ++s;
252             array* a = allocate(s);
253             for(;;) {
254                 a->next = r;
255                 call_itt_notify(releasing,a);
256                 array* new_r = r;
257                 if( my_root.compare_exchange_strong(new_r, a) ) break;
258                 call_itt_notify(acquired, new_r);
259                 __TBB_ASSERT(new_r != nullptr, nullptr);
260                 if( new_r->lg_size >= s ) {
261                     // Another thread inserted an equal or  bigger array, so our array is superfluous.
262                     deallocate(a);
263                     break;
264                 }
265                 r = new_r;
266             }
267         }
268     }
269     insert:
270     // Whether a slot has been found in an older table, or if it has been inserted at this level,
271     // it has already been accounted for in the total.  Guaranteed to be room for it, and it is
272     // not present, so search for empty slot and use it.
273     array* ir = my_root.load(std::memory_order_acquire);
274     call_itt_notify(acquired, ir);
275     std::size_t mask = ir->mask();
276     for(std::size_t i = ir->start(h);; i = (i+1)&mask) {
277         slot& s = ir->at(i);
278         if( s.empty() ) {
279             if( s.claim(k) ) {
280                 s.ptr = found;
281                 return found;
282             }
283         }
284     }
285 }
286 
287 //! Specialization that exploits native TLS
288 template <>
289 class ets_base<ets_key_per_instance>: public ets_base<ets_no_key> {
290     using super = ets_base<ets_no_key>;
291 #if _WIN32||_WIN64
292 #if __TBB_WIN8UI_SUPPORT
293     using tls_key_t = DWORD;
294     void create_key() { my_key = FlsAlloc(nullptr); }
295     void destroy_key() { FlsFree(my_key); }
296     void set_tls(void * value) { FlsSetValue(my_key, (LPVOID)value); }
297     void* get_tls() { return (void *)FlsGetValue(my_key); }
298 #else
299     using tls_key_t = DWORD;
300     void create_key() { my_key = TlsAlloc(); }
301     void destroy_key() { TlsFree(my_key); }
302     void set_tls(void * value) { TlsSetValue(my_key, (LPVOID)value); }
303     void* get_tls() { return (void *)TlsGetValue(my_key); }
304 #endif
305 #else
306     using tls_key_t = pthread_key_t;
307     void create_key() { pthread_key_create(&my_key, nullptr); }
308     void destroy_key() { pthread_key_delete(my_key); }
309     void set_tls( void * value ) const { pthread_setspecific(my_key, value); }
310     void* get_tls() const { return pthread_getspecific(my_key); }
311 #endif
312     tls_key_t my_key;
313     virtual void* create_local() override = 0;
314     virtual void* create_array(std::size_t _size) override = 0;  // _size in bytes
315     virtual void free_array(void* ptr, std::size_t _size) override = 0; // size in bytes
316 protected:
317     ets_base() {create_key();}
318     ~ets_base() {destroy_key();}
319     void* table_lookup( bool& exists ) {
320         void* found = get_tls();
321         if( found ) {
322             exists=true;
323         } else {
324             found = super::table_lookup(exists);
325             set_tls(found);
326         }
327         return found;
328     }
329     void table_clear() {
330         destroy_key();
331         create_key();
332         super::table_clear();
333     }
334     void table_swap( ets_base& other ) {
335        using std::swap;
336        __TBB_ASSERT(this!=&other, "Don't swap an instance with itself");
337        swap(my_key, other.my_key);
338        super::table_swap(other);
339     }
340 };
341 
342 //! Random access iterator for traversing the thread local copies.
343 template< typename Container, typename Value >
344 class enumerable_thread_specific_iterator
345 {
346     //! current position in the concurrent_vector
347 
348     Container *my_container;
349     typename Container::size_type my_index;
350     mutable Value *my_value;
351 
352     template<typename C, typename T, typename U>
353     friend bool operator==( const enumerable_thread_specific_iterator<C, T>& i,
354                      const enumerable_thread_specific_iterator<C, U>& j );
355 
356     template<typename C, typename T, typename U>
357     friend bool operator<( const enumerable_thread_specific_iterator<C,T>& i,
358                            const enumerable_thread_specific_iterator<C,U>& j );
359 
360     template<typename C, typename T, typename U>
361     friend std::ptrdiff_t operator-( const enumerable_thread_specific_iterator<C,T>& i,
362                                 const enumerable_thread_specific_iterator<C,U>& j );
363 
364     template<typename C, typename U>
365     friend class enumerable_thread_specific_iterator;
366 
367 public:
368     //! STL support
369     using difference_type = std::ptrdiff_t;
370     using value_type = Value;
371     using pointer = Value*;
372     using reference = Value&;
373     using iterator_category = std::random_access_iterator_tag;
374 
375     enumerable_thread_specific_iterator( const Container &container, typename Container::size_type index ) :
376         my_container(&const_cast<Container &>(container)), my_index(index), my_value(nullptr) {}
377 
378     //! Default constructor
379     enumerable_thread_specific_iterator() : my_container(nullptr), my_index(0), my_value(nullptr) {}
380 
381     template<typename U>
382     enumerable_thread_specific_iterator( const enumerable_thread_specific_iterator<Container, U>& other ) :
383             my_container( other.my_container ), my_index( other.my_index), my_value( const_cast<Value *>(other.my_value) ) {}
384 
385     enumerable_thread_specific_iterator operator+( std::ptrdiff_t offset ) const {
386         return enumerable_thread_specific_iterator(*my_container, my_index + offset);
387     }
388 
389     friend enumerable_thread_specific_iterator operator+( std::ptrdiff_t offset, enumerable_thread_specific_iterator v ) {
390         return enumerable_thread_specific_iterator(*v.my_container, v.my_index + offset);
391     }
392 
393     enumerable_thread_specific_iterator &operator+=( std::ptrdiff_t offset ) {
394         my_index += offset;
395         my_value = nullptr;
396         return *this;
397     }
398 
399     enumerable_thread_specific_iterator operator-( std::ptrdiff_t offset ) const {
400         return enumerable_thread_specific_iterator( *my_container, my_index-offset );
401     }
402 
403     enumerable_thread_specific_iterator &operator-=( std::ptrdiff_t offset ) {
404         my_index -= offset;
405         my_value = nullptr;
406         return *this;
407     }
408 
409     Value& operator*() const {
410         Value* value = my_value;
411         if( !value ) {
412             value = my_value = (*my_container)[my_index].value();
413         }
414         __TBB_ASSERT( value==(*my_container)[my_index].value(), "corrupt cache" );
415         return *value;
416     }
417 
418     Value& operator[]( std::ptrdiff_t k ) const {
419        return *(*my_container)[my_index + k].value();
420     }
421 
422     Value* operator->() const {return &operator*();}
423 
424     enumerable_thread_specific_iterator& operator++() {
425         ++my_index;
426         my_value = nullptr;
427         return *this;
428     }
429 
430     enumerable_thread_specific_iterator& operator--() {
431         --my_index;
432         my_value = nullptr;
433         return *this;
434     }
435 
436     //! Post increment
437     enumerable_thread_specific_iterator operator++(int) {
438         enumerable_thread_specific_iterator result = *this;
439         ++my_index;
440         my_value = nullptr;
441         return result;
442     }
443 
444     //! Post decrement
445     enumerable_thread_specific_iterator operator--(int) {
446         enumerable_thread_specific_iterator result = *this;
447         --my_index;
448         my_value = nullptr;
449         return result;
450     }
451 };
452 
453 template<typename Container, typename T, typename U>
454 bool operator==( const enumerable_thread_specific_iterator<Container, T>& i,
455                  const enumerable_thread_specific_iterator<Container, U>& j ) {
456     return i.my_index == j.my_index && i.my_container == j.my_container;
457 }
458 
459 template<typename Container, typename T, typename U>
460 bool operator!=( const enumerable_thread_specific_iterator<Container,T>& i,
461                  const enumerable_thread_specific_iterator<Container,U>& j ) {
462     return !(i==j);
463 }
464 
465 template<typename Container, typename T, typename U>
466 bool operator<( const enumerable_thread_specific_iterator<Container,T>& i,
467                 const enumerable_thread_specific_iterator<Container,U>& j ) {
468     return i.my_index<j.my_index;
469 }
470 
471 template<typename Container, typename T, typename U>
472 bool operator>( const enumerable_thread_specific_iterator<Container,T>& i,
473                 const enumerable_thread_specific_iterator<Container,U>& j ) {
474     return j<i;
475 }
476 
477 template<typename Container, typename T, typename U>
478 bool operator>=( const enumerable_thread_specific_iterator<Container,T>& i,
479                  const enumerable_thread_specific_iterator<Container,U>& j ) {
480     return !(i<j);
481 }
482 
483 template<typename Container, typename T, typename U>
484 bool operator<=( const enumerable_thread_specific_iterator<Container,T>& i,
485                  const enumerable_thread_specific_iterator<Container,U>& j ) {
486     return !(j<i);
487 }
488 
489 template<typename Container, typename T, typename U>
490 std::ptrdiff_t operator-( const enumerable_thread_specific_iterator<Container,T>& i,
491                      const enumerable_thread_specific_iterator<Container,U>& j ) {
492     return i.my_index-j.my_index;
493 }
494 
495 template<typename SegmentedContainer, typename Value >
496 class segmented_iterator
497 {
498     template<typename C, typename T, typename U>
499     friend bool operator==(const segmented_iterator<C,T>& i, const segmented_iterator<C,U>& j);
500 
501     template<typename C, typename T, typename U>
502     friend bool operator!=(const segmented_iterator<C,T>& i, const segmented_iterator<C,U>& j);
503 
504     template<typename C, typename U>
505     friend class segmented_iterator;
506 
507 public:
508     segmented_iterator() {my_segcont = nullptr;}
509 
510     segmented_iterator( const SegmentedContainer& _segmented_container ) :
511         my_segcont(const_cast<SegmentedContainer*>(&_segmented_container)),
512         outer_iter(my_segcont->end()) { }
513 
514     ~segmented_iterator() {}
515 
516     using InnerContainer = typename SegmentedContainer::value_type;
517     using inner_iterator = typename InnerContainer::iterator;
518     using outer_iterator = typename SegmentedContainer::iterator;
519 
520     // STL support
521     // TODO: inherit all types from segmented container?
522     using difference_type = std::ptrdiff_t;
523     using value_type = Value;
524     using size_type = typename SegmentedContainer::size_type;
525     using pointer = Value*;
526     using reference = Value&;
527     using iterator_category = std::input_iterator_tag;
528 
529     // Copy Constructor
530     template<typename U>
531     segmented_iterator(const segmented_iterator<SegmentedContainer, U>& other) :
532         my_segcont(other.my_segcont),
533         outer_iter(other.outer_iter),
534         // can we assign a default-constructed iterator to inner if we're at the end?
535         inner_iter(other.inner_iter)
536     {}
537 
538     // assignment
539     template<typename U>
540     segmented_iterator& operator=( const segmented_iterator<SegmentedContainer, U>& other) {
541         my_segcont = other.my_segcont;
542         outer_iter = other.outer_iter;
543         if(outer_iter != my_segcont->end()) inner_iter = other.inner_iter;
544         return *this;
545     }
546 
547     // allow assignment of outer iterator to segmented iterator.  Once it is
548     // assigned, move forward until a non-empty inner container is found or
549     // the end of the outer container is reached.
550     segmented_iterator& operator=(const outer_iterator& new_outer_iter) {
551         __TBB_ASSERT(my_segcont != nullptr, nullptr);
552         // check that this iterator points to something inside the segmented container
553         for(outer_iter = new_outer_iter ;outer_iter!=my_segcont->end(); ++outer_iter) {
554             if( !outer_iter->empty() ) {
555                 inner_iter = outer_iter->begin();
556                 break;
557             }
558         }
559         return *this;
560     }
561 
562     // pre-increment
563     segmented_iterator& operator++() {
564         advance_me();
565         return *this;
566     }
567 
568     // post-increment
569     segmented_iterator operator++(int) {
570         segmented_iterator tmp = *this;
571         operator++();
572         return tmp;
573     }
574 
575     bool operator==(const outer_iterator& other_outer) const {
576         __TBB_ASSERT(my_segcont != nullptr, nullptr);
577         return (outer_iter == other_outer &&
578                 (outer_iter == my_segcont->end() || inner_iter == outer_iter->begin()));
579     }
580 
581     bool operator!=(const outer_iterator& other_outer) const {
582         return !operator==(other_outer);
583 
584     }
585 
586     // (i)* RHS
587     reference operator*() const {
588         __TBB_ASSERT(my_segcont != nullptr, nullptr);
589         __TBB_ASSERT(outer_iter != my_segcont->end(), "Dereferencing a pointer at end of container");
590         __TBB_ASSERT(inner_iter != outer_iter->end(), nullptr); // should never happen
591         return *inner_iter;
592     }
593 
594     // i->
595     pointer operator->() const { return &operator*();}
596 
597 private:
598     SegmentedContainer* my_segcont;
599     outer_iterator outer_iter;
600     inner_iterator inner_iter;
601 
602     void advance_me() {
603         __TBB_ASSERT(my_segcont != nullptr, nullptr);
604         __TBB_ASSERT(outer_iter != my_segcont->end(), nullptr); // not true if there are no inner containers
605         __TBB_ASSERT(inner_iter != outer_iter->end(), nullptr); // not true if the inner containers are all empty.
606         ++inner_iter;
607         while(inner_iter == outer_iter->end() && ++outer_iter != my_segcont->end()) {
608             inner_iter = outer_iter->begin();
609         }
610     }
611 };    // segmented_iterator
612 
613 template<typename SegmentedContainer, typename T, typename U>
614 bool operator==( const segmented_iterator<SegmentedContainer,T>& i,
615                  const segmented_iterator<SegmentedContainer,U>& j ) {
616     if(i.my_segcont != j.my_segcont) return false;
617     if(i.my_segcont == nullptr) return true;
618     if(i.outer_iter != j.outer_iter) return false;
619     if(i.outer_iter == i.my_segcont->end()) return true;
620     return i.inner_iter == j.inner_iter;
621 }
622 
623 // !=
624 template<typename SegmentedContainer, typename T, typename U>
625 bool operator!=( const segmented_iterator<SegmentedContainer,T>& i,
626                  const segmented_iterator<SegmentedContainer,U>& j ) {
627     return !(i==j);
628 }
629 
630 template<typename T>
631 struct construct_by_default: no_assign {
632     void construct(void*where) {new(where) T();} // C++ note: the () in T() ensure zero initialization.
633     construct_by_default( int ) {}
634 };
635 
636 template<typename T>
637 struct construct_by_exemplar: no_assign {
638     const T exemplar;
639     void construct(void*where) {new(where) T(exemplar);}
640     construct_by_exemplar( const T& t ) : exemplar(t) {}
641     construct_by_exemplar( T&& t ) : exemplar(std::move(t)) {}
642 };
643 
644 template<typename T, typename Finit>
645 struct construct_by_finit: no_assign {
646     Finit f;
647     void construct(void* where) {new(where) T(f());}
648     construct_by_finit( Finit&& f_ ) : f(std::move(f_)) {}
649 };
650 
651 template<typename T, typename... P>
652 struct construct_by_args: no_assign {
653     stored_pack<P...> pack;
654     void construct(void* where) {
655         call( [where](const typename std::decay<P>::type&... args ){
656            new(where) T(args...);
657         }, pack );
658     }
659     construct_by_args( P&& ... args ) : pack(std::forward<P>(args)...) {}
660 };
661 
662 // storage for initialization function pointer
663 // TODO: consider removing the template parameter T here and in callback_leaf
664 class callback_base {
665 public:
666     // Clone *this
667     virtual callback_base* clone() const = 0;
668     // Destruct and free *this
669     virtual void destroy() = 0;
670     // Need virtual destructor to satisfy GCC compiler warning
671     virtual ~callback_base() { }
672     // Construct T at where
673     virtual void construct(void* where) = 0;
674 };
675 
676 template <typename Constructor>
677 class callback_leaf: public callback_base, Constructor {
678     template<typename... P> callback_leaf( P&& ... params ) : Constructor(std::forward<P>(params)...) {}
679     // TODO: make the construction/destruction consistent (use allocator.construct/destroy)
680     using my_allocator_type = typename tbb::tbb_allocator<callback_leaf>;
681 
682     callback_base* clone() const override {
683         return make(*this);
684     }
685 
686     void destroy() override {
687         my_allocator_type alloc;
688         tbb::detail::allocator_traits<my_allocator_type>::destroy(alloc, this);
689         tbb::detail::allocator_traits<my_allocator_type>::deallocate(alloc, this, 1);
690     }
691 
692     void construct(void* where) override {
693         Constructor::construct(where);
694     }
695 
696 public:
697     template<typename... P>
698     static callback_base* make( P&& ... params ) {
699         void* where = my_allocator_type().allocate(1);
700         return new(where) callback_leaf( std::forward<P>(params)... );
701     }
702 };
703 
704 //! Template for recording construction of objects in table
705 /** All maintenance of the space will be done explicitly on push_back,
706     and all thread local copies must be destroyed before the concurrent
707     vector is deleted.
708 
709     The flag is_built is initialized to false.  When the local is
710     successfully-constructed, set the flag to true or call value_committed().
711     If the constructor throws, the flag will be false.
712 */
713 template<typename U>
714 struct ets_element {
715     detail::aligned_space<U> my_space;
716     bool is_built;
717     ets_element() { is_built = false; }  // not currently-built
718     U* value() { return my_space.begin(); }
719     U* value_committed() { is_built = true; return my_space.begin(); }
720     ~ets_element() {
721         if(is_built) {
722             my_space.begin()->~U();
723             is_built = false;
724         }
725     }
726 };
727 
728 // A predicate that can be used for a compile-time compatibility check of ETS instances
729 // Ideally, it should have been declared inside the ETS class, but unfortunately
730 // in that case VS2013 does not enable the variadic constructor.
731 template<typename T, typename ETS> struct is_compatible_ets : std::false_type {};
732 template<typename T, typename U, typename A, ets_key_usage_type C>
733 struct is_compatible_ets< T, enumerable_thread_specific<U,A,C> > : std::is_same<T, U> {};
734 
735 // A predicate that checks whether, for a variable 'foo' of type T, foo() is a valid expression
736 template <typename T> using has_empty_braces_operator = decltype(std::declval<T>()());
737 template <typename T> using is_callable_no_args = supports<T, has_empty_braces_operator>;
738 
739 //! The enumerable_thread_specific container
740 /** enumerable_thread_specific has the following properties:
741     - thread-local copies are lazily created, with default, exemplar or function initialization.
742     - thread-local copies do not move (during lifetime, and excepting clear()) so the address of a copy is invariant.
743     - the contained objects need not have operator=() defined if combine is not used.
744     - enumerable_thread_specific containers may be copy-constructed or assigned.
745     - thread-local copies can be managed by hash-table, or can be accessed via TLS storage for speed.
746     - outside of parallel contexts, the contents of all thread-local copies are accessible by iterator or using combine or combine_each methods
747 
748 @par Segmented iterator
749     When the thread-local objects are containers with input_iterators defined, a segmented iterator may
750     be used to iterate over all the elements of all thread-local copies.
751 
752 @par combine and combine_each
753     - Both methods are defined for enumerable_thread_specific.
754     - combine() requires the type T have operator=() defined.
755     - neither method modifies the contents of the object (though there is no guarantee that the applied methods do not modify the object.)
756     - Both are evaluated in serial context (the methods are assumed to be non-benign.)
757 
758 @ingroup containers */
759 template <typename T, typename Allocator=cache_aligned_allocator<T>,
760           ets_key_usage_type ETS_key_type=ets_no_key >
761 class enumerable_thread_specific: ets_base<ETS_key_type> {
762 
763     template<typename U, typename A, ets_key_usage_type C> friend class enumerable_thread_specific;
764 
765     using padded_element = padded<ets_element<T>>;
766 
767     //! A generic range, used to create range objects from the iterators
768     template<typename I>
769     class generic_range_type: public blocked_range<I> {
770     public:
771         using value_type = T;
772         using reference = T&;
773         using const_reference = const T&;
774         using iterator = I;
775         using difference_type = std::ptrdiff_t;
776 
777         generic_range_type( I begin_, I end_, std::size_t grainsize_ = 1) : blocked_range<I>(begin_,end_,grainsize_) {}
778         template<typename U>
779         generic_range_type( const generic_range_type<U>& r) : blocked_range<I>(r.begin(),r.end(),r.grainsize()) {}
780         generic_range_type( generic_range_type& r, split ) : blocked_range<I>(r,split()) {}
781     };
782 
783     using allocator_traits_type = tbb::detail::allocator_traits<Allocator>;
784 
785     using padded_allocator_type = typename allocator_traits_type::template rebind_alloc<padded_element>;
786     using internal_collection_type = tbb::concurrent_vector< padded_element, padded_allocator_type >;
787 
788     callback_base *my_construct_callback;
789 
790     internal_collection_type my_locals;
791 
792     // TODO: consider unifying the callback mechanism for all create_local* methods below
793     //   (likely non-compatible and requires interface version increase)
794     void* create_local() override {
795         padded_element& lref = *my_locals.grow_by(1);
796         my_construct_callback->construct(lref.value());
797         return lref.value_committed();
798     }
799 
800     static void* create_local_by_copy( ets_base<ETS_key_type>& base, void* p ) {
801         enumerable_thread_specific& ets = static_cast<enumerable_thread_specific&>(base);
802         padded_element& lref = *ets.my_locals.grow_by(1);
803         new(lref.value()) T(*static_cast<T*>(p));
804         return lref.value_committed();
805     }
806 
807     static void* create_local_by_move( ets_base<ETS_key_type>& base, void* p ) {
808         enumerable_thread_specific& ets = static_cast<enumerable_thread_specific&>(base);
809         padded_element& lref = *ets.my_locals.grow_by(1);
810         new(lref.value()) T(std::move(*static_cast<T*>(p)));
811         return lref.value_committed();
812     }
813 
814     using array_allocator_type = typename allocator_traits_type::template rebind_alloc<uintptr_t>;
815 
816     // _size is in bytes
817     void* create_array(std::size_t _size) override {
818         std::size_t nelements = (_size + sizeof(uintptr_t) -1) / sizeof(uintptr_t);
819         return array_allocator_type().allocate(nelements);
820     }
821 
822     void free_array( void* _ptr, std::size_t _size) override {
823         std::size_t nelements = (_size + sizeof(uintptr_t) -1) / sizeof(uintptr_t);
824         array_allocator_type().deallocate( reinterpret_cast<uintptr_t *>(_ptr),nelements);
825     }
826 
827 public:
828 
829     //! Basic types
830     using value_type = T;
831     using allocator_type = Allocator;
832     using size_type = typename internal_collection_type::size_type;
833     using difference_type = typename internal_collection_type::difference_type;
834     using reference = value_type&;
835     using const_reference = const value_type&;
836 
837     using pointer = typename allocator_traits_type::pointer;
838     using const_pointer = typename allocator_traits_type::const_pointer;
839 
840     // Iterator types
841     using iterator = enumerable_thread_specific_iterator<internal_collection_type, value_type>;
842     using const_iterator = enumerable_thread_specific_iterator<internal_collection_type, const value_type>;
843 
844     // Parallel range types
845     using range_type = generic_range_type<iterator>;
846     using const_range_type = generic_range_type<const_iterator>;
847 
848     //! Default constructor.  Each local instance of T is default constructed.
849     enumerable_thread_specific() : my_construct_callback(
850         callback_leaf<construct_by_default<T> >::make(/*dummy argument*/0)
851     ){}
852 
853     //! Constructor with initializer functor. Each local instance of T is constructed by T(finit()).
854     template <typename Finit , typename = typename std::enable_if<is_callable_no_args<typename std::decay<Finit>::type>::value>::type>
855     explicit enumerable_thread_specific( Finit finit ) : my_construct_callback(
856         callback_leaf<construct_by_finit<T,Finit> >::make( std::move(finit) )
857     ){}
858 
859     //! Constructor with exemplar. Each local instance of T is copy-constructed from the exemplar.
860     explicit enumerable_thread_specific( const T& exemplar ) : my_construct_callback(
861         callback_leaf<construct_by_exemplar<T> >::make( exemplar )
862     ){}
863 
864     explicit enumerable_thread_specific( T&& exemplar ) : my_construct_callback(
865         callback_leaf<construct_by_exemplar<T> >::make( std::move(exemplar) )
866     ){}
867 
868     //! Variadic constructor with initializer arguments.  Each local instance of T is constructed by T(args...)
869     template <typename P1, typename... P,
870               typename = typename std::enable_if<!is_callable_no_args<typename std::decay<P1>::type>::value
871                                                       && !is_compatible_ets<T, typename std::decay<P1>::type>::value
872                                                       && !std::is_same<T, typename std::decay<P1>::type>::value
873                                                      >::type>
874     enumerable_thread_specific( P1&& arg1, P&& ... args ) : my_construct_callback(
875         callback_leaf<construct_by_args<T,P1,P...> >::make( std::forward<P1>(arg1), std::forward<P>(args)... )
876     ){}
877 
878     //! Destructor
879     ~enumerable_thread_specific() {
880         if(my_construct_callback) my_construct_callback->destroy();
881         // Deallocate the hash table before overridden free_array() becomes inaccessible
882         this->ets_base<ETS_key_type>::table_clear();
883     }
884 
885     //! returns reference to local, discarding exists
886     reference local() {
887         bool exists;
888         return local(exists);
889     }
890 
891     //! Returns reference to calling thread's local copy, creating one if necessary
892     reference local(bool& exists)  {
893         void* ptr = this->table_lookup(exists);
894         return *(T*)ptr;
895     }
896 
897     //! Get the number of local copies
898     size_type size() const { return my_locals.size(); }
899 
900     //! true if there have been no local copies created
901     bool empty() const { return my_locals.empty(); }
902 
903     //! begin iterator
904     iterator begin() { return iterator( my_locals, 0 ); }
905     //! end iterator
906     iterator end() { return iterator(my_locals, my_locals.size() ); }
907 
908     //! begin const iterator
909     const_iterator begin() const { return const_iterator(my_locals, 0); }
910 
911     //! end const iterator
912     const_iterator end() const { return const_iterator(my_locals, my_locals.size()); }
913 
914     //! Get range for parallel algorithms
915     range_type range( std::size_t grainsize=1 ) { return range_type( begin(), end(), grainsize ); }
916 
917     //! Get const range for parallel algorithms
918     const_range_type range( std::size_t grainsize=1 ) const { return const_range_type( begin(), end(), grainsize ); }
919 
920     //! Destroys local copies
921     void clear() {
922         my_locals.clear();
923         this->table_clear();
924         // callback is not destroyed
925     }
926 
927 private:
928     template<typename A2, ets_key_usage_type C2>
929     void internal_copy(const enumerable_thread_specific<T, A2, C2>& other) {
930         // this tests is_compatible_ets
931         static_assert( (is_compatible_ets<T, typename std::decay<decltype(other)>::type>::value), "is_compatible_ets fails" );
932         // Initialize my_construct_callback first, so that it is valid even if rest of this routine throws an exception.
933         my_construct_callback = other.my_construct_callback->clone();
934         __TBB_ASSERT(my_locals.size()==0, nullptr);
935         my_locals.reserve(other.size());
936         this->table_elementwise_copy( other, create_local_by_copy );
937     }
938 
939     void internal_swap(enumerable_thread_specific& other) {
940         using std::swap;
941         __TBB_ASSERT( this!=&other, nullptr);
942         swap(my_construct_callback, other.my_construct_callback);
943         // concurrent_vector::swap() preserves storage space,
944         // so addresses to the vector kept in ETS hash table remain valid.
945         swap(my_locals, other.my_locals);
946         this->ets_base<ETS_key_type>::table_swap(other);
947     }
948 
949     template<typename A2, ets_key_usage_type C2>
950     void internal_move(enumerable_thread_specific<T, A2, C2>&& other) {
951         static_assert( (is_compatible_ets<T, typename std::decay<decltype(other)>::type>::value), "is_compatible_ets fails" );
952         my_construct_callback = other.my_construct_callback;
953         other.my_construct_callback = nullptr;
954         __TBB_ASSERT(my_locals.size()==0, nullptr);
955         my_locals.reserve(other.size());
956         this->table_elementwise_copy( other, create_local_by_move );
957     }
958 
959 public:
960     enumerable_thread_specific( const enumerable_thread_specific& other )
961     : ets_base<ETS_key_type>() /* prevents GCC warnings with -Wextra */
962     {
963         internal_copy(other);
964     }
965 
966     template<typename Alloc, ets_key_usage_type Cachetype>
967     enumerable_thread_specific( const enumerable_thread_specific<T, Alloc, Cachetype>& other )
968     {
969         internal_copy(other);
970     }
971 
972     enumerable_thread_specific( enumerable_thread_specific&& other ) : my_construct_callback()
973     {
974         // TODO: use internal_move correctly here
975         internal_swap(other);
976     }
977 
978     template<typename Alloc, ets_key_usage_type Cachetype>
979     enumerable_thread_specific( enumerable_thread_specific<T, Alloc, Cachetype>&& other ) : my_construct_callback()
980     {
981         internal_move(std::move(other));
982     }
983 
984     enumerable_thread_specific& operator=( const enumerable_thread_specific& other )
985     {
986         if( this != &other ) {
987             this->clear();
988             my_construct_callback->destroy();
989             internal_copy( other );
990         }
991         return *this;
992     }
993 
994     template<typename Alloc, ets_key_usage_type Cachetype>
995     enumerable_thread_specific& operator=( const enumerable_thread_specific<T, Alloc, Cachetype>& other )
996     {
997         __TBB_ASSERT( static_cast<void*>(this)!=static_cast<const void*>(&other), nullptr); // Objects of different types
998         this->clear();
999         my_construct_callback->destroy();
1000         internal_copy(other);
1001         return *this;
1002     }
1003 
1004     enumerable_thread_specific& operator=( enumerable_thread_specific&& other )
1005     {
1006         if( this != &other ) {
1007             // TODO: use internal_move correctly here
1008             internal_swap(other);
1009         }
1010         return *this;
1011     }
1012 
1013     template<typename Alloc, ets_key_usage_type Cachetype>
1014     enumerable_thread_specific& operator=( enumerable_thread_specific<T, Alloc, Cachetype>&& other )
1015     {
1016         __TBB_ASSERT( static_cast<void*>(this)!=static_cast<const void*>(&other), nullptr); // Objects of different types
1017         this->clear();
1018         my_construct_callback->destroy();
1019         internal_move(std::move(other));
1020         return *this;
1021     }
1022 
1023     // CombineFunc has signature T(T,T) or T(const T&, const T&)
1024     template <typename CombineFunc>
1025     T combine(CombineFunc f_combine) {
1026         if(begin() == end()) {
1027             ets_element<T> location;
1028             my_construct_callback->construct(location.value());
1029             return *location.value_committed();
1030         }
1031         const_iterator ci = begin();
1032         T my_result = *ci;
1033         while(++ci != end())
1034             my_result = f_combine( my_result, *ci );
1035         return my_result;
1036     }
1037 
1038     // combine_func_t takes T by value or by [const] reference, and returns nothing
1039     template <typename CombineFunc>
1040     void combine_each(CombineFunc f_combine) {
1041         for(iterator ci = begin(); ci != end(); ++ci) {
1042             f_combine( *ci );
1043         }
1044     }
1045 
1046 }; // enumerable_thread_specific
1047 
1048 template< typename Container >
1049 class flattened2d {
1050     // This intermediate typedef is to address issues with VC7.1 compilers
1051     using conval_type = typename Container::value_type;
1052 
1053 public:
1054     //! Basic types
1055     using size_type = typename conval_type::size_type;
1056     using difference_type = typename conval_type::difference_type;
1057     using allocator_type = typename conval_type::allocator_type;
1058     using value_type = typename conval_type::value_type;
1059     using reference = typename conval_type::reference;
1060     using const_reference = typename conval_type::const_reference;
1061     using pointer = typename conval_type::pointer;
1062     using const_pointer = typename conval_type::const_pointer;
1063 
1064     using iterator = segmented_iterator<Container, value_type>;
1065     using const_iterator = segmented_iterator<Container, const value_type>;
1066 
1067     flattened2d( const Container &c, typename Container::const_iterator b, typename Container::const_iterator e ) :
1068         my_container(const_cast<Container*>(&c)), my_begin(b), my_end(e) { }
1069 
1070     explicit flattened2d( const Container &c ) :
1071         my_container(const_cast<Container*>(&c)), my_begin(c.begin()), my_end(c.end()) { }
1072 
1073     iterator begin() { return iterator(*my_container) = my_begin; }
1074     iterator end() { return iterator(*my_container) = my_end; }
1075     const_iterator begin() const { return const_iterator(*my_container) = my_begin; }
1076     const_iterator end() const { return const_iterator(*my_container) = my_end; }
1077 
1078     size_type size() const {
1079         size_type tot_size = 0;
1080         for(typename Container::const_iterator i = my_begin; i != my_end; ++i) {
1081             tot_size += i->size();
1082         }
1083         return tot_size;
1084     }
1085 
1086 private:
1087     Container *my_container;
1088     typename Container::const_iterator my_begin;
1089     typename Container::const_iterator my_end;
1090 };
1091 
1092 template <typename Container>
1093 flattened2d<Container> flatten2d(const Container &c, const typename Container::const_iterator b, const typename Container::const_iterator e) {
1094     return flattened2d<Container>(c, b, e);
1095 }
1096 
1097 template <typename Container>
1098 flattened2d<Container> flatten2d(const Container &c) {
1099     return flattened2d<Container>(c);
1100 }
1101 
1102 } // namespace d1
1103 } // namespace detail
1104 
1105 inline namespace v1 {
1106 using detail::d1::enumerable_thread_specific;
1107 using detail::d1::flattened2d;
1108 using detail::d1::flatten2d;
1109 // ets enum keys
1110 using detail::d1::ets_key_usage_type;
1111 using detail::d1::ets_key_per_instance;
1112 using detail::d1::ets_no_key;
1113 #if __TBB_RESUMABLE_TASKS
1114 using detail::d1::ets_suspend_aware;
1115 #endif
1116 } // inline namespace v1
1117 
1118 } // namespace tbb
1119 
1120 #endif // __TBB_enumerable_thread_specific_H
1121 
1122