1 /*
2     Copyright (c) 2005-2022 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #ifndef __TBB_enumerable_thread_specific_H
18 #define __TBB_enumerable_thread_specific_H
19 
20 #include "detail/_config.h"
21 #include "detail/_namespace_injection.h"
22 #include "detail/_assert.h"
23 #include "detail/_template_helpers.h"
24 #include "detail/_aligned_space.h"
25 
26 #include "concurrent_vector.h"
27 #include "tbb_allocator.h"
28 #include "cache_aligned_allocator.h"
29 #include "profiling.h"
30 
31 #include <atomic>
32 #include <thread>
33 #include <cstring> // memcpy
34 #include <cstddef> // std::ptrdiff_t
35 
36 #include "task.h" // for task::suspend_point
37 
38 #if _WIN32 || _WIN64
39 #include <windows.h>
40 #else
41 #include <pthread.h>
42 #endif
43 
44 namespace tbb {
45 namespace detail {
46 namespace d1 {
47 
48 //! enum for selecting between single key and key-per-instance versions
49 enum ets_key_usage_type {
50     ets_key_per_instance
51     , ets_no_key
52 #if __TBB_RESUMABLE_TASKS
53     , ets_suspend_aware
54 #endif
55 };
56 
57 // Forward declaration to use in internal classes
58 template <typename T, typename Allocator, ets_key_usage_type ETS_key_type>
59 class enumerable_thread_specific;
60 
61 template <std::size_t ThreadIDSize>
62 struct internal_ets_key_selector {
63     using key_type = std::thread::id;
64     static key_type current_key() {
65         return std::this_thread::get_id();
66     }
67 };
68 
69 // Intel Compiler on OSX cannot create atomics objects that instantiated from non-fundamental types
70 #if __INTEL_COMPILER && __APPLE__
71 template<>
72 struct internal_ets_key_selector<sizeof(std::size_t)> {
73     using key_type = std::size_t;
74     static key_type current_key() {
75         auto id = std::this_thread::get_id();
76         return reinterpret_cast<key_type&>(id);
77     }
78 };
79 #endif
80 
81 template <ets_key_usage_type ETS_key_type>
82 struct ets_key_selector : internal_ets_key_selector<sizeof(std::thread::id)> {};
83 
84 #if __TBB_RESUMABLE_TASKS
85 template <>
86 struct ets_key_selector<ets_suspend_aware> {
87     using key_type = suspend_point;
88     static key_type current_key() {
89         return r1::current_suspend_point();
90     }
91 };
92 #endif
93 
94 template<ets_key_usage_type ETS_key_type>
95 class ets_base : detail::no_copy {
96 protected:
97     using key_type = typename ets_key_selector<ETS_key_type>::key_type;
98 
99 public:
100     struct slot;
101     struct array {
102         array* next;
103         std::size_t lg_size;
104         slot& at( std::size_t k ) {
105             return (reinterpret_cast<slot*>(reinterpret_cast<void*>(this+1)))[k];
106         }
107         std::size_t size() const { return std::size_t(1) << lg_size; }
108         std::size_t mask() const { return size() - 1; }
109         std::size_t start( std::size_t h ) const {
110             return h >> (8 * sizeof(std::size_t) - lg_size);
111         }
112     };
113     struct slot {
114         std::atomic<key_type> key;
115         void* ptr;
116         bool empty() const { return key.load(std::memory_order_relaxed) == key_type(); }
117         bool match( key_type k ) const { return key.load(std::memory_order_relaxed) == k; }
118         bool claim( key_type k ) {
119             // TODO: maybe claim ptr, because key_type is not guaranteed to fit into word size
120             key_type expected = key_type();
121             return key.compare_exchange_strong(expected, k);
122         }
123     };
124 
125 protected:
126     //! Root of linked list of arrays of decreasing size.
127     /** nullptr if and only if my_count==0.
128         Each array in the list is half the size of its predecessor. */
129     std::atomic<array*> my_root;
130     std::atomic<std::size_t> my_count;
131 
132     virtual void* create_local() = 0;
133     virtual void* create_array(std::size_t _size) = 0;  // _size in bytes
134     virtual void free_array(void* ptr, std::size_t _size) = 0; // _size in bytes
135 
136     array* allocate( std::size_t lg_size ) {
137         std::size_t n = std::size_t(1) << lg_size;
138         array* a = static_cast<array*>(create_array(sizeof(array) + n * sizeof(slot)));
139         a->lg_size = lg_size;
140         std::memset( a + 1, 0, n * sizeof(slot) );
141         return a;
142     }
143     void deallocate(array* a) {
144         std::size_t n = std::size_t(1) << (a->lg_size);
145         free_array( static_cast<void*>(a), std::size_t(sizeof(array) + n * sizeof(slot)) );
146     }
147 
148     ets_base() : my_root{nullptr}, my_count{0} {}
149     virtual ~ets_base();  // g++ complains if this is not virtual
150 
151     void* table_lookup( bool& exists );
152     void  table_clear();
153     // The following functions are not used in concurrent context,
154     // so we don't need synchronization and ITT annotations there.
155     template <ets_key_usage_type E2>
156     void table_elementwise_copy( const ets_base& other,
157                                  void*(*add_element)(ets_base<E2>&, void*) ) {
158         __TBB_ASSERT(!my_root.load(std::memory_order_relaxed), nullptr);
159         __TBB_ASSERT(!my_count.load(std::memory_order_relaxed), nullptr);
160         if( !other.my_root.load(std::memory_order_relaxed) ) return;
161         array* root = allocate(other.my_root.load(std::memory_order_relaxed)->lg_size);
162         my_root.store(root, std::memory_order_relaxed);
163         root->next = nullptr;
164         my_count.store(other.my_count.load(std::memory_order_relaxed), std::memory_order_relaxed);
165         std::size_t mask = root->mask();
166         for( array* r = other.my_root.load(std::memory_order_relaxed); r; r = r->next ) {
167             for( std::size_t i = 0; i < r->size(); ++i ) {
168                 slot& s1 = r->at(i);
169                 if( !s1.empty() ) {
170                     for( std::size_t j = root->start(std::hash<key_type>{}(s1.key.load(std::memory_order_relaxed))); ; j = (j+1)&mask ) {
171                         slot& s2 = root->at(j);
172                         if( s2.empty() ) {
173                             s2.ptr = add_element(static_cast<ets_base<E2>&>(*this), s1.ptr);
174                             s2.key.store(s1.key.load(std::memory_order_relaxed), std::memory_order_relaxed);
175                             break;
176                         }
177                         else if( s2.match(s1.key.load(std::memory_order_relaxed)) )
178                             break;
179                     }
180                 }
181             }
182         }
183     }
184     void table_swap( ets_base& other ) {
185        __TBB_ASSERT(this!=&other, "Don't swap an instance with itself");
186        swap_atomics_relaxed(my_root, other.my_root);
187        swap_atomics_relaxed(my_count, other.my_count);
188     }
189 };
190 
191 template<ets_key_usage_type ETS_key_type>
192 ets_base<ETS_key_type>::~ets_base() {
193     __TBB_ASSERT(!my_root.load(std::memory_order_relaxed), nullptr);
194 }
195 
196 template<ets_key_usage_type ETS_key_type>
197 void ets_base<ETS_key_type>::table_clear() {
198     while ( array* r = my_root.load(std::memory_order_relaxed) ) {
199         my_root.store(r->next, std::memory_order_relaxed);
200         deallocate(r);
201     }
202     my_count.store(0, std::memory_order_relaxed);
203 }
204 
205 template<ets_key_usage_type ETS_key_type>
206 void* ets_base<ETS_key_type>::table_lookup( bool& exists ) {
207     const key_type k = ets_key_selector<ETS_key_type>::current_key();
208 
209     __TBB_ASSERT(k != key_type(), nullptr);
210     void* found;
211     std::size_t h = std::hash<key_type>{}(k);
212     for( array* r = my_root.load(std::memory_order_acquire); r; r = r->next ) {
213         call_itt_notify(acquired,r);
214         std::size_t mask=r->mask();
215         for(std::size_t i = r->start(h); ;i=(i+1)&mask) {
216             slot& s = r->at(i);
217             if( s.empty() ) break;
218             if( s.match(k) ) {
219                 if( r == my_root.load(std::memory_order_acquire) ) {
220                     // Success at top level
221                     exists = true;
222                     return s.ptr;
223                 } else {
224                     // Success at some other level.  Need to insert at top level.
225                     exists = true;
226                     found = s.ptr;
227                     goto insert;
228                 }
229             }
230         }
231     }
232     // Key does not yet exist.  The density of slots in the table does not exceed 0.5,
233     // for if this will occur a new table is allocated with double the current table
234     // size, which is swapped in as the new root table.  So an empty slot is guaranteed.
235     exists = false;
236     found = create_local();
237     {
238         std::size_t c = ++my_count;
239         array* r = my_root.load(std::memory_order_acquire);
240         call_itt_notify(acquired,r);
241         if( !r || c > r->size()/2 ) {
242             std::size_t s = r ? r->lg_size : 2;
243             while( c > std::size_t(1)<<(s-1) ) ++s;
244             array* a = allocate(s);
245             for(;;) {
246                 a->next = r;
247                 call_itt_notify(releasing,a);
248                 array* new_r = r;
249                 if( my_root.compare_exchange_strong(new_r, a) ) break;
250                 call_itt_notify(acquired, new_r);
251                 __TBB_ASSERT(new_r != nullptr, nullptr);
252                 if( new_r->lg_size >= s ) {
253                     // Another thread inserted an equal or  bigger array, so our array is superfluous.
254                     deallocate(a);
255                     break;
256                 }
257                 r = new_r;
258             }
259         }
260     }
261     insert:
262     // Whether a slot has been found in an older table, or if it has been inserted at this level,
263     // it has already been accounted for in the total.  Guaranteed to be room for it, and it is
264     // not present, so search for empty slot and use it.
265     array* ir = my_root.load(std::memory_order_acquire);
266     call_itt_notify(acquired, ir);
267     std::size_t mask = ir->mask();
268     for(std::size_t i = ir->start(h);; i = (i+1)&mask) {
269         slot& s = ir->at(i);
270         if( s.empty() ) {
271             if( s.claim(k) ) {
272                 s.ptr = found;
273                 return found;
274             }
275         }
276     }
277 }
278 
279 //! Specialization that exploits native TLS
280 template <>
281 class ets_base<ets_key_per_instance>: public ets_base<ets_no_key> {
282     using super = ets_base<ets_no_key>;
283 #if _WIN32||_WIN64
284 #if __TBB_WIN8UI_SUPPORT
285     using tls_key_t = DWORD;
286     void create_key() { my_key = FlsAlloc(nullptr); }
287     void destroy_key() { FlsFree(my_key); }
288     void set_tls(void * value) { FlsSetValue(my_key, (LPVOID)value); }
289     void* get_tls() { return (void *)FlsGetValue(my_key); }
290 #else
291     using tls_key_t = DWORD;
292     void create_key() { my_key = TlsAlloc(); }
293     void destroy_key() { TlsFree(my_key); }
294     void set_tls(void * value) { TlsSetValue(my_key, (LPVOID)value); }
295     void* get_tls() { return (void *)TlsGetValue(my_key); }
296 #endif
297 #else
298     using tls_key_t = pthread_key_t;
299     void create_key() { pthread_key_create(&my_key, nullptr); }
300     void destroy_key() { pthread_key_delete(my_key); }
301     void set_tls( void * value ) const { pthread_setspecific(my_key, value); }
302     void* get_tls() const { return pthread_getspecific(my_key); }
303 #endif
304     tls_key_t my_key;
305     virtual void* create_local() override = 0;
306     virtual void* create_array(std::size_t _size) override = 0;  // _size in bytes
307     virtual void free_array(void* ptr, std::size_t _size) override = 0; // size in bytes
308 protected:
309     ets_base() {create_key();}
310     ~ets_base() {destroy_key();}
311     void* table_lookup( bool& exists ) {
312         void* found = get_tls();
313         if( found ) {
314             exists=true;
315         } else {
316             found = super::table_lookup(exists);
317             set_tls(found);
318         }
319         return found;
320     }
321     void table_clear() {
322         destroy_key();
323         create_key();
324         super::table_clear();
325     }
326     void table_swap( ets_base& other ) {
327        using std::swap;
328        __TBB_ASSERT(this!=&other, "Don't swap an instance with itself");
329        swap(my_key, other.my_key);
330        super::table_swap(other);
331     }
332 };
333 
334 //! Random access iterator for traversing the thread local copies.
335 template< typename Container, typename Value >
336 class enumerable_thread_specific_iterator
337 {
338     //! current position in the concurrent_vector
339 
340     Container *my_container;
341     typename Container::size_type my_index;
342     mutable Value *my_value;
343 
344     template<typename C, typename T, typename U>
345     friend bool operator==( const enumerable_thread_specific_iterator<C, T>& i,
346                      const enumerable_thread_specific_iterator<C, U>& j );
347 
348     template<typename C, typename T, typename U>
349     friend bool operator<( const enumerable_thread_specific_iterator<C,T>& i,
350                            const enumerable_thread_specific_iterator<C,U>& j );
351 
352     template<typename C, typename T, typename U>
353     friend std::ptrdiff_t operator-( const enumerable_thread_specific_iterator<C,T>& i,
354                                 const enumerable_thread_specific_iterator<C,U>& j );
355 
356     template<typename C, typename U>
357     friend class enumerable_thread_specific_iterator;
358 
359 public:
360     //! STL support
361     using difference_type = std::ptrdiff_t;
362     using value_type = Value;
363     using pointer = Value*;
364     using reference = Value&;
365     using iterator_category = std::random_access_iterator_tag;
366 
367     enumerable_thread_specific_iterator( const Container &container, typename Container::size_type index ) :
368         my_container(&const_cast<Container &>(container)), my_index(index), my_value(nullptr) {}
369 
370     //! Default constructor
371     enumerable_thread_specific_iterator() : my_container(nullptr), my_index(0), my_value(nullptr) {}
372 
373     template<typename U>
374     enumerable_thread_specific_iterator( const enumerable_thread_specific_iterator<Container, U>& other ) :
375             my_container( other.my_container ), my_index( other.my_index), my_value( const_cast<Value *>(other.my_value) ) {}
376 
377     enumerable_thread_specific_iterator operator+( std::ptrdiff_t offset ) const {
378         return enumerable_thread_specific_iterator(*my_container, my_index + offset);
379     }
380 
381     friend enumerable_thread_specific_iterator operator+( std::ptrdiff_t offset, enumerable_thread_specific_iterator v ) {
382         return enumerable_thread_specific_iterator(*v.my_container, v.my_index + offset);
383     }
384 
385     enumerable_thread_specific_iterator &operator+=( std::ptrdiff_t offset ) {
386         my_index += offset;
387         my_value = nullptr;
388         return *this;
389     }
390 
391     enumerable_thread_specific_iterator operator-( std::ptrdiff_t offset ) const {
392         return enumerable_thread_specific_iterator( *my_container, my_index-offset );
393     }
394 
395     enumerable_thread_specific_iterator &operator-=( std::ptrdiff_t offset ) {
396         my_index -= offset;
397         my_value = nullptr;
398         return *this;
399     }
400 
401     Value& operator*() const {
402         Value* value = my_value;
403         if( !value ) {
404             value = my_value = (*my_container)[my_index].value();
405         }
406         __TBB_ASSERT( value==(*my_container)[my_index].value(), "corrupt cache" );
407         return *value;
408     }
409 
410     Value& operator[]( std::ptrdiff_t k ) const {
411        return *(*my_container)[my_index + k].value();
412     }
413 
414     Value* operator->() const {return &operator*();}
415 
416     enumerable_thread_specific_iterator& operator++() {
417         ++my_index;
418         my_value = nullptr;
419         return *this;
420     }
421 
422     enumerable_thread_specific_iterator& operator--() {
423         --my_index;
424         my_value = nullptr;
425         return *this;
426     }
427 
428     //! Post increment
429     enumerable_thread_specific_iterator operator++(int) {
430         enumerable_thread_specific_iterator result = *this;
431         ++my_index;
432         my_value = nullptr;
433         return result;
434     }
435 
436     //! Post decrement
437     enumerable_thread_specific_iterator operator--(int) {
438         enumerable_thread_specific_iterator result = *this;
439         --my_index;
440         my_value = nullptr;
441         return result;
442     }
443 };
444 
445 template<typename Container, typename T, typename U>
446 bool operator==( const enumerable_thread_specific_iterator<Container, T>& i,
447                  const enumerable_thread_specific_iterator<Container, U>& j ) {
448     return i.my_index == j.my_index && i.my_container == j.my_container;
449 }
450 
451 template<typename Container, typename T, typename U>
452 bool operator!=( const enumerable_thread_specific_iterator<Container,T>& i,
453                  const enumerable_thread_specific_iterator<Container,U>& j ) {
454     return !(i==j);
455 }
456 
457 template<typename Container, typename T, typename U>
458 bool operator<( const enumerable_thread_specific_iterator<Container,T>& i,
459                 const enumerable_thread_specific_iterator<Container,U>& j ) {
460     return i.my_index<j.my_index;
461 }
462 
463 template<typename Container, typename T, typename U>
464 bool operator>( const enumerable_thread_specific_iterator<Container,T>& i,
465                 const enumerable_thread_specific_iterator<Container,U>& j ) {
466     return j<i;
467 }
468 
469 template<typename Container, typename T, typename U>
470 bool operator>=( const enumerable_thread_specific_iterator<Container,T>& i,
471                  const enumerable_thread_specific_iterator<Container,U>& j ) {
472     return !(i<j);
473 }
474 
475 template<typename Container, typename T, typename U>
476 bool operator<=( const enumerable_thread_specific_iterator<Container,T>& i,
477                  const enumerable_thread_specific_iterator<Container,U>& j ) {
478     return !(j<i);
479 }
480 
481 template<typename Container, typename T, typename U>
482 std::ptrdiff_t operator-( const enumerable_thread_specific_iterator<Container,T>& i,
483                      const enumerable_thread_specific_iterator<Container,U>& j ) {
484     return i.my_index-j.my_index;
485 }
486 
487 template<typename SegmentedContainer, typename Value >
488 class segmented_iterator
489 {
490     template<typename C, typename T, typename U>
491     friend bool operator==(const segmented_iterator<C,T>& i, const segmented_iterator<C,U>& j);
492 
493     template<typename C, typename T, typename U>
494     friend bool operator!=(const segmented_iterator<C,T>& i, const segmented_iterator<C,U>& j);
495 
496     template<typename C, typename U>
497     friend class segmented_iterator;
498 
499 public:
500     segmented_iterator() {my_segcont = nullptr;}
501 
502     segmented_iterator( const SegmentedContainer& _segmented_container ) :
503         my_segcont(const_cast<SegmentedContainer*>(&_segmented_container)),
504         outer_iter(my_segcont->end()) { }
505 
506     ~segmented_iterator() {}
507 
508     using InnerContainer = typename SegmentedContainer::value_type;
509     using inner_iterator = typename InnerContainer::iterator;
510     using outer_iterator = typename SegmentedContainer::iterator;
511 
512     // STL support
513     // TODO: inherit all types from segmented container?
514     using difference_type = std::ptrdiff_t;
515     using value_type = Value;
516     using size_type = typename SegmentedContainer::size_type;
517     using pointer = Value*;
518     using reference = Value&;
519     using iterator_category = std::input_iterator_tag;
520 
521     // Copy Constructor
522     template<typename U>
523     segmented_iterator(const segmented_iterator<SegmentedContainer, U>& other) :
524         my_segcont(other.my_segcont),
525         outer_iter(other.outer_iter),
526         // can we assign a default-constructed iterator to inner if we're at the end?
527         inner_iter(other.inner_iter)
528     {}
529 
530     // assignment
531     template<typename U>
532     segmented_iterator& operator=( const segmented_iterator<SegmentedContainer, U>& other) {
533         my_segcont = other.my_segcont;
534         outer_iter = other.outer_iter;
535         if(outer_iter != my_segcont->end()) inner_iter = other.inner_iter;
536         return *this;
537     }
538 
539     // allow assignment of outer iterator to segmented iterator.  Once it is
540     // assigned, move forward until a non-empty inner container is found or
541     // the end of the outer container is reached.
542     segmented_iterator& operator=(const outer_iterator& new_outer_iter) {
543         __TBB_ASSERT(my_segcont != nullptr, nullptr);
544         // check that this iterator points to something inside the segmented container
545         for(outer_iter = new_outer_iter ;outer_iter!=my_segcont->end(); ++outer_iter) {
546             if( !outer_iter->empty() ) {
547                 inner_iter = outer_iter->begin();
548                 break;
549             }
550         }
551         return *this;
552     }
553 
554     // pre-increment
555     segmented_iterator& operator++() {
556         advance_me();
557         return *this;
558     }
559 
560     // post-increment
561     segmented_iterator operator++(int) {
562         segmented_iterator tmp = *this;
563         operator++();
564         return tmp;
565     }
566 
567     bool operator==(const outer_iterator& other_outer) const {
568         __TBB_ASSERT(my_segcont != nullptr, nullptr);
569         return (outer_iter == other_outer &&
570                 (outer_iter == my_segcont->end() || inner_iter == outer_iter->begin()));
571     }
572 
573     bool operator!=(const outer_iterator& other_outer) const {
574         return !operator==(other_outer);
575 
576     }
577 
578     // (i)* RHS
579     reference operator*() const {
580         __TBB_ASSERT(my_segcont != nullptr, nullptr);
581         __TBB_ASSERT(outer_iter != my_segcont->end(), "Dereferencing a pointer at end of container");
582         __TBB_ASSERT(inner_iter != outer_iter->end(), nullptr); // should never happen
583         return *inner_iter;
584     }
585 
586     // i->
587     pointer operator->() const { return &operator*();}
588 
589 private:
590     SegmentedContainer* my_segcont;
591     outer_iterator outer_iter;
592     inner_iterator inner_iter;
593 
594     void advance_me() {
595         __TBB_ASSERT(my_segcont != nullptr, nullptr);
596         __TBB_ASSERT(outer_iter != my_segcont->end(), nullptr); // not true if there are no inner containers
597         __TBB_ASSERT(inner_iter != outer_iter->end(), nullptr); // not true if the inner containers are all empty.
598         ++inner_iter;
599         while(inner_iter == outer_iter->end() && ++outer_iter != my_segcont->end()) {
600             inner_iter = outer_iter->begin();
601         }
602     }
603 };    // segmented_iterator
604 
605 template<typename SegmentedContainer, typename T, typename U>
606 bool operator==( const segmented_iterator<SegmentedContainer,T>& i,
607                  const segmented_iterator<SegmentedContainer,U>& j ) {
608     if(i.my_segcont != j.my_segcont) return false;
609     if(i.my_segcont == nullptr) return true;
610     if(i.outer_iter != j.outer_iter) return false;
611     if(i.outer_iter == i.my_segcont->end()) return true;
612     return i.inner_iter == j.inner_iter;
613 }
614 
615 // !=
616 template<typename SegmentedContainer, typename T, typename U>
617 bool operator!=( const segmented_iterator<SegmentedContainer,T>& i,
618                  const segmented_iterator<SegmentedContainer,U>& j ) {
619     return !(i==j);
620 }
621 
622 template<typename T>
623 struct construct_by_default: no_assign {
624     void construct(void*where) {new(where) T();} // C++ note: the () in T() ensure zero initialization.
625     construct_by_default( int ) {}
626 };
627 
628 template<typename T>
629 struct construct_by_exemplar: no_assign {
630     const T exemplar;
631     void construct(void*where) {new(where) T(exemplar);}
632     construct_by_exemplar( const T& t ) : exemplar(t) {}
633     construct_by_exemplar( T&& t ) : exemplar(std::move(t)) {}
634 };
635 
636 template<typename T, typename Finit>
637 struct construct_by_finit: no_assign {
638     Finit f;
639     void construct(void* where) {new(where) T(f());}
640     construct_by_finit( Finit&& f_ ) : f(std::move(f_)) {}
641 };
642 
643 template<typename T, typename... P>
644 struct construct_by_args: no_assign {
645     stored_pack<P...> pack;
646     void construct(void* where) {
647         call( [where](const typename std::decay<P>::type&... args ){
648            new(where) T(args...);
649         }, pack );
650     }
651     construct_by_args( P&& ... args ) : pack(std::forward<P>(args)...) {}
652 };
653 
654 // storage for initialization function pointer
655 // TODO: consider removing the template parameter T here and in callback_leaf
656 class callback_base {
657 public:
658     // Clone *this
659     virtual callback_base* clone() const = 0;
660     // Destruct and free *this
661     virtual void destroy() = 0;
662     // Need virtual destructor to satisfy GCC compiler warning
663     virtual ~callback_base() { }
664     // Construct T at where
665     virtual void construct(void* where) = 0;
666 };
667 
668 template <typename Constructor>
669 class callback_leaf: public callback_base, Constructor {
670     template<typename... P> callback_leaf( P&& ... params ) : Constructor(std::forward<P>(params)...) {}
671     // TODO: make the construction/destruction consistent (use allocator.construct/destroy)
672     using my_allocator_type = typename tbb::tbb_allocator<callback_leaf>;
673 
674     callback_base* clone() const override {
675         return make(*this);
676     }
677 
678     void destroy() override {
679         my_allocator_type alloc;
680         tbb::detail::allocator_traits<my_allocator_type>::destroy(alloc, this);
681         tbb::detail::allocator_traits<my_allocator_type>::deallocate(alloc, this, 1);
682     }
683 
684     void construct(void* where) override {
685         Constructor::construct(where);
686     }
687 
688 public:
689     template<typename... P>
690     static callback_base* make( P&& ... params ) {
691         void* where = my_allocator_type().allocate(1);
692         return new(where) callback_leaf( std::forward<P>(params)... );
693     }
694 };
695 
696 //! Template for recording construction of objects in table
697 /** All maintenance of the space will be done explicitly on push_back,
698     and all thread local copies must be destroyed before the concurrent
699     vector is deleted.
700 
701     The flag is_built is initialized to false.  When the local is
702     successfully-constructed, set the flag to true or call value_committed().
703     If the constructor throws, the flag will be false.
704 */
705 template<typename U>
706 struct ets_element {
707     detail::aligned_space<U> my_space;
708     bool is_built;
709     ets_element() { is_built = false; }  // not currently-built
710     U* value() { return my_space.begin(); }
711     U* value_committed() { is_built = true; return my_space.begin(); }
712     ~ets_element() {
713         if(is_built) {
714             my_space.begin()->~U();
715             is_built = false;
716         }
717     }
718 };
719 
720 // A predicate that can be used for a compile-time compatibility check of ETS instances
721 // Ideally, it should have been declared inside the ETS class, but unfortunately
722 // in that case VS2013 does not enable the variadic constructor.
723 template<typename T, typename ETS> struct is_compatible_ets : std::false_type {};
724 template<typename T, typename U, typename A, ets_key_usage_type C>
725 struct is_compatible_ets< T, enumerable_thread_specific<U,A,C> > : std::is_same<T, U> {};
726 
727 // A predicate that checks whether, for a variable 'foo' of type T, foo() is a valid expression
728 template <typename T> using has_empty_braces_operator = decltype(std::declval<T>()());
729 template <typename T> using is_callable_no_args = supports<T, has_empty_braces_operator>;
730 
731 //! The enumerable_thread_specific container
732 /** enumerable_thread_specific has the following properties:
733     - thread-local copies are lazily created, with default, exemplar or function initialization.
734     - thread-local copies do not move (during lifetime, and excepting clear()) so the address of a copy is invariant.
735     - the contained objects need not have operator=() defined if combine is not used.
736     - enumerable_thread_specific containers may be copy-constructed or assigned.
737     - thread-local copies can be managed by hash-table, or can be accessed via TLS storage for speed.
738     - outside of parallel contexts, the contents of all thread-local copies are accessible by iterator or using combine or combine_each methods
739 
740 @par Segmented iterator
741     When the thread-local objects are containers with input_iterators defined, a segmented iterator may
742     be used to iterate over all the elements of all thread-local copies.
743 
744 @par combine and combine_each
745     - Both methods are defined for enumerable_thread_specific.
746     - combine() requires the type T have operator=() defined.
747     - neither method modifies the contents of the object (though there is no guarantee that the applied methods do not modify the object.)
748     - Both are evaluated in serial context (the methods are assumed to be non-benign.)
749 
750 @ingroup containers */
751 template <typename T, typename Allocator=cache_aligned_allocator<T>,
752           ets_key_usage_type ETS_key_type=ets_no_key >
753 class enumerable_thread_specific: ets_base<ETS_key_type> {
754 
755     template<typename U, typename A, ets_key_usage_type C> friend class enumerable_thread_specific;
756 
757     using padded_element = padded<ets_element<T>>;
758 
759     //! A generic range, used to create range objects from the iterators
760     template<typename I>
761     class generic_range_type: public blocked_range<I> {
762     public:
763         using value_type = T;
764         using reference = T&;
765         using const_reference = const T&;
766         using iterator = I;
767         using difference_type = std::ptrdiff_t;
768 
769         generic_range_type( I begin_, I end_, std::size_t grainsize_ = 1) : blocked_range<I>(begin_,end_,grainsize_) {}
770         template<typename U>
771         generic_range_type( const generic_range_type<U>& r) : blocked_range<I>(r.begin(),r.end(),r.grainsize()) {}
772         generic_range_type( generic_range_type& r, split ) : blocked_range<I>(r,split()) {}
773     };
774 
775     using allocator_traits_type = tbb::detail::allocator_traits<Allocator>;
776 
777     using padded_allocator_type = typename allocator_traits_type::template rebind_alloc<padded_element>;
778     using internal_collection_type = tbb::concurrent_vector< padded_element, padded_allocator_type >;
779 
780     callback_base *my_construct_callback;
781 
782     internal_collection_type my_locals;
783 
784     // TODO: consider unifying the callback mechanism for all create_local* methods below
785     //   (likely non-compatible and requires interface version increase)
786     void* create_local() override {
787         padded_element& lref = *my_locals.grow_by(1);
788         my_construct_callback->construct(lref.value());
789         return lref.value_committed();
790     }
791 
792     static void* create_local_by_copy( ets_base<ETS_key_type>& base, void* p ) {
793         enumerable_thread_specific& ets = static_cast<enumerable_thread_specific&>(base);
794         padded_element& lref = *ets.my_locals.grow_by(1);
795         new(lref.value()) T(*static_cast<T*>(p));
796         return lref.value_committed();
797     }
798 
799     static void* create_local_by_move( ets_base<ETS_key_type>& base, void* p ) {
800         enumerable_thread_specific& ets = static_cast<enumerable_thread_specific&>(base);
801         padded_element& lref = *ets.my_locals.grow_by(1);
802         new(lref.value()) T(std::move(*static_cast<T*>(p)));
803         return lref.value_committed();
804     }
805 
806     using array_allocator_type = typename allocator_traits_type::template rebind_alloc<uintptr_t>;
807 
808     // _size is in bytes
809     void* create_array(std::size_t _size) override {
810         std::size_t nelements = (_size + sizeof(uintptr_t) -1) / sizeof(uintptr_t);
811         return array_allocator_type().allocate(nelements);
812     }
813 
814     void free_array( void* _ptr, std::size_t _size) override {
815         std::size_t nelements = (_size + sizeof(uintptr_t) -1) / sizeof(uintptr_t);
816         array_allocator_type().deallocate( reinterpret_cast<uintptr_t *>(_ptr),nelements);
817     }
818 
819 public:
820 
821     //! Basic types
822     using value_type = T;
823     using allocator_type = Allocator;
824     using size_type = typename internal_collection_type::size_type;
825     using difference_type = typename internal_collection_type::difference_type;
826     using reference = value_type&;
827     using const_reference = const value_type&;
828 
829     using pointer = typename allocator_traits_type::pointer;
830     using const_pointer = typename allocator_traits_type::const_pointer;
831 
832     // Iterator types
833     using iterator = enumerable_thread_specific_iterator<internal_collection_type, value_type>;
834     using const_iterator = enumerable_thread_specific_iterator<internal_collection_type, const value_type>;
835 
836     // Parallel range types
837     using range_type = generic_range_type<iterator>;
838     using const_range_type = generic_range_type<const_iterator>;
839 
840     //! Default constructor.  Each local instance of T is default constructed.
841     enumerable_thread_specific() : my_construct_callback(
842         callback_leaf<construct_by_default<T> >::make(/*dummy argument*/0)
843     ){}
844 
845     //! Constructor with initializer functor. Each local instance of T is constructed by T(finit()).
846     template <typename Finit , typename = typename std::enable_if<is_callable_no_args<typename std::decay<Finit>::type>::value>::type>
847     explicit enumerable_thread_specific( Finit finit ) : my_construct_callback(
848         callback_leaf<construct_by_finit<T,Finit> >::make( std::move(finit) )
849     ){}
850 
851     //! Constructor with exemplar. Each local instance of T is copy-constructed from the exemplar.
852     explicit enumerable_thread_specific( const T& exemplar ) : my_construct_callback(
853         callback_leaf<construct_by_exemplar<T> >::make( exemplar )
854     ){}
855 
856     explicit enumerable_thread_specific( T&& exemplar ) : my_construct_callback(
857         callback_leaf<construct_by_exemplar<T> >::make( std::move(exemplar) )
858     ){}
859 
860     //! Variadic constructor with initializer arguments.  Each local instance of T is constructed by T(args...)
861     template <typename P1, typename... P,
862               typename = typename std::enable_if<!is_callable_no_args<typename std::decay<P1>::type>::value
863                                                       && !is_compatible_ets<T, typename std::decay<P1>::type>::value
864                                                       && !std::is_same<T, typename std::decay<P1>::type>::value
865                                                      >::type>
866     enumerable_thread_specific( P1&& arg1, P&& ... args ) : my_construct_callback(
867         callback_leaf<construct_by_args<T,P1,P...> >::make( std::forward<P1>(arg1), std::forward<P>(args)... )
868     ){}
869 
870     //! Destructor
871     ~enumerable_thread_specific() {
872         if(my_construct_callback) my_construct_callback->destroy();
873         // Deallocate the hash table before overridden free_array() becomes inaccessible
874         this->ets_base<ETS_key_type>::table_clear();
875     }
876 
877     //! returns reference to local, discarding exists
878     reference local() {
879         bool exists;
880         return local(exists);
881     }
882 
883     //! Returns reference to calling thread's local copy, creating one if necessary
884     reference local(bool& exists)  {
885         void* ptr = this->table_lookup(exists);
886         return *(T*)ptr;
887     }
888 
889     //! Get the number of local copies
890     size_type size() const { return my_locals.size(); }
891 
892     //! true if there have been no local copies created
893     bool empty() const { return my_locals.empty(); }
894 
895     //! begin iterator
896     iterator begin() { return iterator( my_locals, 0 ); }
897     //! end iterator
898     iterator end() { return iterator(my_locals, my_locals.size() ); }
899 
900     //! begin const iterator
901     const_iterator begin() const { return const_iterator(my_locals, 0); }
902 
903     //! end const iterator
904     const_iterator end() const { return const_iterator(my_locals, my_locals.size()); }
905 
906     //! Get range for parallel algorithms
907     range_type range( std::size_t grainsize=1 ) { return range_type( begin(), end(), grainsize ); }
908 
909     //! Get const range for parallel algorithms
910     const_range_type range( std::size_t grainsize=1 ) const { return const_range_type( begin(), end(), grainsize ); }
911 
912     //! Destroys local copies
913     void clear() {
914         my_locals.clear();
915         this->table_clear();
916         // callback is not destroyed
917     }
918 
919 private:
920     template<typename A2, ets_key_usage_type C2>
921     void internal_copy(const enumerable_thread_specific<T, A2, C2>& other) {
922         // this tests is_compatible_ets
923         static_assert( (is_compatible_ets<T, typename std::decay<decltype(other)>::type>::value), "is_compatible_ets fails" );
924         // Initialize my_construct_callback first, so that it is valid even if rest of this routine throws an exception.
925         my_construct_callback = other.my_construct_callback->clone();
926         __TBB_ASSERT(my_locals.size()==0, nullptr);
927         my_locals.reserve(other.size());
928         this->table_elementwise_copy( other, create_local_by_copy );
929     }
930 
931     void internal_swap(enumerable_thread_specific& other) {
932         using std::swap;
933         __TBB_ASSERT( this!=&other, nullptr);
934         swap(my_construct_callback, other.my_construct_callback);
935         // concurrent_vector::swap() preserves storage space,
936         // so addresses to the vector kept in ETS hash table remain valid.
937         swap(my_locals, other.my_locals);
938         this->ets_base<ETS_key_type>::table_swap(other);
939     }
940 
941     template<typename A2, ets_key_usage_type C2>
942     void internal_move(enumerable_thread_specific<T, A2, C2>&& other) {
943         static_assert( (is_compatible_ets<T, typename std::decay<decltype(other)>::type>::value), "is_compatible_ets fails" );
944         my_construct_callback = other.my_construct_callback;
945         other.my_construct_callback = nullptr;
946         __TBB_ASSERT(my_locals.size()==0, nullptr);
947         my_locals.reserve(other.size());
948         this->table_elementwise_copy( other, create_local_by_move );
949     }
950 
951 public:
952     enumerable_thread_specific( const enumerable_thread_specific& other )
953     : ets_base<ETS_key_type>() /* prevents GCC warnings with -Wextra */
954     {
955         internal_copy(other);
956     }
957 
958     template<typename Alloc, ets_key_usage_type Cachetype>
959     enumerable_thread_specific( const enumerable_thread_specific<T, Alloc, Cachetype>& other )
960     {
961         internal_copy(other);
962     }
963 
964     enumerable_thread_specific( enumerable_thread_specific&& other ) : my_construct_callback()
965     {
966         // TODO: use internal_move correctly here
967         internal_swap(other);
968     }
969 
970     template<typename Alloc, ets_key_usage_type Cachetype>
971     enumerable_thread_specific( enumerable_thread_specific<T, Alloc, Cachetype>&& other ) : my_construct_callback()
972     {
973         internal_move(std::move(other));
974     }
975 
976     enumerable_thread_specific& operator=( const enumerable_thread_specific& other )
977     {
978         if( this != &other ) {
979             this->clear();
980             my_construct_callback->destroy();
981             internal_copy( other );
982         }
983         return *this;
984     }
985 
986     template<typename Alloc, ets_key_usage_type Cachetype>
987     enumerable_thread_specific& operator=( const enumerable_thread_specific<T, Alloc, Cachetype>& other )
988     {
989         __TBB_ASSERT( static_cast<void*>(this)!=static_cast<const void*>(&other), nullptr); // Objects of different types
990         this->clear();
991         my_construct_callback->destroy();
992         internal_copy(other);
993         return *this;
994     }
995 
996     enumerable_thread_specific& operator=( enumerable_thread_specific&& other )
997     {
998         if( this != &other ) {
999             // TODO: use internal_move correctly here
1000             internal_swap(other);
1001         }
1002         return *this;
1003     }
1004 
1005     template<typename Alloc, ets_key_usage_type Cachetype>
1006     enumerable_thread_specific& operator=( enumerable_thread_specific<T, Alloc, Cachetype>&& other )
1007     {
1008         __TBB_ASSERT( static_cast<void*>(this)!=static_cast<const void*>(&other), nullptr); // Objects of different types
1009         this->clear();
1010         my_construct_callback->destroy();
1011         internal_move(std::move(other));
1012         return *this;
1013     }
1014 
1015     // CombineFunc has signature T(T,T) or T(const T&, const T&)
1016     template <typename CombineFunc>
1017     T combine(CombineFunc f_combine) {
1018         if(begin() == end()) {
1019             ets_element<T> location;
1020             my_construct_callback->construct(location.value());
1021             return *location.value_committed();
1022         }
1023         const_iterator ci = begin();
1024         T my_result = *ci;
1025         while(++ci != end())
1026             my_result = f_combine( my_result, *ci );
1027         return my_result;
1028     }
1029 
1030     // combine_func_t takes T by value or by [const] reference, and returns nothing
1031     template <typename CombineFunc>
1032     void combine_each(CombineFunc f_combine) {
1033         for(iterator ci = begin(); ci != end(); ++ci) {
1034             f_combine( *ci );
1035         }
1036     }
1037 
1038 }; // enumerable_thread_specific
1039 
1040 template< typename Container >
1041 class flattened2d {
1042     // This intermediate typedef is to address issues with VC7.1 compilers
1043     using conval_type = typename Container::value_type;
1044 
1045 public:
1046     //! Basic types
1047     using size_type = typename conval_type::size_type;
1048     using difference_type = typename conval_type::difference_type;
1049     using allocator_type = typename conval_type::allocator_type;
1050     using value_type = typename conval_type::value_type;
1051     using reference = typename conval_type::reference;
1052     using const_reference = typename conval_type::const_reference;
1053     using pointer = typename conval_type::pointer;
1054     using const_pointer = typename conval_type::const_pointer;
1055 
1056     using iterator = segmented_iterator<Container, value_type>;
1057     using const_iterator = segmented_iterator<Container, const value_type>;
1058 
1059     flattened2d( const Container &c, typename Container::const_iterator b, typename Container::const_iterator e ) :
1060         my_container(const_cast<Container*>(&c)), my_begin(b), my_end(e) { }
1061 
1062     explicit flattened2d( const Container &c ) :
1063         my_container(const_cast<Container*>(&c)), my_begin(c.begin()), my_end(c.end()) { }
1064 
1065     iterator begin() { return iterator(*my_container) = my_begin; }
1066     iterator end() { return iterator(*my_container) = my_end; }
1067     const_iterator begin() const { return const_iterator(*my_container) = my_begin; }
1068     const_iterator end() const { return const_iterator(*my_container) = my_end; }
1069 
1070     size_type size() const {
1071         size_type tot_size = 0;
1072         for(typename Container::const_iterator i = my_begin; i != my_end; ++i) {
1073             tot_size += i->size();
1074         }
1075         return tot_size;
1076     }
1077 
1078 private:
1079     Container *my_container;
1080     typename Container::const_iterator my_begin;
1081     typename Container::const_iterator my_end;
1082 };
1083 
1084 template <typename Container>
1085 flattened2d<Container> flatten2d(const Container &c, const typename Container::const_iterator b, const typename Container::const_iterator e) {
1086     return flattened2d<Container>(c, b, e);
1087 }
1088 
1089 template <typename Container>
1090 flattened2d<Container> flatten2d(const Container &c) {
1091     return flattened2d<Container>(c);
1092 }
1093 
1094 } // namespace d1
1095 } // namespace detail
1096 
1097 inline namespace v1 {
1098 using detail::d1::enumerable_thread_specific;
1099 using detail::d1::flattened2d;
1100 using detail::d1::flatten2d;
1101 // ets enum keys
1102 using detail::d1::ets_key_usage_type;
1103 using detail::d1::ets_key_per_instance;
1104 using detail::d1::ets_no_key;
1105 #if __TBB_RESUMABLE_TASKS
1106 using detail::d1::ets_suspend_aware;
1107 #endif
1108 } // inline namespace v1
1109 
1110 } // namespace tbb
1111 
1112 #endif // __TBB_enumerable_thread_specific_H
1113 
1114