1 /*
2     Copyright (c) 2005-2021 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #ifndef __TBB_concurrent_hash_map_H
18 #define __TBB_concurrent_hash_map_H
19 
20 #include "detail/_namespace_injection.h"
21 #include "detail/_utils.h"
22 #include "detail/_assert.h"
23 #include "detail/_allocator_traits.h"
24 #include "detail/_containers_helpers.h"
25 #include "detail/_template_helpers.h"
26 #include "detail/_hash_compare.h"
27 #include "detail/_range_common.h"
28 #include "tbb_allocator.h"
29 #include "spin_rw_mutex.h"
30 
31 #include <atomic>
32 #include <initializer_list>
33 #include <tuple>
34 #include <iterator>
35 #include <utility>      // Need std::pair
36 #include <cstring>      // Need std::memset
37 
38 namespace tbb {
39 namespace detail {
40 namespace d2 {
41 
42 #if __TBB_PREVIEW_CONCURRENT_HASH_MAP_EXTENSIONS && __TBB_CPP20_CONCEPTS_PRESENT
43 template <typename Mutex>
44 concept ch_map_rw_scoped_lockable = rw_scoped_lockable<Mutex> &&
45 	requires(const typename Mutex::scoped_lock& sl) {
46 		{ sl.is_writer() } -> std::convertible_to<bool>;
47 };
48 #endif
49 
50 template <typename MutexType>
51 struct hash_map_node_base : no_copy {
52     using mutex_type = MutexType;
53     // Scoped lock type for mutex
54     using scoped_type = typename MutexType::scoped_lock;
55     // Next node in chain
56     hash_map_node_base* next;
57     mutex_type mutex;
58 };
59 
60 // Incompleteness flag value
61 static void* const rehash_req_flag = reinterpret_cast<void*>(std::size_t(3));
62 // Rehashed empty bucket flag
63 static void* const empty_rehashed_flag = reinterpret_cast<void*>(std::size_t(0));
64 
65 template <typename MutexType>
66 bool rehash_required( hash_map_node_base<MutexType>* node_ptr ) {
67     return reinterpret_cast<void*>(node_ptr) == rehash_req_flag;
68 }
69 
70 #if TBB_USE_ASSERT
71 template <typename MutexType>
72 bool empty_rehashed( hash_map_node_base<MutexType>* node_ptr ) {
73     return reinterpret_cast<void*>(node_ptr) == empty_rehashed_flag;
74 }
75 #endif
76 
77 // base class of concurrent_hash_map
78 
79 template <typename Allocator, typename MutexType>
80 class hash_map_base {
81 public:
82     using size_type = std::size_t;
83     using hashcode_type = std::size_t;
84     using segment_index_type = std::size_t;
85     using node_base = hash_map_node_base<MutexType>;
86 
87     struct bucket : no_copy {
88         using mutex_type = MutexType;
89         using scoped_type = typename mutex_type::scoped_lock;
90 
91         bucket() : node_list(nullptr) {}
92         bucket( node_base* ptr ) : node_list(ptr) {}
93 
94         mutex_type mutex;
95         std::atomic<node_base*> node_list;
96     };
97 
98     using allocator_type = Allocator;
99     using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>;
100     using bucket_allocator_type = typename allocator_traits_type::template rebind_alloc<bucket>;
101     using bucket_allocator_traits = tbb::detail::allocator_traits<bucket_allocator_type>;
102 
103     // Count of segments in the first block
104     static constexpr size_type embedded_block = 1;
105     // Count of segments in the first block
106     static constexpr size_type embedded_buckets = 1 << embedded_block;
107     // Count of segments in the first block
108     static constexpr size_type first_block = 8; //including embedded_block. perfect with bucket size 16, so the allocations are power of 4096
109     // Size of a pointer / table size
110     static constexpr size_type pointers_per_table = sizeof(segment_index_type) * 8; // one segment per bit
111 
112     using segment_ptr_type = bucket*;
113     using atomic_segment_type = std::atomic<segment_ptr_type>;
114     using segments_table_type = atomic_segment_type[pointers_per_table];
115 
116     hash_map_base( const allocator_type& alloc ) : my_allocator(alloc), my_mask(embedded_buckets - 1), my_size(0) {
117         for (size_type i = 0; i != embedded_buckets; ++i) {
118             my_embedded_segment[i].node_list.store(nullptr, std::memory_order_relaxed);
119         }
120 
121         for (size_type segment_index = 0; segment_index < pointers_per_table; ++segment_index) {
122             auto argument = segment_index < embedded_block ? my_embedded_segment + segment_base(segment_index) : nullptr;
123             my_table[segment_index].store(argument, std::memory_order_relaxed);
124         }
125 
126         __TBB_ASSERT( embedded_block <= first_block, "The first block number must include embedded blocks");
127     }
128 
129     // segment index of given index in the array
130     static segment_index_type segment_index_of( size_type index ) {
131         return segment_index_type(tbb::detail::log2( index|1 ));
132     }
133 
134     // the first array index of given segment
135     static segment_index_type segment_base( segment_index_type k ) {
136         return (segment_index_type(1) << k & ~segment_index_type(1));
137     }
138 
139     // segment size except for k == 0
140     static size_type segment_size( segment_index_type k ) {
141         return size_type(1) << k; // fake value for k==0
142     }
143 
144     // true if ptr is valid pointer
145     static bool is_valid( void* ptr ) {
146         return reinterpret_cast<uintptr_t>(ptr) > uintptr_t(63);
147     }
148 
149     template <typename... Args>
150     void init_buckets_impl( segment_ptr_type ptr, size_type sz, Args&&... args ) {
151         for (size_type i = 0; i < sz; ++i) {
152             bucket_allocator_traits::construct(my_allocator, ptr + i, std::forward<Args>(args)...);
153         }
154     }
155 
156     // Initialize buckets
157     void init_buckets( segment_ptr_type ptr, size_type sz, bool is_initial ) {
158         if (is_initial) {
159             init_buckets_impl(ptr, sz);
160         } else {
161             init_buckets_impl(ptr, sz, reinterpret_cast<node_base*>(rehash_req_flag));
162         }
163     }
164 
165     // Add node n to bucket b
166     static void add_to_bucket( bucket* b, node_base* n ) {
167         __TBB_ASSERT(!rehash_required(b->node_list.load(std::memory_order_relaxed)), nullptr);
168         n->next = b->node_list.load(std::memory_order_relaxed);
169         b->node_list.store(n, std::memory_order_relaxed); // its under lock and flag is set
170     }
171 
172     const bucket_allocator_type& get_allocator() const {
173         return my_allocator;
174     }
175 
176     bucket_allocator_type& get_allocator() {
177         return my_allocator;
178     }
179 
180     // Enable segment
181     void enable_segment( segment_index_type k, bool is_initial = false ) {
182         __TBB_ASSERT( k, "Zero segment must be embedded" );
183         size_type sz;
184         __TBB_ASSERT( !is_valid(my_table[k].load(std::memory_order_relaxed)), "Wrong concurrent assignment");
185         if (k >= first_block) {
186             sz = segment_size(k);
187             segment_ptr_type ptr = nullptr;
188             try_call( [&] {
189                 ptr = bucket_allocator_traits::allocate(my_allocator, sz);
190             } ).on_exception( [&] {
191                 my_table[k].store(nullptr, std::memory_order_relaxed);
192             });
193 
194             __TBB_ASSERT(ptr, nullptr);
195             init_buckets(ptr, sz, is_initial);
196             my_table[k].store(ptr, std::memory_order_release);
197             sz <<= 1;// double it to get entire capacity of the container
198         } else { // the first block
199             __TBB_ASSERT( k == embedded_block, "Wrong segment index" );
200             sz = segment_size(first_block);
201             segment_ptr_type ptr = nullptr;
202             try_call( [&] {
203                 ptr = bucket_allocator_traits::allocate(my_allocator, sz - embedded_buckets);
204             } ).on_exception( [&] {
205                 my_table[k].store(nullptr, std::memory_order_relaxed);
206             });
207 
208             __TBB_ASSERT(ptr, nullptr);
209             init_buckets(ptr, sz - embedded_buckets, is_initial);
210             ptr -= segment_base(embedded_block);
211             for(segment_index_type i = embedded_block; i < first_block; i++) // calc the offsets
212                 my_table[i].store(ptr + segment_base(i), std::memory_order_release);
213         }
214         my_mask.store(sz-1, std::memory_order_release);
215     }
216 
217     void delete_segment( segment_index_type s ) {
218         segment_ptr_type buckets_ptr = my_table[s].load(std::memory_order_relaxed);
219         size_type sz = segment_size( s ? s : 1 );
220 
221         size_type deallocate_size = 0;
222 
223         if (s >= first_block) { // the first segment or the next
224             deallocate_size = sz;
225         } else if (s == embedded_block && embedded_block != first_block) {
226             deallocate_size = segment_size(first_block) - embedded_buckets;
227         }
228 
229         for (size_type i = 0; i < deallocate_size; ++i) {
230             bucket_allocator_traits::destroy(my_allocator, buckets_ptr + i);
231         }
232         if (deallocate_size != 0) {
233             bucket_allocator_traits::deallocate(my_allocator, buckets_ptr, deallocate_size);
234         }
235 
236         if (s >= embedded_block) my_table[s].store(nullptr, std::memory_order_relaxed);
237     }
238 
239     // Get bucket by (masked) hashcode
240     bucket *get_bucket( hashcode_type h ) const noexcept {
241         segment_index_type s = segment_index_of( h );
242         h -= segment_base(s);
243         segment_ptr_type seg = my_table[s].load(std::memory_order_acquire);
244         __TBB_ASSERT( is_valid(seg), "hashcode must be cut by valid mask for allocated segments" );
245         return &seg[h];
246     }
247 
248     // detail serial rehashing helper
249     void mark_rehashed_levels( hashcode_type h ) noexcept {
250         segment_index_type s = segment_index_of( h );
251         while (segment_ptr_type seg = my_table[++s].load(std::memory_order_relaxed))
252             if (rehash_required(seg[h].node_list.load(std::memory_order_relaxed))) {
253                 seg[h].node_list.store(reinterpret_cast<node_base*>(empty_rehashed_flag), std::memory_order_relaxed);
254                 mark_rehashed_levels( h + ((hashcode_type)1<<s) ); // optimized segment_base(s)
255             }
256     }
257 
258     // Check for mask race
259     // Splitting into two functions should help inlining
260     inline bool check_mask_race( const hashcode_type h, hashcode_type &m ) const {
261         hashcode_type m_now, m_old = m;
262         m_now = my_mask.load(std::memory_order_acquire);
263         if (m_old != m_now) {
264             return check_rehashing_collision(h, m_old, m = m_now);
265         }
266         return false;
267     }
268 
269     // Process mask race, check for rehashing collision
270     bool check_rehashing_collision( const hashcode_type h, hashcode_type m_old, hashcode_type m ) const {
271         __TBB_ASSERT(m_old != m, nullptr); // TODO?: m arg could be optimized out by passing h = h&m
272         if( (h & m_old) != (h & m) ) { // mask changed for this hashcode, rare event
273             // condition above proves that 'h' has some other bits set beside 'm_old'
274             // find next applicable mask after m_old    //TODO: look at bsl instruction
275             for( ++m_old; !(h & m_old); m_old <<= 1 ) // at maximum few rounds depending on the first block size
276                 ;
277             m_old = (m_old<<1) - 1; // get full mask from a bit
278             __TBB_ASSERT((m_old&(m_old+1))==0 && m_old <= m, nullptr);
279             // check whether it is rehashing/ed
280             if (!rehash_required(get_bucket(h & m_old)->node_list.load(std::memory_order_acquire))) {
281                 return true;
282             }
283         }
284         return false;
285     }
286 
287     // Insert a node and check for load factor. @return segment index to enable.
288     segment_index_type insert_new_node( bucket *b, node_base *n, hashcode_type mask ) {
289         size_type sz = ++my_size; // prefix form is to enforce allocation after the first item inserted
290         add_to_bucket( b, n );
291         // check load factor
292         if( sz >= mask ) { // TODO: add custom load_factor
293             segment_index_type new_seg = tbb::detail::log2( mask+1 ); //optimized segment_index_of
294             __TBB_ASSERT( is_valid(my_table[new_seg-1].load(std::memory_order_relaxed)), "new allocations must not publish new mask until segment has allocated");
295             static const segment_ptr_type is_allocating = segment_ptr_type(2);;
296             segment_ptr_type disabled = nullptr;
297             if (!(my_table[new_seg].load(std::memory_order_acquire))
298                 && my_table[new_seg].compare_exchange_strong(disabled, is_allocating))
299                 return new_seg; // The value must be processed
300         }
301         return 0;
302     }
303 
304     // Prepare enough segments for number of buckets
305     void reserve(size_type buckets) {
306         if( !buckets-- ) return;
307         bool is_initial = !my_size.load(std::memory_order_relaxed);
308         for (size_type m = my_mask.load(std::memory_order_relaxed); buckets > m;
309             m = my_mask.load(std::memory_order_relaxed))
310         {
311             enable_segment( segment_index_of( m+1 ), is_initial );
312         }
313     }
314 
315     // Swap hash_map_bases
316     void internal_swap_content(hash_map_base &table) {
317         using std::swap;
318         swap_atomics_relaxed(my_mask, table.my_mask);
319         swap_atomics_relaxed(my_size, table.my_size);
320 
321         for(size_type i = 0; i < embedded_buckets; i++) {
322             auto temp = my_embedded_segment[i].node_list.load(std::memory_order_relaxed);
323             my_embedded_segment[i].node_list.store(table.my_embedded_segment[i].node_list.load(std::memory_order_relaxed),
324                 std::memory_order_relaxed);
325             table.my_embedded_segment[i].node_list.store(temp, std::memory_order_relaxed);
326         }
327         for(size_type i = embedded_block; i < pointers_per_table; i++) {
328             auto temp = my_table[i].load(std::memory_order_relaxed);
329             my_table[i].store(table.my_table[i].load(std::memory_order_relaxed),
330                 std::memory_order_relaxed);
331             table.my_table[i].store(temp, std::memory_order_relaxed);
332         }
333     }
334 
335     void internal_move(hash_map_base&& other) {
336         my_mask.store(other.my_mask.load(std::memory_order_relaxed), std::memory_order_relaxed);
337         other.my_mask.store(embedded_buckets - 1, std::memory_order_relaxed);
338 
339         my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed);
340         other.my_size.store(0, std::memory_order_relaxed);
341 
342         for (size_type i = 0; i < embedded_buckets; ++i) {
343             my_embedded_segment[i].node_list.store(other.my_embedded_segment[i].node_list, std::memory_order_relaxed);
344             other.my_embedded_segment[i].node_list.store(nullptr, std::memory_order_relaxed);
345         }
346 
347         for (size_type i = embedded_block; i < pointers_per_table; ++i) {
348             my_table[i].store(other.my_table[i].load(std::memory_order_relaxed),
349                 std::memory_order_relaxed);
350             other.my_table[i].store(nullptr, std::memory_order_relaxed);
351         }
352     }
353 
354 protected:
355     bucket_allocator_type my_allocator;
356     // Hash mask = sum of allocated segment sizes - 1
357     std::atomic<hashcode_type> my_mask;
358     // Size of container in stored items
359     std::atomic<size_type> my_size; // It must be in separate cache line from my_mask due to performance effects
360     // Zero segment
361     bucket my_embedded_segment[embedded_buckets];
362     // Segment pointers table. Also prevents false sharing between my_mask and my_size
363     segments_table_type my_table;
364 };
365 
366 template <typename Iterator>
367 class hash_map_range;
368 
369 // Meets requirements of a forward iterator for STL
370 // Value is either the T or const T type of the container.
371 template <typename Container, typename Value>
372 class hash_map_iterator {
373     using map_type = Container;
374     using node = typename Container::node;
375     using map_base = typename Container::base_type;
376     using node_base = typename map_base::node_base;
377     using bucket = typename map_base::bucket;
378 public:
379     using value_type = Value;
380     using size_type = typename Container::size_type;
381     using difference_type = typename Container::difference_type;
382     using pointer = value_type*;
383     using reference = value_type&;
384     using iterator_category = std::forward_iterator_tag;
385 
386     // Construct undefined iterator
387     hash_map_iterator(): my_map(), my_index(), my_bucket(), my_node() {}
388     hash_map_iterator( const hash_map_iterator<Container, typename Container::value_type>& other ) :
389         my_map(other.my_map),
390         my_index(other.my_index),
391         my_bucket(other.my_bucket),
392         my_node(other.my_node)
393     {}
394 
395     hash_map_iterator& operator=( const hash_map_iterator<Container, typename Container::value_type>& other ) {
396         my_map = other.my_map;
397         my_index = other.my_index;
398         my_bucket = other.my_bucket;
399         my_node = other.my_node;
400         return *this;
401     }
402 
403     Value& operator*() const {
404         __TBB_ASSERT( map_base::is_valid(my_node), "iterator uninitialized or at end of container?" );
405         return my_node->value();
406     }
407 
408     Value* operator->() const {return &operator*();}
409 
410     hash_map_iterator& operator++() {
411         my_node = static_cast<node*>( my_node->next );
412         if( !my_node ) advance_to_next_bucket();
413         return *this;
414     }
415 
416     // Post increment
417     hash_map_iterator operator++(int) {
418         hash_map_iterator old(*this);
419         operator++();
420         return old;
421     }
422 private:
423     template <typename C, typename T, typename U>
424     friend bool operator==( const hash_map_iterator<C,T>& i, const hash_map_iterator<C,U>& j );
425 
426     template <typename C, typename T, typename U>
427     friend bool operator!=( const hash_map_iterator<C,T>& i, const hash_map_iterator<C,U>& j );
428 
429     template <typename C, typename T, typename U>
430     friend ptrdiff_t operator-( const hash_map_iterator<C,T>& i, const hash_map_iterator<C,U>& j );
431 
432     template <typename C, typename U>
433     friend class hash_map_iterator;
434 
435     template <typename I>
436     friend class hash_map_range;
437 
438     void advance_to_next_bucket() { // TODO?: refactor to iterator_base class
439         size_t k = my_index+1;
440         __TBB_ASSERT( my_bucket, "advancing an invalid iterator?");
441         while (k <= my_map->my_mask.load(std::memory_order_relaxed)) {
442             // Following test uses 2's-complement wizardry
443             if( k&(k-2) ) // not the beginning of a segment
444                 ++my_bucket;
445             else my_bucket = my_map->get_bucket( k );
446             my_node = static_cast<node*>( my_bucket->node_list.load(std::memory_order_relaxed) );
447             if( map_base::is_valid(my_node) ) {
448                 my_index = k; return;
449             }
450             ++k;
451         }
452         my_bucket = 0; my_node = 0; my_index = k; // the end
453     }
454 
455     template <typename Key, typename T, typename HashCompare, typename A
456 #if __TBB_PREVIEW_CONCURRENT_HASH_MAP_EXTENSIONS
457             , typename M
458              >
459         __TBB_requires(tbb::detail::hash_compare<HashCompare, Key> &&
460                        ch_map_rw_scoped_lockable<M>)
461 #else
462              >
463         __TBB_requires(tbb::detail::hash_compare<HashCompare, Key>)
464 #endif
465     friend class concurrent_hash_map;
466 
467     hash_map_iterator( const Container &map, std::size_t index, const bucket *b, node_base *n ) :
468         my_map(&map), my_index(index), my_bucket(b), my_node(static_cast<node*>(n))
469     {
470         if( b && !map_base::is_valid(n) )
471             advance_to_next_bucket();
472     }
473 
474     // concurrent_hash_map over which we are iterating.
475     const Container *my_map;
476     // Index in hash table for current item
477     size_t my_index;
478     // Pointer to bucket
479     const bucket* my_bucket;
480     // Pointer to node that has current item
481     node* my_node;
482 };
483 
484 template <typename Container, typename T, typename U>
485 bool operator==( const hash_map_iterator<Container,T>& i, const hash_map_iterator<Container,U>& j ) {
486     return i.my_node == j.my_node && i.my_map == j.my_map;
487 }
488 
489 template <typename Container, typename T, typename U>
490 bool operator!=( const hash_map_iterator<Container,T>& i, const hash_map_iterator<Container,U>& j ) {
491     return i.my_node != j.my_node || i.my_map != j.my_map;
492 }
493 
494 // Range class used with concurrent_hash_map
495 template <typename Iterator>
496 class hash_map_range {
497     using map_type = typename Iterator::map_type;
498 public:
499     // Type for size of a range
500     using size_type = std::size_t;
501     using value_type = typename Iterator::value_type;
502     using reference = typename Iterator::reference;
503     using difference_type = typename Iterator::difference_type;
504     using iterator = Iterator;
505 
506     // True if range is empty.
507     bool empty() const { return my_begin == my_end; }
508 
509     // True if range can be partitioned into two subranges.
510     bool is_divisible() const {
511         return my_midpoint != my_end;
512     }
513 
514     // Split range.
515     hash_map_range( hash_map_range& r, split ) :
516         my_end(r.my_end),
517         my_grainsize(r.my_grainsize)
518     {
519         r.my_end = my_begin = r.my_midpoint;
520         __TBB_ASSERT( !empty(), "Splitting despite the range is not divisible" );
521         __TBB_ASSERT( !r.empty(), "Splitting despite the range is not divisible" );
522         set_midpoint();
523         r.set_midpoint();
524     }
525 
526     // Init range with container and grainsize specified
527     hash_map_range( const map_type &map, size_type grainsize_ = 1 ) :
528         my_begin( Iterator( map, 0, map.my_embedded_segment, map.my_embedded_segment->node_list.load(std::memory_order_relaxed) ) ),
529         my_end( Iterator( map, map.my_mask.load(std::memory_order_relaxed) + 1, 0, 0 ) ),
530         my_grainsize( grainsize_ )
531     {
532         __TBB_ASSERT( grainsize_>0, "grainsize must be positive" );
533         set_midpoint();
534     }
535 
536     Iterator begin() const { return my_begin; }
537     Iterator end() const { return my_end; }
538     // The grain size for this range.
539     size_type grainsize() const { return my_grainsize; }
540 
541 private:
542     Iterator my_begin;
543     Iterator my_end;
544     mutable Iterator my_midpoint;
545     size_t my_grainsize;
546     // Set my_midpoint to point approximately half way between my_begin and my_end.
547     void set_midpoint() const;
548     template <typename U> friend class hash_map_range;
549 };
550 
551 template <typename Iterator>
552 void hash_map_range<Iterator>::set_midpoint() const {
553     // Split by groups of nodes
554     size_t m = my_end.my_index-my_begin.my_index;
555     if( m > my_grainsize ) {
556         m = my_begin.my_index + m/2u;
557         auto b = my_begin.my_map->get_bucket(m);
558         my_midpoint = Iterator(*my_begin.my_map,m,b,b->node_list.load(std::memory_order_relaxed));
559     } else {
560         my_midpoint = my_end;
561     }
562     __TBB_ASSERT( my_begin.my_index <= my_midpoint.my_index,
563         "my_begin is after my_midpoint" );
564     __TBB_ASSERT( my_midpoint.my_index <= my_end.my_index,
565         "my_midpoint is after my_end" );
566     __TBB_ASSERT( my_begin != my_midpoint || my_begin == my_end,
567         "[my_begin, my_midpoint) range should not be empty" );
568 }
569 
570 template <typename Key, typename T,
571           typename HashCompare = d1::tbb_hash_compare<Key>,
572           typename Allocator = tbb_allocator<std::pair<const Key, T>>
573 #if __TBB_PREVIEW_CONCURRENT_HASH_MAP_EXTENSIONS
574         , typename MutexType = spin_rw_mutex
575          >
576     __TBB_requires(tbb::detail::hash_compare<HashCompare, Key> &&
577                    ch_map_rw_scoped_lockable<MutexType>)
578 #else
579          >
580     __TBB_requires(tbb::detail::hash_compare<HashCompare, Key>)
581 #endif
582 class concurrent_hash_map
583 #if __TBB_PREVIEW_CONCURRENT_HASH_MAP_EXTENSIONS
584     : protected hash_map_base<Allocator, MutexType>
585 #else
586     : protected hash_map_base<Allocator, spin_rw_mutex>
587 #endif
588 {
589     template <typename Container, typename Value>
590     friend class hash_map_iterator;
591 
592     template <typename I>
593     friend class hash_map_range;
594     using allocator_traits_type = tbb::detail::allocator_traits<Allocator>;
595 
596 #if __TBB_PREVIEW_CONCURRENT_HASH_MAP_EXTENSIONS
597     using base_type = hash_map_base<Allocator, MutexType>;
598 #else
599     using base_type = hash_map_base<Allocator, spin_rw_mutex>;
600 #endif
601 public:
602     using key_type = Key;
603     using mapped_type = T;
604     // type_identity is needed to disable implicit deduction guides for std::initializer_list constructors
605     // and copy/move constructor with explicit allocator argument
606     using allocator_type = tbb::detail::type_identity_t<Allocator>;
607     using hash_compare_type = tbb::detail::type_identity_t<HashCompare>;
608     using value_type = std::pair<const Key, T>;
609     using size_type = typename base_type::size_type;
610     using difference_type = std::ptrdiff_t;
611 #if __TBB_PREVIEW_CONCURRENT_HASH_MAP_EXTENSIONS
612     using mutex_type = MutexType;
613 #endif
614     using pointer = typename allocator_traits_type::pointer;
615     using const_pointer = typename allocator_traits_type::const_pointer;
616 
617     using reference = value_type&;
618     using const_reference = const value_type&;
619     using iterator = hash_map_iterator<concurrent_hash_map, value_type>;
620     using const_iterator = hash_map_iterator<concurrent_hash_map, const value_type>;
621     using range_type = hash_map_range<iterator>;
622     using const_range_type = hash_map_range<const_iterator>;
623 
624 protected:
625     static_assert(std::is_same<value_type, typename Allocator::value_type>::value,
626         "value_type of the container must be the same as its allocator's");
627 
628     friend class const_accessor;
629     class node;
630     using segment_index_type = typename base_type::segment_index_type;
631     using segment_ptr_type = typename base_type::segment_ptr_type;
632     using node_base = typename base_type::node_base;
633     using bucket = typename base_type::bucket;
634     using hashcode_type = typename base_type::hashcode_type;
635     using bucket_allocator_type = typename base_type::bucket_allocator_type;
636     using node_allocator_type = typename base_type::allocator_traits_type::template rebind_alloc<node>;
637     using node_allocator_traits = tbb::detail::allocator_traits<node_allocator_type>;
638     hash_compare_type my_hash_compare;
639 
640     class node : public node_base {
641     public:
642         node() {}
643         ~node() {}
644         pointer storage() { return &my_value; }
645         value_type& value() { return *storage(); }
646     private:
647         union {
648             value_type my_value;
649         };
650     };
651 
652     void delete_node( node_base *n ) {
653         node_allocator_type node_allocator(this->get_allocator());
654         node_allocator_traits::destroy(node_allocator, static_cast<node*>(n)->storage());
655         node_allocator_traits::destroy(node_allocator, static_cast<node*>(n));
656         node_allocator_traits::deallocate(node_allocator, static_cast<node*>(n), 1);
657     }
658 
659     template <typename... Args>
660     static node* create_node(bucket_allocator_type& allocator, Args&&... args) {
661         node_allocator_type node_allocator(allocator);
662         node* node_ptr = node_allocator_traits::allocate(node_allocator, 1);
663         auto guard = make_raii_guard([&] {
664             node_allocator_traits::destroy(node_allocator, node_ptr);
665             node_allocator_traits::deallocate(node_allocator, node_ptr, 1);
666         });
667 
668         node_allocator_traits::construct(node_allocator, node_ptr);
669         node_allocator_traits::construct(node_allocator, node_ptr->storage(), std::forward<Args>(args)...);
670         guard.dismiss();
671         return node_ptr;
672     }
673 
674     static node* allocate_node_copy_construct(bucket_allocator_type& allocator, const Key &key, const T * t){
675         return create_node(allocator, key, *t);
676     }
677 
678     static node* allocate_node_move_construct(bucket_allocator_type& allocator, const Key &key, const T * t){
679         return create_node(allocator, key, std::move(*const_cast<T*>(t)));
680     }
681 
682     template <typename K = Key>
683     static node* allocate_node_default_construct(bucket_allocator_type& allocator, const K &key, const T * ){
684         // Emplace construct an empty T object inside the pair
685         return create_node(allocator, std::piecewise_construct,
686                            std::forward_as_tuple(key), std::forward_as_tuple());
687     }
688 
689     static node* do_not_allocate_node(bucket_allocator_type& , const Key &, const T * ){
690         __TBB_ASSERT(false,"this dummy function should not be called");
691         return nullptr;
692     }
693 
694     template <typename K>
695     node *search_bucket( const K &key, bucket *b ) const {
696         node *n = static_cast<node*>( b->node_list.load(std::memory_order_relaxed) );
697         while (this->is_valid(n) && !my_hash_compare.equal(key, n->value().first))
698             n = static_cast<node*>( n->next );
699         __TBB_ASSERT(!rehash_required(n), "Search can be executed only for rehashed bucket");
700         return n;
701     }
702 
703     // bucket accessor is to find, rehash, acquire a lock, and access a bucket
704     class bucket_accessor : public bucket::scoped_type {
705         bucket *my_b;
706     public:
707         bucket_accessor( concurrent_hash_map *base, const hashcode_type h, bool writer = false ) { acquire( base, h, writer ); }
708         // find a bucket by masked hashcode, optionally rehash, and acquire the lock
709         inline void acquire( concurrent_hash_map *base, const hashcode_type h, bool writer = false ) {
710             my_b = base->get_bucket( h );
711             // TODO: actually, notification is unnecessary here, just hiding double-check
712             if (rehash_required(my_b->node_list.load(std::memory_order_acquire))
713                 && bucket::scoped_type::try_acquire( my_b->mutex, /*write=*/true ) )
714             {
715                 if (rehash_required(my_b->node_list.load(std::memory_order_relaxed))) base->rehash_bucket(my_b, h); // recursive rehashing
716             }
717             else bucket::scoped_type::acquire( my_b->mutex, writer );
718             __TBB_ASSERT(!rehash_required(my_b->node_list.load(std::memory_order_relaxed)), nullptr);
719         }
720 
721         // get bucket pointer
722         bucket *operator() () { return my_b; }
723     };
724 
725     // TODO refactor to hash_base
726     void rehash_bucket( bucket *b_new, const hashcode_type hash ) {
727         __TBB_ASSERT( hash > 1, "The lowermost buckets can't be rehashed" );
728         b_new->node_list.store(reinterpret_cast<node_base*>(empty_rehashed_flag), std::memory_order_release); // mark rehashed
729         hashcode_type mask = (hashcode_type(1) << tbb::detail::log2(hash)) - 1; // get parent mask from the topmost bit
730         bucket_accessor b_old( this, hash & mask );
731 
732         mask = (mask<<1) | 1; // get full mask for new bucket
733         __TBB_ASSERT( (mask&(mask+1))==0 && (hash & mask) == hash, nullptr );
734     restart:
735         node_base* prev = nullptr;
736         node_base* curr = b_old()->node_list.load(std::memory_order_acquire);
737         while (this->is_valid(curr)) {
738             hashcode_type curr_node_hash = my_hash_compare.hash(static_cast<node*>(curr)->value().first);
739 
740             if ((curr_node_hash & mask) == hash) {
741                 if (!b_old.is_writer()) {
742                     if (!b_old.upgrade_to_writer()) {
743                         goto restart; // node ptr can be invalid due to concurrent erase
744                     }
745                 }
746                 node_base* next = curr->next;
747                 // exclude from b_old
748                 if (prev == nullptr) {
749                     b_old()->node_list.store(curr->next, std::memory_order_relaxed);
750                 } else {
751                     prev->next = curr->next;
752                 }
753                 this->add_to_bucket(b_new, curr);
754                 curr = next;
755             } else {
756                 prev = curr;
757                 curr = curr->next;
758             }
759         }
760     }
761 
762     template <typename U>
763     using hash_compare_is_transparent = dependent_bool<comp_is_transparent<hash_compare_type>, U>;
764 
765 public:
766 
767     class accessor;
768     // Combines data access, locking, and garbage collection.
769     class const_accessor : private node::scoped_type /*which derived from no_copy*/ {
770 #if __TBB_PREVIEW_CONCURRENT_HASH_MAP_EXTENSIONS
771         friend class concurrent_hash_map<Key,T,HashCompare,Allocator,MutexType>;
772 #else
773         friend class concurrent_hash_map<Key,T,HashCompare,Allocator>;
774 #endif
775         friend class accessor;
776     public:
777         // Type of value
778         using value_type = const typename concurrent_hash_map::value_type;
779 
780         // True if result is empty.
781         bool empty() const { return !my_node; }
782 
783         // Set to null
784         void release() {
785             if( my_node ) {
786                 node::scoped_type::release();
787                 my_node = nullptr;
788             }
789         }
790 
791         // Return reference to associated value in hash table.
792         const_reference operator*() const {
793             __TBB_ASSERT( my_node, "attempt to dereference empty accessor" );
794             return my_node->value();
795         }
796 
797         // Return pointer to associated value in hash table.
798         const_pointer operator->() const {
799             return &operator*();
800         }
801 
802         // Create empty result
803         const_accessor() : my_node(nullptr), my_hash() {}
804 
805         // Destroy result after releasing the underlying reference.
806         ~const_accessor() {
807             my_node = nullptr; // scoped lock's release() is called in its destructor
808         }
809     protected:
810         bool is_writer() { return node::scoped_type::is_writer(); }
811         node *my_node;
812         hashcode_type my_hash;
813     };
814 
815     // Allows write access to elements and combines data access, locking, and garbage collection.
816     class accessor: public const_accessor {
817     public:
818         // Type of value
819         using value_type = typename concurrent_hash_map::value_type;
820 
821         // Return reference to associated value in hash table.
822         reference operator*() const {
823             __TBB_ASSERT( this->my_node, "attempt to dereference empty accessor" );
824             return this->my_node->value();
825         }
826 
827         // Return pointer to associated value in hash table.
828         pointer operator->() const {
829             return &operator*();
830         }
831     };
832 
833     explicit concurrent_hash_map( const hash_compare_type& compare, const allocator_type& a = allocator_type() )
834         : base_type(a)
835         , my_hash_compare(compare)
836     {}
837 
838     concurrent_hash_map() : concurrent_hash_map(hash_compare_type()) {}
839 
840     explicit concurrent_hash_map( const allocator_type& a )
841         : concurrent_hash_map(hash_compare_type(), a)
842     {}
843 
844     // Construct empty table with n preallocated buckets. This number serves also as initial concurrency level.
845     concurrent_hash_map( size_type n, const allocator_type &a = allocator_type() )
846         : concurrent_hash_map(a)
847     {
848         this->reserve(n);
849     }
850 
851     concurrent_hash_map( size_type n, const hash_compare_type& compare, const allocator_type& a = allocator_type() )
852         : concurrent_hash_map(compare, a)
853     {
854         this->reserve(n);
855     }
856 
857     // Copy constructor
858     concurrent_hash_map( const concurrent_hash_map &table )
859         : concurrent_hash_map(node_allocator_traits::select_on_container_copy_construction(table.get_allocator()))
860     {
861         try_call( [&] {
862             internal_copy(table);
863         }).on_exception( [&] {
864             this->clear();
865         });
866     }
867 
868     concurrent_hash_map( const concurrent_hash_map &table, const allocator_type &a)
869         : concurrent_hash_map(a)
870     {
871         try_call( [&] {
872             internal_copy(table);
873         }).on_exception( [&] {
874             this->clear();
875         });
876     }
877 
878     // Move constructor
879     concurrent_hash_map( concurrent_hash_map &&table )
880         : concurrent_hash_map(std::move(table.get_allocator()))
881     {
882         this->internal_move(std::move(table));
883     }
884 
885     // Move constructor
886     concurrent_hash_map( concurrent_hash_map &&table, const allocator_type &a )
887         : concurrent_hash_map(a)
888     {
889         using is_equal_type = typename node_allocator_traits::is_always_equal;
890         internal_move_construct_with_allocator(std::move(table), a, is_equal_type());
891     }
892 
893     // Construction with copying iteration range and given allocator instance
894     template <typename I>
895     concurrent_hash_map( I first, I last, const allocator_type &a = allocator_type() )
896         : concurrent_hash_map(a)
897     {
898         try_call( [&] {
899             internal_copy(first, last, std::distance(first, last));
900         }).on_exception( [&] {
901             this->clear();
902         });
903     }
904 
905     template <typename I>
906     concurrent_hash_map( I first, I last, const hash_compare_type& compare, const allocator_type& a = allocator_type() )
907         : concurrent_hash_map(compare, a)
908     {
909         try_call( [&] {
910             internal_copy(first, last, std::distance(first, last));
911         }).on_exception( [&] {
912             this->clear();
913         });
914     }
915 
916     concurrent_hash_map( std::initializer_list<value_type> il, const hash_compare_type& compare = hash_compare_type(), const allocator_type& a = allocator_type() )
917         : concurrent_hash_map(compare, a)
918     {
919         try_call( [&] {
920             internal_copy(il.begin(), il.end(), il.size());
921         }).on_exception( [&] {
922             this->clear();
923         });
924     }
925 
926     concurrent_hash_map( std::initializer_list<value_type> il, const allocator_type& a )
927         : concurrent_hash_map(il, hash_compare_type(), a) {}
928 
929     // Assignment
930     concurrent_hash_map& operator=( const concurrent_hash_map &table ) {
931         if( this != &table ) {
932             clear();
933             copy_assign_allocators(this->my_allocator, table.my_allocator);
934             internal_copy(table);
935         }
936         return *this;
937     }
938 
939     // Move Assignment
940     concurrent_hash_map& operator=( concurrent_hash_map &&table ) {
941         if( this != &table ) {
942             using pocma_type = typename node_allocator_traits::propagate_on_container_move_assignment;
943             using is_equal_type = typename node_allocator_traits::is_always_equal;
944             move_assign_allocators(this->my_allocator, table.my_allocator);
945             internal_move_assign(std::move(table), tbb::detail::disjunction<is_equal_type, pocma_type>());
946         }
947         return *this;
948     }
949 
950     // Assignment
951     concurrent_hash_map& operator=( std::initializer_list<value_type> il ) {
952         clear();
953         internal_copy(il.begin(), il.end(), il.size());
954         return *this;
955     }
956 
957     // Rehashes and optionally resizes the whole table.
958     /** Useful to optimize performance before or after concurrent operations.
959         Also enables using of find() and count() concurrent methods in serial context. */
960     void rehash(size_type sz = 0) {
961         this->reserve(sz); // TODO: add reduction of number of buckets as well
962         hashcode_type mask = this->my_mask.load(std::memory_order_relaxed);
963         hashcode_type b = (mask+1)>>1; // size or first index of the last segment
964         __TBB_ASSERT((b&(b-1))==0, nullptr); // zero or power of 2
965         bucket *bp = this->get_bucket( b ); // only the last segment should be scanned for rehashing
966         for(; b <= mask; b++, bp++ ) {
967             node_base *n = bp->node_list.load(std::memory_order_relaxed);
968             __TBB_ASSERT( this->is_valid(n) || empty_rehashed(n) || rehash_required(n), "Broken internal structure" );
969             __TBB_ASSERT( *reinterpret_cast<intptr_t*>(&bp->mutex) == 0, "concurrent or unexpectedly terminated operation during rehash() execution" );
970             if (rehash_required(n)) { // rehash bucket, conditional because rehashing of a previous bucket may affect this one
971                 hashcode_type h = b; bucket *b_old = bp;
972                 do {
973                     __TBB_ASSERT( h > 1, "The lowermost buckets can't be rehashed" );
974                     hashcode_type m = ( hashcode_type(1) << tbb::detail::log2( h ) ) - 1; // get parent mask from the topmost bit
975                     b_old = this->get_bucket( h &= m );
976                 } while( rehash_required(b_old->node_list.load(std::memory_order_relaxed)) );
977                 // now h - is index of the root rehashed bucket b_old
978                 this->mark_rehashed_levels( h ); // mark all non-rehashed children recursively across all segments
979                 node_base* prev = nullptr;
980                 node_base* curr = b_old->node_list.load(std::memory_order_relaxed);
981                 while (this->is_valid(curr)) {
982                     hashcode_type curr_node_hash = my_hash_compare.hash(static_cast<node*>(curr)->value().first);
983 
984                     if ((curr_node_hash & mask) != h) { // should be rehashed
985                         node_base* next = curr->next;
986                         // exclude from b_old
987                         if (prev == nullptr) {
988                             b_old->node_list.store(curr->next, std::memory_order_relaxed);
989                         } else {
990                             prev->next = curr->next;
991                         }
992                         bucket *b_new = this->get_bucket(curr_node_hash & mask);
993                         __TBB_ASSERT(!rehash_required(b_new->node_list.load(std::memory_order_relaxed)), "hash() function changed for key in table or internal error");
994                         this->add_to_bucket(b_new, curr);
995                         curr = next;
996                     } else {
997                         prev = curr;
998                         curr = curr->next;
999                     }
1000                 }
1001             }
1002         }
1003     }
1004 
1005     // Clear table
1006     void clear() {
1007         hashcode_type m = this->my_mask.load(std::memory_order_relaxed);
1008         __TBB_ASSERT((m&(m+1))==0, "data structure is invalid");
1009         this->my_size.store(0, std::memory_order_relaxed);
1010         segment_index_type s = this->segment_index_of( m );
1011         __TBB_ASSERT( s+1 == this->pointers_per_table || !this->my_table[s+1].load(std::memory_order_relaxed), "wrong mask or concurrent grow" );
1012         do {
1013             __TBB_ASSERT(this->is_valid(this->my_table[s].load(std::memory_order_relaxed)), "wrong mask or concurrent grow" );
1014             segment_ptr_type buckets_ptr = this->my_table[s].load(std::memory_order_relaxed);
1015             size_type sz = this->segment_size( s ? s : 1 );
1016             for( segment_index_type i = 0; i < sz; i++ )
1017                 for( node_base *n = buckets_ptr[i].node_list.load(std::memory_order_relaxed);
1018                     this->is_valid(n); n = buckets_ptr[i].node_list.load(std::memory_order_relaxed) )
1019                 {
1020                     buckets_ptr[i].node_list.store(n->next, std::memory_order_relaxed);
1021                     delete_node( n );
1022                 }
1023             this->delete_segment(s);
1024         } while(s-- > 0);
1025         this->my_mask.store(this->embedded_buckets - 1, std::memory_order_relaxed);
1026     }
1027 
1028     // Clear table and destroy it.
1029     ~concurrent_hash_map() { clear(); }
1030 
1031     //------------------------------------------------------------------------
1032     // Parallel algorithm support
1033     //------------------------------------------------------------------------
1034     range_type range( size_type grainsize=1 ) {
1035         return range_type( *this, grainsize );
1036     }
1037     const_range_type range( size_type grainsize=1 ) const {
1038         return const_range_type( *this, grainsize );
1039     }
1040 
1041     //------------------------------------------------------------------------
1042     // STL support - not thread-safe methods
1043     //------------------------------------------------------------------------
1044     iterator begin() { return iterator( *this, 0, this->my_embedded_segment, this->my_embedded_segment->node_list.load(std::memory_order_relaxed) ); }
1045     const_iterator begin() const { return const_iterator( *this, 0, this->my_embedded_segment, this->my_embedded_segment->node_list.load(std::memory_order_relaxed) ); }
1046     const_iterator cbegin() const { return const_iterator( *this, 0, this->my_embedded_segment, this->my_embedded_segment->node_list.load(std::memory_order_relaxed) ); }
1047     iterator end() { return iterator( *this, 0, 0, 0 ); }
1048     const_iterator end() const { return const_iterator( *this, 0, 0, 0 ); }
1049     const_iterator cend() const { return const_iterator( *this, 0, 0, 0 ); }
1050     std::pair<iterator, iterator> equal_range( const Key& key ) { return internal_equal_range( key, end() ); }
1051     std::pair<const_iterator, const_iterator> equal_range( const Key& key ) const { return internal_equal_range( key, end() ); }
1052 
1053     template <typename K>
1054     typename std::enable_if<hash_compare_is_transparent<K>::value,
1055                             std::pair<iterator, iterator>>::type equal_range( const K& key ) {
1056         return internal_equal_range(key, end());
1057     }
1058 
1059     template <typename K>
1060     typename std::enable_if<hash_compare_is_transparent<K>::value,
1061                             std::pair<const_iterator, const_iterator>>::type equal_range( const K& key ) const {
1062         return internal_equal_range(key, end());
1063     }
1064 
1065     // Number of items in table.
1066     size_type size() const { return this->my_size.load(std::memory_order_acquire); }
1067 
1068     // True if size()==0.
1069     __TBB_nodiscard bool empty() const { return size() == 0; }
1070 
1071     // Upper bound on size.
1072     size_type max_size() const {
1073         return allocator_traits_type::max_size(base_type::get_allocator());
1074     }
1075 
1076     // Returns the current number of buckets
1077     size_type bucket_count() const { return this->my_mask.load(std::memory_order_relaxed) + 1; }
1078 
1079     // return allocator object
1080     allocator_type get_allocator() const { return base_type::get_allocator(); }
1081 
1082     // swap two instances. Iterators are invalidated
1083     void swap(concurrent_hash_map& table) {
1084         using pocs_type = typename node_allocator_traits::propagate_on_container_swap;
1085         using is_equal_type = typename node_allocator_traits::is_always_equal;
1086         swap_allocators(this->my_allocator, table.my_allocator);
1087         internal_swap(table, tbb::detail::disjunction<pocs_type, is_equal_type>());
1088     }
1089 
1090     //------------------------------------------------------------------------
1091     // concurrent map operations
1092     //------------------------------------------------------------------------
1093 
1094     // Return count of items (0 or 1)
1095     size_type count( const Key &key ) const {
1096         return const_cast<concurrent_hash_map*>(this)->lookup</*insert*/false>(key, nullptr, nullptr, /*write=*/false, &do_not_allocate_node);
1097     }
1098 
1099     template <typename K>
1100     typename std::enable_if<hash_compare_is_transparent<K>::value,
1101                             size_type>::type count( const K& key ) const {
1102         return const_cast<concurrent_hash_map*>(this)->lookup</*insert*/false>(key, nullptr, nullptr, /*write=*/false, &do_not_allocate_node);
1103     }
1104 
1105     // Find item and acquire a read lock on the item.
1106     /** Return true if item is found, false otherwise. */
1107     bool find( const_accessor &result, const Key &key ) const {
1108         result.release();
1109         return const_cast<concurrent_hash_map*>(this)->lookup</*insert*/false>(key, nullptr, &result, /*write=*/false, &do_not_allocate_node );
1110     }
1111 
1112     // Find item and acquire a write lock on the item.
1113     /** Return true if item is found, false otherwise. */
1114     bool find( accessor &result, const Key &key ) {
1115         result.release();
1116         return lookup</*insert*/false>(key, nullptr, &result, /*write=*/true, &do_not_allocate_node);
1117     }
1118 
1119     template <typename K>
1120     typename std::enable_if<hash_compare_is_transparent<K>::value,
1121                             bool>::type find( const_accessor& result, const K& key ) {
1122         result.release();
1123         return lookup</*insert*/false>(key, nullptr, &result, /*write=*/false, &do_not_allocate_node);
1124     }
1125 
1126     template <typename K>
1127     typename std::enable_if<hash_compare_is_transparent<K>::value,
1128                             bool>::type find( accessor& result, const K& key ) {
1129         result.release();
1130         return lookup</*insert*/false>(key, nullptr, &result, /*write=*/true, &do_not_allocate_node);
1131     }
1132 
1133     // Insert item (if not already present) and acquire a read lock on the item.
1134     /** Returns true if item is new. */
1135     bool insert( const_accessor &result, const Key &key ) {
1136         result.release();
1137         return lookup</*insert*/true>(key, nullptr, &result, /*write=*/false, &allocate_node_default_construct<>);
1138     }
1139 
1140     // Insert item (if not already present) and acquire a write lock on the item.
1141     /** Returns true if item is new. */
1142     bool insert( accessor &result, const Key &key ) {
1143         result.release();
1144         return lookup</*insert*/true>(key, nullptr, &result, /*write=*/true, &allocate_node_default_construct<>);
1145     }
1146 
1147     template <typename K>
1148     typename std::enable_if<hash_compare_is_transparent<K>::value &&
1149                             std::is_constructible<key_type, const K&>::value,
1150                             bool>::type insert( const_accessor& result, const K& key ) {
1151         result.release();
1152         return lookup</*insert*/true>(key, nullptr, &result, /*write=*/true, &allocate_node_default_construct<K>);
1153     }
1154 
1155     template <typename K>
1156     typename std::enable_if<hash_compare_is_transparent<K>::value &&
1157                             std::is_constructible<key_type, const K&>::value,
1158                             bool>::type insert( accessor& result, const K& key ) {
1159         result.release();
1160         return lookup</*insert*/true>(key, nullptr, &result, /*write=*/true, &allocate_node_default_construct<K>);
1161     }
1162 
1163     // Insert item by copying if there is no such key present already and acquire a read lock on the item.
1164     /** Returns true if item is new. */
1165     bool insert( const_accessor &result, const value_type &value ) {
1166         result.release();
1167         return lookup</*insert*/true>(value.first, &value.second, &result, /*write=*/false, &allocate_node_copy_construct);
1168     }
1169 
1170     // Insert item by copying if there is no such key present already and acquire a write lock on the item.
1171     /** Returns true if item is new. */
1172     bool insert( accessor &result, const value_type &value ) {
1173         result.release();
1174         return lookup</*insert*/true>(value.first, &value.second, &result, /*write=*/true, &allocate_node_copy_construct);
1175     }
1176 
1177     // Insert item by copying if there is no such key present already
1178     /** Returns true if item is inserted. */
1179     bool insert( const value_type &value ) {
1180         return lookup</*insert*/true>(value.first, &value.second, nullptr, /*write=*/false, &allocate_node_copy_construct);
1181     }
1182 
1183     // Insert item by copying if there is no such key present already and acquire a read lock on the item.
1184     /** Returns true if item is new. */
1185     bool insert( const_accessor &result, value_type && value ) {
1186         return generic_move_insert(result, std::move(value));
1187     }
1188 
1189     // Insert item by copying if there is no such key present already and acquire a write lock on the item.
1190     /** Returns true if item is new. */
1191     bool insert( accessor &result, value_type && value ) {
1192         return generic_move_insert(result, std::move(value));
1193     }
1194 
1195     // Insert item by copying if there is no such key present already
1196     /** Returns true if item is inserted. */
1197     bool insert( value_type && value ) {
1198         return generic_move_insert(accessor_not_used(), std::move(value));
1199     }
1200 
1201     // Insert item by copying if there is no such key present already and acquire a read lock on the item.
1202     /** Returns true if item is new. */
1203     template <typename... Args>
1204     bool emplace( const_accessor &result, Args&&... args ) {
1205         return generic_emplace(result, std::forward<Args>(args)...);
1206     }
1207 
1208     // Insert item by copying if there is no such key present already and acquire a write lock on the item.
1209     /** Returns true if item is new. */
1210     template <typename... Args>
1211     bool emplace( accessor &result, Args&&... args ) {
1212         return generic_emplace(result, std::forward<Args>(args)...);
1213     }
1214 
1215     // Insert item by copying if there is no such key present already
1216     /** Returns true if item is inserted. */
1217     template <typename... Args>
1218     bool emplace( Args&&... args ) {
1219         return generic_emplace(accessor_not_used(), std::forward<Args>(args)...);
1220     }
1221 
1222     // Insert range [first, last)
1223     template <typename I>
1224     void insert( I first, I last ) {
1225         for ( ; first != last; ++first )
1226             insert( *first );
1227     }
1228 
1229     // Insert initializer list
1230     void insert( std::initializer_list<value_type> il ) {
1231         insert( il.begin(), il.end() );
1232     }
1233 
1234     // Erase item.
1235     /** Return true if item was erased by particularly this call. */
1236     bool erase( const Key &key ) {
1237         return internal_erase(key);
1238     }
1239 
1240     template <typename K>
1241     typename std::enable_if<hash_compare_is_transparent<K>::value,
1242                             bool>::type erase( const K& key ) {
1243         return internal_erase(key);
1244     }
1245 
1246     // Erase item by const_accessor.
1247     /** Return true if item was erased by particularly this call. */
1248     bool erase( const_accessor& item_accessor ) {
1249         return exclude( item_accessor );
1250     }
1251 
1252     // Erase item by accessor.
1253     /** Return true if item was erased by particularly this call. */
1254     bool erase( accessor& item_accessor ) {
1255         return exclude( item_accessor );
1256     }
1257 
1258 protected:
1259     template <typename K, typename AllocateNodeType>
1260     node* allocate_node_helper( const K& key, const T* t, AllocateNodeType allocate_node, std::true_type ) {
1261         return allocate_node(base_type::get_allocator(), key, t);
1262     }
1263 
1264     template <typename K, typename AllocateNodeType>
1265     node* allocate_node_helper( const K&, const T*, AllocateNodeType, std::false_type ) {
1266         __TBB_ASSERT(false, "allocate_node_helper with std::false_type should never been called");
1267         return nullptr;
1268     }
1269 
1270     // Insert or find item and optionally acquire a lock on the item.
1271     template <bool OpInsert, typename K, typename AllocateNodeType>
1272     bool lookup( const K &key, const T *t, const_accessor *result, bool write, AllocateNodeType allocate_node, node *tmp_n  = 0)
1273     {
1274         __TBB_ASSERT( !result || !result->my_node, nullptr );
1275         bool return_value;
1276         hashcode_type const h = my_hash_compare.hash( key );
1277         hashcode_type m = this->my_mask.load(std::memory_order_acquire);
1278         segment_index_type grow_segment = 0;
1279         node *n;
1280         restart:
1281         {//lock scope
1282             __TBB_ASSERT((m&(m+1))==0, "data structure is invalid");
1283             return_value = false;
1284             // get bucket
1285             bucket_accessor b( this, h & m );
1286             // find a node
1287             n = search_bucket( key, b() );
1288             if( OpInsert ) {
1289                 // [opt] insert a key
1290                 if( !n ) {
1291                     if( !tmp_n ) {
1292                         tmp_n = allocate_node_helper(key, t, allocate_node, std::integral_constant<bool, OpInsert>{});
1293                     }
1294                     while ( !b.is_writer() && !b.upgrade_to_writer() ) { // TODO: improved insertion
1295                         // Rerun search list, in case another thread inserted the intem during the upgrade
1296                         n = search_bucket(key, b());
1297                         if (this->is_valid(n)) { // unfortunately, it did
1298                             if (!b.downgrade_to_reader()) {
1299                                 // If the lock was downgraded with reacquiring the mutex
1300                                 // Rerun search list in case another thread removed the item during the downgrade
1301                                 n = search_bucket(key, b());
1302                                 if (!this->is_valid(n)) {
1303                                     // Unfortunately, it did
1304                                     // We need to try upgrading to writer again
1305                                     continue;
1306                                 }
1307                             }
1308                             goto exists;
1309                         }
1310                     }
1311 
1312                     if( this->check_mask_race(h, m) )
1313                         goto restart; // b.release() is done in ~b().
1314                     // insert and set flag to grow the container
1315                     grow_segment = this->insert_new_node( b(), n = tmp_n, m );
1316                     tmp_n = 0;
1317                     return_value = true;
1318                 }
1319             } else { // find or count
1320                 if( !n ) {
1321                     if( this->check_mask_race( h, m ) )
1322                         goto restart; // b.release() is done in ~b(). TODO: replace by continue
1323                     return false;
1324                 }
1325                 return_value = true;
1326             }
1327         exists:
1328             if( !result ) goto check_growth;
1329             // TODO: the following seems as generic/regular operation
1330             // acquire the item
1331             if( !result->try_acquire( n->mutex, write ) ) {
1332                 for( tbb::detail::atomic_backoff backoff(true);; ) {
1333                     if( result->try_acquire( n->mutex, write ) ) break;
1334                     if( !backoff.bounded_pause() ) {
1335                         // the wait takes really long, restart the operation
1336                         b.release();
1337                         __TBB_ASSERT( !OpInsert || !return_value, "Can't acquire new item in locked bucket?" );
1338                         yield();
1339                         m = this->my_mask.load(std::memory_order_acquire);
1340                         goto restart;
1341                     }
1342                 }
1343             }
1344         }//lock scope
1345         result->my_node = n;
1346         result->my_hash = h;
1347     check_growth:
1348         // [opt] grow the container
1349         if( grow_segment ) {
1350             this->enable_segment( grow_segment );
1351         }
1352         if( tmp_n ) // if OpInsert only
1353             delete_node( tmp_n );
1354         return return_value;
1355     }
1356 
1357     struct accessor_not_used { void release(){}};
1358     friend const_accessor* accessor_location( accessor_not_used const& ){ return nullptr;}
1359     friend const_accessor* accessor_location( const_accessor & a )      { return &a;}
1360 
1361     friend bool is_write_access_needed( accessor const& )           { return true;}
1362     friend bool is_write_access_needed( const_accessor const& )     { return false;}
1363     friend bool is_write_access_needed( accessor_not_used const& )  { return false;}
1364 
1365     template <typename Accessor>
1366     bool generic_move_insert( Accessor && result, value_type && value ) {
1367         result.release();
1368         return lookup</*insert*/true>(value.first, &value.second, accessor_location(result), is_write_access_needed(result), &allocate_node_move_construct);
1369     }
1370 
1371     template <typename Accessor, typename... Args>
1372     bool generic_emplace( Accessor && result, Args &&... args ) {
1373         result.release();
1374         node * node_ptr = create_node(base_type::get_allocator(), std::forward<Args>(args)...);
1375         return lookup</*insert*/true>(node_ptr->value().first, nullptr, accessor_location(result), is_write_access_needed(result), &do_not_allocate_node, node_ptr);
1376     }
1377 
1378     // delete item by accessor
1379     bool exclude( const_accessor &item_accessor ) {
1380         __TBB_ASSERT( item_accessor.my_node, nullptr );
1381         node_base *const exclude_node = item_accessor.my_node;
1382         hashcode_type const hash = item_accessor.my_hash;
1383         hashcode_type mask = this->my_mask.load(std::memory_order_acquire);
1384         do {
1385             // get bucket
1386             bucket_accessor b( this, hash & mask, /*writer=*/true );
1387             node_base* prev = nullptr;
1388             node_base* curr = b()->node_list.load(std::memory_order_relaxed);
1389 
1390             while (curr && curr != exclude_node) {
1391                 prev = curr;
1392                 curr = curr->next;
1393             }
1394 
1395             if (curr == nullptr) { // someone else was first
1396                 if (this->check_mask_race(hash, mask))
1397                     continue;
1398                 item_accessor.release();
1399                 return false;
1400             }
1401             __TBB_ASSERT( curr == exclude_node, nullptr );
1402             // remove from container
1403             if (prev == nullptr) {
1404                 b()->node_list.store(curr->next, std::memory_order_relaxed);
1405             } else {
1406                 prev->next = curr->next;
1407             }
1408 
1409             this->my_size--;
1410             break;
1411         } while(true);
1412         if (!item_accessor.is_writer()) { // need to get exclusive lock
1413             item_accessor.upgrade_to_writer(); // return value means nothing here
1414         }
1415 
1416         item_accessor.release();
1417         delete_node(exclude_node); // Only one thread can delete it
1418         return true;
1419     }
1420 
1421     template <typename K>
1422     bool internal_erase( const K& key ) {
1423         node_base *erase_node;
1424         hashcode_type const hash = my_hash_compare.hash(key);
1425         hashcode_type mask = this->my_mask.load(std::memory_order_acquire);
1426     restart:
1427         {//lock scope
1428             // get bucket
1429             bucket_accessor b( this, hash & mask );
1430         search:
1431             node_base* prev = nullptr;
1432             erase_node = b()->node_list.load(std::memory_order_relaxed);
1433             while (this->is_valid(erase_node) && !my_hash_compare.equal(key, static_cast<node*>(erase_node)->value().first ) ) {
1434                 prev = erase_node;
1435                 erase_node = erase_node->next;
1436             }
1437 
1438             if (erase_node == nullptr) { // not found, but mask could be changed
1439                 if (this->check_mask_race(hash, mask))
1440                     goto restart;
1441                 return false;
1442             } else if (!b.is_writer() && !b.upgrade_to_writer()) {
1443                 if (this->check_mask_race(hash, mask)) // contended upgrade, check mask
1444                     goto restart;
1445                 goto search;
1446             }
1447 
1448             // remove from container
1449             if (prev == nullptr) {
1450                 b()->node_list.store(erase_node->next, std::memory_order_relaxed);
1451             } else {
1452                 prev->next = erase_node->next;
1453             }
1454             this->my_size--;
1455         }
1456         {
1457             typename node::scoped_type item_locker( erase_node->mutex, /*write=*/true );
1458         }
1459         // note: there should be no threads pretending to acquire this mutex again, do not try to upgrade const_accessor!
1460         delete_node(erase_node); // Only one thread can delete it due to write lock on the bucket
1461         return true;
1462     }
1463 
1464     // Returns an iterator for an item defined by the key, or for the next item after it (if upper==true)
1465     template <typename K, typename I>
1466     std::pair<I, I> internal_equal_range( const K& key, I end_ ) const {
1467         hashcode_type h = my_hash_compare.hash( key );
1468         hashcode_type m = this->my_mask.load(std::memory_order_relaxed);
1469         __TBB_ASSERT((m&(m+1))==0, "data structure is invalid");
1470         h &= m;
1471         bucket *b = this->get_bucket( h );
1472         while (rehash_required(b->node_list.load(std::memory_order_relaxed))) {
1473             m = ( hashcode_type(1) << tbb::detail::log2( h ) ) - 1; // get parent mask from the topmost bit
1474             b = this->get_bucket( h &= m );
1475         }
1476         node *n = search_bucket( key, b );
1477         if( !n )
1478             return std::make_pair(end_, end_);
1479         iterator lower(*this, h, b, n), upper(lower);
1480         return std::make_pair(lower, ++upper);
1481     }
1482 
1483     // Copy "source" to *this, where *this must start out empty.
1484     void internal_copy( const concurrent_hash_map& source ) {
1485         hashcode_type mask = source.my_mask.load(std::memory_order_relaxed);
1486         if( this->my_mask.load(std::memory_order_relaxed) == mask ) { // optimized version
1487             this->reserve(source.my_size.load(std::memory_order_relaxed)); // TODO: load_factor?
1488             bucket *dst = 0, *src = 0;
1489             bool rehashing_required = false;
1490             for( hashcode_type k = 0; k <= mask; k++ ) {
1491                 if( k & (k-2) ) ++dst,src++; // not the beginning of a segment
1492                 else { dst = this->get_bucket( k ); src = source.get_bucket( k ); }
1493                 __TBB_ASSERT(!rehash_required(dst->node_list.load(std::memory_order_relaxed)), "Invalid bucket in destination table");
1494                 node *n = static_cast<node*>( src->node_list.load(std::memory_order_relaxed) );
1495                 if (rehash_required(n)) { // source is not rehashed, items are in previous buckets
1496                     rehashing_required = true;
1497                     dst->node_list.store(reinterpret_cast<node_base*>(rehash_req_flag), std::memory_order_relaxed);
1498                 } else for(; n; n = static_cast<node*>( n->next ) ) {
1499                     node* node_ptr = create_node(base_type::get_allocator(), n->value().first, n->value().second);
1500                     this->add_to_bucket( dst, node_ptr);
1501                     this->my_size.fetch_add(1, std::memory_order_relaxed);
1502                 }
1503             }
1504             if( rehashing_required ) rehash();
1505         } else internal_copy(source.begin(), source.end(), source.my_size.load(std::memory_order_relaxed));
1506     }
1507 
1508     template <typename I>
1509     void internal_copy( I first, I last, size_type reserve_size ) {
1510         this->reserve(reserve_size); // TODO: load_factor?
1511         hashcode_type m = this->my_mask.load(std::memory_order_relaxed);
1512         for(; first != last; ++first) {
1513             hashcode_type h = my_hash_compare.hash( (*first).first );
1514             bucket *b = this->get_bucket( h & m );
1515             __TBB_ASSERT(!rehash_required(b->node_list.load(std::memory_order_relaxed)), "Invalid bucket in destination table");
1516             node* node_ptr = create_node(base_type::get_allocator(), (*first).first, (*first).second);
1517             this->add_to_bucket( b, node_ptr );
1518             ++this->my_size; // TODO: replace by non-atomic op
1519         }
1520     }
1521 
1522     void internal_move_construct_with_allocator( concurrent_hash_map&& other, const allocator_type&,
1523                                                 /*is_always_equal=*/std::true_type )
1524     {
1525         this->internal_move(std::move(other));
1526     }
1527 
1528     void internal_move_construct_with_allocator( concurrent_hash_map&& other, const allocator_type& a,
1529                                                 /*is_always_equal=*/std::false_type )
1530     {
1531         if (a == other.get_allocator()){
1532             this->internal_move(std::move(other));
1533         } else {
1534             try_call( [&] {
1535                 internal_copy(std::make_move_iterator(other.begin()), std::make_move_iterator(other.end()),
1536                     other.size());
1537             }).on_exception( [&] {
1538                 this->clear();
1539             });
1540         }
1541     }
1542 
1543     void internal_move_assign( concurrent_hash_map&& other,
1544         /*is_always_equal || POCMA = */std::true_type)
1545     {
1546         this->internal_move(std::move(other));
1547     }
1548 
1549     void internal_move_assign(concurrent_hash_map&& other, /*is_always_equal=*/ std::false_type) {
1550         if (this->my_allocator == other.my_allocator) {
1551             this->internal_move(std::move(other));
1552         } else {
1553             //do per element move
1554             internal_copy(std::make_move_iterator(other.begin()), std::make_move_iterator(other.end()),
1555                 other.size());
1556         }
1557     }
1558 
1559     void internal_swap(concurrent_hash_map& other, /*is_always_equal || POCS = */ std::true_type) {
1560         this->internal_swap_content(other);
1561     }
1562 
1563     void internal_swap(concurrent_hash_map& other, /*is_always_equal || POCS = */ std::false_type) {
1564         __TBB_ASSERT(this->my_allocator == other.my_allocator, nullptr);
1565         this->internal_swap_content(other);
1566     }
1567 
1568     // Fast find when no concurrent erasure is used. For internal use inside TBB only!
1569     /** Return pointer to item with given key, or nullptr if no such item exists.
1570         Must not be called concurrently with erasure operations. */
1571     const_pointer internal_fast_find( const Key& key ) const {
1572         hashcode_type h = my_hash_compare.hash( key );
1573         hashcode_type m = this->my_mask.load(std::memory_order_acquire);
1574         node *n;
1575     restart:
1576         __TBB_ASSERT((m&(m+1))==0, "data structure is invalid");
1577         bucket *b = this->get_bucket( h & m );
1578         // TODO: actually, notification is unnecessary here, just hiding double-check
1579         if (rehash_required(b->node_list.load(std::memory_order_acquire)))
1580         {
1581             typename bucket::scoped_type lock;
1582             if( lock.try_acquire( b->mutex, /*write=*/true ) ) {
1583                 if (rehash_required(b->node_list.load(std::memory_order_relaxed)))
1584                     const_cast<concurrent_hash_map*>(this)->rehash_bucket( b, h & m ); //recursive rehashing
1585             }
1586             else lock.acquire( b->mutex, /*write=*/false );
1587             __TBB_ASSERT(!rehash_required(b->node_list.load(std::memory_order_relaxed)), nullptr);
1588         }
1589         n = search_bucket( key, b );
1590         if( n )
1591             return n->storage();
1592         else if( this->check_mask_race( h, m ) )
1593             goto restart;
1594         return 0;
1595     }
1596 };
1597 
1598 #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
1599 template <typename It,
1600           typename HashCompare = d1::tbb_hash_compare<iterator_key_t<It>>,
1601           typename Alloc = tbb_allocator<iterator_alloc_pair_t<It>>,
1602           typename = std::enable_if_t<is_input_iterator_v<It>>,
1603           typename = std::enable_if_t<is_allocator_v<Alloc>>,
1604           typename = std::enable_if_t<!is_allocator_v<HashCompare>>>
1605 concurrent_hash_map( It, It, HashCompare = HashCompare(), Alloc = Alloc() )
1606 -> concurrent_hash_map<iterator_key_t<It>, iterator_mapped_t<It>, HashCompare, Alloc>;
1607 
1608 template <typename It, typename Alloc,
1609           typename = std::enable_if_t<is_input_iterator_v<It>>,
1610           typename = std::enable_if_t<is_allocator_v<Alloc>>>
1611 concurrent_hash_map( It, It, Alloc )
1612 -> concurrent_hash_map<iterator_key_t<It>, iterator_mapped_t<It>, d1::tbb_hash_compare<iterator_key_t<It>>, Alloc>;
1613 
1614 template <typename Key, typename T,
1615           typename HashCompare = d1::tbb_hash_compare<std::remove_const_t<Key>>,
1616           typename Alloc = tbb_allocator<std::pair<const Key, T>>,
1617           typename = std::enable_if_t<is_allocator_v<Alloc>>,
1618           typename = std::enable_if_t<!is_allocator_v<HashCompare>>>
1619 concurrent_hash_map( std::initializer_list<std::pair<Key, T>>, HashCompare = HashCompare(), Alloc = Alloc() )
1620 -> concurrent_hash_map<std::remove_const_t<Key>, T, HashCompare, Alloc>;
1621 
1622 template <typename Key, typename T, typename Alloc,
1623           typename = std::enable_if_t<is_allocator_v<Alloc>>>
1624 concurrent_hash_map( std::initializer_list<std::pair<Key, T>>, Alloc )
1625 -> concurrent_hash_map<std::remove_const_t<Key>, T, d1::tbb_hash_compare<std::remove_const_t<Key>>, Alloc>;
1626 
1627 #endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */
1628 
1629 template <typename Key, typename T, typename HashCompare, typename A1, typename A2>
1630 inline bool operator==(const concurrent_hash_map<Key, T, HashCompare, A1> &a, const concurrent_hash_map<Key, T, HashCompare, A2> &b) {
1631     if(a.size() != b.size()) return false;
1632     typename concurrent_hash_map<Key, T, HashCompare, A1>::const_iterator i(a.begin()), i_end(a.end());
1633     typename concurrent_hash_map<Key, T, HashCompare, A2>::const_iterator j, j_end(b.end());
1634     for(; i != i_end; ++i) {
1635         j = b.equal_range(i->first).first;
1636         if( j == j_end || !(i->second == j->second) ) return false;
1637     }
1638     return true;
1639 }
1640 
1641 #if !__TBB_CPP20_COMPARISONS_PRESENT
1642 template <typename Key, typename T, typename HashCompare, typename A1, typename A2>
1643 inline bool operator!=(const concurrent_hash_map<Key, T, HashCompare, A1> &a, const concurrent_hash_map<Key, T, HashCompare, A2> &b)
1644 {    return !(a == b); }
1645 #endif // !__TBB_CPP20_COMPARISONS_PRESENT
1646 
1647 template <typename Key, typename T, typename HashCompare, typename A>
1648 inline void swap(concurrent_hash_map<Key, T, HashCompare, A> &a, concurrent_hash_map<Key, T, HashCompare, A> &b)
1649 {    a.swap( b ); }
1650 
1651 } // namespace d2
1652 } // namespace detail
1653 
1654 inline namespace v1 {
1655     using detail::split;
1656     using detail::d2::concurrent_hash_map;
1657     using detail::d1::tbb_hash_compare;
1658 } // namespace v1
1659 
1660 } // namespace tbb
1661 
1662 #endif /* __TBB_concurrent_hash_map_H */
1663