1 /*
2     Copyright (c) 2005-2022 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #ifndef __TBB_concurrent_vector_H
18 #define __TBB_concurrent_vector_H
19 
20 #include "detail/_namespace_injection.h"
21 #include "detail/_utils.h"
22 #include "detail/_assert.h"
23 #include "detail/_allocator_traits.h"
24 #include "detail/_segment_table.h"
25 #include "detail/_containers_helpers.h"
26 #include "blocked_range.h"
27 #include "cache_aligned_allocator.h"
28 
29 #include <algorithm>
30 #include <utility> // std::move_if_noexcept
31 #include <algorithm>
32 #if __TBB_CPP20_COMPARISONS_PRESENT
33 #include <compare>
34 #endif
35 
36 namespace tbb {
37 namespace detail {
38 namespace d1 {
39 
40 template <typename Vector, typename Value>
41 class vector_iterator {
42     using vector_type = Vector;
43 
44 public:
45     using value_type = Value;
46     using size_type = typename vector_type::size_type;
47     using difference_type = typename vector_type::difference_type;
48     using pointer = value_type*;
49     using reference = value_type&;
50     using iterator_category = std::random_access_iterator_tag;
51 
52     template <typename Vec, typename Val>
53     friend vector_iterator<Vec, Val> operator+( typename vector_iterator<Vec, Val>::difference_type, const vector_iterator<Vec, Val>& );
54 
55     template <typename Vec, typename Val1, typename Val2>
56     friend typename vector_iterator<Vec, Val1>::difference_type operator-( const vector_iterator<Vec, Val1>&, const vector_iterator<Vec, Val2>& );
57 
58     template <typename Vec, typename Val1, typename Val2>
59     friend bool operator==( const vector_iterator<Vec, Val1>&, const vector_iterator<Vec, Val2>& );
60 
61     template <typename Vec, typename Val1, typename Val2>
62     friend bool operator<( const vector_iterator<Vec, Val1>&, const vector_iterator<Vec, Val2>& );
63 
64     template <typename Vec, typename Val>
65     friend class vector_iterator;
66 
67     template <typename T, typename Allocator>
68     friend class concurrent_vector;
69 
70 private:
71     vector_iterator( const vector_type& vector, size_type index, value_type* item = nullptr )
72         : my_vector(const_cast<vector_type*>(&vector)), my_index(index), my_item(item)
73     {}
74 
75 public:
76     vector_iterator() : my_vector(nullptr), my_index(~size_type(0)), my_item(nullptr)
77     {}
78 
79     vector_iterator( const vector_iterator<vector_type, typename vector_type::value_type>& other )
80         : my_vector(other.my_vector), my_index(other.my_index), my_item(other.my_item)
81     {}
82 
83     vector_iterator& operator=( const vector_iterator<vector_type, typename vector_type::value_type>& other ) {
84         my_vector = other.my_vector;
85         my_index = other.my_index;
86         my_item = other.my_item;
87         return *this;
88     }
89 
90     vector_iterator operator+( difference_type offset ) const {
91         return vector_iterator(*my_vector, my_index + offset);
92     }
93 
94     vector_iterator& operator+=( difference_type offset ) {
95         my_index += offset;
96         my_item = nullptr;
97         return *this;
98     }
99 
100     vector_iterator operator-( difference_type offset ) const {
101         return vector_iterator(*my_vector, my_index - offset);
102     }
103 
104     vector_iterator& operator-=( difference_type offset ) {
105         my_index -= offset;
106         my_item = nullptr;
107         return *this;
108     }
109 
110     reference operator*() const {
111         value_type *item = my_item;
112         if (item == nullptr) {
113             item = &my_vector->internal_subscript(my_index);
114         } else {
115             __TBB_ASSERT(item == &my_vector->internal_subscript(my_index), "corrupt cache");
116         }
117         return *item;
118     }
119 
120     pointer operator->() const { return &(operator*()); }
121 
122     reference operator[]( difference_type k ) const {
123         return my_vector->internal_subscript(my_index + k);
124     }
125 
126     vector_iterator& operator++() {
127         ++my_index;
128         if (my_item != nullptr) {
129             if (vector_type::is_first_element_in_segment(my_index)) {
130                 // If the iterator crosses a segment boundary, the pointer become invalid
131                 // as possibly next segment is in another memory location
132                 my_item = nullptr;
133             } else {
134                 ++my_item;
135             }
136         }
137         return *this;
138     }
139 
140     vector_iterator operator++(int) {
141         vector_iterator result = *this;
142         ++(*this);
143         return result;
144     }
145 
146     vector_iterator& operator--() {
147         __TBB_ASSERT(my_index > 0, "operator--() applied to iterator already at beginning of concurrent_vector");
148         --my_index;
149         if (my_item != nullptr) {
150             if (vector_type::is_first_element_in_segment(my_index)) {
151                 // If the iterator crosses a segment boundary, the pointer become invalid
152                 // as possibly next segment is in another memory location
153                 my_item = nullptr;
154             } else {
155                 --my_item;
156             }
157         }
158         return *this;
159     }
160 
161     vector_iterator operator--(int) {
162         vector_iterator result = *this;
163         --(*this);
164         return result;
165     }
166 
167 private:
168     // concurrent_vector over which we are iterating.
169     vector_type* my_vector;
170 
171     // Index into the vector
172     size_type my_index;
173 
174     // Caches my_vector *it;
175     // If my_item == nullptr cached value is not available use internal_subscript(my_index)
176     mutable value_type* my_item;
177 }; // class vector_iterator
178 
179 template <typename Vector, typename T>
180 vector_iterator<Vector, T> operator+( typename vector_iterator<Vector, T>::difference_type offset,
181                                       const vector_iterator<Vector, T>& v )
182 {
183     return vector_iterator<Vector, T>(*v.my_vector, v.my_index + offset);
184 }
185 
186 template <typename Vector, typename T, typename U>
187 typename vector_iterator<Vector, T>::difference_type operator-( const vector_iterator<Vector, T>& i,
188                                                                 const vector_iterator<Vector, U>& j )
189 {
190     using difference_type = typename vector_iterator<Vector, T>::difference_type;
191     return static_cast<difference_type>(i.my_index) - static_cast<difference_type>(j.my_index);
192 }
193 
194 template <typename Vector, typename T, typename U>
195 bool operator==( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) {
196     return i.my_vector == j.my_vector && i.my_index == j.my_index;
197 }
198 
199 template <typename Vector, typename T, typename U>
200 bool operator!=( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) {
201     return !(i == j);
202 }
203 
204 template <typename Vector, typename T, typename U>
205 bool operator<( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) {
206     return i.my_index < j.my_index;
207 }
208 
209 template <typename Vector, typename T, typename U>
210 bool operator>( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) {
211     return j < i;
212 }
213 
214 template <typename Vector, typename T, typename U>
215 bool operator>=( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) {
216     return !(i < j);
217 }
218 
219 template <typename Vector, typename T, typename U>
220 bool operator<=( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) {
221     return !(j < i);
222 }
223 
224 static constexpr std::size_t embedded_table_num_segments = 3;
225 
226 template <typename T, typename Allocator = tbb::cache_aligned_allocator<T>>
227 class concurrent_vector
228     : private segment_table<T, Allocator, concurrent_vector<T, Allocator>, embedded_table_num_segments>
229 {
230     using self_type = concurrent_vector<T, Allocator>;
231     using base_type = segment_table<T, Allocator, self_type, embedded_table_num_segments>;
232 
233     friend class segment_table<T, Allocator, self_type, embedded_table_num_segments>;
234 
235     template <typename Iterator>
236     class generic_range_type : public tbb::blocked_range<Iterator> {
237         using base_type = tbb::blocked_range<Iterator>;
238     public:
239         using value_type = T;
240         using reference = T&;
241         using const_reference = const T&;
242         using iterator = Iterator;
243         using difference_type = std::ptrdiff_t;
244 
245         using base_type::base_type;
246 
247         template<typename U>
248         generic_range_type( const generic_range_type<U>& r) : blocked_range<Iterator>(r.begin(), r.end(), r.grainsize()) {}
249         generic_range_type( generic_range_type& r, split ) : blocked_range<Iterator>(r, split()) {}
250     }; // class generic_range_type
251 
252     static_assert(std::is_same<T, typename Allocator::value_type>::value,
253                   "value_type of the container must be the same as its allocator's");
254     using allocator_traits_type = tbb::detail::allocator_traits<Allocator>;
255     // Segment table for concurrent_vector can be extended
256     static constexpr bool allow_table_extending = true;
257     static constexpr bool is_noexcept_assignment = allocator_traits_type::propagate_on_container_move_assignment::value ||
258                                                    allocator_traits_type::is_always_equal::value;
259     static constexpr bool is_noexcept_swap = allocator_traits_type::propagate_on_container_swap::value ||
260                                              allocator_traits_type::is_always_equal::value;
261 
262 public:
263     using value_type = T;
264     using allocator_type = Allocator;
265     using size_type = std::size_t;
266     using difference_type = std::ptrdiff_t;
267     using reference = value_type&;
268     using const_reference = const value_type&;
269 
270     using pointer = typename allocator_traits_type::pointer;
271     using const_pointer = typename allocator_traits_type::const_pointer;
272 
273     using iterator = vector_iterator<concurrent_vector, value_type>;
274     using const_iterator = vector_iterator<concurrent_vector, const value_type>;
275     using reverse_iterator = std::reverse_iterator<iterator>;
276     using const_reverse_iterator = std::reverse_iterator<const_iterator>;
277 
278     using range_type = generic_range_type<iterator>;
279     using const_range_type = generic_range_type<const_iterator>;
280 
281     concurrent_vector() : concurrent_vector(allocator_type()) {}
282 
283     explicit concurrent_vector( const allocator_type& alloc ) noexcept
284         : base_type(alloc)
285     {}
286 
287     explicit concurrent_vector( size_type count, const value_type& value,
288                                 const allocator_type& alloc = allocator_type() )
289         : concurrent_vector(alloc)
290     {
291         try_call( [&] {
292             grow_by(count, value);
293         } ).on_exception( [&] {
294             base_type::clear();
295         });
296     }
297 
298     explicit concurrent_vector( size_type count, const allocator_type& alloc = allocator_type() )
299         : concurrent_vector(alloc)
300     {
301         try_call( [&] {
302             grow_by(count);
303         } ).on_exception( [&] {
304             base_type::clear();
305         });
306     }
307 
308     template <typename InputIterator>
309     concurrent_vector( InputIterator first, InputIterator last, const allocator_type& alloc = allocator_type() )
310         : concurrent_vector(alloc)
311     {
312         try_call( [&] {
313             grow_by(first, last);
314         } ).on_exception( [&] {
315             base_type::clear();
316         });
317     }
318 
319     concurrent_vector( const concurrent_vector& other )
320         : base_type(segment_table_allocator_traits::select_on_container_copy_construction(other.get_allocator()))
321     {
322         try_call( [&] {
323             grow_by(other.begin(), other.end());
324         } ).on_exception( [&] {
325             base_type::clear();
326         });
327     }
328 
329     concurrent_vector( const concurrent_vector& other, const allocator_type& alloc )
330         : base_type(other, alloc) {}
331 
332     concurrent_vector(concurrent_vector&& other) noexcept
333         : base_type(std::move(other))
334     {}
335 
336     concurrent_vector( concurrent_vector&& other, const allocator_type& alloc )
337         : base_type(std::move(other), alloc)
338     {}
339 
340     concurrent_vector( std::initializer_list<value_type> init,
341                        const allocator_type& alloc = allocator_type() )
342         : concurrent_vector(init.begin(), init.end(), alloc)
343     {}
344 
345     ~concurrent_vector() {}
346 
347     // Assignment
348     concurrent_vector& operator=( const concurrent_vector& other ) {
349         base_type::operator=(other);
350         return *this;
351     }
352 
353     concurrent_vector& operator=( concurrent_vector&& other ) noexcept(is_noexcept_assignment) {
354         base_type::operator=(std::move(other));
355         return *this;
356     }
357 
358     concurrent_vector& operator=( std::initializer_list<value_type> init ) {
359         assign(init);
360         return *this;
361     }
362 
363     void assign( size_type count, const value_type& value ) {
364         destroy_elements();
365         grow_by(count, value);
366     }
367 
368     template <typename InputIterator>
369     typename std::enable_if<is_input_iterator<InputIterator>::value, void>::type
370     assign( InputIterator first, InputIterator last ) {
371         destroy_elements();
372         grow_by(first, last);
373     }
374 
375     void assign( std::initializer_list<value_type> init ) {
376         destroy_elements();
377         assign(init.begin(), init.end());
378     }
379 
380     // Concurrent growth
381     iterator grow_by( size_type delta ) {
382         return internal_grow_by_delta(delta);
383     }
384 
385     iterator grow_by( size_type delta, const value_type& value ) {
386         return internal_grow_by_delta(delta, value);
387     }
388 
389     template <typename ForwardIterator>
390     typename std::enable_if<is_input_iterator<ForwardIterator>::value, iterator>::type
391     grow_by( ForwardIterator first, ForwardIterator last ) {
392         auto delta = std::distance(first, last);
393         return internal_grow_by_delta(delta, first, last);
394     }
395 
396     iterator grow_by( std::initializer_list<value_type> init ) {
397         return grow_by(init.begin(), init.end());
398     }
399 
400     iterator grow_to_at_least( size_type n ) {
401         return internal_grow_to_at_least(n);
402     }
403     iterator grow_to_at_least( size_type n, const value_type& value ) {
404         return internal_grow_to_at_least(n, value);
405     }
406 
407     iterator push_back( const value_type& item ) {
408         return internal_emplace_back(item);
409     }
410 
411     iterator push_back( value_type&& item ) {
412         return internal_emplace_back(std::move(item));
413     }
414 
415     template <typename... Args>
416     iterator emplace_back( Args&&... args ) {
417         return internal_emplace_back(std::forward<Args>(args)...);
418     }
419 
420     // Items access
421     reference operator[]( size_type index ) {
422         return internal_subscript(index);
423     }
424     const_reference operator[]( size_type index ) const {
425         return internal_subscript(index);
426     }
427 
428     reference at( size_type index ) {
429         return internal_subscript_with_exceptions(index);
430     }
431     const_reference at( size_type index ) const {
432         return internal_subscript_with_exceptions(index);
433     }
434 
435     // Get range for iterating with parallel algorithms
436     range_type range( size_t grainsize = 1 ) {
437         return range_type(begin(), end(), grainsize);
438     }
439 
440     // Get const range for iterating with parallel algorithms
441     const_range_type range( size_t grainsize = 1 ) const {
442         return const_range_type(begin(), end(), grainsize);
443     }
444 
445     reference front() {
446         return internal_subscript(0);
447     }
448 
449     const_reference front() const {
450         return internal_subscript(0);
451     }
452 
453     reference back() {
454         return internal_subscript(size() - 1);
455     }
456 
457     const_reference back() const {
458         return internal_subscript(size() - 1);
459     }
460 
461     // Iterators
462     iterator begin() { return iterator(*this, 0); }
463     const_iterator begin() const { return const_iterator(*this, 0); }
464     const_iterator cbegin() const { return const_iterator(*this, 0); }
465 
466     iterator end() { return iterator(*this, size()); }
467     const_iterator end() const { return const_iterator(*this, size()); }
468     const_iterator cend() const { return const_iterator(*this, size()); }
469 
470     reverse_iterator rbegin() { return reverse_iterator(end()); }
471     const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
472     const_reverse_iterator crbegin() const { return const_reverse_iterator(cend()); }
473 
474     reverse_iterator rend() { return reverse_iterator(begin()); }
475     const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
476     const_reverse_iterator crend() const { return const_reverse_iterator(cbegin()); }
477 
478     allocator_type get_allocator() const {
479         return base_type::get_allocator();
480     }
481 
482     // Storage
483     bool empty() const noexcept {
484         return 0 == size();
485     }
486 
487     size_type size() const noexcept {
488         return std::min(this->my_size.load(std::memory_order_acquire), capacity());
489     }
490 
491     size_type max_size() const noexcept {
492         return allocator_traits_type::max_size(base_type::get_allocator());
493     }
494 
495     size_type capacity() const noexcept {
496         return base_type::capacity();
497     }
498 
499     void reserve( size_type n ) {
500         if (n == 0) return;
501 
502         if (n > max_size()) {
503             tbb::detail::throw_exception(exception_id::reservation_length_error);
504         }
505 
506         this->assign_first_block_if_necessary(this->segment_index_of(n - 1) + 1);
507         base_type::reserve(n);
508     }
509 
510     void resize( size_type n ) {
511         internal_resize(n);
512     }
513 
514     void resize( size_type n, const value_type& val ) {
515         internal_resize(n, val);
516     }
517 
518     void shrink_to_fit() {
519         internal_compact();
520     }
521 
522     void swap(concurrent_vector& other) noexcept(is_noexcept_swap) {
523         base_type::swap(other);
524     }
525 
526     void clear() {
527         destroy_elements();
528     }
529 
530 private:
531     using segment_type = typename base_type::segment_type;
532     using segment_table_type = typename base_type::segment_table_type;
533     using segment_table_allocator_traits = typename base_type::segment_table_allocator_traits;
534     using segment_index_type = typename base_type::segment_index_type;
535 
536     using segment_element_type = typename base_type::value_type;
537     using segment_element_allocator_type = typename allocator_traits_type::template rebind_alloc<segment_element_type>;
538     using segment_element_allocator_traits = tbb::detail::allocator_traits<segment_element_allocator_type>;
539 
540     segment_table_type allocate_long_table( const typename base_type::atomic_segment* embedded_table, size_type start_index ) {
541         __TBB_ASSERT(start_index <= this->embedded_table_size, "Start index out of embedded table");
542 
543         // If other threads are trying to set pointers in the short segment, wait for them to finish their
544         // assignments before we copy the short segment to the long segment. Note: grow_to_at_least depends on it
545         for (segment_index_type i = 0; this->segment_base(i) < start_index; ++i) {
546             spin_wait_while_eq(embedded_table[i], segment_type(nullptr));
547         }
548 
549         // It is possible that the table was extend by a thread allocating first_block, need to check this.
550         if (this->get_table() != embedded_table) {
551             return nullptr;
552         }
553 
554         // Allocate long segment table and fill with null pointers
555         segment_table_type new_segment_table = segment_table_allocator_traits::allocate(base_type::get_allocator(), this->pointers_per_long_table);
556         // Copy segment pointers from the embedded table
557         for (size_type segment_index = 0; segment_index < this->pointers_per_embedded_table; ++segment_index) {
558             segment_table_allocator_traits::construct(base_type::get_allocator(), &new_segment_table[segment_index],
559                 embedded_table[segment_index].load(std::memory_order_relaxed));
560         }
561         for (size_type segment_index = this->pointers_per_embedded_table; segment_index < this->pointers_per_long_table; ++segment_index) {
562             segment_table_allocator_traits::construct(base_type::get_allocator(), &new_segment_table[segment_index], nullptr);
563         }
564 
565         return new_segment_table;
566     }
567 
568     // create_segment function is required by the segment_table base class
569     segment_type create_segment( segment_table_type table, segment_index_type seg_index, size_type index ) {
570         size_type first_block = this->my_first_block.load(std::memory_order_relaxed);
571         // First block allocation
572         if (seg_index < first_block) {
573             // If 0 segment is already allocated, then it remains to wait until the segments are filled to requested
574             if (table[0].load(std::memory_order_acquire) != nullptr) {
575                 spin_wait_while_eq(table[seg_index], segment_type(nullptr));
576                 return nullptr;
577             }
578 
579             segment_element_allocator_type segment_allocator(base_type::get_allocator());
580             segment_type new_segment = nullptr;
581             size_type first_block_size = this->segment_size(first_block);
582             try_call( [&] {
583                 new_segment = segment_element_allocator_traits::allocate(segment_allocator, first_block_size);
584             } ).on_exception( [&] {
585                 segment_type disabled_segment = nullptr;
586                 if (table[0].compare_exchange_strong(disabled_segment, this->segment_allocation_failure_tag)) {
587                     size_type end_segment = table == this->my_embedded_table ? this->pointers_per_embedded_table : first_block;
588                     for (size_type i = 1; i < end_segment; ++i) {
589                         table[i].store(this->segment_allocation_failure_tag, std::memory_order_release);
590                     }
591                 }
592             });
593 
594             segment_type disabled_segment = nullptr;
595             if (table[0].compare_exchange_strong(disabled_segment, new_segment)) {
596                 this->extend_table_if_necessary(table, 0, first_block_size);
597                 for (size_type i = 1; i < first_block; ++i) {
598                     table[i].store(new_segment, std::memory_order_release);
599                 }
600 
601                 // Other threads can wait on a snapshot of an embedded table, need to fill it.
602                 for (size_type i = 1; i < first_block && i < this->pointers_per_embedded_table; ++i) {
603                     this->my_embedded_table[i].store(new_segment, std::memory_order_release);
604                 }
605             } else if (new_segment != this->segment_allocation_failure_tag) {
606                 // Deallocate the memory
607                 segment_element_allocator_traits::deallocate(segment_allocator, new_segment, first_block_size);
608                 // 0 segment is already allocated, then it remains to wait until the segments are filled to requested
609                 spin_wait_while_eq(table[seg_index], segment_type(nullptr));
610             }
611         } else {
612             size_type offset = this->segment_base(seg_index);
613             if (index == offset) {
614                 __TBB_ASSERT(table[seg_index].load(std::memory_order_relaxed) == nullptr, "Only this thread can enable this segment");
615                 segment_element_allocator_type segment_allocator(base_type::get_allocator());
616                 segment_type new_segment = this->segment_allocation_failure_tag;
617                 try_call( [&] {
618                     new_segment = segment_element_allocator_traits::allocate(segment_allocator,this->segment_size(seg_index));
619                     // Shift base address to simplify access by index
620                     new_segment -= this->segment_base(seg_index);
621                 } ).on_completion( [&] {
622                     table[seg_index].store(new_segment, std::memory_order_release);
623                 });
624             } else {
625                 spin_wait_while_eq(table[seg_index], segment_type(nullptr));
626             }
627         }
628         return nullptr;
629     }
630 
631     // Returns the number of elements in the segment to be destroy
632     size_type number_of_elements_in_segment( segment_index_type seg_index ) {
633         size_type curr_vector_size = this->my_size.load(std::memory_order_relaxed);
634         size_type curr_segment_base = this->segment_base(seg_index);
635 
636         if (seg_index == 0) {
637             return std::min(curr_vector_size, this->segment_size(seg_index));
638         } else {
639             // Perhaps the segment is allocated, but there are no elements in it.
640             if (curr_vector_size < curr_segment_base) {
641                 return 0;
642             }
643             return curr_segment_base * 2 > curr_vector_size ? curr_vector_size - curr_segment_base : curr_segment_base;
644         }
645     }
646 
647     segment_type nullify_segment( segment_table_type table, size_type segment_index ) {
648         segment_type target_segment = table[segment_index].load(std::memory_order_relaxed);
649         if (segment_index >= this->my_first_block) {
650             table[segment_index].store(nullptr, std::memory_order_relaxed);
651         } else {
652             if (segment_index == 0) {
653                 for (size_type i = 0; i < this->my_first_block; ++i) {
654                     table[i].store(nullptr, std::memory_order_relaxed);
655                 }
656             }
657         }
658 
659         return target_segment;
660     }
661 
662     void deallocate_segment( segment_type address, segment_index_type seg_index ) {
663         segment_element_allocator_type segment_allocator(base_type::get_allocator());
664         size_type first_block = this->my_first_block.load(std::memory_order_relaxed);
665         if (seg_index >= first_block) {
666             segment_element_allocator_traits::deallocate(segment_allocator, address, this->segment_size(seg_index));
667         }
668         else if (seg_index == 0) {
669             size_type elements_to_deallocate = first_block > 0 ? this->segment_size(first_block) : this->segment_size(0);
670             segment_element_allocator_traits::deallocate(segment_allocator, address, elements_to_deallocate);
671         }
672     }
673 
674     // destroy_segment function is required by the segment_table base class
675     void destroy_segment( segment_type address, segment_index_type seg_index ) {
676         size_type elements_to_destroy = number_of_elements_in_segment(seg_index);
677         segment_element_allocator_type segment_allocator(base_type::get_allocator());
678 
679         for (size_type i = 0; i < elements_to_destroy; ++i) {
680             segment_element_allocator_traits::destroy(segment_allocator, address + i);
681         }
682 
683         deallocate_segment(address, seg_index);
684     }
685 
686     // copy_segment function is required by the segment_table base class
687     void copy_segment( segment_index_type seg_index, segment_type from, segment_type to ) {
688         size_type i = 0;
689         try_call( [&] {
690             for (; i != number_of_elements_in_segment(seg_index); ++i) {
691                 segment_table_allocator_traits::construct(base_type::get_allocator(), to + i, from[i]);
692             }
693         } ).on_exception( [&] {
694             // Zero-initialize items left not constructed after the exception
695             zero_unconstructed_elements(this->get_segment(seg_index) + i, this->segment_size(seg_index) - i);
696 
697             segment_index_type last_segment = this->segment_index_of(this->my_size.load(std::memory_order_relaxed));
698             auto table = this->get_table();
699             for (segment_index_type j = seg_index + 1; j != last_segment; ++j) {
700                 auto curr_segment = table[j].load(std::memory_order_relaxed);
701                 if (curr_segment) {
702                     zero_unconstructed_elements(curr_segment + this->segment_base(j), this->segment_size(j));
703                 }
704             }
705             this->my_size.store(this->segment_size(seg_index) + i, std::memory_order_relaxed);
706         });
707     }
708 
709     // move_segment function is required by the segment_table base class
710     void move_segment( segment_index_type seg_index, segment_type from, segment_type to ) {
711         size_type i = 0;
712         try_call( [&] {
713             for (; i != number_of_elements_in_segment(seg_index); ++i) {
714                 segment_table_allocator_traits::construct(base_type::get_allocator(), to + i, std::move(from[i]));
715             }
716         } ).on_exception( [&] {
717             // Zero-initialize items left not constructed after the exception
718             zero_unconstructed_elements(this->get_segment(seg_index) + i, this->segment_size(seg_index) - i);
719 
720             segment_index_type last_segment = this->segment_index_of(this->my_size.load(std::memory_order_relaxed));
721             auto table = this->get_table();
722             for (segment_index_type j = seg_index + 1; j != last_segment; ++j) {
723                 auto curr_segment = table[j].load(std::memory_order_relaxed);
724                 if (curr_segment) {
725                     zero_unconstructed_elements(curr_segment + this->segment_base(j), this->segment_size(j));
726                 }
727             }
728             this->my_size.store(this->segment_size(seg_index) + i, std::memory_order_relaxed);
729         });
730     }
731 
732     static constexpr bool is_first_element_in_segment( size_type index ) {
733         // An element is the first in a segment if its index is equal to a power of two
734         return is_power_of_two_at_least(index, 2);
735     }
736 
737     const_reference internal_subscript( size_type index ) const {
738         return const_cast<self_type*>(this)->internal_subscript(index);
739     }
740 
741     reference internal_subscript( size_type index ) {
742         __TBB_ASSERT(index < this->my_size.load(std::memory_order_relaxed), "Invalid subscript index");
743         return base_type::template internal_subscript</*allow_out_of_range_access=*/false>(index);
744     }
745 
746     const_reference internal_subscript_with_exceptions( size_type index ) const {
747         return const_cast<self_type*>(this)->internal_subscript_with_exceptions(index);
748     }
749 
750     reference internal_subscript_with_exceptions( size_type index ) {
751         if (index >= this->my_size.load(std::memory_order_acquire)) {
752             tbb::detail::throw_exception(exception_id::out_of_range);
753         }
754 
755         segment_table_type table = this->my_segment_table.load(std::memory_order_acquire);
756 
757         size_type seg_index = this->segment_index_of(index);
758         if (base_type::number_of_segments(table) < seg_index) {
759             tbb::detail::throw_exception(exception_id::out_of_range);
760         }
761 
762         if (table[seg_index] <= this->segment_allocation_failure_tag) {
763             tbb::detail::throw_exception(exception_id::out_of_range);
764         }
765 
766         return base_type::template internal_subscript</*allow_out_of_range_access=*/false>(index);
767     }
768 
769     static void zero_unconstructed_elements( pointer start, size_type count ) {
770         std::memset(static_cast<void *>(start), 0, count * sizeof(value_type));
771     }
772 
773     template <typename... Args>
774     iterator internal_emplace_back( Args&&... args ) {
775         size_type old_size = this->my_size++;
776         this->assign_first_block_if_necessary(default_first_block_size);
777         auto element_address = &base_type::template internal_subscript</*allow_out_of_range_access=*/true>(old_size);
778 
779         // try_call API is not convenient here due to broken
780         // variadic capture on GCC 4.8.5
781         auto value_guard = make_raii_guard([&] {
782             zero_unconstructed_elements(element_address, /*count =*/1);
783         });
784 
785         segment_table_allocator_traits::construct(base_type::get_allocator(), element_address, std::forward<Args>(args)...);
786         value_guard.dismiss();
787         return iterator(*this, old_size, element_address);
788     }
789 
790     template <typename... Args>
791     void internal_loop_construct( segment_table_type table, size_type start_idx, size_type end_idx, const Args&... args ) {
792         static_assert(sizeof...(Args) < 2, "Too many parameters");
793         for (size_type idx = start_idx; idx < end_idx; ++idx) {
794             auto element_address = &base_type::template internal_subscript</*allow_out_of_range_access=*/true>(idx);
795             // try_call API is not convenient here due to broken
796             // variadic capture on GCC 4.8.5
797             auto value_guard = make_raii_guard( [&] {
798                 segment_index_type last_allocated_segment = this->find_last_allocated_segment(table);
799                 size_type segment_size = this->segment_size(last_allocated_segment);
800                 end_idx = end_idx < segment_size ? end_idx : segment_size;
801                 for (size_type i = idx; i < end_idx; ++i) {
802                     zero_unconstructed_elements(&this->internal_subscript(i), /*count =*/1);
803                 }
804             });
805             segment_table_allocator_traits::construct(base_type::get_allocator(), element_address, args...);
806             value_guard.dismiss();
807         }
808     }
809 
810     template <typename ForwardIterator>
811     void internal_loop_construct( segment_table_type table, size_type start_idx, size_type end_idx, ForwardIterator first, ForwardIterator ) {
812         for (size_type idx = start_idx; idx < end_idx; ++idx) {
813             auto element_address = &base_type::template internal_subscript</*allow_out_of_range_access=*/true>(idx);
814             try_call( [&] {
815                 segment_table_allocator_traits::construct(base_type::get_allocator(), element_address, *first++);
816             } ).on_exception( [&] {
817                 segment_index_type last_allocated_segment = this->find_last_allocated_segment(table);
818                 size_type segment_size = this->segment_size(last_allocated_segment);
819                 end_idx = end_idx < segment_size ? end_idx : segment_size;
820                 for (size_type i = idx; i < end_idx; ++i) {
821                     zero_unconstructed_elements(&this->internal_subscript(i), /*count =*/1);
822                 }
823             });
824         }
825     }
826 
827     template <typename... Args>
828     iterator internal_grow( size_type start_idx, size_type end_idx, const Args&... args ) {
829         this->assign_first_block_if_necessary(this->segment_index_of(end_idx - 1) + 1);
830         size_type seg_index = this->segment_index_of(end_idx - 1);
831         segment_table_type table = this->get_table();
832         this->extend_table_if_necessary(table, start_idx, end_idx);
833 
834         if (seg_index > this->my_first_block.load(std::memory_order_relaxed)) {
835             // So that other threads be able to work with the last segment of grow_by, allocate it immediately.
836             // If the last segment is not less than the first block
837             if (table[seg_index].load(std::memory_order_relaxed) == nullptr) {
838                 size_type first_element = this->segment_base(seg_index);
839                 if (first_element >= start_idx && first_element < end_idx) {
840                     segment_type segment = table[seg_index].load(std::memory_order_relaxed);
841                     base_type::enable_segment(segment, table, seg_index, first_element);
842                 }
843             }
844         }
845 
846         internal_loop_construct(table, start_idx, end_idx, args...);
847 
848         return iterator(*this, start_idx, &base_type::template internal_subscript</*allow_out_of_range_access=*/false>(start_idx));
849     }
850 
851 
852     template <typename... Args>
853     iterator internal_grow_by_delta( size_type delta, const Args&... args ) {
854         if (delta == size_type(0)) {
855             return end();
856         }
857         size_type start_idx = this->my_size.fetch_add(delta);
858         size_type end_idx = start_idx + delta;
859 
860         return internal_grow(start_idx, end_idx, args...);
861     }
862 
863     template <typename... Args>
864     iterator internal_grow_to_at_least( size_type new_size, const Args&... args ) {
865         size_type old_size = this->my_size.load(std::memory_order_relaxed);
866         if (new_size == size_type(0)) return iterator(*this, 0);
867         while (old_size < new_size && !this->my_size.compare_exchange_weak(old_size, new_size))
868         {}
869 
870         int delta = static_cast<int>(new_size) - static_cast<int>(old_size);
871         if (delta > 0) {
872             return internal_grow(old_size, new_size, args...);
873         }
874 
875         size_type end_segment = this->segment_index_of(new_size - 1);
876 
877         // Check/wait for segments allocation completes
878         if (end_segment >= this->pointers_per_embedded_table &&
879             this->get_table() == this->my_embedded_table)
880         {
881             spin_wait_while_eq(this->my_segment_table, this->my_embedded_table);
882         }
883 
884         for (segment_index_type seg_idx = 0; seg_idx <= end_segment; ++seg_idx) {
885             if (this->get_table()[seg_idx].load(std::memory_order_relaxed) == nullptr) {
886                 atomic_backoff backoff(true);
887                 while (this->get_table()[seg_idx].load(std::memory_order_relaxed) == nullptr) {
888                     backoff.pause();
889                 }
890             }
891         }
892 
893     #if TBB_USE_DEBUG
894         size_type cap = capacity();
895         __TBB_ASSERT( cap >= new_size, NULL);
896     #endif
897         return iterator(*this, size());
898     }
899 
900     template <typename... Args>
901     void internal_resize( size_type n, const Args&... args ) {
902         if (n == 0) {
903             clear();
904             return;
905         }
906 
907         size_type old_size = this->my_size.load(std::memory_order_acquire);
908         if (n > old_size) {
909             reserve(n);
910             grow_to_at_least(n, args...);
911         } else {
912             if (old_size == n) {
913                 return;
914             }
915             size_type last_segment = this->segment_index_of(old_size - 1);
916             // Delete segments
917             for (size_type seg_idx = this->segment_index_of(n - 1) + 1; seg_idx <= last_segment; ++seg_idx) {
918                 this->delete_segment(seg_idx);
919             }
920 
921             // If n > segment_size(n) => we need to destroy all of the items in the first segment
922             // Otherwise, we need to destroy only items with the index < n
923             size_type n_segment = this->segment_index_of(n - 1);
924             size_type last_index_to_destroy = std::min(this->segment_base(n_segment) + this->segment_size(n_segment), old_size);
925             // Destroy elements in curr segment
926             for (size_type idx = n; idx < last_index_to_destroy; ++idx) {
927                 segment_table_allocator_traits::destroy(base_type::get_allocator(), &base_type::template internal_subscript</*allow_out_of_range_access=*/false>(idx));
928             }
929             this->my_size.store(n, std::memory_order_release);
930         }
931     }
932 
933     void destroy_elements() {
934         allocator_type alloc(base_type::get_allocator());
935         for (size_type i = 0; i < this->my_size.load(std::memory_order_relaxed); ++i) {
936             allocator_traits_type::destroy(alloc, &base_type::template internal_subscript</*allow_out_of_range_access=*/false>(i));
937         }
938         this->my_size.store(0, std::memory_order_relaxed);
939     }
940 
941     static bool incompact_predicate( size_type size ) {
942         // memory page size
943         const size_type page_size = 4096;
944         return size < page_size || ((size - 1) % page_size < page_size / 2 && size < page_size * 128);
945     }
946 
947     void internal_compact() {
948         const size_type curr_size = this->my_size.load(std::memory_order_relaxed);
949         segment_table_type table = this->get_table();
950         const segment_index_type k_end = this->find_last_allocated_segment(table);                   // allocated segments
951         const segment_index_type k_stop = curr_size ? this->segment_index_of(curr_size - 1) + 1 : 0; // number of segments to store existing items: 0=>0; 1,2=>1; 3,4=>2; [5-8]=>3;..
952         const segment_index_type first_block = this->my_first_block;                                 // number of merged segments, getting values from atomics
953 
954         segment_index_type k = first_block;
955         if (k_stop < first_block) {
956             k = k_stop;
957         }
958         else {
959             while (k < k_stop && incompact_predicate(this->segment_size(k) * sizeof(value_type))) k++;
960         }
961 
962         if (k_stop == k_end && k == first_block) {
963             return;
964         }
965 
966         // First segment optimization
967         if (k != first_block && k) {
968             size_type max_block = std::max(first_block, k);
969 
970             auto buffer_table = segment_table_allocator_traits::allocate(base_type::get_allocator(), max_block);
971 
972             for (size_type seg_idx = 0; seg_idx < max_block; ++seg_idx) {
973                 segment_table_allocator_traits::construct(base_type::get_allocator(), &buffer_table[seg_idx],
974                     table[seg_idx].load(std::memory_order_relaxed));
975                 table[seg_idx].store(nullptr, std::memory_order_relaxed);
976             }
977 
978             this->my_first_block.store(k, std::memory_order_relaxed);
979             size_type index = 0;
980             try_call( [&] {
981                 for (; index < std::min(this->segment_size(max_block), curr_size); ++index) {
982                     auto element_address = &static_cast<base_type*>(this)->operator[](index);
983                     segment_index_type seg_idx = this->segment_index_of(index);
984                     segment_table_allocator_traits::construct(base_type::get_allocator(), element_address,
985                     std::move_if_noexcept(buffer_table[seg_idx].load(std::memory_order_relaxed)[index]));
986                 }
987             } ).on_exception( [&] {
988                 segment_element_allocator_type allocator(base_type::get_allocator());
989                 for (size_type i = 0; i < index; ++i) {
990                     auto element_adress = &this->operator[](i);
991                     segment_element_allocator_traits::destroy(allocator, element_adress);
992                 }
993                 segment_element_allocator_traits::deallocate(allocator,
994                     table[0].load(std::memory_order_relaxed), this->segment_size(max_block));
995 
996                 for (size_type seg_idx = 0; seg_idx < max_block; ++seg_idx) {
997                     table[seg_idx].store(buffer_table[seg_idx].load(std::memory_order_relaxed),
998                         std::memory_order_relaxed);
999                     buffer_table[seg_idx].store(nullptr, std::memory_order_relaxed);
1000                 }
1001                 segment_table_allocator_traits::deallocate(base_type::get_allocator(),
1002                     buffer_table, max_block);
1003                 this->my_first_block.store(first_block, std::memory_order_relaxed);
1004             });
1005 
1006             // Need to correct deallocate old segments
1007             // Method destroy_segment respect active first_block, therefore,
1008             // in order for the segment deletion to work correctly, set the first_block size that was earlier,
1009             // destroy the unnecessary segments.
1010             this->my_first_block.store(first_block, std::memory_order_relaxed);
1011             for (size_type seg_idx = max_block; seg_idx > 0 ; --seg_idx) {
1012                 auto curr_segment = buffer_table[seg_idx - 1].load(std::memory_order_relaxed);
1013                 if (curr_segment != nullptr) {
1014                     destroy_segment(buffer_table[seg_idx - 1].load(std::memory_order_relaxed) + this->segment_base(seg_idx - 1),
1015                         seg_idx - 1);
1016                 }
1017             }
1018 
1019             this->my_first_block.store(k, std::memory_order_relaxed);
1020 
1021             for (size_type seg_idx = 0; seg_idx < max_block; ++seg_idx) {
1022                 segment_table_allocator_traits::destroy(base_type::get_allocator(), &buffer_table[seg_idx]);
1023             }
1024 
1025             segment_table_allocator_traits::deallocate(base_type::get_allocator(), buffer_table, max_block);
1026         }
1027         // free unnecessary segments allocated by reserve() call
1028         if (k_stop < k_end) {
1029             for (size_type seg_idx = k_end; seg_idx != k_stop; --seg_idx) {
1030                 if (table[seg_idx - 1].load(std::memory_order_relaxed) != nullptr) {
1031                     this->delete_segment(seg_idx - 1);
1032                 }
1033             }
1034             if (!k) this->my_first_block.store(0, std::memory_order_relaxed);;
1035         }
1036     }
1037 
1038     // Lever for adjusting the size of first_block at the very first insertion.
1039     // TODO: consider >1 value, check performance
1040     static constexpr size_type default_first_block_size = 1;
1041 
1042     template <typename Vector, typename Value>
1043     friend class vector_iterator;
1044 }; // class concurrent_vector
1045 
1046 #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
1047 // Deduction guide for the constructor from two iterators
1048 template <typename It, typename Alloc = tbb::cache_aligned_allocator<iterator_value_t<It>>,
1049           typename = std::enable_if_t<is_input_iterator_v<It>>,
1050           typename = std::enable_if_t<is_allocator_v<Alloc>>>
1051 concurrent_vector( It, It, Alloc = Alloc() )
1052 -> concurrent_vector<iterator_value_t<It>, Alloc>;
1053 #endif
1054 
1055 template <typename T, typename Allocator>
1056 void swap(concurrent_vector<T, Allocator> &lhs,
1057           concurrent_vector<T, Allocator> &rhs)
1058 {
1059     lhs.swap(rhs);
1060 }
1061 
1062 template <typename T, typename Allocator>
1063 bool operator==(const concurrent_vector<T, Allocator> &lhs,
1064                 const concurrent_vector<T, Allocator> &rhs)
1065 {
1066     return lhs.size() == rhs.size() && std::equal(lhs.begin(), lhs.end(), rhs.begin());
1067 }
1068 
1069 #if !__TBB_CPP20_COMPARISONS_PRESENT
1070 template <typename T, typename Allocator>
1071 bool operator!=(const concurrent_vector<T, Allocator> &lhs,
1072                 const concurrent_vector<T, Allocator> &rhs)
1073 {
1074     return !(lhs == rhs);
1075 }
1076 #endif // !__TBB_CPP20_COMPARISONS_PRESENT
1077 
1078 #if __TBB_CPP20_COMPARISONS_PRESENT && __TBB_CPP20_CONCEPTS_PRESENT
1079 template <typename T, typename Allocator>
1080 tbb::detail::synthesized_three_way_result<typename concurrent_vector<T, Allocator>::value_type>
1081 operator<=>(const concurrent_vector<T, Allocator> &lhs,
1082             const concurrent_vector<T, Allocator> &rhs)
1083 {
1084     return std::lexicographical_compare_three_way(lhs.begin(), lhs.end(),
1085                                                   rhs.begin(), rhs.end(),
1086                                                   tbb::detail::synthesized_three_way_comparator{});
1087 }
1088 
1089 #else
1090 
1091 template <typename T, typename Allocator>
1092 bool operator<(const concurrent_vector<T, Allocator> &lhs,
1093                const concurrent_vector<T, Allocator> &rhs)
1094 {
1095     return std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
1096 }
1097 
1098 template <typename T, typename Allocator>
1099 bool operator<=(const concurrent_vector<T, Allocator> &lhs,
1100                 const concurrent_vector<T, Allocator> &rhs)
1101 {
1102     return !(rhs < lhs);
1103 }
1104 
1105 template <typename T, typename Allocator>
1106 bool operator>(const concurrent_vector<T, Allocator> &lhs,
1107                const concurrent_vector<T, Allocator> &rhs)
1108 {
1109     return rhs < lhs;
1110 }
1111 
1112 template <typename T, typename Allocator>
1113 bool operator>=(const concurrent_vector<T, Allocator> &lhs,
1114                 const concurrent_vector<T, Allocator> &rhs)
1115 {
1116     return !(lhs < rhs);
1117 }
1118 #endif // __TBB_CPP20_COMPARISONS_PRESENT && __TBB_CPP20_CONCEPTS_PRESENT
1119 
1120 } // namespace d1
1121 } // namespace detail
1122 
1123 inline namespace v1 {
1124     using detail::d1::concurrent_vector;
1125 } // namespace v1
1126 
1127 } // namespace tbb
1128 
1129 #endif // __TBB_concurrent_vector_H
1130