1 /* 2 Copyright (c) 2005-2024 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #ifndef __TBB_enumerable_thread_specific_H 18 #define __TBB_enumerable_thread_specific_H 19 20 #include "detail/_config.h" 21 #include "detail/_namespace_injection.h" 22 #include "detail/_assert.h" 23 #include "detail/_template_helpers.h" 24 #include "detail/_aligned_space.h" 25 26 #include "concurrent_vector.h" 27 #include "tbb_allocator.h" 28 #include "cache_aligned_allocator.h" 29 #include "profiling.h" 30 31 #include <atomic> 32 #include <thread> 33 #include <cstring> // memcpy 34 #include <cstddef> // std::ptrdiff_t 35 36 #include "task.h" // for task::suspend_point 37 38 #if _WIN32 || _WIN64 39 #ifndef NOMINMAX 40 #define NOMINMAX 41 #define __TBB_DEFINED_NOMINMAX 1 42 #endif 43 #include <windows.h> 44 #if __TBB_DEFINED_NOMINMAX 45 #undef NOMINMAX 46 #undef __TBB_DEFINED_NOMINMAX 47 #endif 48 #else 49 #include <pthread.h> 50 #endif 51 52 namespace tbb { 53 namespace detail { 54 namespace d1 { 55 56 //! enum for selecting between single key and key-per-instance versions 57 enum ets_key_usage_type { 58 ets_key_per_instance 59 , ets_no_key 60 #if __TBB_RESUMABLE_TASKS 61 , ets_suspend_aware 62 #endif 63 }; 64 65 // Forward declaration to use in internal classes 66 template <typename T, typename Allocator, ets_key_usage_type ETS_key_type> 67 class enumerable_thread_specific; 68 69 template <std::size_t ThreadIDSize> 70 struct internal_ets_key_selector { 71 using key_type = std::thread::id; 72 static key_type current_key() { 73 return std::this_thread::get_id(); 74 } 75 }; 76 77 // Intel Compiler on OSX cannot create atomics objects that instantiated from non-fundamental types 78 #if __INTEL_COMPILER && __APPLE__ 79 template<> 80 struct internal_ets_key_selector<sizeof(std::size_t)> { 81 using key_type = std::size_t; 82 static key_type current_key() { 83 auto id = std::this_thread::get_id(); 84 return reinterpret_cast<key_type&>(id); 85 } 86 }; 87 #endif 88 89 template <ets_key_usage_type ETS_key_type> 90 struct ets_key_selector : internal_ets_key_selector<sizeof(std::thread::id)> {}; 91 92 #if __TBB_RESUMABLE_TASKS 93 template <> 94 struct ets_key_selector<ets_suspend_aware> { 95 using key_type = suspend_point; 96 static key_type current_key() { 97 return r1::current_suspend_point(); 98 } 99 }; 100 #endif 101 102 template<ets_key_usage_type ETS_key_type> 103 class ets_base : detail::no_copy { 104 protected: 105 using key_type = typename ets_key_selector<ETS_key_type>::key_type; 106 107 public: 108 struct slot; 109 struct array { 110 array* next; 111 std::size_t lg_size; 112 slot& at( std::size_t k ) { 113 return (reinterpret_cast<slot*>(reinterpret_cast<void*>(this+1)))[k]; 114 } 115 std::size_t size() const { return std::size_t(1) << lg_size; } 116 std::size_t mask() const { return size() - 1; } 117 std::size_t start( std::size_t h ) const { 118 return h >> (8 * sizeof(std::size_t) - lg_size); 119 } 120 }; 121 struct slot { 122 std::atomic<key_type> key; 123 void* ptr; 124 bool empty() const { return key.load(std::memory_order_relaxed) == key_type(); } 125 bool match( key_type k ) const { return key.load(std::memory_order_relaxed) == k; } 126 bool claim( key_type k ) { 127 // TODO: maybe claim ptr, because key_type is not guaranteed to fit into word size 128 key_type expected = key_type(); 129 return key.compare_exchange_strong(expected, k); 130 } 131 }; 132 133 protected: 134 //! Root of linked list of arrays of decreasing size. 135 /** nullptr if and only if my_count==0. 136 Each array in the list is half the size of its predecessor. */ 137 std::atomic<array*> my_root; 138 std::atomic<std::size_t> my_count; 139 140 virtual void* create_local() = 0; 141 virtual void* create_array(std::size_t _size) = 0; // _size in bytes 142 virtual void free_array(void* ptr, std::size_t _size) = 0; // _size in bytes 143 144 array* allocate( std::size_t lg_size ) { 145 std::size_t n = std::size_t(1) << lg_size; 146 array* a = static_cast<array*>(create_array(sizeof(array) + n * sizeof(slot))); 147 a->lg_size = lg_size; 148 std::memset( a + 1, 0, n * sizeof(slot) ); 149 return a; 150 } 151 void deallocate(array* a) { 152 std::size_t n = std::size_t(1) << (a->lg_size); 153 free_array( static_cast<void*>(a), std::size_t(sizeof(array) + n * sizeof(slot)) ); 154 } 155 156 ets_base() : my_root{nullptr}, my_count{0} {} 157 virtual ~ets_base(); // g++ complains if this is not virtual 158 159 void* table_lookup( bool& exists ); 160 void table_clear(); 161 // The following functions are not used in concurrent context, 162 // so we don't need synchronization and ITT annotations there. 163 template <ets_key_usage_type E2> 164 void table_elementwise_copy( const ets_base& other, 165 void*(*add_element)(ets_base<E2>&, void*) ) { 166 __TBB_ASSERT(!my_root.load(std::memory_order_relaxed), nullptr); 167 __TBB_ASSERT(!my_count.load(std::memory_order_relaxed), nullptr); 168 if( !other.my_root.load(std::memory_order_relaxed) ) return; 169 array* root = allocate(other.my_root.load(std::memory_order_relaxed)->lg_size); 170 my_root.store(root, std::memory_order_relaxed); 171 root->next = nullptr; 172 my_count.store(other.my_count.load(std::memory_order_relaxed), std::memory_order_relaxed); 173 std::size_t mask = root->mask(); 174 for( array* r = other.my_root.load(std::memory_order_relaxed); r; r = r->next ) { 175 for( std::size_t i = 0; i < r->size(); ++i ) { 176 slot& s1 = r->at(i); 177 if( !s1.empty() ) { 178 for( std::size_t j = root->start(std::hash<key_type>{}(s1.key.load(std::memory_order_relaxed))); ; j = (j+1)&mask ) { 179 slot& s2 = root->at(j); 180 if( s2.empty() ) { 181 s2.ptr = add_element(static_cast<ets_base<E2>&>(*this), s1.ptr); 182 s2.key.store(s1.key.load(std::memory_order_relaxed), std::memory_order_relaxed); 183 break; 184 } 185 else if( s2.match(s1.key.load(std::memory_order_relaxed)) ) 186 break; 187 } 188 } 189 } 190 } 191 } 192 void table_swap( ets_base& other ) { 193 __TBB_ASSERT(this!=&other, "Don't swap an instance with itself"); 194 swap_atomics_relaxed(my_root, other.my_root); 195 swap_atomics_relaxed(my_count, other.my_count); 196 } 197 }; 198 199 template<ets_key_usage_type ETS_key_type> 200 ets_base<ETS_key_type>::~ets_base() { 201 __TBB_ASSERT(!my_root.load(std::memory_order_relaxed), nullptr); 202 } 203 204 template<ets_key_usage_type ETS_key_type> 205 void ets_base<ETS_key_type>::table_clear() { 206 while ( array* r = my_root.load(std::memory_order_relaxed) ) { 207 my_root.store(r->next, std::memory_order_relaxed); 208 deallocate(r); 209 } 210 my_count.store(0, std::memory_order_relaxed); 211 } 212 213 template<ets_key_usage_type ETS_key_type> 214 void* ets_base<ETS_key_type>::table_lookup( bool& exists ) { 215 const key_type k = ets_key_selector<ETS_key_type>::current_key(); 216 217 __TBB_ASSERT(k != key_type(), nullptr); 218 void* found; 219 std::size_t h = std::hash<key_type>{}(k); 220 for( array* r = my_root.load(std::memory_order_acquire); r; r = r->next ) { 221 call_itt_notify(acquired,r); 222 std::size_t mask=r->mask(); 223 for(std::size_t i = r->start(h); ;i=(i+1)&mask) { 224 slot& s = r->at(i); 225 if( s.empty() ) break; 226 if( s.match(k) ) { 227 if( r == my_root.load(std::memory_order_acquire) ) { 228 // Success at top level 229 exists = true; 230 return s.ptr; 231 } else { 232 // Success at some other level. Need to insert at top level. 233 exists = true; 234 found = s.ptr; 235 goto insert; 236 } 237 } 238 } 239 } 240 // Key does not yet exist. The density of slots in the table does not exceed 0.5, 241 // for if this will occur a new table is allocated with double the current table 242 // size, which is swapped in as the new root table. So an empty slot is guaranteed. 243 exists = false; 244 found = create_local(); 245 { 246 std::size_t c = ++my_count; 247 array* r = my_root.load(std::memory_order_acquire); 248 call_itt_notify(acquired,r); 249 if( !r || c > r->size()/2 ) { 250 std::size_t s = r ? r->lg_size : 2; 251 while( c > std::size_t(1)<<(s-1) ) ++s; 252 array* a = allocate(s); 253 for(;;) { 254 a->next = r; 255 call_itt_notify(releasing,a); 256 array* new_r = r; 257 if( my_root.compare_exchange_strong(new_r, a) ) break; 258 call_itt_notify(acquired, new_r); 259 __TBB_ASSERT(new_r != nullptr, nullptr); 260 if( new_r->lg_size >= s ) { 261 // Another thread inserted an equal or bigger array, so our array is superfluous. 262 deallocate(a); 263 break; 264 } 265 r = new_r; 266 } 267 } 268 } 269 insert: 270 // Whether a slot has been found in an older table, or if it has been inserted at this level, 271 // it has already been accounted for in the total. Guaranteed to be room for it, and it is 272 // not present, so search for empty slot and use it. 273 array* ir = my_root.load(std::memory_order_acquire); 274 call_itt_notify(acquired, ir); 275 std::size_t mask = ir->mask(); 276 for(std::size_t i = ir->start(h);; i = (i+1)&mask) { 277 slot& s = ir->at(i); 278 if( s.empty() ) { 279 if( s.claim(k) ) { 280 s.ptr = found; 281 return found; 282 } 283 } 284 } 285 } 286 287 //! Specialization that exploits native TLS 288 template <> 289 class ets_base<ets_key_per_instance>: public ets_base<ets_no_key> { 290 using super = ets_base<ets_no_key>; 291 #if _WIN32||_WIN64 292 #if __TBB_WIN8UI_SUPPORT 293 using tls_key_t = DWORD; 294 void create_key() { my_key = FlsAlloc(nullptr); } 295 void destroy_key() { FlsFree(my_key); } 296 void set_tls(void * value) { FlsSetValue(my_key, (LPVOID)value); } 297 void* get_tls() { return (void *)FlsGetValue(my_key); } 298 #else 299 using tls_key_t = DWORD; 300 void create_key() { my_key = TlsAlloc(); } 301 void destroy_key() { TlsFree(my_key); } 302 void set_tls(void * value) { TlsSetValue(my_key, (LPVOID)value); } 303 void* get_tls() { return (void *)TlsGetValue(my_key); } 304 #endif 305 #else 306 using tls_key_t = pthread_key_t; 307 void create_key() { pthread_key_create(&my_key, nullptr); } 308 void destroy_key() { pthread_key_delete(my_key); } 309 void set_tls( void * value ) const { pthread_setspecific(my_key, value); } 310 void* get_tls() const { return pthread_getspecific(my_key); } 311 #endif 312 tls_key_t my_key; 313 virtual void* create_local() override = 0; 314 virtual void* create_array(std::size_t _size) override = 0; // _size in bytes 315 virtual void free_array(void* ptr, std::size_t _size) override = 0; // size in bytes 316 protected: 317 ets_base() {create_key();} 318 ~ets_base() {destroy_key();} 319 void* table_lookup( bool& exists ) { 320 void* found = get_tls(); 321 if( found ) { 322 exists=true; 323 } else { 324 found = super::table_lookup(exists); 325 set_tls(found); 326 } 327 return found; 328 } 329 void table_clear() { 330 destroy_key(); 331 create_key(); 332 super::table_clear(); 333 } 334 void table_swap( ets_base& other ) { 335 using std::swap; 336 __TBB_ASSERT(this!=&other, "Don't swap an instance with itself"); 337 swap(my_key, other.my_key); 338 super::table_swap(other); 339 } 340 }; 341 342 //! Random access iterator for traversing the thread local copies. 343 template< typename Container, typename Value > 344 class enumerable_thread_specific_iterator 345 { 346 //! current position in the concurrent_vector 347 348 Container *my_container; 349 typename Container::size_type my_index; 350 mutable Value *my_value; 351 352 template<typename C, typename T, typename U> 353 friend bool operator==( const enumerable_thread_specific_iterator<C, T>& i, 354 const enumerable_thread_specific_iterator<C, U>& j ); 355 356 template<typename C, typename T, typename U> 357 friend bool operator<( const enumerable_thread_specific_iterator<C,T>& i, 358 const enumerable_thread_specific_iterator<C,U>& j ); 359 360 template<typename C, typename T, typename U> 361 friend std::ptrdiff_t operator-( const enumerable_thread_specific_iterator<C,T>& i, 362 const enumerable_thread_specific_iterator<C,U>& j ); 363 364 template<typename C, typename U> 365 friend class enumerable_thread_specific_iterator; 366 367 public: 368 //! STL support 369 using difference_type = std::ptrdiff_t; 370 using value_type = Value; 371 using pointer = Value*; 372 using reference = Value&; 373 using iterator_category = std::random_access_iterator_tag; 374 375 enumerable_thread_specific_iterator( const Container &container, typename Container::size_type index ) : 376 my_container(&const_cast<Container &>(container)), my_index(index), my_value(nullptr) {} 377 378 //! Default constructor 379 enumerable_thread_specific_iterator() : my_container(nullptr), my_index(0), my_value(nullptr) {} 380 381 template<typename U> 382 enumerable_thread_specific_iterator( const enumerable_thread_specific_iterator<Container, U>& other ) : 383 my_container( other.my_container ), my_index( other.my_index), my_value( const_cast<Value *>(other.my_value) ) {} 384 385 enumerable_thread_specific_iterator operator+( std::ptrdiff_t offset ) const { 386 return enumerable_thread_specific_iterator(*my_container, my_index + offset); 387 } 388 389 friend enumerable_thread_specific_iterator operator+( std::ptrdiff_t offset, enumerable_thread_specific_iterator v ) { 390 return enumerable_thread_specific_iterator(*v.my_container, v.my_index + offset); 391 } 392 393 enumerable_thread_specific_iterator &operator+=( std::ptrdiff_t offset ) { 394 my_index += offset; 395 my_value = nullptr; 396 return *this; 397 } 398 399 enumerable_thread_specific_iterator operator-( std::ptrdiff_t offset ) const { 400 return enumerable_thread_specific_iterator( *my_container, my_index-offset ); 401 } 402 403 enumerable_thread_specific_iterator &operator-=( std::ptrdiff_t offset ) { 404 my_index -= offset; 405 my_value = nullptr; 406 return *this; 407 } 408 409 Value& operator*() const { 410 Value* value = my_value; 411 if( !value ) { 412 value = my_value = (*my_container)[my_index].value(); 413 } 414 __TBB_ASSERT( value==(*my_container)[my_index].value(), "corrupt cache" ); 415 return *value; 416 } 417 418 Value& operator[]( std::ptrdiff_t k ) const { 419 return *(*my_container)[my_index + k].value(); 420 } 421 422 Value* operator->() const {return &operator*();} 423 424 enumerable_thread_specific_iterator& operator++() { 425 ++my_index; 426 my_value = nullptr; 427 return *this; 428 } 429 430 enumerable_thread_specific_iterator& operator--() { 431 --my_index; 432 my_value = nullptr; 433 return *this; 434 } 435 436 //! Post increment 437 enumerable_thread_specific_iterator operator++(int) { 438 enumerable_thread_specific_iterator result = *this; 439 ++my_index; 440 my_value = nullptr; 441 return result; 442 } 443 444 //! Post decrement 445 enumerable_thread_specific_iterator operator--(int) { 446 enumerable_thread_specific_iterator result = *this; 447 --my_index; 448 my_value = nullptr; 449 return result; 450 } 451 }; 452 453 template<typename Container, typename T, typename U> 454 bool operator==( const enumerable_thread_specific_iterator<Container, T>& i, 455 const enumerable_thread_specific_iterator<Container, U>& j ) { 456 return i.my_index == j.my_index && i.my_container == j.my_container; 457 } 458 459 template<typename Container, typename T, typename U> 460 bool operator!=( const enumerable_thread_specific_iterator<Container,T>& i, 461 const enumerable_thread_specific_iterator<Container,U>& j ) { 462 return !(i==j); 463 } 464 465 template<typename Container, typename T, typename U> 466 bool operator<( const enumerable_thread_specific_iterator<Container,T>& i, 467 const enumerable_thread_specific_iterator<Container,U>& j ) { 468 return i.my_index<j.my_index; 469 } 470 471 template<typename Container, typename T, typename U> 472 bool operator>( const enumerable_thread_specific_iterator<Container,T>& i, 473 const enumerable_thread_specific_iterator<Container,U>& j ) { 474 return j<i; 475 } 476 477 template<typename Container, typename T, typename U> 478 bool operator>=( const enumerable_thread_specific_iterator<Container,T>& i, 479 const enumerable_thread_specific_iterator<Container,U>& j ) { 480 return !(i<j); 481 } 482 483 template<typename Container, typename T, typename U> 484 bool operator<=( const enumerable_thread_specific_iterator<Container,T>& i, 485 const enumerable_thread_specific_iterator<Container,U>& j ) { 486 return !(j<i); 487 } 488 489 template<typename Container, typename T, typename U> 490 std::ptrdiff_t operator-( const enumerable_thread_specific_iterator<Container,T>& i, 491 const enumerable_thread_specific_iterator<Container,U>& j ) { 492 return i.my_index-j.my_index; 493 } 494 495 template<typename SegmentedContainer, typename Value > 496 class segmented_iterator 497 { 498 template<typename C, typename T, typename U> 499 friend bool operator==(const segmented_iterator<C,T>& i, const segmented_iterator<C,U>& j); 500 501 template<typename C, typename T, typename U> 502 friend bool operator!=(const segmented_iterator<C,T>& i, const segmented_iterator<C,U>& j); 503 504 template<typename C, typename U> 505 friend class segmented_iterator; 506 507 public: 508 segmented_iterator() {my_segcont = nullptr;} 509 510 segmented_iterator( const SegmentedContainer& _segmented_container ) : 511 my_segcont(const_cast<SegmentedContainer*>(&_segmented_container)), 512 outer_iter(my_segcont->end()) { } 513 514 ~segmented_iterator() {} 515 516 using InnerContainer = typename SegmentedContainer::value_type; 517 using inner_iterator = typename InnerContainer::iterator; 518 using outer_iterator = typename SegmentedContainer::iterator; 519 520 // STL support 521 // TODO: inherit all types from segmented container? 522 using difference_type = std::ptrdiff_t; 523 using value_type = Value; 524 using size_type = typename SegmentedContainer::size_type; 525 using pointer = Value*; 526 using reference = Value&; 527 using iterator_category = std::input_iterator_tag; 528 529 // Copy Constructor 530 template<typename U> 531 segmented_iterator(const segmented_iterator<SegmentedContainer, U>& other) : 532 my_segcont(other.my_segcont), 533 outer_iter(other.outer_iter), 534 // can we assign a default-constructed iterator to inner if we're at the end? 535 inner_iter(other.inner_iter) 536 {} 537 538 // assignment 539 template<typename U> 540 segmented_iterator& operator=( const segmented_iterator<SegmentedContainer, U>& other) { 541 my_segcont = other.my_segcont; 542 outer_iter = other.outer_iter; 543 if(outer_iter != my_segcont->end()) inner_iter = other.inner_iter; 544 return *this; 545 } 546 547 // allow assignment of outer iterator to segmented iterator. Once it is 548 // assigned, move forward until a non-empty inner container is found or 549 // the end of the outer container is reached. 550 segmented_iterator& operator=(const outer_iterator& new_outer_iter) { 551 __TBB_ASSERT(my_segcont != nullptr, nullptr); 552 // check that this iterator points to something inside the segmented container 553 for(outer_iter = new_outer_iter ;outer_iter!=my_segcont->end(); ++outer_iter) { 554 if( !outer_iter->empty() ) { 555 inner_iter = outer_iter->begin(); 556 break; 557 } 558 } 559 return *this; 560 } 561 562 // pre-increment 563 segmented_iterator& operator++() { 564 advance_me(); 565 return *this; 566 } 567 568 // post-increment 569 segmented_iterator operator++(int) { 570 segmented_iterator tmp = *this; 571 operator++(); 572 return tmp; 573 } 574 575 bool operator==(const outer_iterator& other_outer) const { 576 __TBB_ASSERT(my_segcont != nullptr, nullptr); 577 return (outer_iter == other_outer && 578 (outer_iter == my_segcont->end() || inner_iter == outer_iter->begin())); 579 } 580 581 bool operator!=(const outer_iterator& other_outer) const { 582 return !operator==(other_outer); 583 584 } 585 586 // (i)* RHS 587 reference operator*() const { 588 __TBB_ASSERT(my_segcont != nullptr, nullptr); 589 __TBB_ASSERT(outer_iter != my_segcont->end(), "Dereferencing a pointer at end of container"); 590 __TBB_ASSERT(inner_iter != outer_iter->end(), nullptr); // should never happen 591 return *inner_iter; 592 } 593 594 // i-> 595 pointer operator->() const { return &operator*();} 596 597 private: 598 SegmentedContainer* my_segcont; 599 outer_iterator outer_iter; 600 inner_iterator inner_iter; 601 602 void advance_me() { 603 __TBB_ASSERT(my_segcont != nullptr, nullptr); 604 __TBB_ASSERT(outer_iter != my_segcont->end(), nullptr); // not true if there are no inner containers 605 __TBB_ASSERT(inner_iter != outer_iter->end(), nullptr); // not true if the inner containers are all empty. 606 ++inner_iter; 607 while(inner_iter == outer_iter->end() && ++outer_iter != my_segcont->end()) { 608 inner_iter = outer_iter->begin(); 609 } 610 } 611 }; // segmented_iterator 612 613 template<typename SegmentedContainer, typename T, typename U> 614 bool operator==( const segmented_iterator<SegmentedContainer,T>& i, 615 const segmented_iterator<SegmentedContainer,U>& j ) { 616 if(i.my_segcont != j.my_segcont) return false; 617 if(i.my_segcont == nullptr) return true; 618 if(i.outer_iter != j.outer_iter) return false; 619 if(i.outer_iter == i.my_segcont->end()) return true; 620 return i.inner_iter == j.inner_iter; 621 } 622 623 // != 624 template<typename SegmentedContainer, typename T, typename U> 625 bool operator!=( const segmented_iterator<SegmentedContainer,T>& i, 626 const segmented_iterator<SegmentedContainer,U>& j ) { 627 return !(i==j); 628 } 629 630 template<typename T> 631 struct construct_by_default: no_assign { 632 void construct(void*where) {new(where) T();} // C++ note: the () in T() ensure zero initialization. 633 construct_by_default( int ) {} 634 }; 635 636 template<typename T> 637 struct construct_by_exemplar: no_assign { 638 const T exemplar; 639 void construct(void*where) {new(where) T(exemplar);} 640 construct_by_exemplar( const T& t ) : exemplar(t) {} 641 construct_by_exemplar( T&& t ) : exemplar(std::move(t)) {} 642 }; 643 644 template<typename T, typename Finit> 645 struct construct_by_finit: no_assign { 646 Finit f; 647 void construct(void* where) {new(where) T(f());} 648 construct_by_finit( Finit&& f_ ) : f(std::move(f_)) {} 649 }; 650 651 template<typename T, typename... P> 652 struct construct_by_args: no_assign { 653 stored_pack<P...> pack; 654 void construct(void* where) { 655 call( [where](const typename std::decay<P>::type&... args ){ 656 new(where) T(args...); 657 }, pack ); 658 } 659 construct_by_args( P&& ... args ) : pack(std::forward<P>(args)...) {} 660 }; 661 662 // storage for initialization function pointer 663 // TODO: consider removing the template parameter T here and in callback_leaf 664 class callback_base { 665 public: 666 // Clone *this 667 virtual callback_base* clone() const = 0; 668 // Destruct and free *this 669 virtual void destroy() = 0; 670 // Need virtual destructor to satisfy GCC compiler warning 671 virtual ~callback_base() { } 672 // Construct T at where 673 virtual void construct(void* where) = 0; 674 }; 675 676 template <typename Constructor> 677 class callback_leaf: public callback_base, Constructor { 678 template<typename... P> callback_leaf( P&& ... params ) : Constructor(std::forward<P>(params)...) {} 679 // TODO: make the construction/destruction consistent (use allocator.construct/destroy) 680 using my_allocator_type = typename tbb::tbb_allocator<callback_leaf>; 681 682 callback_base* clone() const override { 683 return make(*this); 684 } 685 686 void destroy() override { 687 my_allocator_type alloc; 688 tbb::detail::allocator_traits<my_allocator_type>::destroy(alloc, this); 689 tbb::detail::allocator_traits<my_allocator_type>::deallocate(alloc, this, 1); 690 } 691 692 void construct(void* where) override { 693 Constructor::construct(where); 694 } 695 696 public: 697 template<typename... P> 698 static callback_base* make( P&& ... params ) { 699 void* where = my_allocator_type().allocate(1); 700 return new(where) callback_leaf( std::forward<P>(params)... ); 701 } 702 }; 703 704 //! Template for recording construction of objects in table 705 /** All maintenance of the space will be done explicitly on push_back, 706 and all thread local copies must be destroyed before the concurrent 707 vector is deleted. 708 709 The flag is_built is initialized to false. When the local is 710 successfully-constructed, set the flag to true or call value_committed(). 711 If the constructor throws, the flag will be false. 712 */ 713 template<typename U> 714 struct ets_element { 715 detail::aligned_space<U> my_space; 716 bool is_built; 717 ets_element() { is_built = false; } // not currently-built 718 U* value() { return my_space.begin(); } 719 U* value_committed() { is_built = true; return my_space.begin(); } 720 ~ets_element() { 721 if(is_built) { 722 my_space.begin()->~U(); 723 is_built = false; 724 } 725 } 726 }; 727 728 // A predicate that can be used for a compile-time compatibility check of ETS instances 729 // Ideally, it should have been declared inside the ETS class, but unfortunately 730 // in that case VS2013 does not enable the variadic constructor. 731 template<typename T, typename ETS> struct is_compatible_ets : std::false_type {}; 732 template<typename T, typename U, typename A, ets_key_usage_type C> 733 struct is_compatible_ets< T, enumerable_thread_specific<U,A,C> > : std::is_same<T, U> {}; 734 735 // A predicate that checks whether, for a variable 'foo' of type T, foo() is a valid expression 736 template <typename T> using has_empty_braces_operator = decltype(std::declval<T>()()); 737 template <typename T> using is_callable_no_args = supports<T, has_empty_braces_operator>; 738 739 //! The enumerable_thread_specific container 740 /** enumerable_thread_specific has the following properties: 741 - thread-local copies are lazily created, with default, exemplar or function initialization. 742 - thread-local copies do not move (during lifetime, and excepting clear()) so the address of a copy is invariant. 743 - the contained objects need not have operator=() defined if combine is not used. 744 - enumerable_thread_specific containers may be copy-constructed or assigned. 745 - thread-local copies can be managed by hash-table, or can be accessed via TLS storage for speed. 746 - outside of parallel contexts, the contents of all thread-local copies are accessible by iterator or using combine or combine_each methods 747 748 @par Segmented iterator 749 When the thread-local objects are containers with input_iterators defined, a segmented iterator may 750 be used to iterate over all the elements of all thread-local copies. 751 752 @par combine and combine_each 753 - Both methods are defined for enumerable_thread_specific. 754 - combine() requires the type T have operator=() defined. 755 - neither method modifies the contents of the object (though there is no guarantee that the applied methods do not modify the object.) 756 - Both are evaluated in serial context (the methods are assumed to be non-benign.) 757 758 @ingroup containers */ 759 template <typename T, typename Allocator=cache_aligned_allocator<T>, 760 ets_key_usage_type ETS_key_type=ets_no_key > 761 class enumerable_thread_specific: ets_base<ETS_key_type> { 762 763 template<typename U, typename A, ets_key_usage_type C> friend class enumerable_thread_specific; 764 765 using padded_element = padded<ets_element<T>>; 766 767 //! A generic range, used to create range objects from the iterators 768 template<typename I> 769 class generic_range_type: public blocked_range<I> { 770 public: 771 using value_type = T; 772 using reference = T&; 773 using const_reference = const T&; 774 using iterator = I; 775 using difference_type = std::ptrdiff_t; 776 777 generic_range_type( I begin_, I end_, std::size_t grainsize_ = 1) : blocked_range<I>(begin_,end_,grainsize_) {} 778 template<typename U> 779 generic_range_type( const generic_range_type<U>& r) : blocked_range<I>(r.begin(),r.end(),r.grainsize()) {} 780 generic_range_type( generic_range_type& r, split ) : blocked_range<I>(r,split()) {} 781 }; 782 783 using allocator_traits_type = tbb::detail::allocator_traits<Allocator>; 784 785 using padded_allocator_type = typename allocator_traits_type::template rebind_alloc<padded_element>; 786 using internal_collection_type = tbb::concurrent_vector< padded_element, padded_allocator_type >; 787 788 callback_base *my_construct_callback; 789 790 internal_collection_type my_locals; 791 792 // TODO: consider unifying the callback mechanism for all create_local* methods below 793 // (likely non-compatible and requires interface version increase) 794 void* create_local() override { 795 padded_element& lref = *my_locals.grow_by(1); 796 my_construct_callback->construct(lref.value()); 797 return lref.value_committed(); 798 } 799 800 static void* create_local_by_copy( ets_base<ETS_key_type>& base, void* p ) { 801 enumerable_thread_specific& ets = static_cast<enumerable_thread_specific&>(base); 802 padded_element& lref = *ets.my_locals.grow_by(1); 803 new(lref.value()) T(*static_cast<T*>(p)); 804 return lref.value_committed(); 805 } 806 807 static void* create_local_by_move( ets_base<ETS_key_type>& base, void* p ) { 808 enumerable_thread_specific& ets = static_cast<enumerable_thread_specific&>(base); 809 padded_element& lref = *ets.my_locals.grow_by(1); 810 new(lref.value()) T(std::move(*static_cast<T*>(p))); 811 return lref.value_committed(); 812 } 813 814 using array_allocator_type = typename allocator_traits_type::template rebind_alloc<uintptr_t>; 815 816 // _size is in bytes 817 void* create_array(std::size_t _size) override { 818 std::size_t nelements = (_size + sizeof(uintptr_t) -1) / sizeof(uintptr_t); 819 return array_allocator_type().allocate(nelements); 820 } 821 822 void free_array( void* _ptr, std::size_t _size) override { 823 std::size_t nelements = (_size + sizeof(uintptr_t) -1) / sizeof(uintptr_t); 824 array_allocator_type().deallocate( reinterpret_cast<uintptr_t *>(_ptr),nelements); 825 } 826 827 public: 828 829 //! Basic types 830 using value_type = T; 831 using allocator_type = Allocator; 832 using size_type = typename internal_collection_type::size_type; 833 using difference_type = typename internal_collection_type::difference_type; 834 using reference = value_type&; 835 using const_reference = const value_type&; 836 837 using pointer = typename allocator_traits_type::pointer; 838 using const_pointer = typename allocator_traits_type::const_pointer; 839 840 // Iterator types 841 using iterator = enumerable_thread_specific_iterator<internal_collection_type, value_type>; 842 using const_iterator = enumerable_thread_specific_iterator<internal_collection_type, const value_type>; 843 844 // Parallel range types 845 using range_type = generic_range_type<iterator>; 846 using const_range_type = generic_range_type<const_iterator>; 847 848 //! Default constructor. Each local instance of T is default constructed. 849 enumerable_thread_specific() : my_construct_callback( 850 callback_leaf<construct_by_default<T> >::make(/*dummy argument*/0) 851 ){} 852 853 //! Constructor with initializer functor. Each local instance of T is constructed by T(finit()). 854 template <typename Finit , typename = typename std::enable_if<is_callable_no_args<typename std::decay<Finit>::type>::value>::type> 855 explicit enumerable_thread_specific( Finit finit ) : my_construct_callback( 856 callback_leaf<construct_by_finit<T,Finit> >::make( std::move(finit) ) 857 ){} 858 859 //! Constructor with exemplar. Each local instance of T is copy-constructed from the exemplar. 860 explicit enumerable_thread_specific( const T& exemplar ) : my_construct_callback( 861 callback_leaf<construct_by_exemplar<T> >::make( exemplar ) 862 ){} 863 864 explicit enumerable_thread_specific( T&& exemplar ) : my_construct_callback( 865 callback_leaf<construct_by_exemplar<T> >::make( std::move(exemplar) ) 866 ){} 867 868 //! Variadic constructor with initializer arguments. Each local instance of T is constructed by T(args...) 869 template <typename P1, typename... P, 870 typename = typename std::enable_if<!is_callable_no_args<typename std::decay<P1>::type>::value 871 && !is_compatible_ets<T, typename std::decay<P1>::type>::value 872 && !std::is_same<T, typename std::decay<P1>::type>::value 873 >::type> 874 enumerable_thread_specific( P1&& arg1, P&& ... args ) : my_construct_callback( 875 callback_leaf<construct_by_args<T,P1,P...> >::make( std::forward<P1>(arg1), std::forward<P>(args)... ) 876 ){} 877 878 //! Destructor 879 ~enumerable_thread_specific() { 880 if(my_construct_callback) my_construct_callback->destroy(); 881 // Deallocate the hash table before overridden free_array() becomes inaccessible 882 this->ets_base<ETS_key_type>::table_clear(); 883 } 884 885 //! returns reference to local, discarding exists 886 reference local() { 887 bool exists; 888 return local(exists); 889 } 890 891 //! Returns reference to calling thread's local copy, creating one if necessary 892 reference local(bool& exists) { 893 void* ptr = this->table_lookup(exists); 894 return *(T*)ptr; 895 } 896 897 //! Get the number of local copies 898 size_type size() const { return my_locals.size(); } 899 900 //! true if there have been no local copies created 901 bool empty() const { return my_locals.empty(); } 902 903 //! begin iterator 904 iterator begin() { return iterator( my_locals, 0 ); } 905 //! end iterator 906 iterator end() { return iterator(my_locals, my_locals.size() ); } 907 908 //! begin const iterator 909 const_iterator begin() const { return const_iterator(my_locals, 0); } 910 911 //! end const iterator 912 const_iterator end() const { return const_iterator(my_locals, my_locals.size()); } 913 914 //! Get range for parallel algorithms 915 range_type range( std::size_t grainsize=1 ) { return range_type( begin(), end(), grainsize ); } 916 917 //! Get const range for parallel algorithms 918 const_range_type range( std::size_t grainsize=1 ) const { return const_range_type( begin(), end(), grainsize ); } 919 920 //! Destroys local copies 921 void clear() { 922 my_locals.clear(); 923 this->table_clear(); 924 // callback is not destroyed 925 } 926 927 private: 928 template<typename A2, ets_key_usage_type C2> 929 void internal_copy(const enumerable_thread_specific<T, A2, C2>& other) { 930 // this tests is_compatible_ets 931 static_assert( (is_compatible_ets<T, typename std::decay<decltype(other)>::type>::value), "is_compatible_ets fails" ); 932 // Initialize my_construct_callback first, so that it is valid even if rest of this routine throws an exception. 933 my_construct_callback = other.my_construct_callback->clone(); 934 __TBB_ASSERT(my_locals.size()==0, nullptr); 935 my_locals.reserve(other.size()); 936 this->table_elementwise_copy( other, create_local_by_copy ); 937 } 938 939 void internal_swap(enumerable_thread_specific& other) { 940 using std::swap; 941 __TBB_ASSERT( this!=&other, nullptr); 942 swap(my_construct_callback, other.my_construct_callback); 943 // concurrent_vector::swap() preserves storage space, 944 // so addresses to the vector kept in ETS hash table remain valid. 945 swap(my_locals, other.my_locals); 946 this->ets_base<ETS_key_type>::table_swap(other); 947 } 948 949 template<typename A2, ets_key_usage_type C2> 950 void internal_move(enumerable_thread_specific<T, A2, C2>&& other) { 951 static_assert( (is_compatible_ets<T, typename std::decay<decltype(other)>::type>::value), "is_compatible_ets fails" ); 952 my_construct_callback = other.my_construct_callback; 953 other.my_construct_callback = nullptr; 954 __TBB_ASSERT(my_locals.size()==0, nullptr); 955 my_locals.reserve(other.size()); 956 this->table_elementwise_copy( other, create_local_by_move ); 957 } 958 959 public: 960 enumerable_thread_specific( const enumerable_thread_specific& other ) 961 : ets_base<ETS_key_type>() /* prevents GCC warnings with -Wextra */ 962 { 963 internal_copy(other); 964 } 965 966 template<typename Alloc, ets_key_usage_type Cachetype> 967 enumerable_thread_specific( const enumerable_thread_specific<T, Alloc, Cachetype>& other ) 968 { 969 internal_copy(other); 970 } 971 972 enumerable_thread_specific( enumerable_thread_specific&& other ) : my_construct_callback() 973 { 974 // TODO: use internal_move correctly here 975 internal_swap(other); 976 } 977 978 template<typename Alloc, ets_key_usage_type Cachetype> 979 enumerable_thread_specific( enumerable_thread_specific<T, Alloc, Cachetype>&& other ) : my_construct_callback() 980 { 981 internal_move(std::move(other)); 982 } 983 984 enumerable_thread_specific& operator=( const enumerable_thread_specific& other ) 985 { 986 if( this != &other ) { 987 this->clear(); 988 my_construct_callback->destroy(); 989 internal_copy( other ); 990 } 991 return *this; 992 } 993 994 template<typename Alloc, ets_key_usage_type Cachetype> 995 enumerable_thread_specific& operator=( const enumerable_thread_specific<T, Alloc, Cachetype>& other ) 996 { 997 __TBB_ASSERT( static_cast<void*>(this)!=static_cast<const void*>(&other), nullptr); // Objects of different types 998 this->clear(); 999 my_construct_callback->destroy(); 1000 internal_copy(other); 1001 return *this; 1002 } 1003 1004 enumerable_thread_specific& operator=( enumerable_thread_specific&& other ) 1005 { 1006 if( this != &other ) { 1007 // TODO: use internal_move correctly here 1008 internal_swap(other); 1009 } 1010 return *this; 1011 } 1012 1013 template<typename Alloc, ets_key_usage_type Cachetype> 1014 enumerable_thread_specific& operator=( enumerable_thread_specific<T, Alloc, Cachetype>&& other ) 1015 { 1016 __TBB_ASSERT( static_cast<void*>(this)!=static_cast<const void*>(&other), nullptr); // Objects of different types 1017 this->clear(); 1018 my_construct_callback->destroy(); 1019 internal_move(std::move(other)); 1020 return *this; 1021 } 1022 1023 // CombineFunc has signature T(T,T) or T(const T&, const T&) 1024 template <typename CombineFunc> 1025 T combine(CombineFunc f_combine) { 1026 if(begin() == end()) { 1027 ets_element<T> location; 1028 my_construct_callback->construct(location.value()); 1029 return *location.value_committed(); 1030 } 1031 const_iterator ci = begin(); 1032 T my_result = *ci; 1033 while(++ci != end()) 1034 my_result = f_combine( my_result, *ci ); 1035 return my_result; 1036 } 1037 1038 // combine_func_t takes T by value or by [const] reference, and returns nothing 1039 template <typename CombineFunc> 1040 void combine_each(CombineFunc f_combine) { 1041 for(iterator ci = begin(); ci != end(); ++ci) { 1042 f_combine( *ci ); 1043 } 1044 } 1045 1046 }; // enumerable_thread_specific 1047 1048 template< typename Container > 1049 class flattened2d { 1050 // This intermediate typedef is to address issues with VC7.1 compilers 1051 using conval_type = typename Container::value_type; 1052 1053 public: 1054 //! Basic types 1055 using size_type = typename conval_type::size_type; 1056 using difference_type = typename conval_type::difference_type; 1057 using allocator_type = typename conval_type::allocator_type; 1058 using value_type = typename conval_type::value_type; 1059 using reference = typename conval_type::reference; 1060 using const_reference = typename conval_type::const_reference; 1061 using pointer = typename conval_type::pointer; 1062 using const_pointer = typename conval_type::const_pointer; 1063 1064 using iterator = segmented_iterator<Container, value_type>; 1065 using const_iterator = segmented_iterator<Container, const value_type>; 1066 1067 flattened2d( const Container &c, typename Container::const_iterator b, typename Container::const_iterator e ) : 1068 my_container(const_cast<Container*>(&c)), my_begin(b), my_end(e) { } 1069 1070 explicit flattened2d( const Container &c ) : 1071 my_container(const_cast<Container*>(&c)), my_begin(c.begin()), my_end(c.end()) { } 1072 1073 iterator begin() { return iterator(*my_container) = my_begin; } 1074 iterator end() { return iterator(*my_container) = my_end; } 1075 const_iterator begin() const { return const_iterator(*my_container) = my_begin; } 1076 const_iterator end() const { return const_iterator(*my_container) = my_end; } 1077 1078 size_type size() const { 1079 size_type tot_size = 0; 1080 for(typename Container::const_iterator i = my_begin; i != my_end; ++i) { 1081 tot_size += i->size(); 1082 } 1083 return tot_size; 1084 } 1085 1086 private: 1087 Container *my_container; 1088 typename Container::const_iterator my_begin; 1089 typename Container::const_iterator my_end; 1090 }; 1091 1092 template <typename Container> 1093 flattened2d<Container> flatten2d(const Container &c, const typename Container::const_iterator b, const typename Container::const_iterator e) { 1094 return flattened2d<Container>(c, b, e); 1095 } 1096 1097 template <typename Container> 1098 flattened2d<Container> flatten2d(const Container &c) { 1099 return flattened2d<Container>(c); 1100 } 1101 1102 } // namespace d1 1103 } // namespace detail 1104 1105 inline namespace v1 { 1106 using detail::d1::enumerable_thread_specific; 1107 using detail::d1::flattened2d; 1108 using detail::d1::flatten2d; 1109 // ets enum keys 1110 using detail::d1::ets_key_usage_type; 1111 using detail::d1::ets_key_per_instance; 1112 using detail::d1::ets_no_key; 1113 #if __TBB_RESUMABLE_TASKS 1114 using detail::d1::ets_suspend_aware; 1115 #endif 1116 } // inline namespace v1 1117 1118 } // namespace tbb 1119 1120 #endif // __TBB_enumerable_thread_specific_H 1121 1122