1 /* 2 Copyright (c) 2005-2022 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #ifndef __TBB_enumerable_thread_specific_H 18 #define __TBB_enumerable_thread_specific_H 19 20 #include "detail/_config.h" 21 #include "detail/_namespace_injection.h" 22 #include "detail/_assert.h" 23 #include "detail/_template_helpers.h" 24 #include "detail/_aligned_space.h" 25 26 #include "concurrent_vector.h" 27 #include "tbb_allocator.h" 28 #include "cache_aligned_allocator.h" 29 #include "profiling.h" 30 31 #include <atomic> 32 #include <thread> 33 #include <cstring> // memcpy 34 #include <cstddef> // std::ptrdiff_t 35 36 #include "task.h" // for task::suspend_point 37 38 #if _WIN32 || _WIN64 39 #include <windows.h> 40 #else 41 #include <pthread.h> 42 #endif 43 44 namespace tbb { 45 namespace detail { 46 namespace d1 { 47 48 //! enum for selecting between single key and key-per-instance versions 49 enum ets_key_usage_type { 50 ets_key_per_instance 51 , ets_no_key 52 #if __TBB_RESUMABLE_TASKS 53 , ets_suspend_aware 54 #endif 55 }; 56 57 // Forward declaration to use in internal classes 58 template <typename T, typename Allocator, ets_key_usage_type ETS_key_type> 59 class enumerable_thread_specific; 60 61 template <std::size_t ThreadIDSize> 62 struct internal_ets_key_selector { 63 using key_type = std::thread::id; 64 static key_type current_key() { 65 return std::this_thread::get_id(); 66 } 67 }; 68 69 // Intel Compiler on OSX cannot create atomics objects that instantiated from non-fundamental types 70 #if __INTEL_COMPILER && __APPLE__ 71 template<> 72 struct internal_ets_key_selector<sizeof(std::size_t)> { 73 using key_type = std::size_t; 74 static key_type current_key() { 75 auto id = std::this_thread::get_id(); 76 return reinterpret_cast<key_type&>(id); 77 } 78 }; 79 #endif 80 81 template <ets_key_usage_type ETS_key_type> 82 struct ets_key_selector : internal_ets_key_selector<sizeof(std::thread::id)> {}; 83 84 #if __TBB_RESUMABLE_TASKS 85 template <> 86 struct ets_key_selector<ets_suspend_aware> { 87 using key_type = suspend_point; 88 static key_type current_key() { 89 return r1::current_suspend_point(); 90 } 91 }; 92 #endif 93 94 template<ets_key_usage_type ETS_key_type> 95 class ets_base : detail::no_copy { 96 protected: 97 using key_type = typename ets_key_selector<ETS_key_type>::key_type; 98 99 public: 100 struct slot; 101 struct array { 102 array* next; 103 std::size_t lg_size; 104 slot& at( std::size_t k ) { 105 return (reinterpret_cast<slot*>(reinterpret_cast<void*>(this+1)))[k]; 106 } 107 std::size_t size() const { return std::size_t(1) << lg_size; } 108 std::size_t mask() const { return size() - 1; } 109 std::size_t start( std::size_t h ) const { 110 return h >> (8 * sizeof(std::size_t) - lg_size); 111 } 112 }; 113 struct slot { 114 std::atomic<key_type> key; 115 void* ptr; 116 bool empty() const { return key.load(std::memory_order_relaxed) == key_type(); } 117 bool match( key_type k ) const { return key.load(std::memory_order_relaxed) == k; } 118 bool claim( key_type k ) { 119 // TODO: maybe claim ptr, because key_type is not guaranteed to fit into word size 120 key_type expected = key_type(); 121 return key.compare_exchange_strong(expected, k); 122 } 123 }; 124 125 protected: 126 //! Root of linked list of arrays of decreasing size. 127 /** nullptr if and only if my_count==0. 128 Each array in the list is half the size of its predecessor. */ 129 std::atomic<array*> my_root; 130 std::atomic<std::size_t> my_count; 131 132 virtual void* create_local() = 0; 133 virtual void* create_array(std::size_t _size) = 0; // _size in bytes 134 virtual void free_array(void* ptr, std::size_t _size) = 0; // _size in bytes 135 136 array* allocate( std::size_t lg_size ) { 137 std::size_t n = std::size_t(1) << lg_size; 138 array* a = static_cast<array*>(create_array(sizeof(array) + n * sizeof(slot))); 139 a->lg_size = lg_size; 140 std::memset( a + 1, 0, n * sizeof(slot) ); 141 return a; 142 } 143 void deallocate(array* a) { 144 std::size_t n = std::size_t(1) << (a->lg_size); 145 free_array( static_cast<void*>(a), std::size_t(sizeof(array) + n * sizeof(slot)) ); 146 } 147 148 ets_base() : my_root{nullptr}, my_count{0} {} 149 virtual ~ets_base(); // g++ complains if this is not virtual 150 151 void* table_lookup( bool& exists ); 152 void table_clear(); 153 // The following functions are not used in concurrent context, 154 // so we don't need synchronization and ITT annotations there. 155 template <ets_key_usage_type E2> 156 void table_elementwise_copy( const ets_base& other, 157 void*(*add_element)(ets_base<E2>&, void*) ) { 158 __TBB_ASSERT(!my_root.load(std::memory_order_relaxed), nullptr); 159 __TBB_ASSERT(!my_count.load(std::memory_order_relaxed), nullptr); 160 if( !other.my_root.load(std::memory_order_relaxed) ) return; 161 array* root = allocate(other.my_root.load(std::memory_order_relaxed)->lg_size); 162 my_root.store(root, std::memory_order_relaxed); 163 root->next = nullptr; 164 my_count.store(other.my_count.load(std::memory_order_relaxed), std::memory_order_relaxed); 165 std::size_t mask = root->mask(); 166 for( array* r = other.my_root.load(std::memory_order_relaxed); r; r = r->next ) { 167 for( std::size_t i = 0; i < r->size(); ++i ) { 168 slot& s1 = r->at(i); 169 if( !s1.empty() ) { 170 for( std::size_t j = root->start(std::hash<key_type>{}(s1.key.load(std::memory_order_relaxed))); ; j = (j+1)&mask ) { 171 slot& s2 = root->at(j); 172 if( s2.empty() ) { 173 s2.ptr = add_element(static_cast<ets_base<E2>&>(*this), s1.ptr); 174 s2.key.store(s1.key.load(std::memory_order_relaxed), std::memory_order_relaxed); 175 break; 176 } 177 else if( s2.match(s1.key.load(std::memory_order_relaxed)) ) 178 break; 179 } 180 } 181 } 182 } 183 } 184 void table_swap( ets_base& other ) { 185 __TBB_ASSERT(this!=&other, "Don't swap an instance with itself"); 186 swap_atomics_relaxed(my_root, other.my_root); 187 swap_atomics_relaxed(my_count, other.my_count); 188 } 189 }; 190 191 template<ets_key_usage_type ETS_key_type> 192 ets_base<ETS_key_type>::~ets_base() { 193 __TBB_ASSERT(!my_root.load(std::memory_order_relaxed), nullptr); 194 } 195 196 template<ets_key_usage_type ETS_key_type> 197 void ets_base<ETS_key_type>::table_clear() { 198 while ( array* r = my_root.load(std::memory_order_relaxed) ) { 199 my_root.store(r->next, std::memory_order_relaxed); 200 deallocate(r); 201 } 202 my_count.store(0, std::memory_order_relaxed); 203 } 204 205 template<ets_key_usage_type ETS_key_type> 206 void* ets_base<ETS_key_type>::table_lookup( bool& exists ) { 207 const key_type k = ets_key_selector<ETS_key_type>::current_key(); 208 209 __TBB_ASSERT(k != key_type(), nullptr); 210 void* found; 211 std::size_t h = std::hash<key_type>{}(k); 212 for( array* r = my_root.load(std::memory_order_acquire); r; r = r->next ) { 213 call_itt_notify(acquired,r); 214 std::size_t mask=r->mask(); 215 for(std::size_t i = r->start(h); ;i=(i+1)&mask) { 216 slot& s = r->at(i); 217 if( s.empty() ) break; 218 if( s.match(k) ) { 219 if( r == my_root.load(std::memory_order_acquire) ) { 220 // Success at top level 221 exists = true; 222 return s.ptr; 223 } else { 224 // Success at some other level. Need to insert at top level. 225 exists = true; 226 found = s.ptr; 227 goto insert; 228 } 229 } 230 } 231 } 232 // Key does not yet exist. The density of slots in the table does not exceed 0.5, 233 // for if this will occur a new table is allocated with double the current table 234 // size, which is swapped in as the new root table. So an empty slot is guaranteed. 235 exists = false; 236 found = create_local(); 237 { 238 std::size_t c = ++my_count; 239 array* r = my_root.load(std::memory_order_acquire); 240 call_itt_notify(acquired,r); 241 if( !r || c > r->size()/2 ) { 242 std::size_t s = r ? r->lg_size : 2; 243 while( c > std::size_t(1)<<(s-1) ) ++s; 244 array* a = allocate(s); 245 for(;;) { 246 a->next = r; 247 call_itt_notify(releasing,a); 248 array* new_r = r; 249 if( my_root.compare_exchange_strong(new_r, a) ) break; 250 call_itt_notify(acquired, new_r); 251 __TBB_ASSERT(new_r != nullptr, nullptr); 252 if( new_r->lg_size >= s ) { 253 // Another thread inserted an equal or bigger array, so our array is superfluous. 254 deallocate(a); 255 break; 256 } 257 r = new_r; 258 } 259 } 260 } 261 insert: 262 // Whether a slot has been found in an older table, or if it has been inserted at this level, 263 // it has already been accounted for in the total. Guaranteed to be room for it, and it is 264 // not present, so search for empty slot and use it. 265 array* ir = my_root.load(std::memory_order_acquire); 266 call_itt_notify(acquired, ir); 267 std::size_t mask = ir->mask(); 268 for(std::size_t i = ir->start(h);; i = (i+1)&mask) { 269 slot& s = ir->at(i); 270 if( s.empty() ) { 271 if( s.claim(k) ) { 272 s.ptr = found; 273 return found; 274 } 275 } 276 } 277 } 278 279 //! Specialization that exploits native TLS 280 template <> 281 class ets_base<ets_key_per_instance>: public ets_base<ets_no_key> { 282 using super = ets_base<ets_no_key>; 283 #if _WIN32||_WIN64 284 #if __TBB_WIN8UI_SUPPORT 285 using tls_key_t = DWORD; 286 void create_key() { my_key = FlsAlloc(nullptr); } 287 void destroy_key() { FlsFree(my_key); } 288 void set_tls(void * value) { FlsSetValue(my_key, (LPVOID)value); } 289 void* get_tls() { return (void *)FlsGetValue(my_key); } 290 #else 291 using tls_key_t = DWORD; 292 void create_key() { my_key = TlsAlloc(); } 293 void destroy_key() { TlsFree(my_key); } 294 void set_tls(void * value) { TlsSetValue(my_key, (LPVOID)value); } 295 void* get_tls() { return (void *)TlsGetValue(my_key); } 296 #endif 297 #else 298 using tls_key_t = pthread_key_t; 299 void create_key() { pthread_key_create(&my_key, nullptr); } 300 void destroy_key() { pthread_key_delete(my_key); } 301 void set_tls( void * value ) const { pthread_setspecific(my_key, value); } 302 void* get_tls() const { return pthread_getspecific(my_key); } 303 #endif 304 tls_key_t my_key; 305 virtual void* create_local() override = 0; 306 virtual void* create_array(std::size_t _size) override = 0; // _size in bytes 307 virtual void free_array(void* ptr, std::size_t _size) override = 0; // size in bytes 308 protected: 309 ets_base() {create_key();} 310 ~ets_base() {destroy_key();} 311 void* table_lookup( bool& exists ) { 312 void* found = get_tls(); 313 if( found ) { 314 exists=true; 315 } else { 316 found = super::table_lookup(exists); 317 set_tls(found); 318 } 319 return found; 320 } 321 void table_clear() { 322 destroy_key(); 323 create_key(); 324 super::table_clear(); 325 } 326 void table_swap( ets_base& other ) { 327 using std::swap; 328 __TBB_ASSERT(this!=&other, "Don't swap an instance with itself"); 329 swap(my_key, other.my_key); 330 super::table_swap(other); 331 } 332 }; 333 334 //! Random access iterator for traversing the thread local copies. 335 template< typename Container, typename Value > 336 class enumerable_thread_specific_iterator 337 { 338 //! current position in the concurrent_vector 339 340 Container *my_container; 341 typename Container::size_type my_index; 342 mutable Value *my_value; 343 344 template<typename C, typename T, typename U> 345 friend bool operator==( const enumerable_thread_specific_iterator<C, T>& i, 346 const enumerable_thread_specific_iterator<C, U>& j ); 347 348 template<typename C, typename T, typename U> 349 friend bool operator<( const enumerable_thread_specific_iterator<C,T>& i, 350 const enumerable_thread_specific_iterator<C,U>& j ); 351 352 template<typename C, typename T, typename U> 353 friend std::ptrdiff_t operator-( const enumerable_thread_specific_iterator<C,T>& i, 354 const enumerable_thread_specific_iterator<C,U>& j ); 355 356 template<typename C, typename U> 357 friend class enumerable_thread_specific_iterator; 358 359 public: 360 //! STL support 361 using difference_type = std::ptrdiff_t; 362 using value_type = Value; 363 using pointer = Value*; 364 using reference = Value&; 365 using iterator_category = std::random_access_iterator_tag; 366 367 enumerable_thread_specific_iterator( const Container &container, typename Container::size_type index ) : 368 my_container(&const_cast<Container &>(container)), my_index(index), my_value(nullptr) {} 369 370 //! Default constructor 371 enumerable_thread_specific_iterator() : my_container(nullptr), my_index(0), my_value(nullptr) {} 372 373 template<typename U> 374 enumerable_thread_specific_iterator( const enumerable_thread_specific_iterator<Container, U>& other ) : 375 my_container( other.my_container ), my_index( other.my_index), my_value( const_cast<Value *>(other.my_value) ) {} 376 377 enumerable_thread_specific_iterator operator+( std::ptrdiff_t offset ) const { 378 return enumerable_thread_specific_iterator(*my_container, my_index + offset); 379 } 380 381 friend enumerable_thread_specific_iterator operator+( std::ptrdiff_t offset, enumerable_thread_specific_iterator v ) { 382 return enumerable_thread_specific_iterator(*v.my_container, v.my_index + offset); 383 } 384 385 enumerable_thread_specific_iterator &operator+=( std::ptrdiff_t offset ) { 386 my_index += offset; 387 my_value = nullptr; 388 return *this; 389 } 390 391 enumerable_thread_specific_iterator operator-( std::ptrdiff_t offset ) const { 392 return enumerable_thread_specific_iterator( *my_container, my_index-offset ); 393 } 394 395 enumerable_thread_specific_iterator &operator-=( std::ptrdiff_t offset ) { 396 my_index -= offset; 397 my_value = nullptr; 398 return *this; 399 } 400 401 Value& operator*() const { 402 Value* value = my_value; 403 if( !value ) { 404 value = my_value = (*my_container)[my_index].value(); 405 } 406 __TBB_ASSERT( value==(*my_container)[my_index].value(), "corrupt cache" ); 407 return *value; 408 } 409 410 Value& operator[]( std::ptrdiff_t k ) const { 411 return *(*my_container)[my_index + k].value(); 412 } 413 414 Value* operator->() const {return &operator*();} 415 416 enumerable_thread_specific_iterator& operator++() { 417 ++my_index; 418 my_value = nullptr; 419 return *this; 420 } 421 422 enumerable_thread_specific_iterator& operator--() { 423 --my_index; 424 my_value = nullptr; 425 return *this; 426 } 427 428 //! Post increment 429 enumerable_thread_specific_iterator operator++(int) { 430 enumerable_thread_specific_iterator result = *this; 431 ++my_index; 432 my_value = nullptr; 433 return result; 434 } 435 436 //! Post decrement 437 enumerable_thread_specific_iterator operator--(int) { 438 enumerable_thread_specific_iterator result = *this; 439 --my_index; 440 my_value = nullptr; 441 return result; 442 } 443 }; 444 445 template<typename Container, typename T, typename U> 446 bool operator==( const enumerable_thread_specific_iterator<Container, T>& i, 447 const enumerable_thread_specific_iterator<Container, U>& j ) { 448 return i.my_index == j.my_index && i.my_container == j.my_container; 449 } 450 451 template<typename Container, typename T, typename U> 452 bool operator!=( const enumerable_thread_specific_iterator<Container,T>& i, 453 const enumerable_thread_specific_iterator<Container,U>& j ) { 454 return !(i==j); 455 } 456 457 template<typename Container, typename T, typename U> 458 bool operator<( const enumerable_thread_specific_iterator<Container,T>& i, 459 const enumerable_thread_specific_iterator<Container,U>& j ) { 460 return i.my_index<j.my_index; 461 } 462 463 template<typename Container, typename T, typename U> 464 bool operator>( const enumerable_thread_specific_iterator<Container,T>& i, 465 const enumerable_thread_specific_iterator<Container,U>& j ) { 466 return j<i; 467 } 468 469 template<typename Container, typename T, typename U> 470 bool operator>=( const enumerable_thread_specific_iterator<Container,T>& i, 471 const enumerable_thread_specific_iterator<Container,U>& j ) { 472 return !(i<j); 473 } 474 475 template<typename Container, typename T, typename U> 476 bool operator<=( const enumerable_thread_specific_iterator<Container,T>& i, 477 const enumerable_thread_specific_iterator<Container,U>& j ) { 478 return !(j<i); 479 } 480 481 template<typename Container, typename T, typename U> 482 std::ptrdiff_t operator-( const enumerable_thread_specific_iterator<Container,T>& i, 483 const enumerable_thread_specific_iterator<Container,U>& j ) { 484 return i.my_index-j.my_index; 485 } 486 487 template<typename SegmentedContainer, typename Value > 488 class segmented_iterator 489 { 490 template<typename C, typename T, typename U> 491 friend bool operator==(const segmented_iterator<C,T>& i, const segmented_iterator<C,U>& j); 492 493 template<typename C, typename T, typename U> 494 friend bool operator!=(const segmented_iterator<C,T>& i, const segmented_iterator<C,U>& j); 495 496 template<typename C, typename U> 497 friend class segmented_iterator; 498 499 public: 500 segmented_iterator() {my_segcont = nullptr;} 501 502 segmented_iterator( const SegmentedContainer& _segmented_container ) : 503 my_segcont(const_cast<SegmentedContainer*>(&_segmented_container)), 504 outer_iter(my_segcont->end()) { } 505 506 ~segmented_iterator() {} 507 508 using InnerContainer = typename SegmentedContainer::value_type; 509 using inner_iterator = typename InnerContainer::iterator; 510 using outer_iterator = typename SegmentedContainer::iterator; 511 512 // STL support 513 // TODO: inherit all types from segmented container? 514 using difference_type = std::ptrdiff_t; 515 using value_type = Value; 516 using size_type = typename SegmentedContainer::size_type; 517 using pointer = Value*; 518 using reference = Value&; 519 using iterator_category = std::input_iterator_tag; 520 521 // Copy Constructor 522 template<typename U> 523 segmented_iterator(const segmented_iterator<SegmentedContainer, U>& other) : 524 my_segcont(other.my_segcont), 525 outer_iter(other.outer_iter), 526 // can we assign a default-constructed iterator to inner if we're at the end? 527 inner_iter(other.inner_iter) 528 {} 529 530 // assignment 531 template<typename U> 532 segmented_iterator& operator=( const segmented_iterator<SegmentedContainer, U>& other) { 533 my_segcont = other.my_segcont; 534 outer_iter = other.outer_iter; 535 if(outer_iter != my_segcont->end()) inner_iter = other.inner_iter; 536 return *this; 537 } 538 539 // allow assignment of outer iterator to segmented iterator. Once it is 540 // assigned, move forward until a non-empty inner container is found or 541 // the end of the outer container is reached. 542 segmented_iterator& operator=(const outer_iterator& new_outer_iter) { 543 __TBB_ASSERT(my_segcont != nullptr, nullptr); 544 // check that this iterator points to something inside the segmented container 545 for(outer_iter = new_outer_iter ;outer_iter!=my_segcont->end(); ++outer_iter) { 546 if( !outer_iter->empty() ) { 547 inner_iter = outer_iter->begin(); 548 break; 549 } 550 } 551 return *this; 552 } 553 554 // pre-increment 555 segmented_iterator& operator++() { 556 advance_me(); 557 return *this; 558 } 559 560 // post-increment 561 segmented_iterator operator++(int) { 562 segmented_iterator tmp = *this; 563 operator++(); 564 return tmp; 565 } 566 567 bool operator==(const outer_iterator& other_outer) const { 568 __TBB_ASSERT(my_segcont != nullptr, nullptr); 569 return (outer_iter == other_outer && 570 (outer_iter == my_segcont->end() || inner_iter == outer_iter->begin())); 571 } 572 573 bool operator!=(const outer_iterator& other_outer) const { 574 return !operator==(other_outer); 575 576 } 577 578 // (i)* RHS 579 reference operator*() const { 580 __TBB_ASSERT(my_segcont != nullptr, nullptr); 581 __TBB_ASSERT(outer_iter != my_segcont->end(), "Dereferencing a pointer at end of container"); 582 __TBB_ASSERT(inner_iter != outer_iter->end(), nullptr); // should never happen 583 return *inner_iter; 584 } 585 586 // i-> 587 pointer operator->() const { return &operator*();} 588 589 private: 590 SegmentedContainer* my_segcont; 591 outer_iterator outer_iter; 592 inner_iterator inner_iter; 593 594 void advance_me() { 595 __TBB_ASSERT(my_segcont != nullptr, nullptr); 596 __TBB_ASSERT(outer_iter != my_segcont->end(), nullptr); // not true if there are no inner containers 597 __TBB_ASSERT(inner_iter != outer_iter->end(), nullptr); // not true if the inner containers are all empty. 598 ++inner_iter; 599 while(inner_iter == outer_iter->end() && ++outer_iter != my_segcont->end()) { 600 inner_iter = outer_iter->begin(); 601 } 602 } 603 }; // segmented_iterator 604 605 template<typename SegmentedContainer, typename T, typename U> 606 bool operator==( const segmented_iterator<SegmentedContainer,T>& i, 607 const segmented_iterator<SegmentedContainer,U>& j ) { 608 if(i.my_segcont != j.my_segcont) return false; 609 if(i.my_segcont == nullptr) return true; 610 if(i.outer_iter != j.outer_iter) return false; 611 if(i.outer_iter == i.my_segcont->end()) return true; 612 return i.inner_iter == j.inner_iter; 613 } 614 615 // != 616 template<typename SegmentedContainer, typename T, typename U> 617 bool operator!=( const segmented_iterator<SegmentedContainer,T>& i, 618 const segmented_iterator<SegmentedContainer,U>& j ) { 619 return !(i==j); 620 } 621 622 template<typename T> 623 struct construct_by_default: no_assign { 624 void construct(void*where) {new(where) T();} // C++ note: the () in T() ensure zero initialization. 625 construct_by_default( int ) {} 626 }; 627 628 template<typename T> 629 struct construct_by_exemplar: no_assign { 630 const T exemplar; 631 void construct(void*where) {new(where) T(exemplar);} 632 construct_by_exemplar( const T& t ) : exemplar(t) {} 633 construct_by_exemplar( T&& t ) : exemplar(std::move(t)) {} 634 }; 635 636 template<typename T, typename Finit> 637 struct construct_by_finit: no_assign { 638 Finit f; 639 void construct(void* where) {new(where) T(f());} 640 construct_by_finit( Finit&& f_ ) : f(std::move(f_)) {} 641 }; 642 643 template<typename T, typename... P> 644 struct construct_by_args: no_assign { 645 stored_pack<P...> pack; 646 void construct(void* where) { 647 call( [where](const typename std::decay<P>::type&... args ){ 648 new(where) T(args...); 649 }, pack ); 650 } 651 construct_by_args( P&& ... args ) : pack(std::forward<P>(args)...) {} 652 }; 653 654 // storage for initialization function pointer 655 // TODO: consider removing the template parameter T here and in callback_leaf 656 class callback_base { 657 public: 658 // Clone *this 659 virtual callback_base* clone() const = 0; 660 // Destruct and free *this 661 virtual void destroy() = 0; 662 // Need virtual destructor to satisfy GCC compiler warning 663 virtual ~callback_base() { } 664 // Construct T at where 665 virtual void construct(void* where) = 0; 666 }; 667 668 template <typename Constructor> 669 class callback_leaf: public callback_base, Constructor { 670 template<typename... P> callback_leaf( P&& ... params ) : Constructor(std::forward<P>(params)...) {} 671 // TODO: make the construction/destruction consistent (use allocator.construct/destroy) 672 using my_allocator_type = typename tbb::tbb_allocator<callback_leaf>; 673 674 callback_base* clone() const override { 675 return make(*this); 676 } 677 678 void destroy() override { 679 my_allocator_type alloc; 680 tbb::detail::allocator_traits<my_allocator_type>::destroy(alloc, this); 681 tbb::detail::allocator_traits<my_allocator_type>::deallocate(alloc, this, 1); 682 } 683 684 void construct(void* where) override { 685 Constructor::construct(where); 686 } 687 688 public: 689 template<typename... P> 690 static callback_base* make( P&& ... params ) { 691 void* where = my_allocator_type().allocate(1); 692 return new(where) callback_leaf( std::forward<P>(params)... ); 693 } 694 }; 695 696 //! Template for recording construction of objects in table 697 /** All maintenance of the space will be done explicitly on push_back, 698 and all thread local copies must be destroyed before the concurrent 699 vector is deleted. 700 701 The flag is_built is initialized to false. When the local is 702 successfully-constructed, set the flag to true or call value_committed(). 703 If the constructor throws, the flag will be false. 704 */ 705 template<typename U> 706 struct ets_element { 707 detail::aligned_space<U> my_space; 708 bool is_built; 709 ets_element() { is_built = false; } // not currently-built 710 U* value() { return my_space.begin(); } 711 U* value_committed() { is_built = true; return my_space.begin(); } 712 ~ets_element() { 713 if(is_built) { 714 my_space.begin()->~U(); 715 is_built = false; 716 } 717 } 718 }; 719 720 // A predicate that can be used for a compile-time compatibility check of ETS instances 721 // Ideally, it should have been declared inside the ETS class, but unfortunately 722 // in that case VS2013 does not enable the variadic constructor. 723 template<typename T, typename ETS> struct is_compatible_ets : std::false_type {}; 724 template<typename T, typename U, typename A, ets_key_usage_type C> 725 struct is_compatible_ets< T, enumerable_thread_specific<U,A,C> > : std::is_same<T, U> {}; 726 727 // A predicate that checks whether, for a variable 'foo' of type T, foo() is a valid expression 728 template <typename T> using has_empty_braces_operator = decltype(std::declval<T>()()); 729 template <typename T> using is_callable_no_args = supports<T, has_empty_braces_operator>; 730 731 //! The enumerable_thread_specific container 732 /** enumerable_thread_specific has the following properties: 733 - thread-local copies are lazily created, with default, exemplar or function initialization. 734 - thread-local copies do not move (during lifetime, and excepting clear()) so the address of a copy is invariant. 735 - the contained objects need not have operator=() defined if combine is not used. 736 - enumerable_thread_specific containers may be copy-constructed or assigned. 737 - thread-local copies can be managed by hash-table, or can be accessed via TLS storage for speed. 738 - outside of parallel contexts, the contents of all thread-local copies are accessible by iterator or using combine or combine_each methods 739 740 @par Segmented iterator 741 When the thread-local objects are containers with input_iterators defined, a segmented iterator may 742 be used to iterate over all the elements of all thread-local copies. 743 744 @par combine and combine_each 745 - Both methods are defined for enumerable_thread_specific. 746 - combine() requires the type T have operator=() defined. 747 - neither method modifies the contents of the object (though there is no guarantee that the applied methods do not modify the object.) 748 - Both are evaluated in serial context (the methods are assumed to be non-benign.) 749 750 @ingroup containers */ 751 template <typename T, typename Allocator=cache_aligned_allocator<T>, 752 ets_key_usage_type ETS_key_type=ets_no_key > 753 class enumerable_thread_specific: ets_base<ETS_key_type> { 754 755 template<typename U, typename A, ets_key_usage_type C> friend class enumerable_thread_specific; 756 757 using padded_element = padded<ets_element<T>>; 758 759 //! A generic range, used to create range objects from the iterators 760 template<typename I> 761 class generic_range_type: public blocked_range<I> { 762 public: 763 using value_type = T; 764 using reference = T&; 765 using const_reference = const T&; 766 using iterator = I; 767 using difference_type = std::ptrdiff_t; 768 769 generic_range_type( I begin_, I end_, std::size_t grainsize_ = 1) : blocked_range<I>(begin_,end_,grainsize_) {} 770 template<typename U> 771 generic_range_type( const generic_range_type<U>& r) : blocked_range<I>(r.begin(),r.end(),r.grainsize()) {} 772 generic_range_type( generic_range_type& r, split ) : blocked_range<I>(r,split()) {} 773 }; 774 775 using allocator_traits_type = tbb::detail::allocator_traits<Allocator>; 776 777 using padded_allocator_type = typename allocator_traits_type::template rebind_alloc<padded_element>; 778 using internal_collection_type = tbb::concurrent_vector< padded_element, padded_allocator_type >; 779 780 callback_base *my_construct_callback; 781 782 internal_collection_type my_locals; 783 784 // TODO: consider unifying the callback mechanism for all create_local* methods below 785 // (likely non-compatible and requires interface version increase) 786 void* create_local() override { 787 padded_element& lref = *my_locals.grow_by(1); 788 my_construct_callback->construct(lref.value()); 789 return lref.value_committed(); 790 } 791 792 static void* create_local_by_copy( ets_base<ETS_key_type>& base, void* p ) { 793 enumerable_thread_specific& ets = static_cast<enumerable_thread_specific&>(base); 794 padded_element& lref = *ets.my_locals.grow_by(1); 795 new(lref.value()) T(*static_cast<T*>(p)); 796 return lref.value_committed(); 797 } 798 799 static void* create_local_by_move( ets_base<ETS_key_type>& base, void* p ) { 800 enumerable_thread_specific& ets = static_cast<enumerable_thread_specific&>(base); 801 padded_element& lref = *ets.my_locals.grow_by(1); 802 new(lref.value()) T(std::move(*static_cast<T*>(p))); 803 return lref.value_committed(); 804 } 805 806 using array_allocator_type = typename allocator_traits_type::template rebind_alloc<uintptr_t>; 807 808 // _size is in bytes 809 void* create_array(std::size_t _size) override { 810 std::size_t nelements = (_size + sizeof(uintptr_t) -1) / sizeof(uintptr_t); 811 return array_allocator_type().allocate(nelements); 812 } 813 814 void free_array( void* _ptr, std::size_t _size) override { 815 std::size_t nelements = (_size + sizeof(uintptr_t) -1) / sizeof(uintptr_t); 816 array_allocator_type().deallocate( reinterpret_cast<uintptr_t *>(_ptr),nelements); 817 } 818 819 public: 820 821 //! Basic types 822 using value_type = T; 823 using allocator_type = Allocator; 824 using size_type = typename internal_collection_type::size_type; 825 using difference_type = typename internal_collection_type::difference_type; 826 using reference = value_type&; 827 using const_reference = const value_type&; 828 829 using pointer = typename allocator_traits_type::pointer; 830 using const_pointer = typename allocator_traits_type::const_pointer; 831 832 // Iterator types 833 using iterator = enumerable_thread_specific_iterator<internal_collection_type, value_type>; 834 using const_iterator = enumerable_thread_specific_iterator<internal_collection_type, const value_type>; 835 836 // Parallel range types 837 using range_type = generic_range_type<iterator>; 838 using const_range_type = generic_range_type<const_iterator>; 839 840 //! Default constructor. Each local instance of T is default constructed. 841 enumerable_thread_specific() : my_construct_callback( 842 callback_leaf<construct_by_default<T> >::make(/*dummy argument*/0) 843 ){} 844 845 //! Constructor with initializer functor. Each local instance of T is constructed by T(finit()). 846 template <typename Finit , typename = typename std::enable_if<is_callable_no_args<typename std::decay<Finit>::type>::value>::type> 847 explicit enumerable_thread_specific( Finit finit ) : my_construct_callback( 848 callback_leaf<construct_by_finit<T,Finit> >::make( std::move(finit) ) 849 ){} 850 851 //! Constructor with exemplar. Each local instance of T is copy-constructed from the exemplar. 852 explicit enumerable_thread_specific( const T& exemplar ) : my_construct_callback( 853 callback_leaf<construct_by_exemplar<T> >::make( exemplar ) 854 ){} 855 856 explicit enumerable_thread_specific( T&& exemplar ) : my_construct_callback( 857 callback_leaf<construct_by_exemplar<T> >::make( std::move(exemplar) ) 858 ){} 859 860 //! Variadic constructor with initializer arguments. Each local instance of T is constructed by T(args...) 861 template <typename P1, typename... P, 862 typename = typename std::enable_if<!is_callable_no_args<typename std::decay<P1>::type>::value 863 && !is_compatible_ets<T, typename std::decay<P1>::type>::value 864 && !std::is_same<T, typename std::decay<P1>::type>::value 865 >::type> 866 enumerable_thread_specific( P1&& arg1, P&& ... args ) : my_construct_callback( 867 callback_leaf<construct_by_args<T,P1,P...> >::make( std::forward<P1>(arg1), std::forward<P>(args)... ) 868 ){} 869 870 //! Destructor 871 ~enumerable_thread_specific() { 872 if(my_construct_callback) my_construct_callback->destroy(); 873 // Deallocate the hash table before overridden free_array() becomes inaccessible 874 this->ets_base<ETS_key_type>::table_clear(); 875 } 876 877 //! returns reference to local, discarding exists 878 reference local() { 879 bool exists; 880 return local(exists); 881 } 882 883 //! Returns reference to calling thread's local copy, creating one if necessary 884 reference local(bool& exists) { 885 void* ptr = this->table_lookup(exists); 886 return *(T*)ptr; 887 } 888 889 //! Get the number of local copies 890 size_type size() const { return my_locals.size(); } 891 892 //! true if there have been no local copies created 893 bool empty() const { return my_locals.empty(); } 894 895 //! begin iterator 896 iterator begin() { return iterator( my_locals, 0 ); } 897 //! end iterator 898 iterator end() { return iterator(my_locals, my_locals.size() ); } 899 900 //! begin const iterator 901 const_iterator begin() const { return const_iterator(my_locals, 0); } 902 903 //! end const iterator 904 const_iterator end() const { return const_iterator(my_locals, my_locals.size()); } 905 906 //! Get range for parallel algorithms 907 range_type range( std::size_t grainsize=1 ) { return range_type( begin(), end(), grainsize ); } 908 909 //! Get const range for parallel algorithms 910 const_range_type range( std::size_t grainsize=1 ) const { return const_range_type( begin(), end(), grainsize ); } 911 912 //! Destroys local copies 913 void clear() { 914 my_locals.clear(); 915 this->table_clear(); 916 // callback is not destroyed 917 } 918 919 private: 920 template<typename A2, ets_key_usage_type C2> 921 void internal_copy(const enumerable_thread_specific<T, A2, C2>& other) { 922 // this tests is_compatible_ets 923 static_assert( (is_compatible_ets<T, typename std::decay<decltype(other)>::type>::value), "is_compatible_ets fails" ); 924 // Initialize my_construct_callback first, so that it is valid even if rest of this routine throws an exception. 925 my_construct_callback = other.my_construct_callback->clone(); 926 __TBB_ASSERT(my_locals.size()==0, nullptr); 927 my_locals.reserve(other.size()); 928 this->table_elementwise_copy( other, create_local_by_copy ); 929 } 930 931 void internal_swap(enumerable_thread_specific& other) { 932 using std::swap; 933 __TBB_ASSERT( this!=&other, nullptr); 934 swap(my_construct_callback, other.my_construct_callback); 935 // concurrent_vector::swap() preserves storage space, 936 // so addresses to the vector kept in ETS hash table remain valid. 937 swap(my_locals, other.my_locals); 938 this->ets_base<ETS_key_type>::table_swap(other); 939 } 940 941 template<typename A2, ets_key_usage_type C2> 942 void internal_move(enumerable_thread_specific<T, A2, C2>&& other) { 943 static_assert( (is_compatible_ets<T, typename std::decay<decltype(other)>::type>::value), "is_compatible_ets fails" ); 944 my_construct_callback = other.my_construct_callback; 945 other.my_construct_callback = nullptr; 946 __TBB_ASSERT(my_locals.size()==0, nullptr); 947 my_locals.reserve(other.size()); 948 this->table_elementwise_copy( other, create_local_by_move ); 949 } 950 951 public: 952 enumerable_thread_specific( const enumerable_thread_specific& other ) 953 : ets_base<ETS_key_type>() /* prevents GCC warnings with -Wextra */ 954 { 955 internal_copy(other); 956 } 957 958 template<typename Alloc, ets_key_usage_type Cachetype> 959 enumerable_thread_specific( const enumerable_thread_specific<T, Alloc, Cachetype>& other ) 960 { 961 internal_copy(other); 962 } 963 964 enumerable_thread_specific( enumerable_thread_specific&& other ) : my_construct_callback() 965 { 966 // TODO: use internal_move correctly here 967 internal_swap(other); 968 } 969 970 template<typename Alloc, ets_key_usage_type Cachetype> 971 enumerable_thread_specific( enumerable_thread_specific<T, Alloc, Cachetype>&& other ) : my_construct_callback() 972 { 973 internal_move(std::move(other)); 974 } 975 976 enumerable_thread_specific& operator=( const enumerable_thread_specific& other ) 977 { 978 if( this != &other ) { 979 this->clear(); 980 my_construct_callback->destroy(); 981 internal_copy( other ); 982 } 983 return *this; 984 } 985 986 template<typename Alloc, ets_key_usage_type Cachetype> 987 enumerable_thread_specific& operator=( const enumerable_thread_specific<T, Alloc, Cachetype>& other ) 988 { 989 __TBB_ASSERT( static_cast<void*>(this)!=static_cast<const void*>(&other), nullptr); // Objects of different types 990 this->clear(); 991 my_construct_callback->destroy(); 992 internal_copy(other); 993 return *this; 994 } 995 996 enumerable_thread_specific& operator=( enumerable_thread_specific&& other ) 997 { 998 if( this != &other ) { 999 // TODO: use internal_move correctly here 1000 internal_swap(other); 1001 } 1002 return *this; 1003 } 1004 1005 template<typename Alloc, ets_key_usage_type Cachetype> 1006 enumerable_thread_specific& operator=( enumerable_thread_specific<T, Alloc, Cachetype>&& other ) 1007 { 1008 __TBB_ASSERT( static_cast<void*>(this)!=static_cast<const void*>(&other), nullptr); // Objects of different types 1009 this->clear(); 1010 my_construct_callback->destroy(); 1011 internal_move(std::move(other)); 1012 return *this; 1013 } 1014 1015 // CombineFunc has signature T(T,T) or T(const T&, const T&) 1016 template <typename CombineFunc> 1017 T combine(CombineFunc f_combine) { 1018 if(begin() == end()) { 1019 ets_element<T> location; 1020 my_construct_callback->construct(location.value()); 1021 return *location.value_committed(); 1022 } 1023 const_iterator ci = begin(); 1024 T my_result = *ci; 1025 while(++ci != end()) 1026 my_result = f_combine( my_result, *ci ); 1027 return my_result; 1028 } 1029 1030 // combine_func_t takes T by value or by [const] reference, and returns nothing 1031 template <typename CombineFunc> 1032 void combine_each(CombineFunc f_combine) { 1033 for(iterator ci = begin(); ci != end(); ++ci) { 1034 f_combine( *ci ); 1035 } 1036 } 1037 1038 }; // enumerable_thread_specific 1039 1040 template< typename Container > 1041 class flattened2d { 1042 // This intermediate typedef is to address issues with VC7.1 compilers 1043 using conval_type = typename Container::value_type; 1044 1045 public: 1046 //! Basic types 1047 using size_type = typename conval_type::size_type; 1048 using difference_type = typename conval_type::difference_type; 1049 using allocator_type = typename conval_type::allocator_type; 1050 using value_type = typename conval_type::value_type; 1051 using reference = typename conval_type::reference; 1052 using const_reference = typename conval_type::const_reference; 1053 using pointer = typename conval_type::pointer; 1054 using const_pointer = typename conval_type::const_pointer; 1055 1056 using iterator = segmented_iterator<Container, value_type>; 1057 using const_iterator = segmented_iterator<Container, const value_type>; 1058 1059 flattened2d( const Container &c, typename Container::const_iterator b, typename Container::const_iterator e ) : 1060 my_container(const_cast<Container*>(&c)), my_begin(b), my_end(e) { } 1061 1062 explicit flattened2d( const Container &c ) : 1063 my_container(const_cast<Container*>(&c)), my_begin(c.begin()), my_end(c.end()) { } 1064 1065 iterator begin() { return iterator(*my_container) = my_begin; } 1066 iterator end() { return iterator(*my_container) = my_end; } 1067 const_iterator begin() const { return const_iterator(*my_container) = my_begin; } 1068 const_iterator end() const { return const_iterator(*my_container) = my_end; } 1069 1070 size_type size() const { 1071 size_type tot_size = 0; 1072 for(typename Container::const_iterator i = my_begin; i != my_end; ++i) { 1073 tot_size += i->size(); 1074 } 1075 return tot_size; 1076 } 1077 1078 private: 1079 Container *my_container; 1080 typename Container::const_iterator my_begin; 1081 typename Container::const_iterator my_end; 1082 }; 1083 1084 template <typename Container> 1085 flattened2d<Container> flatten2d(const Container &c, const typename Container::const_iterator b, const typename Container::const_iterator e) { 1086 return flattened2d<Container>(c, b, e); 1087 } 1088 1089 template <typename Container> 1090 flattened2d<Container> flatten2d(const Container &c) { 1091 return flattened2d<Container>(c); 1092 } 1093 1094 } // namespace d1 1095 } // namespace detail 1096 1097 inline namespace v1 { 1098 using detail::d1::enumerable_thread_specific; 1099 using detail::d1::flattened2d; 1100 using detail::d1::flatten2d; 1101 // ets enum keys 1102 using detail::d1::ets_key_usage_type; 1103 using detail::d1::ets_key_per_instance; 1104 using detail::d1::ets_no_key; 1105 #if __TBB_RESUMABLE_TASKS 1106 using detail::d1::ets_suspend_aware; 1107 #endif 1108 } // inline namespace v1 1109 1110 } // namespace tbb 1111 1112 #endif // __TBB_enumerable_thread_specific_H 1113 1114