xref: /oneTBB/src/tbb/small_object_pool.cpp (revision b15aabb3)
151c0b2f7Stbbdev /*
2*b15aabb3Stbbdev     Copyright (c) 2020-2021 Intel Corporation
351c0b2f7Stbbdev 
451c0b2f7Stbbdev     Licensed under the Apache License, Version 2.0 (the "License");
551c0b2f7Stbbdev     you may not use this file except in compliance with the License.
651c0b2f7Stbbdev     You may obtain a copy of the License at
751c0b2f7Stbbdev 
851c0b2f7Stbbdev         http://www.apache.org/licenses/LICENSE-2.0
951c0b2f7Stbbdev 
1051c0b2f7Stbbdev     Unless required by applicable law or agreed to in writing, software
1151c0b2f7Stbbdev     distributed under the License is distributed on an "AS IS" BASIS,
1251c0b2f7Stbbdev     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1351c0b2f7Stbbdev     See the License for the specific language governing permissions and
1451c0b2f7Stbbdev     limitations under the License.
1551c0b2f7Stbbdev */
1651c0b2f7Stbbdev 
1749e08aacStbbdev #include "oneapi/tbb/cache_aligned_allocator.h"
1849e08aacStbbdev #include "oneapi/tbb/detail/_small_object_pool.h"
1949e08aacStbbdev #include "oneapi/tbb/detail/_task.h"
2051c0b2f7Stbbdev #include "governor.h"
2151c0b2f7Stbbdev #include "thread_data.h"
2251c0b2f7Stbbdev #include "task_dispatcher.h"
2351c0b2f7Stbbdev 
2451c0b2f7Stbbdev #include <cstddef>
2551c0b2f7Stbbdev 
2651c0b2f7Stbbdev namespace tbb {
2751c0b2f7Stbbdev namespace detail {
2851c0b2f7Stbbdev namespace r1 {
2951c0b2f7Stbbdev 
3051c0b2f7Stbbdev small_object_pool_impl::small_object* const small_object_pool_impl::dead_public_list =
3151c0b2f7Stbbdev                 reinterpret_cast<small_object_pool_impl::small_object*>(1);
3251c0b2f7Stbbdev 
allocate(d1::small_object_pool * & allocator,std::size_t number_of_bytes,const d1::execution_data & ed)3351c0b2f7Stbbdev void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& allocator, std::size_t number_of_bytes, const d1::execution_data& ed) {
3451c0b2f7Stbbdev     auto& tls = static_cast<const execution_data_ext&>(ed).task_disp->get_thread_data();
3551c0b2f7Stbbdev     auto pool = tls.my_small_object_pool;
3651c0b2f7Stbbdev     return pool->allocate_impl(allocator, number_of_bytes);
3751c0b2f7Stbbdev }
3851c0b2f7Stbbdev 
allocate(d1::small_object_pool * & allocator,std::size_t number_of_bytes)3951c0b2f7Stbbdev void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& allocator, std::size_t number_of_bytes) {
4051c0b2f7Stbbdev     // TODO: optimize if the allocator contains a valid pool.
4151c0b2f7Stbbdev     auto tls = governor::get_thread_data();
4251c0b2f7Stbbdev     auto pool = tls->my_small_object_pool;
4351c0b2f7Stbbdev     return pool->allocate_impl(allocator, number_of_bytes);
4451c0b2f7Stbbdev }
4551c0b2f7Stbbdev 
allocate_impl(d1::small_object_pool * & allocator,std::size_t number_of_bytes)4651c0b2f7Stbbdev void* small_object_pool_impl::allocate_impl(d1::small_object_pool*& allocator, std::size_t number_of_bytes)
4751c0b2f7Stbbdev {
4851c0b2f7Stbbdev     small_object* obj{nullptr};
4951c0b2f7Stbbdev 
5051c0b2f7Stbbdev     if (number_of_bytes <= small_object_size) {
5151c0b2f7Stbbdev         if (m_private_list) {
5251c0b2f7Stbbdev             obj = m_private_list;
5351c0b2f7Stbbdev             m_private_list = m_private_list->next;
5451c0b2f7Stbbdev         } else if (m_public_list.load(std::memory_order_relaxed)) {
5551c0b2f7Stbbdev             // No fence required for read of my_public_list above, because std::atomic::exchange() has a fence.
5651c0b2f7Stbbdev             obj = m_public_list.exchange(nullptr);
5751c0b2f7Stbbdev             __TBB_ASSERT( obj, "another thread emptied the my_public_list" );
5851c0b2f7Stbbdev             m_private_list = obj->next;
5951c0b2f7Stbbdev         } else {
6051c0b2f7Stbbdev             obj = new (cache_aligned_allocate(small_object_size)) small_object{nullptr};
6151c0b2f7Stbbdev             ++m_private_counter;
6251c0b2f7Stbbdev         }
6351c0b2f7Stbbdev     } else {
6451c0b2f7Stbbdev         obj = new (cache_aligned_allocate(number_of_bytes)) small_object{nullptr};
6551c0b2f7Stbbdev     }
6651c0b2f7Stbbdev     allocator = this;
6751c0b2f7Stbbdev 
6851c0b2f7Stbbdev     // Return uninitialized memory for further construction on user side.
6951c0b2f7Stbbdev     obj->~small_object();
7051c0b2f7Stbbdev     return obj;
7151c0b2f7Stbbdev }
7251c0b2f7Stbbdev 
deallocate(d1::small_object_pool & allocator,void * ptr,std::size_t number_of_bytes)7351c0b2f7Stbbdev void __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& allocator, void* ptr, std::size_t number_of_bytes) {
7451c0b2f7Stbbdev     auto pool = static_cast<small_object_pool_impl*>(&allocator);
7551c0b2f7Stbbdev     auto tls = governor::get_thread_data();
7651c0b2f7Stbbdev     pool->deallocate_impl(ptr, number_of_bytes, *tls);
7751c0b2f7Stbbdev }
7851c0b2f7Stbbdev 
deallocate(d1::small_object_pool & allocator,void * ptr,std::size_t number_of_bytes,const d1::execution_data & ed)7951c0b2f7Stbbdev void __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& allocator, void* ptr, std::size_t number_of_bytes, const d1::execution_data& ed) {
8051c0b2f7Stbbdev     auto& tls = static_cast<const execution_data_ext&>(ed).task_disp->get_thread_data();
8151c0b2f7Stbbdev     auto pool = static_cast<small_object_pool_impl*>(&allocator);
8251c0b2f7Stbbdev     pool->deallocate_impl(ptr, number_of_bytes, tls);
8351c0b2f7Stbbdev }
8451c0b2f7Stbbdev 
deallocate_impl(void * ptr,std::size_t number_of_bytes,thread_data & td)8551c0b2f7Stbbdev void small_object_pool_impl::deallocate_impl(void* ptr, std::size_t number_of_bytes, thread_data& td) {
8651c0b2f7Stbbdev     __TBB_ASSERT(ptr != nullptr, "pointer to deallocate should not be null");
8751c0b2f7Stbbdev     __TBB_ASSERT(number_of_bytes >= sizeof(small_object), "number of bytes should be at least sizeof(small_object)");
8851c0b2f7Stbbdev 
8951c0b2f7Stbbdev     if (number_of_bytes <= small_object_size) {
9051c0b2f7Stbbdev         auto obj = new (ptr) small_object{nullptr};
9151c0b2f7Stbbdev         if (td.my_small_object_pool == this) {
9251c0b2f7Stbbdev             obj->next = m_private_list;
9351c0b2f7Stbbdev             m_private_list = obj;
9451c0b2f7Stbbdev         } else {
9551c0b2f7Stbbdev             auto old_public_list = m_public_list.load(std::memory_order_relaxed);
9651c0b2f7Stbbdev 
9751c0b2f7Stbbdev             for (;;) {
9851c0b2f7Stbbdev                 if (old_public_list == dead_public_list) {
9951c0b2f7Stbbdev                     obj->~small_object();
10051c0b2f7Stbbdev                     cache_aligned_deallocate(obj);
10151c0b2f7Stbbdev                     if (++m_public_counter == 0)
10251c0b2f7Stbbdev                     {
10351c0b2f7Stbbdev                         this->~small_object_pool_impl();
10451c0b2f7Stbbdev                         cache_aligned_deallocate(this);
10551c0b2f7Stbbdev                     }
10651c0b2f7Stbbdev                     break;
10751c0b2f7Stbbdev                 }
10851c0b2f7Stbbdev                 obj->next = old_public_list;
10951c0b2f7Stbbdev                 if (m_public_list.compare_exchange_strong(old_public_list, obj)) {
11051c0b2f7Stbbdev                     break;
11151c0b2f7Stbbdev                 }
11251c0b2f7Stbbdev             }
11351c0b2f7Stbbdev         }
11451c0b2f7Stbbdev     } else {
11551c0b2f7Stbbdev         cache_aligned_deallocate(ptr);
11651c0b2f7Stbbdev     }
11751c0b2f7Stbbdev }
11851c0b2f7Stbbdev 
cleanup_list(small_object * list)11951c0b2f7Stbbdev std::int64_t small_object_pool_impl::cleanup_list(small_object* list)
12051c0b2f7Stbbdev {
12151c0b2f7Stbbdev     std::int64_t removed_count{};
12251c0b2f7Stbbdev 
12351c0b2f7Stbbdev     while (list) {
12451c0b2f7Stbbdev         small_object* current = list;
12551c0b2f7Stbbdev         list = list->next;
12651c0b2f7Stbbdev         current->~small_object();
12751c0b2f7Stbbdev         cache_aligned_deallocate(current);
12851c0b2f7Stbbdev         ++removed_count;
12951c0b2f7Stbbdev     }
13051c0b2f7Stbbdev     return removed_count;
13151c0b2f7Stbbdev }
13251c0b2f7Stbbdev 
destroy()13351c0b2f7Stbbdev void small_object_pool_impl::destroy()
13451c0b2f7Stbbdev {
13551c0b2f7Stbbdev     // clean up private list and subtract the removed count from private counter
13651c0b2f7Stbbdev     m_private_counter -= cleanup_list(m_private_list);
13751c0b2f7Stbbdev     // Grab public list and place dead mark
13851c0b2f7Stbbdev     small_object* public_list = m_public_list.exchange(dead_public_list);
13951c0b2f7Stbbdev     // clean up public list and subtract from private (intentionally) counter
14051c0b2f7Stbbdev     m_private_counter -= cleanup_list(public_list);
14151c0b2f7Stbbdev     __TBB_ASSERT(m_private_counter >= 0, "Private counter may not be less than 0");
142d86ed7fbStbbdev     // Equivalent to fetch_sub(m_private_counter) - m_private_counter. But we need to do it
143d86ed7fbStbbdev     // atomically with operator-= not to access m_private_counter after the subtraction.
144d86ed7fbStbbdev     auto new_value = m_public_counter -= m_private_counter;
14551c0b2f7Stbbdev     // check if this method is responsible to clean up the resources
146d86ed7fbStbbdev     if (new_value == 0) {
14751c0b2f7Stbbdev         this->~small_object_pool_impl();
14851c0b2f7Stbbdev         cache_aligned_deallocate(this);
14951c0b2f7Stbbdev     }
15051c0b2f7Stbbdev }
15151c0b2f7Stbbdev 
15251c0b2f7Stbbdev } // namespace r1
15351c0b2f7Stbbdev } // namespace detail
15451c0b2f7Stbbdev } // namespace tbb
155