151c0b2f7Stbbdev /*
2*b15aabb3Stbbdev Copyright (c) 2020-2021 Intel Corporation
351c0b2f7Stbbdev
451c0b2f7Stbbdev Licensed under the Apache License, Version 2.0 (the "License");
551c0b2f7Stbbdev you may not use this file except in compliance with the License.
651c0b2f7Stbbdev You may obtain a copy of the License at
751c0b2f7Stbbdev
851c0b2f7Stbbdev http://www.apache.org/licenses/LICENSE-2.0
951c0b2f7Stbbdev
1051c0b2f7Stbbdev Unless required by applicable law or agreed to in writing, software
1151c0b2f7Stbbdev distributed under the License is distributed on an "AS IS" BASIS,
1251c0b2f7Stbbdev WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1351c0b2f7Stbbdev See the License for the specific language governing permissions and
1451c0b2f7Stbbdev limitations under the License.
1551c0b2f7Stbbdev */
1651c0b2f7Stbbdev
1749e08aacStbbdev #include "oneapi/tbb/cache_aligned_allocator.h"
1849e08aacStbbdev #include "oneapi/tbb/detail/_small_object_pool.h"
1949e08aacStbbdev #include "oneapi/tbb/detail/_task.h"
2051c0b2f7Stbbdev #include "governor.h"
2151c0b2f7Stbbdev #include "thread_data.h"
2251c0b2f7Stbbdev #include "task_dispatcher.h"
2351c0b2f7Stbbdev
2451c0b2f7Stbbdev #include <cstddef>
2551c0b2f7Stbbdev
2651c0b2f7Stbbdev namespace tbb {
2751c0b2f7Stbbdev namespace detail {
2851c0b2f7Stbbdev namespace r1 {
2951c0b2f7Stbbdev
3051c0b2f7Stbbdev small_object_pool_impl::small_object* const small_object_pool_impl::dead_public_list =
3151c0b2f7Stbbdev reinterpret_cast<small_object_pool_impl::small_object*>(1);
3251c0b2f7Stbbdev
allocate(d1::small_object_pool * & allocator,std::size_t number_of_bytes,const d1::execution_data & ed)3351c0b2f7Stbbdev void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& allocator, std::size_t number_of_bytes, const d1::execution_data& ed) {
3451c0b2f7Stbbdev auto& tls = static_cast<const execution_data_ext&>(ed).task_disp->get_thread_data();
3551c0b2f7Stbbdev auto pool = tls.my_small_object_pool;
3651c0b2f7Stbbdev return pool->allocate_impl(allocator, number_of_bytes);
3751c0b2f7Stbbdev }
3851c0b2f7Stbbdev
allocate(d1::small_object_pool * & allocator,std::size_t number_of_bytes)3951c0b2f7Stbbdev void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& allocator, std::size_t number_of_bytes) {
4051c0b2f7Stbbdev // TODO: optimize if the allocator contains a valid pool.
4151c0b2f7Stbbdev auto tls = governor::get_thread_data();
4251c0b2f7Stbbdev auto pool = tls->my_small_object_pool;
4351c0b2f7Stbbdev return pool->allocate_impl(allocator, number_of_bytes);
4451c0b2f7Stbbdev }
4551c0b2f7Stbbdev
allocate_impl(d1::small_object_pool * & allocator,std::size_t number_of_bytes)4651c0b2f7Stbbdev void* small_object_pool_impl::allocate_impl(d1::small_object_pool*& allocator, std::size_t number_of_bytes)
4751c0b2f7Stbbdev {
4851c0b2f7Stbbdev small_object* obj{nullptr};
4951c0b2f7Stbbdev
5051c0b2f7Stbbdev if (number_of_bytes <= small_object_size) {
5151c0b2f7Stbbdev if (m_private_list) {
5251c0b2f7Stbbdev obj = m_private_list;
5351c0b2f7Stbbdev m_private_list = m_private_list->next;
5451c0b2f7Stbbdev } else if (m_public_list.load(std::memory_order_relaxed)) {
5551c0b2f7Stbbdev // No fence required for read of my_public_list above, because std::atomic::exchange() has a fence.
5651c0b2f7Stbbdev obj = m_public_list.exchange(nullptr);
5751c0b2f7Stbbdev __TBB_ASSERT( obj, "another thread emptied the my_public_list" );
5851c0b2f7Stbbdev m_private_list = obj->next;
5951c0b2f7Stbbdev } else {
6051c0b2f7Stbbdev obj = new (cache_aligned_allocate(small_object_size)) small_object{nullptr};
6151c0b2f7Stbbdev ++m_private_counter;
6251c0b2f7Stbbdev }
6351c0b2f7Stbbdev } else {
6451c0b2f7Stbbdev obj = new (cache_aligned_allocate(number_of_bytes)) small_object{nullptr};
6551c0b2f7Stbbdev }
6651c0b2f7Stbbdev allocator = this;
6751c0b2f7Stbbdev
6851c0b2f7Stbbdev // Return uninitialized memory for further construction on user side.
6951c0b2f7Stbbdev obj->~small_object();
7051c0b2f7Stbbdev return obj;
7151c0b2f7Stbbdev }
7251c0b2f7Stbbdev
deallocate(d1::small_object_pool & allocator,void * ptr,std::size_t number_of_bytes)7351c0b2f7Stbbdev void __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& allocator, void* ptr, std::size_t number_of_bytes) {
7451c0b2f7Stbbdev auto pool = static_cast<small_object_pool_impl*>(&allocator);
7551c0b2f7Stbbdev auto tls = governor::get_thread_data();
7651c0b2f7Stbbdev pool->deallocate_impl(ptr, number_of_bytes, *tls);
7751c0b2f7Stbbdev }
7851c0b2f7Stbbdev
deallocate(d1::small_object_pool & allocator,void * ptr,std::size_t number_of_bytes,const d1::execution_data & ed)7951c0b2f7Stbbdev void __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& allocator, void* ptr, std::size_t number_of_bytes, const d1::execution_data& ed) {
8051c0b2f7Stbbdev auto& tls = static_cast<const execution_data_ext&>(ed).task_disp->get_thread_data();
8151c0b2f7Stbbdev auto pool = static_cast<small_object_pool_impl*>(&allocator);
8251c0b2f7Stbbdev pool->deallocate_impl(ptr, number_of_bytes, tls);
8351c0b2f7Stbbdev }
8451c0b2f7Stbbdev
deallocate_impl(void * ptr,std::size_t number_of_bytes,thread_data & td)8551c0b2f7Stbbdev void small_object_pool_impl::deallocate_impl(void* ptr, std::size_t number_of_bytes, thread_data& td) {
8651c0b2f7Stbbdev __TBB_ASSERT(ptr != nullptr, "pointer to deallocate should not be null");
8751c0b2f7Stbbdev __TBB_ASSERT(number_of_bytes >= sizeof(small_object), "number of bytes should be at least sizeof(small_object)");
8851c0b2f7Stbbdev
8951c0b2f7Stbbdev if (number_of_bytes <= small_object_size) {
9051c0b2f7Stbbdev auto obj = new (ptr) small_object{nullptr};
9151c0b2f7Stbbdev if (td.my_small_object_pool == this) {
9251c0b2f7Stbbdev obj->next = m_private_list;
9351c0b2f7Stbbdev m_private_list = obj;
9451c0b2f7Stbbdev } else {
9551c0b2f7Stbbdev auto old_public_list = m_public_list.load(std::memory_order_relaxed);
9651c0b2f7Stbbdev
9751c0b2f7Stbbdev for (;;) {
9851c0b2f7Stbbdev if (old_public_list == dead_public_list) {
9951c0b2f7Stbbdev obj->~small_object();
10051c0b2f7Stbbdev cache_aligned_deallocate(obj);
10151c0b2f7Stbbdev if (++m_public_counter == 0)
10251c0b2f7Stbbdev {
10351c0b2f7Stbbdev this->~small_object_pool_impl();
10451c0b2f7Stbbdev cache_aligned_deallocate(this);
10551c0b2f7Stbbdev }
10651c0b2f7Stbbdev break;
10751c0b2f7Stbbdev }
10851c0b2f7Stbbdev obj->next = old_public_list;
10951c0b2f7Stbbdev if (m_public_list.compare_exchange_strong(old_public_list, obj)) {
11051c0b2f7Stbbdev break;
11151c0b2f7Stbbdev }
11251c0b2f7Stbbdev }
11351c0b2f7Stbbdev }
11451c0b2f7Stbbdev } else {
11551c0b2f7Stbbdev cache_aligned_deallocate(ptr);
11651c0b2f7Stbbdev }
11751c0b2f7Stbbdev }
11851c0b2f7Stbbdev
cleanup_list(small_object * list)11951c0b2f7Stbbdev std::int64_t small_object_pool_impl::cleanup_list(small_object* list)
12051c0b2f7Stbbdev {
12151c0b2f7Stbbdev std::int64_t removed_count{};
12251c0b2f7Stbbdev
12351c0b2f7Stbbdev while (list) {
12451c0b2f7Stbbdev small_object* current = list;
12551c0b2f7Stbbdev list = list->next;
12651c0b2f7Stbbdev current->~small_object();
12751c0b2f7Stbbdev cache_aligned_deallocate(current);
12851c0b2f7Stbbdev ++removed_count;
12951c0b2f7Stbbdev }
13051c0b2f7Stbbdev return removed_count;
13151c0b2f7Stbbdev }
13251c0b2f7Stbbdev
destroy()13351c0b2f7Stbbdev void small_object_pool_impl::destroy()
13451c0b2f7Stbbdev {
13551c0b2f7Stbbdev // clean up private list and subtract the removed count from private counter
13651c0b2f7Stbbdev m_private_counter -= cleanup_list(m_private_list);
13751c0b2f7Stbbdev // Grab public list and place dead mark
13851c0b2f7Stbbdev small_object* public_list = m_public_list.exchange(dead_public_list);
13951c0b2f7Stbbdev // clean up public list and subtract from private (intentionally) counter
14051c0b2f7Stbbdev m_private_counter -= cleanup_list(public_list);
14151c0b2f7Stbbdev __TBB_ASSERT(m_private_counter >= 0, "Private counter may not be less than 0");
142d86ed7fbStbbdev // Equivalent to fetch_sub(m_private_counter) - m_private_counter. But we need to do it
143d86ed7fbStbbdev // atomically with operator-= not to access m_private_counter after the subtraction.
144d86ed7fbStbbdev auto new_value = m_public_counter -= m_private_counter;
14551c0b2f7Stbbdev // check if this method is responsible to clean up the resources
146d86ed7fbStbbdev if (new_value == 0) {
14751c0b2f7Stbbdev this->~small_object_pool_impl();
14851c0b2f7Stbbdev cache_aligned_deallocate(this);
14951c0b2f7Stbbdev }
15051c0b2f7Stbbdev }
15151c0b2f7Stbbdev
15251c0b2f7Stbbdev } // namespace r1
15351c0b2f7Stbbdev } // namespace detail
15451c0b2f7Stbbdev } // namespace tbb
155