xref: /oneTBB/src/tbb/allocator.cpp (revision 8e7f9e14)
151c0b2f7Stbbdev /*
2b15aabb3Stbbdev     Copyright (c) 2005-2021 Intel Corporation
351c0b2f7Stbbdev 
451c0b2f7Stbbdev     Licensed under the Apache License, Version 2.0 (the "License");
551c0b2f7Stbbdev     you may not use this file except in compliance with the License.
651c0b2f7Stbbdev     You may obtain a copy of the License at
751c0b2f7Stbbdev 
851c0b2f7Stbbdev         http://www.apache.org/licenses/LICENSE-2.0
951c0b2f7Stbbdev 
1051c0b2f7Stbbdev     Unless required by applicable law or agreed to in writing, software
1151c0b2f7Stbbdev     distributed under the License is distributed on an "AS IS" BASIS,
1251c0b2f7Stbbdev     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1351c0b2f7Stbbdev     See the License for the specific language governing permissions and
1451c0b2f7Stbbdev     limitations under the License.
1551c0b2f7Stbbdev */
1651c0b2f7Stbbdev 
1749e08aacStbbdev #include "oneapi/tbb/version.h"
1851c0b2f7Stbbdev 
1949e08aacStbbdev #include "oneapi/tbb/detail/_exception.h"
2049e08aacStbbdev #include "oneapi/tbb/detail/_assert.h"
2149e08aacStbbdev #include "oneapi/tbb/detail/_utils.h"
2251c0b2f7Stbbdev 
2351c0b2f7Stbbdev #include "dynamic_link.h"
2451c0b2f7Stbbdev #include "misc.h"
2551c0b2f7Stbbdev 
2651c0b2f7Stbbdev #include <cstdlib>
2751c0b2f7Stbbdev 
2851c0b2f7Stbbdev #if _WIN32 || _WIN64
2951c0b2f7Stbbdev #include <Windows.h>
3051c0b2f7Stbbdev #else
3151c0b2f7Stbbdev #include <dlfcn.h>
3251c0b2f7Stbbdev #endif /* _WIN32||_WIN64 */
3351c0b2f7Stbbdev 
3451c0b2f7Stbbdev #if __TBB_WEAK_SYMBOLS_PRESENT
3551c0b2f7Stbbdev 
3651c0b2f7Stbbdev #pragma weak scalable_malloc
3751c0b2f7Stbbdev #pragma weak scalable_free
3851c0b2f7Stbbdev #pragma weak scalable_aligned_malloc
3951c0b2f7Stbbdev #pragma weak scalable_aligned_free
4051c0b2f7Stbbdev 
4151c0b2f7Stbbdev extern "C" {
4251c0b2f7Stbbdev     void* scalable_malloc(std::size_t);
4351c0b2f7Stbbdev     void  scalable_free(void*);
4451c0b2f7Stbbdev     void* scalable_aligned_malloc(std::size_t, std::size_t);
4551c0b2f7Stbbdev     void  scalable_aligned_free(void*);
4651c0b2f7Stbbdev }
4751c0b2f7Stbbdev 
4851c0b2f7Stbbdev #endif /* __TBB_WEAK_SYMBOLS_PRESENT */
4951c0b2f7Stbbdev 
5051c0b2f7Stbbdev namespace tbb {
5151c0b2f7Stbbdev namespace detail {
5251c0b2f7Stbbdev namespace r1 {
5351c0b2f7Stbbdev 
5451c0b2f7Stbbdev //! Initialization routine used for first indirect call via allocate_handler.
5551c0b2f7Stbbdev static void* initialize_allocate_handler(std::size_t size);
5651c0b2f7Stbbdev 
5751c0b2f7Stbbdev //! Handler for memory allocation
58*8e7f9e14SAlex using allocate_handler_type = void* (*)(std::size_t size);
59*8e7f9e14SAlex static std::atomic<allocate_handler_type> allocate_handler{ &initialize_allocate_handler };
60*8e7f9e14SAlex allocate_handler_type allocate_handler_unsafe = nullptr;
6151c0b2f7Stbbdev 
6251c0b2f7Stbbdev //! Handler for memory deallocation
6351c0b2f7Stbbdev static void  (*deallocate_handler)(void* pointer) = nullptr;
6451c0b2f7Stbbdev 
6551c0b2f7Stbbdev //! Initialization routine used for first indirect call via cache_aligned_allocate_handler.
6651c0b2f7Stbbdev static void* initialize_cache_aligned_allocate_handler(std::size_t n, std::size_t alignment);
6751c0b2f7Stbbdev 
6851c0b2f7Stbbdev //! Allocates memory using standard malloc. It is used when scalable_allocator is not available
6951c0b2f7Stbbdev static void* std_cache_aligned_allocate(std::size_t n, std::size_t alignment);
7051c0b2f7Stbbdev 
7151c0b2f7Stbbdev //! Allocates memory using standard free. It is used when scalable_allocator is not available
7251c0b2f7Stbbdev static void  std_cache_aligned_deallocate(void* p);
7351c0b2f7Stbbdev 
7451c0b2f7Stbbdev //! Handler for padded memory allocation
75*8e7f9e14SAlex using cache_aligned_allocate_handler_type = void* (*)(std::size_t n, std::size_t alignment);
76*8e7f9e14SAlex static std::atomic<cache_aligned_allocate_handler_type> cache_aligned_allocate_handler{ &initialize_cache_aligned_allocate_handler };
77*8e7f9e14SAlex cache_aligned_allocate_handler_type cache_aligned_allocate_handler_unsafe = nullptr;
7851c0b2f7Stbbdev 
7951c0b2f7Stbbdev //! Handler for padded memory deallocation
8051c0b2f7Stbbdev static void (*cache_aligned_deallocate_handler)(void* p) = nullptr;
8151c0b2f7Stbbdev 
8251c0b2f7Stbbdev //! Table describing how to link the handlers.
8351c0b2f7Stbbdev static const dynamic_link_descriptor MallocLinkTable[] = {
84*8e7f9e14SAlex     DLD(scalable_malloc, allocate_handler_unsafe),
8551c0b2f7Stbbdev     DLD(scalable_free, deallocate_handler),
86*8e7f9e14SAlex     DLD(scalable_aligned_malloc, cache_aligned_allocate_handler_unsafe),
8751c0b2f7Stbbdev     DLD(scalable_aligned_free, cache_aligned_deallocate_handler),
8851c0b2f7Stbbdev };
8951c0b2f7Stbbdev 
9051c0b2f7Stbbdev 
9151c0b2f7Stbbdev #if TBB_USE_DEBUG
9251c0b2f7Stbbdev #define DEBUG_SUFFIX "_debug"
9351c0b2f7Stbbdev #else
9451c0b2f7Stbbdev #define DEBUG_SUFFIX
9551c0b2f7Stbbdev #endif /* TBB_USE_DEBUG */
9651c0b2f7Stbbdev 
9751c0b2f7Stbbdev // MALLOCLIB_NAME is the name of the oneTBB memory allocator library.
9851c0b2f7Stbbdev #if _WIN32||_WIN64
9951c0b2f7Stbbdev #define MALLOCLIB_NAME "tbbmalloc" DEBUG_SUFFIX ".dll"
10051c0b2f7Stbbdev #elif __APPLE__
10151c0b2f7Stbbdev #define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".dylib"
10251c0b2f7Stbbdev #elif __FreeBSD__ || __NetBSD__ || __OpenBSD__ || __sun || _AIX || __ANDROID__
10351c0b2f7Stbbdev #define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".so"
104734f0bc0SPablo Romero #elif __unix__  // Note that order of these #elif's is important!
10551c0b2f7Stbbdev #define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".so.2"
10651c0b2f7Stbbdev #else
10751c0b2f7Stbbdev #error Unknown OS
10851c0b2f7Stbbdev #endif
10951c0b2f7Stbbdev 
11051c0b2f7Stbbdev //! Initialize the allocation/free handler pointers.
11151c0b2f7Stbbdev /** Caller is responsible for ensuring this routine is called exactly once.
11251c0b2f7Stbbdev     The routine attempts to dynamically link with the TBB memory allocator.
11351c0b2f7Stbbdev     If that allocator is not found, it links to malloc and free. */
11451c0b2f7Stbbdev void initialize_handler_pointers() {
11551c0b2f7Stbbdev     __TBB_ASSERT(allocate_handler == &initialize_allocate_handler, NULL);
11651c0b2f7Stbbdev     bool success = dynamic_link(MALLOCLIB_NAME, MallocLinkTable, 4);
11751c0b2f7Stbbdev     if(!success) {
11851c0b2f7Stbbdev         // If unsuccessful, set the handlers to the default routines.
11951c0b2f7Stbbdev         // This must be done now, and not before FillDynamicLinks runs, because if other
12051c0b2f7Stbbdev         // threads call the handlers, we want them to go through the DoOneTimeInitializations logic,
12151c0b2f7Stbbdev         // which forces them to wait.
122*8e7f9e14SAlex         allocate_handler_unsafe = &std::malloc;
12351c0b2f7Stbbdev         deallocate_handler = &std::free;
124*8e7f9e14SAlex         cache_aligned_allocate_handler_unsafe = &std_cache_aligned_allocate;
12551c0b2f7Stbbdev         cache_aligned_deallocate_handler = &std_cache_aligned_deallocate;
12651c0b2f7Stbbdev     }
12751c0b2f7Stbbdev 
128*8e7f9e14SAlex     allocate_handler.store(allocate_handler_unsafe, std::memory_order_release);
129*8e7f9e14SAlex     cache_aligned_allocate_handler.store(cache_aligned_allocate_handler_unsafe, std::memory_order_release);
130*8e7f9e14SAlex 
13151c0b2f7Stbbdev     PrintExtraVersionInfo( "ALLOCATOR", success?"scalable_malloc":"malloc" );
13251c0b2f7Stbbdev }
13351c0b2f7Stbbdev 
13451c0b2f7Stbbdev static std::once_flag initialization_state;
13551c0b2f7Stbbdev void initialize_cache_aligned_allocator() {
13651c0b2f7Stbbdev     std::call_once(initialization_state, &initialize_handler_pointers);
13751c0b2f7Stbbdev }
13851c0b2f7Stbbdev 
13951c0b2f7Stbbdev //! Executed on very first call through allocate_handler
14051c0b2f7Stbbdev static void* initialize_allocate_handler(std::size_t size) {
14151c0b2f7Stbbdev     initialize_cache_aligned_allocator();
14251c0b2f7Stbbdev     __TBB_ASSERT(allocate_handler != &initialize_allocate_handler, NULL);
14351c0b2f7Stbbdev     return (*allocate_handler)(size);
14451c0b2f7Stbbdev }
14551c0b2f7Stbbdev 
14651c0b2f7Stbbdev //! Executed on very first call through cache_aligned_allocate_handler
14751c0b2f7Stbbdev static void* initialize_cache_aligned_allocate_handler(std::size_t bytes, std::size_t alignment) {
14851c0b2f7Stbbdev     initialize_cache_aligned_allocator();
14951c0b2f7Stbbdev     __TBB_ASSERT(cache_aligned_allocate_handler != &initialize_cache_aligned_allocate_handler, NULL);
15051c0b2f7Stbbdev     return (*cache_aligned_allocate_handler)(bytes, alignment);
15151c0b2f7Stbbdev }
15251c0b2f7Stbbdev 
15351c0b2f7Stbbdev // TODO: use CPUID to find actual line size, though consider backward compatibility
15451c0b2f7Stbbdev // nfs - no false sharing
15551c0b2f7Stbbdev static constexpr std::size_t nfs_size = 128;
15651c0b2f7Stbbdev 
15751c0b2f7Stbbdev std::size_t __TBB_EXPORTED_FUNC cache_line_size() {
15851c0b2f7Stbbdev     return nfs_size;
15951c0b2f7Stbbdev }
16051c0b2f7Stbbdev 
16151c0b2f7Stbbdev void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size) {
16251c0b2f7Stbbdev     const std::size_t cache_line_size = nfs_size;
16351c0b2f7Stbbdev     __TBB_ASSERT(is_power_of_two(cache_line_size), "must be power of two");
16451c0b2f7Stbbdev 
16551c0b2f7Stbbdev     // Check for overflow
16651c0b2f7Stbbdev     if (size + cache_line_size < size) {
16751c0b2f7Stbbdev         throw_exception(exception_id::bad_alloc);
16851c0b2f7Stbbdev     }
16951c0b2f7Stbbdev     // scalable_aligned_malloc considers zero size request an error, and returns NULL
17051c0b2f7Stbbdev     if (size == 0) size = 1;
17151c0b2f7Stbbdev 
172*8e7f9e14SAlex     void* result = cache_aligned_allocate_handler.load(std::memory_order_acquire)(size, cache_line_size);
17351c0b2f7Stbbdev     if (!result) {
17451c0b2f7Stbbdev         throw_exception(exception_id::bad_alloc);
17551c0b2f7Stbbdev     }
17651c0b2f7Stbbdev     __TBB_ASSERT(is_aligned(result, cache_line_size), "The returned address isn't aligned");
17751c0b2f7Stbbdev     return result;
17851c0b2f7Stbbdev }
17951c0b2f7Stbbdev 
18051c0b2f7Stbbdev void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p) {
18151c0b2f7Stbbdev     __TBB_ASSERT(cache_aligned_deallocate_handler, "Initialization has not been yet.");
18251c0b2f7Stbbdev     (*cache_aligned_deallocate_handler)(p);
18351c0b2f7Stbbdev }
18451c0b2f7Stbbdev 
18551c0b2f7Stbbdev static void* std_cache_aligned_allocate(std::size_t bytes, std::size_t alignment) {
18651c0b2f7Stbbdev     // TODO: make it common with cache_aligned_resource
18751c0b2f7Stbbdev     std::size_t space = alignment + bytes;
18851c0b2f7Stbbdev     std::uintptr_t base = reinterpret_cast<std::uintptr_t>(std::malloc(space));
18951c0b2f7Stbbdev     if (!base) {
19051c0b2f7Stbbdev         return nullptr;
19151c0b2f7Stbbdev     }
19251c0b2f7Stbbdev     std::uintptr_t result = (base + nfs_size) & ~(nfs_size - 1);
19351c0b2f7Stbbdev     // Round up to the next cache line (align the base address)
19451c0b2f7Stbbdev     __TBB_ASSERT((result - base) >= sizeof(std::uintptr_t), "Cannot store a base pointer to the header");
19551c0b2f7Stbbdev     __TBB_ASSERT(space - (result - base) >= bytes, "Not enough space for the storage");
19651c0b2f7Stbbdev 
19751c0b2f7Stbbdev     // Record where block actually starts.
19851c0b2f7Stbbdev     (reinterpret_cast<std::uintptr_t*>(result))[-1] = base;
19951c0b2f7Stbbdev     return reinterpret_cast<void*>(result);
20051c0b2f7Stbbdev }
20151c0b2f7Stbbdev 
20251c0b2f7Stbbdev static void std_cache_aligned_deallocate(void* p) {
20351c0b2f7Stbbdev     if (p) {
20451c0b2f7Stbbdev         __TBB_ASSERT(reinterpret_cast<std::uintptr_t>(p) >= 0x4096, "attempt to free block not obtained from cache_aligned_allocator");
20551c0b2f7Stbbdev         // Recover where block actually starts
20651c0b2f7Stbbdev         std::uintptr_t base = (reinterpret_cast<std::uintptr_t*>(p))[-1];
20751c0b2f7Stbbdev         __TBB_ASSERT(((base + nfs_size) & ~(nfs_size - 1)) == reinterpret_cast<std::uintptr_t>(p), "Incorrect alignment or not allocated by std_cache_aligned_deallocate?");
20851c0b2f7Stbbdev         std::free(reinterpret_cast<void*>(base));
20951c0b2f7Stbbdev     }
21051c0b2f7Stbbdev }
21151c0b2f7Stbbdev 
21251c0b2f7Stbbdev void* __TBB_EXPORTED_FUNC allocate_memory(std::size_t size) {
213*8e7f9e14SAlex     void* result = allocate_handler.load(std::memory_order_acquire)(size);
21451c0b2f7Stbbdev     if (!result) {
21551c0b2f7Stbbdev         throw_exception(exception_id::bad_alloc);
21651c0b2f7Stbbdev     }
21751c0b2f7Stbbdev     return result;
21851c0b2f7Stbbdev }
21951c0b2f7Stbbdev 
22051c0b2f7Stbbdev void __TBB_EXPORTED_FUNC deallocate_memory(void* p) {
22151c0b2f7Stbbdev     if (p) {
22251c0b2f7Stbbdev         __TBB_ASSERT(deallocate_handler, "Initialization has not been yet.");
22351c0b2f7Stbbdev         (*deallocate_handler)(p);
22451c0b2f7Stbbdev     }
22551c0b2f7Stbbdev }
22651c0b2f7Stbbdev 
22751c0b2f7Stbbdev bool __TBB_EXPORTED_FUNC is_tbbmalloc_used() {
228*8e7f9e14SAlex     auto handler_snapshot = allocate_handler.load(std::memory_order_acquire);
229*8e7f9e14SAlex     if (handler_snapshot == &initialize_allocate_handler) {
230*8e7f9e14SAlex         initialize_cache_aligned_allocator();
23151c0b2f7Stbbdev     }
232*8e7f9e14SAlex     handler_snapshot = allocate_handler.load(std::memory_order_relaxed);
233*8e7f9e14SAlex     __TBB_ASSERT(handler_snapshot != &initialize_allocate_handler && deallocate_handler != nullptr, NULL);
23451c0b2f7Stbbdev     // Cast to void avoids type mismatch errors on some compilers (e.g. __IBMCPP__)
235*8e7f9e14SAlex     __TBB_ASSERT((reinterpret_cast<void*>(handler_snapshot) == reinterpret_cast<void*>(&std::malloc)) == (reinterpret_cast<void*>(deallocate_handler) == reinterpret_cast<void*>(&std::free)),
23651c0b2f7Stbbdev                   "Both shim pointers must refer to routines from the same package (either TBB or CRT)");
237*8e7f9e14SAlex     return reinterpret_cast<void*>(handler_snapshot) == reinterpret_cast<void*>(&std::malloc);
23851c0b2f7Stbbdev }
23951c0b2f7Stbbdev 
24051c0b2f7Stbbdev } // namespace r1
24151c0b2f7Stbbdev } // namespace detail
24251c0b2f7Stbbdev } // namespace tbb
243