xref: /oneTBB/src/tbb/allocator.cpp (revision 26b42567)
151c0b2f7Stbbdev /*
2*26b42567SIlya Isaev     Copyright (c) 2005-2023 Intel Corporation
351c0b2f7Stbbdev 
451c0b2f7Stbbdev     Licensed under the Apache License, Version 2.0 (the "License");
551c0b2f7Stbbdev     you may not use this file except in compliance with the License.
651c0b2f7Stbbdev     You may obtain a copy of the License at
751c0b2f7Stbbdev 
851c0b2f7Stbbdev         http://www.apache.org/licenses/LICENSE-2.0
951c0b2f7Stbbdev 
1051c0b2f7Stbbdev     Unless required by applicable law or agreed to in writing, software
1151c0b2f7Stbbdev     distributed under the License is distributed on an "AS IS" BASIS,
1251c0b2f7Stbbdev     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1351c0b2f7Stbbdev     See the License for the specific language governing permissions and
1451c0b2f7Stbbdev     limitations under the License.
1551c0b2f7Stbbdev */
1651c0b2f7Stbbdev 
1749e08aacStbbdev #include "oneapi/tbb/version.h"
1851c0b2f7Stbbdev 
1949e08aacStbbdev #include "oneapi/tbb/detail/_exception.h"
2049e08aacStbbdev #include "oneapi/tbb/detail/_assert.h"
2149e08aacStbbdev #include "oneapi/tbb/detail/_utils.h"
228827ea7dSLong Nguyen #include "oneapi/tbb/tbb_allocator.h" // Is this OK?
238827ea7dSLong Nguyen #include "oneapi/tbb/cache_aligned_allocator.h"
2451c0b2f7Stbbdev 
2551c0b2f7Stbbdev #include "dynamic_link.h"
2651c0b2f7Stbbdev #include "misc.h"
2751c0b2f7Stbbdev 
2851c0b2f7Stbbdev #include <cstdlib>
2951c0b2f7Stbbdev 
30ce476173SJulien Schueller #ifdef _WIN32
31ce476173SJulien Schueller #include <windows.h>
3251c0b2f7Stbbdev #else
3351c0b2f7Stbbdev #include <dlfcn.h>
34ce476173SJulien Schueller #endif
3551c0b2f7Stbbdev 
36b7a062e2SAndrey Semashev #if (!defined(_WIN32) && !defined(_WIN64)) || defined(__CYGWIN__)
37b7a062e2SAndrey Semashev #include <stdlib.h> // posix_memalign, free
38b7a062e2SAndrey Semashev // With glibc, uClibc and musl on Linux and bionic on Android it is safe to use memalign(), as the allocated memory
39b7a062e2SAndrey Semashev // can be freed with free(). It is also better to use memalign() since posix_memalign() is just a wrapper on top of
40b7a062e2SAndrey Semashev // memalign() and it offers nothing but overhead due to inconvenient interface. This is likely the case with other
41b7a062e2SAndrey Semashev // standard libraries as well, and more libraries can be added to the preprocessor check below. Unfortunately, we
42b7a062e2SAndrey Semashev // can't detect musl, so we simply enable memalign() on Linux and Android in general.
43b7a062e2SAndrey Semashev #if defined(linux) || defined(__linux) || defined(__linux__) || defined(__ANDROID__)
44b7a062e2SAndrey Semashev #include <malloc.h> // memalign
45b7a062e2SAndrey Semashev #define __TBB_USE_MEMALIGN
46b7a062e2SAndrey Semashev #else
47b7a062e2SAndrey Semashev #define __TBB_USE_POSIX_MEMALIGN
48b7a062e2SAndrey Semashev #endif
49b7a062e2SAndrey Semashev #elif defined(_MSC_VER) || defined(__MINGW32__)
50b7a062e2SAndrey Semashev #include <malloc.h> // _aligned_malloc, _aligned_free
51b7a062e2SAndrey Semashev #define __TBB_USE_MSVC_ALIGNED_MALLOC
52b7a062e2SAndrey Semashev #endif
53b7a062e2SAndrey Semashev 
5451c0b2f7Stbbdev #if __TBB_WEAK_SYMBOLS_PRESENT
5551c0b2f7Stbbdev 
5651c0b2f7Stbbdev #pragma weak scalable_malloc
5751c0b2f7Stbbdev #pragma weak scalable_free
5851c0b2f7Stbbdev #pragma weak scalable_aligned_malloc
5951c0b2f7Stbbdev #pragma weak scalable_aligned_free
6051c0b2f7Stbbdev 
6151c0b2f7Stbbdev extern "C" {
6251c0b2f7Stbbdev     void* scalable_malloc(std::size_t);
6351c0b2f7Stbbdev     void  scalable_free(void*);
6451c0b2f7Stbbdev     void* scalable_aligned_malloc(std::size_t, std::size_t);
6551c0b2f7Stbbdev     void  scalable_aligned_free(void*);
6651c0b2f7Stbbdev }
6751c0b2f7Stbbdev 
6851c0b2f7Stbbdev #endif /* __TBB_WEAK_SYMBOLS_PRESENT */
6951c0b2f7Stbbdev 
7051c0b2f7Stbbdev namespace tbb {
7151c0b2f7Stbbdev namespace detail {
7251c0b2f7Stbbdev namespace r1 {
7351c0b2f7Stbbdev 
7451c0b2f7Stbbdev //! Initialization routine used for first indirect call via allocate_handler.
7551c0b2f7Stbbdev static void* initialize_allocate_handler(std::size_t size);
7651c0b2f7Stbbdev 
7751c0b2f7Stbbdev //! Handler for memory allocation
788e7f9e14SAlex using allocate_handler_type = void* (*)(std::size_t size);
798e7f9e14SAlex static std::atomic<allocate_handler_type> allocate_handler{ &initialize_allocate_handler };
808e7f9e14SAlex allocate_handler_type allocate_handler_unsafe = nullptr;
8151c0b2f7Stbbdev 
8251c0b2f7Stbbdev //! Handler for memory deallocation
8351c0b2f7Stbbdev static void  (*deallocate_handler)(void* pointer) = nullptr;
8451c0b2f7Stbbdev 
8551c0b2f7Stbbdev //! Initialization routine used for first indirect call via cache_aligned_allocate_handler.
8651c0b2f7Stbbdev static void* initialize_cache_aligned_allocate_handler(std::size_t n, std::size_t alignment);
8751c0b2f7Stbbdev 
88b7a062e2SAndrey Semashev //! Allocates overaligned memory using standard memory allocator. It is used when scalable_allocator is not available.
8951c0b2f7Stbbdev static void* std_cache_aligned_allocate(std::size_t n, std::size_t alignment);
9051c0b2f7Stbbdev 
91b7a062e2SAndrey Semashev //! Deallocates overaligned memory using standard memory allocator. It is used when scalable_allocator is not available.
9251c0b2f7Stbbdev static void  std_cache_aligned_deallocate(void* p);
9351c0b2f7Stbbdev 
9451c0b2f7Stbbdev //! Handler for padded memory allocation
958e7f9e14SAlex using cache_aligned_allocate_handler_type = void* (*)(std::size_t n, std::size_t alignment);
968e7f9e14SAlex static std::atomic<cache_aligned_allocate_handler_type> cache_aligned_allocate_handler{ &initialize_cache_aligned_allocate_handler };
978e7f9e14SAlex cache_aligned_allocate_handler_type cache_aligned_allocate_handler_unsafe = nullptr;
9851c0b2f7Stbbdev 
9951c0b2f7Stbbdev //! Handler for padded memory deallocation
10051c0b2f7Stbbdev static void (*cache_aligned_deallocate_handler)(void* p) = nullptr;
10151c0b2f7Stbbdev 
10251c0b2f7Stbbdev //! Table describing how to link the handlers.
10351c0b2f7Stbbdev static const dynamic_link_descriptor MallocLinkTable[] = {
1048e7f9e14SAlex     DLD(scalable_malloc, allocate_handler_unsafe),
10551c0b2f7Stbbdev     DLD(scalable_free, deallocate_handler),
1068e7f9e14SAlex     DLD(scalable_aligned_malloc, cache_aligned_allocate_handler_unsafe),
10751c0b2f7Stbbdev     DLD(scalable_aligned_free, cache_aligned_deallocate_handler),
10851c0b2f7Stbbdev };
10951c0b2f7Stbbdev 
11051c0b2f7Stbbdev 
11151c0b2f7Stbbdev #if TBB_USE_DEBUG
11251c0b2f7Stbbdev #define DEBUG_SUFFIX "_debug"
11351c0b2f7Stbbdev #else
11451c0b2f7Stbbdev #define DEBUG_SUFFIX
11551c0b2f7Stbbdev #endif /* TBB_USE_DEBUG */
11651c0b2f7Stbbdev 
11751c0b2f7Stbbdev // MALLOCLIB_NAME is the name of the oneTBB memory allocator library.
11851c0b2f7Stbbdev #if _WIN32||_WIN64
11951c0b2f7Stbbdev #define MALLOCLIB_NAME "tbbmalloc" DEBUG_SUFFIX ".dll"
12051c0b2f7Stbbdev #elif __APPLE__
121*26b42567SIlya Isaev #define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".2.dylib"
12251c0b2f7Stbbdev #elif __FreeBSD__ || __NetBSD__ || __OpenBSD__ || __sun || _AIX || __ANDROID__
12351c0b2f7Stbbdev #define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".so"
124734f0bc0SPablo Romero #elif __unix__  // Note that order of these #elif's is important!
12551c0b2f7Stbbdev #define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".so.2"
12651c0b2f7Stbbdev #else
12751c0b2f7Stbbdev #error Unknown OS
12851c0b2f7Stbbdev #endif
12951c0b2f7Stbbdev 
13051c0b2f7Stbbdev //! Initialize the allocation/free handler pointers.
13151c0b2f7Stbbdev /** Caller is responsible for ensuring this routine is called exactly once.
13251c0b2f7Stbbdev     The routine attempts to dynamically link with the TBB memory allocator.
13351c0b2f7Stbbdev     If that allocator is not found, it links to malloc and free. */
initialize_handler_pointers()13451c0b2f7Stbbdev void initialize_handler_pointers() {
13557f524caSIlya Isaev     __TBB_ASSERT(allocate_handler == &initialize_allocate_handler, nullptr);
13651c0b2f7Stbbdev     bool success = dynamic_link(MALLOCLIB_NAME, MallocLinkTable, 4);
13751c0b2f7Stbbdev     if(!success) {
13851c0b2f7Stbbdev         // If unsuccessful, set the handlers to the default routines.
13951c0b2f7Stbbdev         // This must be done now, and not before FillDynamicLinks runs, because if other
14051c0b2f7Stbbdev         // threads call the handlers, we want them to go through the DoOneTimeInitializations logic,
14151c0b2f7Stbbdev         // which forces them to wait.
1428e7f9e14SAlex         allocate_handler_unsafe = &std::malloc;
14351c0b2f7Stbbdev         deallocate_handler = &std::free;
1448e7f9e14SAlex         cache_aligned_allocate_handler_unsafe = &std_cache_aligned_allocate;
14551c0b2f7Stbbdev         cache_aligned_deallocate_handler = &std_cache_aligned_deallocate;
14651c0b2f7Stbbdev     }
14751c0b2f7Stbbdev 
1488e7f9e14SAlex     allocate_handler.store(allocate_handler_unsafe, std::memory_order_release);
1498e7f9e14SAlex     cache_aligned_allocate_handler.store(cache_aligned_allocate_handler_unsafe, std::memory_order_release);
1508e7f9e14SAlex 
15151c0b2f7Stbbdev     PrintExtraVersionInfo( "ALLOCATOR", success?"scalable_malloc":"malloc" );
15251c0b2f7Stbbdev }
15351c0b2f7Stbbdev 
15451c0b2f7Stbbdev static std::once_flag initialization_state;
initialize_cache_aligned_allocator()15551c0b2f7Stbbdev void initialize_cache_aligned_allocator() {
15651c0b2f7Stbbdev     std::call_once(initialization_state, &initialize_handler_pointers);
15751c0b2f7Stbbdev }
15851c0b2f7Stbbdev 
15951c0b2f7Stbbdev //! Executed on very first call through allocate_handler
initialize_allocate_handler(std::size_t size)16051c0b2f7Stbbdev static void* initialize_allocate_handler(std::size_t size) {
16151c0b2f7Stbbdev     initialize_cache_aligned_allocator();
16257f524caSIlya Isaev     __TBB_ASSERT(allocate_handler != &initialize_allocate_handler, nullptr);
16351c0b2f7Stbbdev     return (*allocate_handler)(size);
16451c0b2f7Stbbdev }
16551c0b2f7Stbbdev 
16651c0b2f7Stbbdev //! Executed on very first call through cache_aligned_allocate_handler
initialize_cache_aligned_allocate_handler(std::size_t bytes,std::size_t alignment)16751c0b2f7Stbbdev static void* initialize_cache_aligned_allocate_handler(std::size_t bytes, std::size_t alignment) {
16851c0b2f7Stbbdev     initialize_cache_aligned_allocator();
16957f524caSIlya Isaev     __TBB_ASSERT(cache_aligned_allocate_handler != &initialize_cache_aligned_allocate_handler, nullptr);
17051c0b2f7Stbbdev     return (*cache_aligned_allocate_handler)(bytes, alignment);
17151c0b2f7Stbbdev }
17251c0b2f7Stbbdev 
17351c0b2f7Stbbdev // TODO: use CPUID to find actual line size, though consider backward compatibility
17451c0b2f7Stbbdev // nfs - no false sharing
17551c0b2f7Stbbdev static constexpr std::size_t nfs_size = 128;
17651c0b2f7Stbbdev 
cache_line_size()17751c0b2f7Stbbdev std::size_t __TBB_EXPORTED_FUNC cache_line_size() {
17851c0b2f7Stbbdev     return nfs_size;
17951c0b2f7Stbbdev }
18051c0b2f7Stbbdev 
cache_aligned_allocate(std::size_t size)18151c0b2f7Stbbdev void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size) {
18251c0b2f7Stbbdev     const std::size_t cache_line_size = nfs_size;
18351c0b2f7Stbbdev     __TBB_ASSERT(is_power_of_two(cache_line_size), "must be power of two");
18451c0b2f7Stbbdev 
18551c0b2f7Stbbdev     // Check for overflow
18651c0b2f7Stbbdev     if (size + cache_line_size < size) {
18751c0b2f7Stbbdev         throw_exception(exception_id::bad_alloc);
18851c0b2f7Stbbdev     }
18957f524caSIlya Isaev     // scalable_aligned_malloc considers zero size request an error, and returns nullptr
19051c0b2f7Stbbdev     if (size == 0) size = 1;
19151c0b2f7Stbbdev 
1928e7f9e14SAlex     void* result = cache_aligned_allocate_handler.load(std::memory_order_acquire)(size, cache_line_size);
19351c0b2f7Stbbdev     if (!result) {
19451c0b2f7Stbbdev         throw_exception(exception_id::bad_alloc);
19551c0b2f7Stbbdev     }
19651c0b2f7Stbbdev     __TBB_ASSERT(is_aligned(result, cache_line_size), "The returned address isn't aligned");
19751c0b2f7Stbbdev     return result;
19851c0b2f7Stbbdev }
19951c0b2f7Stbbdev 
cache_aligned_deallocate(void * p)20051c0b2f7Stbbdev void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p) {
20151c0b2f7Stbbdev     __TBB_ASSERT(cache_aligned_deallocate_handler, "Initialization has not been yet.");
20251c0b2f7Stbbdev     (*cache_aligned_deallocate_handler)(p);
20351c0b2f7Stbbdev }
20451c0b2f7Stbbdev 
std_cache_aligned_allocate(std::size_t bytes,std::size_t alignment)20551c0b2f7Stbbdev static void* std_cache_aligned_allocate(std::size_t bytes, std::size_t alignment) {
206b7a062e2SAndrey Semashev #if defined(__TBB_USE_MEMALIGN)
207b7a062e2SAndrey Semashev     return memalign(alignment, bytes);
208b7a062e2SAndrey Semashev #elif defined(__TBB_USE_POSIX_MEMALIGN)
209b7a062e2SAndrey Semashev     void* p = nullptr;
210b7a062e2SAndrey Semashev     int res = posix_memalign(&p, alignment, bytes);
211b7a062e2SAndrey Semashev     if (res != 0)
212b7a062e2SAndrey Semashev         p = nullptr;
213b7a062e2SAndrey Semashev     return p;
214b7a062e2SAndrey Semashev #elif defined(__TBB_USE_MSVC_ALIGNED_MALLOC)
215b7a062e2SAndrey Semashev     return _aligned_malloc(bytes, alignment);
216b7a062e2SAndrey Semashev #else
21751c0b2f7Stbbdev     // TODO: make it common with cache_aligned_resource
21851c0b2f7Stbbdev     std::size_t space = alignment + bytes;
21951c0b2f7Stbbdev     std::uintptr_t base = reinterpret_cast<std::uintptr_t>(std::malloc(space));
22051c0b2f7Stbbdev     if (!base) {
22151c0b2f7Stbbdev         return nullptr;
22251c0b2f7Stbbdev     }
22351c0b2f7Stbbdev     std::uintptr_t result = (base + nfs_size) & ~(nfs_size - 1);
22451c0b2f7Stbbdev     // Round up to the next cache line (align the base address)
22551c0b2f7Stbbdev     __TBB_ASSERT((result - base) >= sizeof(std::uintptr_t), "Cannot store a base pointer to the header");
22651c0b2f7Stbbdev     __TBB_ASSERT(space - (result - base) >= bytes, "Not enough space for the storage");
22751c0b2f7Stbbdev 
22851c0b2f7Stbbdev     // Record where block actually starts.
22951c0b2f7Stbbdev     (reinterpret_cast<std::uintptr_t*>(result))[-1] = base;
23051c0b2f7Stbbdev     return reinterpret_cast<void*>(result);
231b7a062e2SAndrey Semashev #endif
23251c0b2f7Stbbdev }
23351c0b2f7Stbbdev 
std_cache_aligned_deallocate(void * p)23451c0b2f7Stbbdev static void std_cache_aligned_deallocate(void* p) {
235b7a062e2SAndrey Semashev #if defined(__TBB_USE_MEMALIGN) || defined(__TBB_USE_POSIX_MEMALIGN)
236b7a062e2SAndrey Semashev     free(p);
237b7a062e2SAndrey Semashev #elif defined(__TBB_USE_MSVC_ALIGNED_MALLOC)
238b7a062e2SAndrey Semashev     _aligned_free(p);
239b7a062e2SAndrey Semashev #else
24051c0b2f7Stbbdev     if (p) {
24151c0b2f7Stbbdev         __TBB_ASSERT(reinterpret_cast<std::uintptr_t>(p) >= 0x4096, "attempt to free block not obtained from cache_aligned_allocator");
24251c0b2f7Stbbdev         // Recover where block actually starts
24351c0b2f7Stbbdev         std::uintptr_t base = (reinterpret_cast<std::uintptr_t*>(p))[-1];
24451c0b2f7Stbbdev         __TBB_ASSERT(((base + nfs_size) & ~(nfs_size - 1)) == reinterpret_cast<std::uintptr_t>(p), "Incorrect alignment or not allocated by std_cache_aligned_deallocate?");
24551c0b2f7Stbbdev         std::free(reinterpret_cast<void*>(base));
24651c0b2f7Stbbdev     }
247b7a062e2SAndrey Semashev #endif
24851c0b2f7Stbbdev }
24951c0b2f7Stbbdev 
allocate_memory(std::size_t size)25051c0b2f7Stbbdev void* __TBB_EXPORTED_FUNC allocate_memory(std::size_t size) {
2518e7f9e14SAlex     void* result = allocate_handler.load(std::memory_order_acquire)(size);
25251c0b2f7Stbbdev     if (!result) {
25351c0b2f7Stbbdev         throw_exception(exception_id::bad_alloc);
25451c0b2f7Stbbdev     }
25551c0b2f7Stbbdev     return result;
25651c0b2f7Stbbdev }
25751c0b2f7Stbbdev 
deallocate_memory(void * p)25851c0b2f7Stbbdev void __TBB_EXPORTED_FUNC deallocate_memory(void* p) {
25951c0b2f7Stbbdev     if (p) {
26051c0b2f7Stbbdev         __TBB_ASSERT(deallocate_handler, "Initialization has not been yet.");
26151c0b2f7Stbbdev         (*deallocate_handler)(p);
26251c0b2f7Stbbdev     }
26351c0b2f7Stbbdev }
26451c0b2f7Stbbdev 
is_tbbmalloc_used()26551c0b2f7Stbbdev bool __TBB_EXPORTED_FUNC is_tbbmalloc_used() {
2668e7f9e14SAlex     auto handler_snapshot = allocate_handler.load(std::memory_order_acquire);
2678e7f9e14SAlex     if (handler_snapshot == &initialize_allocate_handler) {
2688e7f9e14SAlex         initialize_cache_aligned_allocator();
26951c0b2f7Stbbdev     }
2708e7f9e14SAlex     handler_snapshot = allocate_handler.load(std::memory_order_relaxed);
27157f524caSIlya Isaev     __TBB_ASSERT(handler_snapshot != &initialize_allocate_handler && deallocate_handler != nullptr, nullptr);
27251c0b2f7Stbbdev     // Cast to void avoids type mismatch errors on some compilers (e.g. __IBMCPP__)
2738e7f9e14SAlex     __TBB_ASSERT((reinterpret_cast<void*>(handler_snapshot) == reinterpret_cast<void*>(&std::malloc)) == (reinterpret_cast<void*>(deallocate_handler) == reinterpret_cast<void*>(&std::free)),
27451c0b2f7Stbbdev                   "Both shim pointers must refer to routines from the same package (either TBB or CRT)");
2758e7f9e14SAlex     return reinterpret_cast<void*>(handler_snapshot) == reinterpret_cast<void*>(&std::malloc);
27651c0b2f7Stbbdev }
27751c0b2f7Stbbdev 
27851c0b2f7Stbbdev } // namespace r1
27951c0b2f7Stbbdev } // namespace detail
28051c0b2f7Stbbdev } // namespace tbb
281