151c0b2f7Stbbdev /* 2*b15aabb3Stbbdev Copyright (c) 2005-2021 Intel Corporation 351c0b2f7Stbbdev 451c0b2f7Stbbdev Licensed under the Apache License, Version 2.0 (the "License"); 551c0b2f7Stbbdev you may not use this file except in compliance with the License. 651c0b2f7Stbbdev You may obtain a copy of the License at 751c0b2f7Stbbdev 851c0b2f7Stbbdev http://www.apache.org/licenses/LICENSE-2.0 951c0b2f7Stbbdev 1051c0b2f7Stbbdev Unless required by applicable law or agreed to in writing, software 1151c0b2f7Stbbdev distributed under the License is distributed on an "AS IS" BASIS, 1251c0b2f7Stbbdev WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1351c0b2f7Stbbdev See the License for the specific language governing permissions and 1451c0b2f7Stbbdev limitations under the License. 1551c0b2f7Stbbdev */ 1651c0b2f7Stbbdev 1749e08aacStbbdev #include "oneapi/tbb/version.h" 1851c0b2f7Stbbdev 1949e08aacStbbdev #include "oneapi/tbb/detail/_exception.h" 2049e08aacStbbdev #include "oneapi/tbb/detail/_assert.h" 2149e08aacStbbdev #include "oneapi/tbb/detail/_utils.h" 2251c0b2f7Stbbdev 2351c0b2f7Stbbdev #include "dynamic_link.h" 2451c0b2f7Stbbdev #include "misc.h" 2551c0b2f7Stbbdev 2651c0b2f7Stbbdev #include <cstdlib> 2751c0b2f7Stbbdev 2851c0b2f7Stbbdev #if _WIN32 || _WIN64 2951c0b2f7Stbbdev #include <Windows.h> 3051c0b2f7Stbbdev #else 3151c0b2f7Stbbdev #include <dlfcn.h> 3251c0b2f7Stbbdev #endif /* _WIN32||_WIN64 */ 3351c0b2f7Stbbdev 3451c0b2f7Stbbdev #if __TBB_WEAK_SYMBOLS_PRESENT 3551c0b2f7Stbbdev 3651c0b2f7Stbbdev #pragma weak scalable_malloc 3751c0b2f7Stbbdev #pragma weak scalable_free 3851c0b2f7Stbbdev #pragma weak scalable_aligned_malloc 3951c0b2f7Stbbdev #pragma weak scalable_aligned_free 4051c0b2f7Stbbdev 4151c0b2f7Stbbdev extern "C" { 4251c0b2f7Stbbdev void* scalable_malloc(std::size_t); 4351c0b2f7Stbbdev void scalable_free(void*); 4451c0b2f7Stbbdev void* scalable_aligned_malloc(std::size_t, std::size_t); 4551c0b2f7Stbbdev void scalable_aligned_free(void*); 4651c0b2f7Stbbdev } 4751c0b2f7Stbbdev 4851c0b2f7Stbbdev #endif /* __TBB_WEAK_SYMBOLS_PRESENT */ 4951c0b2f7Stbbdev 5051c0b2f7Stbbdev namespace tbb { 5151c0b2f7Stbbdev namespace detail { 5251c0b2f7Stbbdev namespace r1 { 5351c0b2f7Stbbdev 5451c0b2f7Stbbdev //! Initialization routine used for first indirect call via allocate_handler. 5551c0b2f7Stbbdev static void* initialize_allocate_handler(std::size_t size); 5651c0b2f7Stbbdev 5751c0b2f7Stbbdev //! Handler for memory allocation 5851c0b2f7Stbbdev static void* (*allocate_handler)(std::size_t size) = &initialize_allocate_handler; 5951c0b2f7Stbbdev 6051c0b2f7Stbbdev //! Handler for memory deallocation 6151c0b2f7Stbbdev static void (*deallocate_handler)(void* pointer) = nullptr; 6251c0b2f7Stbbdev 6351c0b2f7Stbbdev //! Initialization routine used for first indirect call via cache_aligned_allocate_handler. 6451c0b2f7Stbbdev static void* initialize_cache_aligned_allocate_handler(std::size_t n, std::size_t alignment); 6551c0b2f7Stbbdev 6651c0b2f7Stbbdev //! Allocates memory using standard malloc. It is used when scalable_allocator is not available 6751c0b2f7Stbbdev static void* std_cache_aligned_allocate(std::size_t n, std::size_t alignment); 6851c0b2f7Stbbdev 6951c0b2f7Stbbdev //! Allocates memory using standard free. It is used when scalable_allocator is not available 7051c0b2f7Stbbdev static void std_cache_aligned_deallocate(void* p); 7151c0b2f7Stbbdev 7251c0b2f7Stbbdev //! Handler for padded memory allocation 7351c0b2f7Stbbdev static void* (*cache_aligned_allocate_handler)(std::size_t n, std::size_t alignment) = &initialize_cache_aligned_allocate_handler; 7451c0b2f7Stbbdev 7551c0b2f7Stbbdev //! Handler for padded memory deallocation 7651c0b2f7Stbbdev static void (*cache_aligned_deallocate_handler)(void* p) = nullptr; 7751c0b2f7Stbbdev 7851c0b2f7Stbbdev //! Table describing how to link the handlers. 7951c0b2f7Stbbdev static const dynamic_link_descriptor MallocLinkTable[] = { 8051c0b2f7Stbbdev DLD(scalable_malloc, allocate_handler), 8151c0b2f7Stbbdev DLD(scalable_free, deallocate_handler), 8251c0b2f7Stbbdev DLD(scalable_aligned_malloc, cache_aligned_allocate_handler), 8351c0b2f7Stbbdev DLD(scalable_aligned_free, cache_aligned_deallocate_handler), 8451c0b2f7Stbbdev }; 8551c0b2f7Stbbdev 8651c0b2f7Stbbdev 8751c0b2f7Stbbdev #if TBB_USE_DEBUG 8851c0b2f7Stbbdev #define DEBUG_SUFFIX "_debug" 8951c0b2f7Stbbdev #else 9051c0b2f7Stbbdev #define DEBUG_SUFFIX 9151c0b2f7Stbbdev #endif /* TBB_USE_DEBUG */ 9251c0b2f7Stbbdev 9351c0b2f7Stbbdev // MALLOCLIB_NAME is the name of the oneTBB memory allocator library. 9451c0b2f7Stbbdev #if _WIN32||_WIN64 9551c0b2f7Stbbdev #define MALLOCLIB_NAME "tbbmalloc" DEBUG_SUFFIX ".dll" 9651c0b2f7Stbbdev #elif __APPLE__ 9751c0b2f7Stbbdev #define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".dylib" 9851c0b2f7Stbbdev #elif __FreeBSD__ || __NetBSD__ || __OpenBSD__ || __sun || _AIX || __ANDROID__ 9951c0b2f7Stbbdev #define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".so" 10051c0b2f7Stbbdev #elif __linux__ // Note that order of these #elif's is important! 10151c0b2f7Stbbdev #define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".so.2" 10251c0b2f7Stbbdev #else 10351c0b2f7Stbbdev #error Unknown OS 10451c0b2f7Stbbdev #endif 10551c0b2f7Stbbdev 10651c0b2f7Stbbdev //! Initialize the allocation/free handler pointers. 10751c0b2f7Stbbdev /** Caller is responsible for ensuring this routine is called exactly once. 10851c0b2f7Stbbdev The routine attempts to dynamically link with the TBB memory allocator. 10951c0b2f7Stbbdev If that allocator is not found, it links to malloc and free. */ 11051c0b2f7Stbbdev void initialize_handler_pointers() { 11151c0b2f7Stbbdev __TBB_ASSERT(allocate_handler == &initialize_allocate_handler, NULL); 11251c0b2f7Stbbdev bool success = dynamic_link(MALLOCLIB_NAME, MallocLinkTable, 4); 11351c0b2f7Stbbdev if(!success) { 11451c0b2f7Stbbdev // If unsuccessful, set the handlers to the default routines. 11551c0b2f7Stbbdev // This must be done now, and not before FillDynamicLinks runs, because if other 11651c0b2f7Stbbdev // threads call the handlers, we want them to go through the DoOneTimeInitializations logic, 11751c0b2f7Stbbdev // which forces them to wait. 11851c0b2f7Stbbdev allocate_handler = &std::malloc; 11951c0b2f7Stbbdev deallocate_handler = &std::free; 12051c0b2f7Stbbdev cache_aligned_allocate_handler = &std_cache_aligned_allocate; 12151c0b2f7Stbbdev cache_aligned_deallocate_handler = &std_cache_aligned_deallocate; 12251c0b2f7Stbbdev } 12351c0b2f7Stbbdev 12451c0b2f7Stbbdev PrintExtraVersionInfo( "ALLOCATOR", success?"scalable_malloc":"malloc" ); 12551c0b2f7Stbbdev } 12651c0b2f7Stbbdev 12751c0b2f7Stbbdev static std::once_flag initialization_state; 12851c0b2f7Stbbdev void initialize_cache_aligned_allocator() { 12951c0b2f7Stbbdev std::call_once(initialization_state, &initialize_handler_pointers); 13051c0b2f7Stbbdev } 13151c0b2f7Stbbdev 13251c0b2f7Stbbdev //! Executed on very first call through allocate_handler 13351c0b2f7Stbbdev static void* initialize_allocate_handler(std::size_t size) { 13451c0b2f7Stbbdev initialize_cache_aligned_allocator(); 13551c0b2f7Stbbdev __TBB_ASSERT(allocate_handler != &initialize_allocate_handler, NULL); 13651c0b2f7Stbbdev return (*allocate_handler)(size); 13751c0b2f7Stbbdev } 13851c0b2f7Stbbdev 13951c0b2f7Stbbdev //! Executed on very first call through cache_aligned_allocate_handler 14051c0b2f7Stbbdev static void* initialize_cache_aligned_allocate_handler(std::size_t bytes, std::size_t alignment) { 14151c0b2f7Stbbdev initialize_cache_aligned_allocator(); 14251c0b2f7Stbbdev __TBB_ASSERT(cache_aligned_allocate_handler != &initialize_cache_aligned_allocate_handler, NULL); 14351c0b2f7Stbbdev return (*cache_aligned_allocate_handler)(bytes, alignment); 14451c0b2f7Stbbdev } 14551c0b2f7Stbbdev 14651c0b2f7Stbbdev // TODO: use CPUID to find actual line size, though consider backward compatibility 14751c0b2f7Stbbdev // nfs - no false sharing 14851c0b2f7Stbbdev static constexpr std::size_t nfs_size = 128; 14951c0b2f7Stbbdev 15051c0b2f7Stbbdev std::size_t __TBB_EXPORTED_FUNC cache_line_size() { 15151c0b2f7Stbbdev return nfs_size; 15251c0b2f7Stbbdev } 15351c0b2f7Stbbdev 15451c0b2f7Stbbdev void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size) { 15551c0b2f7Stbbdev const std::size_t cache_line_size = nfs_size; 15651c0b2f7Stbbdev __TBB_ASSERT(is_power_of_two(cache_line_size), "must be power of two"); 15751c0b2f7Stbbdev 15851c0b2f7Stbbdev // Check for overflow 15951c0b2f7Stbbdev if (size + cache_line_size < size) { 16051c0b2f7Stbbdev throw_exception(exception_id::bad_alloc); 16151c0b2f7Stbbdev } 16251c0b2f7Stbbdev // scalable_aligned_malloc considers zero size request an error, and returns NULL 16351c0b2f7Stbbdev if (size == 0) size = 1; 16451c0b2f7Stbbdev 16551c0b2f7Stbbdev void* result = cache_aligned_allocate_handler(size, cache_line_size); 16651c0b2f7Stbbdev if (!result) { 16751c0b2f7Stbbdev throw_exception(exception_id::bad_alloc); 16851c0b2f7Stbbdev } 16951c0b2f7Stbbdev __TBB_ASSERT(is_aligned(result, cache_line_size), "The returned address isn't aligned"); 17051c0b2f7Stbbdev return result; 17151c0b2f7Stbbdev } 17251c0b2f7Stbbdev 17351c0b2f7Stbbdev void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p) { 17451c0b2f7Stbbdev __TBB_ASSERT(cache_aligned_deallocate_handler, "Initialization has not been yet."); 17551c0b2f7Stbbdev (*cache_aligned_deallocate_handler)(p); 17651c0b2f7Stbbdev } 17751c0b2f7Stbbdev 17851c0b2f7Stbbdev static void* std_cache_aligned_allocate(std::size_t bytes, std::size_t alignment) { 17951c0b2f7Stbbdev // TODO: make it common with cache_aligned_resource 18051c0b2f7Stbbdev std::size_t space = alignment + bytes; 18151c0b2f7Stbbdev std::uintptr_t base = reinterpret_cast<std::uintptr_t>(std::malloc(space)); 18251c0b2f7Stbbdev if (!base) { 18351c0b2f7Stbbdev return nullptr; 18451c0b2f7Stbbdev } 18551c0b2f7Stbbdev std::uintptr_t result = (base + nfs_size) & ~(nfs_size - 1); 18651c0b2f7Stbbdev // Round up to the next cache line (align the base address) 18751c0b2f7Stbbdev __TBB_ASSERT((result - base) >= sizeof(std::uintptr_t), "Cannot store a base pointer to the header"); 18851c0b2f7Stbbdev __TBB_ASSERT(space - (result - base) >= bytes, "Not enough space for the storage"); 18951c0b2f7Stbbdev 19051c0b2f7Stbbdev // Record where block actually starts. 19151c0b2f7Stbbdev (reinterpret_cast<std::uintptr_t*>(result))[-1] = base; 19251c0b2f7Stbbdev return reinterpret_cast<void*>(result); 19351c0b2f7Stbbdev } 19451c0b2f7Stbbdev 19551c0b2f7Stbbdev static void std_cache_aligned_deallocate(void* p) { 19651c0b2f7Stbbdev if (p) { 19751c0b2f7Stbbdev __TBB_ASSERT(reinterpret_cast<std::uintptr_t>(p) >= 0x4096, "attempt to free block not obtained from cache_aligned_allocator"); 19851c0b2f7Stbbdev // Recover where block actually starts 19951c0b2f7Stbbdev std::uintptr_t base = (reinterpret_cast<std::uintptr_t*>(p))[-1]; 20051c0b2f7Stbbdev __TBB_ASSERT(((base + nfs_size) & ~(nfs_size - 1)) == reinterpret_cast<std::uintptr_t>(p), "Incorrect alignment or not allocated by std_cache_aligned_deallocate?"); 20151c0b2f7Stbbdev std::free(reinterpret_cast<void*>(base)); 20251c0b2f7Stbbdev } 20351c0b2f7Stbbdev } 20451c0b2f7Stbbdev 20551c0b2f7Stbbdev void* __TBB_EXPORTED_FUNC allocate_memory(std::size_t size) { 20651c0b2f7Stbbdev void* result = (*allocate_handler)(size); 20751c0b2f7Stbbdev if (!result) { 20851c0b2f7Stbbdev throw_exception(exception_id::bad_alloc); 20951c0b2f7Stbbdev } 21051c0b2f7Stbbdev return result; 21151c0b2f7Stbbdev } 21251c0b2f7Stbbdev 21351c0b2f7Stbbdev void __TBB_EXPORTED_FUNC deallocate_memory(void* p) { 21451c0b2f7Stbbdev if (p) { 21551c0b2f7Stbbdev __TBB_ASSERT(deallocate_handler, "Initialization has not been yet."); 21651c0b2f7Stbbdev (*deallocate_handler)(p); 21751c0b2f7Stbbdev } 21851c0b2f7Stbbdev } 21951c0b2f7Stbbdev 22051c0b2f7Stbbdev bool __TBB_EXPORTED_FUNC is_tbbmalloc_used() { 22151c0b2f7Stbbdev if (allocate_handler == &initialize_allocate_handler) { 22251c0b2f7Stbbdev void* void_ptr = allocate_handler(1); 22351c0b2f7Stbbdev deallocate_handler(void_ptr); 22451c0b2f7Stbbdev } 22551c0b2f7Stbbdev __TBB_ASSERT(allocate_handler != &initialize_allocate_handler && deallocate_handler != nullptr, NULL); 22651c0b2f7Stbbdev // Cast to void avoids type mismatch errors on some compilers (e.g. __IBMCPP__) 22751c0b2f7Stbbdev __TBB_ASSERT((reinterpret_cast<void*>(allocate_handler) == reinterpret_cast<void*>(&std::malloc)) == (reinterpret_cast<void*>(deallocate_handler) == reinterpret_cast<void*>(&std::free)), 22851c0b2f7Stbbdev "Both shim pointers must refer to routines from the same package (either TBB or CRT)"); 22951c0b2f7Stbbdev return reinterpret_cast<void*>(allocate_handler) == reinterpret_cast<void*>(&std::malloc); 23051c0b2f7Stbbdev } 23151c0b2f7Stbbdev 23251c0b2f7Stbbdev } // namespace r1 23351c0b2f7Stbbdev } // namespace detail 23451c0b2f7Stbbdev } // namespace tbb 235