1*51c0b2f7Stbbdev /* 2*51c0b2f7Stbbdev Copyright (c) 2005-2020 Intel Corporation 3*51c0b2f7Stbbdev 4*51c0b2f7Stbbdev Licensed under the Apache License, Version 2.0 (the "License"); 5*51c0b2f7Stbbdev you may not use this file except in compliance with the License. 6*51c0b2f7Stbbdev You may obtain a copy of the License at 7*51c0b2f7Stbbdev 8*51c0b2f7Stbbdev http://www.apache.org/licenses/LICENSE-2.0 9*51c0b2f7Stbbdev 10*51c0b2f7Stbbdev Unless required by applicable law or agreed to in writing, software 11*51c0b2f7Stbbdev distributed under the License is distributed on an "AS IS" BASIS, 12*51c0b2f7Stbbdev WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13*51c0b2f7Stbbdev See the License for the specific language governing permissions and 14*51c0b2f7Stbbdev limitations under the License. 15*51c0b2f7Stbbdev */ 16*51c0b2f7Stbbdev 17*51c0b2f7Stbbdev #include "tbb/version.h" 18*51c0b2f7Stbbdev 19*51c0b2f7Stbbdev #include "tbb/detail/_exception.h" 20*51c0b2f7Stbbdev #include "tbb/detail/_assert.h" 21*51c0b2f7Stbbdev #include "tbb/detail/_utils.h" 22*51c0b2f7Stbbdev 23*51c0b2f7Stbbdev #include "dynamic_link.h" 24*51c0b2f7Stbbdev #include "misc.h" 25*51c0b2f7Stbbdev 26*51c0b2f7Stbbdev #include <cstdlib> 27*51c0b2f7Stbbdev 28*51c0b2f7Stbbdev #if _WIN32 || _WIN64 29*51c0b2f7Stbbdev #include <Windows.h> 30*51c0b2f7Stbbdev #else 31*51c0b2f7Stbbdev #include <dlfcn.h> 32*51c0b2f7Stbbdev #endif /* _WIN32||_WIN64 */ 33*51c0b2f7Stbbdev 34*51c0b2f7Stbbdev #if __TBB_WEAK_SYMBOLS_PRESENT 35*51c0b2f7Stbbdev 36*51c0b2f7Stbbdev #pragma weak scalable_malloc 37*51c0b2f7Stbbdev #pragma weak scalable_free 38*51c0b2f7Stbbdev #pragma weak scalable_aligned_malloc 39*51c0b2f7Stbbdev #pragma weak scalable_aligned_free 40*51c0b2f7Stbbdev 41*51c0b2f7Stbbdev extern "C" { 42*51c0b2f7Stbbdev void* scalable_malloc(std::size_t); 43*51c0b2f7Stbbdev void scalable_free(void*); 44*51c0b2f7Stbbdev void* scalable_aligned_malloc(std::size_t, std::size_t); 45*51c0b2f7Stbbdev void scalable_aligned_free(void*); 46*51c0b2f7Stbbdev } 47*51c0b2f7Stbbdev 48*51c0b2f7Stbbdev #endif /* __TBB_WEAK_SYMBOLS_PRESENT */ 49*51c0b2f7Stbbdev 50*51c0b2f7Stbbdev namespace tbb { 51*51c0b2f7Stbbdev namespace detail { 52*51c0b2f7Stbbdev namespace r1 { 53*51c0b2f7Stbbdev 54*51c0b2f7Stbbdev //! Initialization routine used for first indirect call via allocate_handler. 55*51c0b2f7Stbbdev static void* initialize_allocate_handler(std::size_t size); 56*51c0b2f7Stbbdev 57*51c0b2f7Stbbdev //! Handler for memory allocation 58*51c0b2f7Stbbdev static void* (*allocate_handler)(std::size_t size) = &initialize_allocate_handler; 59*51c0b2f7Stbbdev 60*51c0b2f7Stbbdev //! Handler for memory deallocation 61*51c0b2f7Stbbdev static void (*deallocate_handler)(void* pointer) = nullptr; 62*51c0b2f7Stbbdev 63*51c0b2f7Stbbdev //! Initialization routine used for first indirect call via cache_aligned_allocate_handler. 64*51c0b2f7Stbbdev static void* initialize_cache_aligned_allocate_handler(std::size_t n, std::size_t alignment); 65*51c0b2f7Stbbdev 66*51c0b2f7Stbbdev //! Allocates memory using standard malloc. It is used when scalable_allocator is not available 67*51c0b2f7Stbbdev static void* std_cache_aligned_allocate(std::size_t n, std::size_t alignment); 68*51c0b2f7Stbbdev 69*51c0b2f7Stbbdev //! Allocates memory using standard free. It is used when scalable_allocator is not available 70*51c0b2f7Stbbdev static void std_cache_aligned_deallocate(void* p); 71*51c0b2f7Stbbdev 72*51c0b2f7Stbbdev //! Handler for padded memory allocation 73*51c0b2f7Stbbdev static void* (*cache_aligned_allocate_handler)(std::size_t n, std::size_t alignment) = &initialize_cache_aligned_allocate_handler; 74*51c0b2f7Stbbdev 75*51c0b2f7Stbbdev //! Handler for padded memory deallocation 76*51c0b2f7Stbbdev static void (*cache_aligned_deallocate_handler)(void* p) = nullptr; 77*51c0b2f7Stbbdev 78*51c0b2f7Stbbdev //! Table describing how to link the handlers. 79*51c0b2f7Stbbdev static const dynamic_link_descriptor MallocLinkTable[] = { 80*51c0b2f7Stbbdev DLD(scalable_malloc, allocate_handler), 81*51c0b2f7Stbbdev DLD(scalable_free, deallocate_handler), 82*51c0b2f7Stbbdev DLD(scalable_aligned_malloc, cache_aligned_allocate_handler), 83*51c0b2f7Stbbdev DLD(scalable_aligned_free, cache_aligned_deallocate_handler), 84*51c0b2f7Stbbdev }; 85*51c0b2f7Stbbdev 86*51c0b2f7Stbbdev 87*51c0b2f7Stbbdev #if TBB_USE_DEBUG 88*51c0b2f7Stbbdev #define DEBUG_SUFFIX "_debug" 89*51c0b2f7Stbbdev #else 90*51c0b2f7Stbbdev #define DEBUG_SUFFIX 91*51c0b2f7Stbbdev #endif /* TBB_USE_DEBUG */ 92*51c0b2f7Stbbdev 93*51c0b2f7Stbbdev // MALLOCLIB_NAME is the name of the oneTBB memory allocator library. 94*51c0b2f7Stbbdev #if _WIN32||_WIN64 95*51c0b2f7Stbbdev #define MALLOCLIB_NAME "tbbmalloc" DEBUG_SUFFIX ".dll" 96*51c0b2f7Stbbdev #elif __APPLE__ 97*51c0b2f7Stbbdev #define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".dylib" 98*51c0b2f7Stbbdev #elif __FreeBSD__ || __NetBSD__ || __OpenBSD__ || __sun || _AIX || __ANDROID__ 99*51c0b2f7Stbbdev #define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".so" 100*51c0b2f7Stbbdev #elif __linux__ // Note that order of these #elif's is important! 101*51c0b2f7Stbbdev #define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".so.2" 102*51c0b2f7Stbbdev #else 103*51c0b2f7Stbbdev #error Unknown OS 104*51c0b2f7Stbbdev #endif 105*51c0b2f7Stbbdev 106*51c0b2f7Stbbdev //! Initialize the allocation/free handler pointers. 107*51c0b2f7Stbbdev /** Caller is responsible for ensuring this routine is called exactly once. 108*51c0b2f7Stbbdev The routine attempts to dynamically link with the TBB memory allocator. 109*51c0b2f7Stbbdev If that allocator is not found, it links to malloc and free. */ 110*51c0b2f7Stbbdev void initialize_handler_pointers() { 111*51c0b2f7Stbbdev __TBB_ASSERT(allocate_handler == &initialize_allocate_handler, NULL); 112*51c0b2f7Stbbdev bool success = dynamic_link(MALLOCLIB_NAME, MallocLinkTable, 4); 113*51c0b2f7Stbbdev if(!success) { 114*51c0b2f7Stbbdev // If unsuccessful, set the handlers to the default routines. 115*51c0b2f7Stbbdev // This must be done now, and not before FillDynamicLinks runs, because if other 116*51c0b2f7Stbbdev // threads call the handlers, we want them to go through the DoOneTimeInitializations logic, 117*51c0b2f7Stbbdev // which forces them to wait. 118*51c0b2f7Stbbdev allocate_handler = &std::malloc; 119*51c0b2f7Stbbdev deallocate_handler = &std::free; 120*51c0b2f7Stbbdev cache_aligned_allocate_handler = &std_cache_aligned_allocate; 121*51c0b2f7Stbbdev cache_aligned_deallocate_handler = &std_cache_aligned_deallocate; 122*51c0b2f7Stbbdev } 123*51c0b2f7Stbbdev 124*51c0b2f7Stbbdev PrintExtraVersionInfo( "ALLOCATOR", success?"scalable_malloc":"malloc" ); 125*51c0b2f7Stbbdev } 126*51c0b2f7Stbbdev 127*51c0b2f7Stbbdev static std::once_flag initialization_state; 128*51c0b2f7Stbbdev void initialize_cache_aligned_allocator() { 129*51c0b2f7Stbbdev std::call_once(initialization_state, &initialize_handler_pointers); 130*51c0b2f7Stbbdev } 131*51c0b2f7Stbbdev 132*51c0b2f7Stbbdev //! Executed on very first call through allocate_handler 133*51c0b2f7Stbbdev static void* initialize_allocate_handler(std::size_t size) { 134*51c0b2f7Stbbdev initialize_cache_aligned_allocator(); 135*51c0b2f7Stbbdev __TBB_ASSERT(allocate_handler != &initialize_allocate_handler, NULL); 136*51c0b2f7Stbbdev return (*allocate_handler)(size); 137*51c0b2f7Stbbdev } 138*51c0b2f7Stbbdev 139*51c0b2f7Stbbdev //! Executed on very first call through cache_aligned_allocate_handler 140*51c0b2f7Stbbdev static void* initialize_cache_aligned_allocate_handler(std::size_t bytes, std::size_t alignment) { 141*51c0b2f7Stbbdev initialize_cache_aligned_allocator(); 142*51c0b2f7Stbbdev __TBB_ASSERT(cache_aligned_allocate_handler != &initialize_cache_aligned_allocate_handler, NULL); 143*51c0b2f7Stbbdev return (*cache_aligned_allocate_handler)(bytes, alignment); 144*51c0b2f7Stbbdev } 145*51c0b2f7Stbbdev 146*51c0b2f7Stbbdev // TODO: use CPUID to find actual line size, though consider backward compatibility 147*51c0b2f7Stbbdev // nfs - no false sharing 148*51c0b2f7Stbbdev static constexpr std::size_t nfs_size = 128; 149*51c0b2f7Stbbdev 150*51c0b2f7Stbbdev std::size_t __TBB_EXPORTED_FUNC cache_line_size() { 151*51c0b2f7Stbbdev return nfs_size; 152*51c0b2f7Stbbdev } 153*51c0b2f7Stbbdev 154*51c0b2f7Stbbdev void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size) { 155*51c0b2f7Stbbdev const std::size_t cache_line_size = nfs_size; 156*51c0b2f7Stbbdev __TBB_ASSERT(is_power_of_two(cache_line_size), "must be power of two"); 157*51c0b2f7Stbbdev 158*51c0b2f7Stbbdev // Check for overflow 159*51c0b2f7Stbbdev if (size + cache_line_size < size) { 160*51c0b2f7Stbbdev throw_exception(exception_id::bad_alloc); 161*51c0b2f7Stbbdev } 162*51c0b2f7Stbbdev // scalable_aligned_malloc considers zero size request an error, and returns NULL 163*51c0b2f7Stbbdev if (size == 0) size = 1; 164*51c0b2f7Stbbdev 165*51c0b2f7Stbbdev void* result = cache_aligned_allocate_handler(size, cache_line_size); 166*51c0b2f7Stbbdev if (!result) { 167*51c0b2f7Stbbdev throw_exception(exception_id::bad_alloc); 168*51c0b2f7Stbbdev } 169*51c0b2f7Stbbdev __TBB_ASSERT(is_aligned(result, cache_line_size), "The returned address isn't aligned"); 170*51c0b2f7Stbbdev return result; 171*51c0b2f7Stbbdev } 172*51c0b2f7Stbbdev 173*51c0b2f7Stbbdev void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p) { 174*51c0b2f7Stbbdev __TBB_ASSERT(cache_aligned_deallocate_handler, "Initialization has not been yet."); 175*51c0b2f7Stbbdev (*cache_aligned_deallocate_handler)(p); 176*51c0b2f7Stbbdev } 177*51c0b2f7Stbbdev 178*51c0b2f7Stbbdev static void* std_cache_aligned_allocate(std::size_t bytes, std::size_t alignment) { 179*51c0b2f7Stbbdev // TODO: make it common with cache_aligned_resource 180*51c0b2f7Stbbdev std::size_t space = alignment + bytes; 181*51c0b2f7Stbbdev std::uintptr_t base = reinterpret_cast<std::uintptr_t>(std::malloc(space)); 182*51c0b2f7Stbbdev if (!base) { 183*51c0b2f7Stbbdev return nullptr; 184*51c0b2f7Stbbdev } 185*51c0b2f7Stbbdev std::uintptr_t result = (base + nfs_size) & ~(nfs_size - 1); 186*51c0b2f7Stbbdev // Round up to the next cache line (align the base address) 187*51c0b2f7Stbbdev __TBB_ASSERT((result - base) >= sizeof(std::uintptr_t), "Cannot store a base pointer to the header"); 188*51c0b2f7Stbbdev __TBB_ASSERT(space - (result - base) >= bytes, "Not enough space for the storage"); 189*51c0b2f7Stbbdev 190*51c0b2f7Stbbdev // Record where block actually starts. 191*51c0b2f7Stbbdev (reinterpret_cast<std::uintptr_t*>(result))[-1] = base; 192*51c0b2f7Stbbdev return reinterpret_cast<void*>(result); 193*51c0b2f7Stbbdev } 194*51c0b2f7Stbbdev 195*51c0b2f7Stbbdev static void std_cache_aligned_deallocate(void* p) { 196*51c0b2f7Stbbdev if (p) { 197*51c0b2f7Stbbdev __TBB_ASSERT(reinterpret_cast<std::uintptr_t>(p) >= 0x4096, "attempt to free block not obtained from cache_aligned_allocator"); 198*51c0b2f7Stbbdev // Recover where block actually starts 199*51c0b2f7Stbbdev std::uintptr_t base = (reinterpret_cast<std::uintptr_t*>(p))[-1]; 200*51c0b2f7Stbbdev __TBB_ASSERT(((base + nfs_size) & ~(nfs_size - 1)) == reinterpret_cast<std::uintptr_t>(p), "Incorrect alignment or not allocated by std_cache_aligned_deallocate?"); 201*51c0b2f7Stbbdev std::free(reinterpret_cast<void*>(base)); 202*51c0b2f7Stbbdev } 203*51c0b2f7Stbbdev } 204*51c0b2f7Stbbdev 205*51c0b2f7Stbbdev void* __TBB_EXPORTED_FUNC allocate_memory(std::size_t size) { 206*51c0b2f7Stbbdev void* result = (*allocate_handler)(size); 207*51c0b2f7Stbbdev if (!result) { 208*51c0b2f7Stbbdev throw_exception(exception_id::bad_alloc); 209*51c0b2f7Stbbdev } 210*51c0b2f7Stbbdev return result; 211*51c0b2f7Stbbdev } 212*51c0b2f7Stbbdev 213*51c0b2f7Stbbdev void __TBB_EXPORTED_FUNC deallocate_memory(void* p) { 214*51c0b2f7Stbbdev if (p) { 215*51c0b2f7Stbbdev __TBB_ASSERT(deallocate_handler, "Initialization has not been yet."); 216*51c0b2f7Stbbdev (*deallocate_handler)(p); 217*51c0b2f7Stbbdev } 218*51c0b2f7Stbbdev } 219*51c0b2f7Stbbdev 220*51c0b2f7Stbbdev bool __TBB_EXPORTED_FUNC is_tbbmalloc_used() { 221*51c0b2f7Stbbdev if (allocate_handler == &initialize_allocate_handler) { 222*51c0b2f7Stbbdev void* void_ptr = allocate_handler(1); 223*51c0b2f7Stbbdev deallocate_handler(void_ptr); 224*51c0b2f7Stbbdev } 225*51c0b2f7Stbbdev __TBB_ASSERT(allocate_handler != &initialize_allocate_handler && deallocate_handler != nullptr, NULL); 226*51c0b2f7Stbbdev // Cast to void avoids type mismatch errors on some compilers (e.g. __IBMCPP__) 227*51c0b2f7Stbbdev __TBB_ASSERT((reinterpret_cast<void*>(allocate_handler) == reinterpret_cast<void*>(&std::malloc)) == (reinterpret_cast<void*>(deallocate_handler) == reinterpret_cast<void*>(&std::free)), 228*51c0b2f7Stbbdev "Both shim pointers must refer to routines from the same package (either TBB or CRT)"); 229*51c0b2f7Stbbdev return reinterpret_cast<void*>(allocate_handler) == reinterpret_cast<void*>(&std::malloc); 230*51c0b2f7Stbbdev } 231*51c0b2f7Stbbdev 232*51c0b2f7Stbbdev } // namespace r1 233*51c0b2f7Stbbdev } // namespace detail 234*51c0b2f7Stbbdev } // namespace tbb 235