1 /* 2 Copyright (c) 2005-2023 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #include "oneapi/tbb/version.h" 18 19 #include "oneapi/tbb/detail/_exception.h" 20 #include "oneapi/tbb/detail/_assert.h" 21 #include "oneapi/tbb/detail/_utils.h" 22 #include "oneapi/tbb/tbb_allocator.h" // Is this OK? 23 #include "oneapi/tbb/cache_aligned_allocator.h" 24 25 #include "dynamic_link.h" 26 #include "misc.h" 27 28 #include <cstdlib> 29 30 #ifdef _WIN32 31 #include <windows.h> 32 #else 33 #include <dlfcn.h> 34 #endif 35 36 #if (!defined(_WIN32) && !defined(_WIN64)) || defined(__CYGWIN__) 37 #include <stdlib.h> // posix_memalign, free 38 // With glibc, uClibc and musl on Linux and bionic on Android it is safe to use memalign(), as the allocated memory 39 // can be freed with free(). It is also better to use memalign() since posix_memalign() is just a wrapper on top of 40 // memalign() and it offers nothing but overhead due to inconvenient interface. This is likely the case with other 41 // standard libraries as well, and more libraries can be added to the preprocessor check below. Unfortunately, we 42 // can't detect musl, so we simply enable memalign() on Linux and Android in general. 43 #if defined(linux) || defined(__linux) || defined(__linux__) || defined(__ANDROID__) 44 #include <malloc.h> // memalign 45 #define __TBB_USE_MEMALIGN 46 #else 47 #define __TBB_USE_POSIX_MEMALIGN 48 #endif 49 #elif defined(_MSC_VER) || defined(__MINGW32__) 50 #include <malloc.h> // _aligned_malloc, _aligned_free 51 #define __TBB_USE_MSVC_ALIGNED_MALLOC 52 #endif 53 54 #if __TBB_WEAK_SYMBOLS_PRESENT 55 56 #pragma weak scalable_malloc 57 #pragma weak scalable_free 58 #pragma weak scalable_aligned_malloc 59 #pragma weak scalable_aligned_free 60 61 extern "C" { 62 void* scalable_malloc(std::size_t); 63 void scalable_free(void*); 64 void* scalable_aligned_malloc(std::size_t, std::size_t); 65 void scalable_aligned_free(void*); 66 } 67 68 #endif /* __TBB_WEAK_SYMBOLS_PRESENT */ 69 70 namespace tbb { 71 namespace detail { 72 namespace r1 { 73 74 //! Initialization routine used for first indirect call via allocate_handler. 75 static void* initialize_allocate_handler(std::size_t size); 76 77 //! Handler for memory allocation 78 using allocate_handler_type = void* (*)(std::size_t size); 79 static std::atomic<allocate_handler_type> allocate_handler{ &initialize_allocate_handler }; 80 allocate_handler_type allocate_handler_unsafe = nullptr; 81 82 //! Handler for memory deallocation 83 static void (*deallocate_handler)(void* pointer) = nullptr; 84 85 //! Initialization routine used for first indirect call via cache_aligned_allocate_handler. 86 static void* initialize_cache_aligned_allocate_handler(std::size_t n, std::size_t alignment); 87 88 //! Allocates overaligned memory using standard memory allocator. It is used when scalable_allocator is not available. 89 static void* std_cache_aligned_allocate(std::size_t n, std::size_t alignment); 90 91 //! Deallocates overaligned memory using standard memory allocator. It is used when scalable_allocator is not available. 92 static void std_cache_aligned_deallocate(void* p); 93 94 //! Handler for padded memory allocation 95 using cache_aligned_allocate_handler_type = void* (*)(std::size_t n, std::size_t alignment); 96 static std::atomic<cache_aligned_allocate_handler_type> cache_aligned_allocate_handler{ &initialize_cache_aligned_allocate_handler }; 97 cache_aligned_allocate_handler_type cache_aligned_allocate_handler_unsafe = nullptr; 98 99 //! Handler for padded memory deallocation 100 static void (*cache_aligned_deallocate_handler)(void* p) = nullptr; 101 102 //! Table describing how to link the handlers. 103 static const dynamic_link_descriptor MallocLinkTable[] = { 104 DLD(scalable_malloc, allocate_handler_unsafe), 105 DLD(scalable_free, deallocate_handler), 106 DLD(scalable_aligned_malloc, cache_aligned_allocate_handler_unsafe), 107 DLD(scalable_aligned_free, cache_aligned_deallocate_handler), 108 }; 109 110 111 #if TBB_USE_DEBUG 112 #define DEBUG_SUFFIX "_debug" 113 #else 114 #define DEBUG_SUFFIX 115 #endif /* TBB_USE_DEBUG */ 116 117 // MALLOCLIB_NAME is the name of the oneTBB memory allocator library. 118 #if _WIN32||_WIN64 119 #define MALLOCLIB_NAME "tbbmalloc" DEBUG_SUFFIX ".dll" 120 #elif __APPLE__ 121 #define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".2.dylib" 122 #elif __FreeBSD__ || __NetBSD__ || __OpenBSD__ || __sun || _AIX || __ANDROID__ 123 #define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".so" 124 #elif __unix__ // Note that order of these #elif's is important! 125 #define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".so.2" 126 #else 127 #error Unknown OS 128 #endif 129 130 //! Initialize the allocation/free handler pointers. 131 /** Caller is responsible for ensuring this routine is called exactly once. 132 The routine attempts to dynamically link with the TBB memory allocator. 133 If that allocator is not found, it links to malloc and free. */ 134 void initialize_handler_pointers() { 135 __TBB_ASSERT(allocate_handler == &initialize_allocate_handler, nullptr); 136 bool success = dynamic_link(MALLOCLIB_NAME, MallocLinkTable, 4); 137 if(!success) { 138 // If unsuccessful, set the handlers to the default routines. 139 // This must be done now, and not before FillDynamicLinks runs, because if other 140 // threads call the handlers, we want them to go through the DoOneTimeInitializations logic, 141 // which forces them to wait. 142 allocate_handler_unsafe = &std::malloc; 143 deallocate_handler = &std::free; 144 cache_aligned_allocate_handler_unsafe = &std_cache_aligned_allocate; 145 cache_aligned_deallocate_handler = &std_cache_aligned_deallocate; 146 } 147 148 allocate_handler.store(allocate_handler_unsafe, std::memory_order_release); 149 cache_aligned_allocate_handler.store(cache_aligned_allocate_handler_unsafe, std::memory_order_release); 150 151 PrintExtraVersionInfo( "ALLOCATOR", success?"scalable_malloc":"malloc" ); 152 } 153 154 static std::once_flag initialization_state; 155 void initialize_cache_aligned_allocator() { 156 std::call_once(initialization_state, &initialize_handler_pointers); 157 } 158 159 //! Executed on very first call through allocate_handler 160 static void* initialize_allocate_handler(std::size_t size) { 161 initialize_cache_aligned_allocator(); 162 __TBB_ASSERT(allocate_handler != &initialize_allocate_handler, nullptr); 163 return (*allocate_handler)(size); 164 } 165 166 //! Executed on very first call through cache_aligned_allocate_handler 167 static void* initialize_cache_aligned_allocate_handler(std::size_t bytes, std::size_t alignment) { 168 initialize_cache_aligned_allocator(); 169 __TBB_ASSERT(cache_aligned_allocate_handler != &initialize_cache_aligned_allocate_handler, nullptr); 170 return (*cache_aligned_allocate_handler)(bytes, alignment); 171 } 172 173 // TODO: use CPUID to find actual line size, though consider backward compatibility 174 // nfs - no false sharing 175 static constexpr std::size_t nfs_size = 128; 176 177 std::size_t __TBB_EXPORTED_FUNC cache_line_size() { 178 return nfs_size; 179 } 180 181 void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size) { 182 const std::size_t cache_line_size = nfs_size; 183 __TBB_ASSERT(is_power_of_two(cache_line_size), "must be power of two"); 184 185 // Check for overflow 186 if (size + cache_line_size < size) { 187 throw_exception(exception_id::bad_alloc); 188 } 189 // scalable_aligned_malloc considers zero size request an error, and returns nullptr 190 if (size == 0) size = 1; 191 192 void* result = cache_aligned_allocate_handler.load(std::memory_order_acquire)(size, cache_line_size); 193 if (!result) { 194 throw_exception(exception_id::bad_alloc); 195 } 196 __TBB_ASSERT(is_aligned(result, cache_line_size), "The returned address isn't aligned"); 197 return result; 198 } 199 200 void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p) { 201 __TBB_ASSERT(cache_aligned_deallocate_handler, "Initialization has not been yet."); 202 (*cache_aligned_deallocate_handler)(p); 203 } 204 205 static void* std_cache_aligned_allocate(std::size_t bytes, std::size_t alignment) { 206 #if defined(__TBB_USE_MEMALIGN) 207 return memalign(alignment, bytes); 208 #elif defined(__TBB_USE_POSIX_MEMALIGN) 209 void* p = nullptr; 210 int res = posix_memalign(&p, alignment, bytes); 211 if (res != 0) 212 p = nullptr; 213 return p; 214 #elif defined(__TBB_USE_MSVC_ALIGNED_MALLOC) 215 return _aligned_malloc(bytes, alignment); 216 #else 217 // TODO: make it common with cache_aligned_resource 218 std::size_t space = alignment + bytes; 219 std::uintptr_t base = reinterpret_cast<std::uintptr_t>(std::malloc(space)); 220 if (!base) { 221 return nullptr; 222 } 223 std::uintptr_t result = (base + nfs_size) & ~(nfs_size - 1); 224 // Round up to the next cache line (align the base address) 225 __TBB_ASSERT((result - base) >= sizeof(std::uintptr_t), "Cannot store a base pointer to the header"); 226 __TBB_ASSERT(space - (result - base) >= bytes, "Not enough space for the storage"); 227 228 // Record where block actually starts. 229 (reinterpret_cast<std::uintptr_t*>(result))[-1] = base; 230 return reinterpret_cast<void*>(result); 231 #endif 232 } 233 234 static void std_cache_aligned_deallocate(void* p) { 235 #if defined(__TBB_USE_MEMALIGN) || defined(__TBB_USE_POSIX_MEMALIGN) 236 free(p); 237 #elif defined(__TBB_USE_MSVC_ALIGNED_MALLOC) 238 _aligned_free(p); 239 #else 240 if (p) { 241 __TBB_ASSERT(reinterpret_cast<std::uintptr_t>(p) >= 0x4096, "attempt to free block not obtained from cache_aligned_allocator"); 242 // Recover where block actually starts 243 std::uintptr_t base = (reinterpret_cast<std::uintptr_t*>(p))[-1]; 244 __TBB_ASSERT(((base + nfs_size) & ~(nfs_size - 1)) == reinterpret_cast<std::uintptr_t>(p), "Incorrect alignment or not allocated by std_cache_aligned_deallocate?"); 245 std::free(reinterpret_cast<void*>(base)); 246 } 247 #endif 248 } 249 250 void* __TBB_EXPORTED_FUNC allocate_memory(std::size_t size) { 251 void* result = allocate_handler.load(std::memory_order_acquire)(size); 252 if (!result) { 253 throw_exception(exception_id::bad_alloc); 254 } 255 return result; 256 } 257 258 void __TBB_EXPORTED_FUNC deallocate_memory(void* p) { 259 if (p) { 260 __TBB_ASSERT(deallocate_handler, "Initialization has not been yet."); 261 (*deallocate_handler)(p); 262 } 263 } 264 265 bool __TBB_EXPORTED_FUNC is_tbbmalloc_used() { 266 auto handler_snapshot = allocate_handler.load(std::memory_order_acquire); 267 if (handler_snapshot == &initialize_allocate_handler) { 268 initialize_cache_aligned_allocator(); 269 } 270 handler_snapshot = allocate_handler.load(std::memory_order_relaxed); 271 __TBB_ASSERT(handler_snapshot != &initialize_allocate_handler && deallocate_handler != nullptr, nullptr); 272 // Cast to void avoids type mismatch errors on some compilers (e.g. __IBMCPP__) 273 __TBB_ASSERT((reinterpret_cast<void*>(handler_snapshot) == reinterpret_cast<void*>(&std::malloc)) == (reinterpret_cast<void*>(deallocate_handler) == reinterpret_cast<void*>(&std::free)), 274 "Both shim pointers must refer to routines from the same package (either TBB or CRT)"); 275 return reinterpret_cast<void*>(handler_snapshot) == reinterpret_cast<void*>(&std::malloc); 276 } 277 278 } // namespace r1 279 } // namespace detail 280 } // namespace tbb 281