xref: /oneTBB/src/tbb/allocator.cpp (revision 5f1ae0dc)
1 /*
2     Copyright (c) 2005-2023 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #include "oneapi/tbb/version.h"
18 
19 #include "oneapi/tbb/detail/_exception.h"
20 #include "oneapi/tbb/detail/_assert.h"
21 #include "oneapi/tbb/detail/_utils.h"
22 #include "oneapi/tbb/tbb_allocator.h" // Is this OK?
23 #include "oneapi/tbb/cache_aligned_allocator.h"
24 
25 #include "dynamic_link.h"
26 #include "misc.h"
27 
28 #include <cstdlib>
29 
30 #ifdef _WIN32
31 #include <windows.h>
32 #else
33 #include <dlfcn.h>
34 #endif
35 
36 #if (!defined(_WIN32) && !defined(_WIN64)) || defined(__CYGWIN__)
37 #include <stdlib.h> // posix_memalign, free
38 // With glibc, uClibc and musl on Linux and bionic on Android it is safe to use memalign(), as the allocated memory
39 // can be freed with free(). It is also better to use memalign() since posix_memalign() is just a wrapper on top of
40 // memalign() and it offers nothing but overhead due to inconvenient interface. This is likely the case with other
41 // standard libraries as well, and more libraries can be added to the preprocessor check below. Unfortunately, we
42 // can't detect musl, so we simply enable memalign() on Linux and Android in general.
43 #if defined(linux) || defined(__linux) || defined(__linux__) || defined(__ANDROID__)
44 #include <malloc.h> // memalign
45 #define __TBB_USE_MEMALIGN
46 #else
47 #define __TBB_USE_POSIX_MEMALIGN
48 #endif
49 #elif defined(_MSC_VER) || defined(__MINGW32__)
50 #include <malloc.h> // _aligned_malloc, _aligned_free
51 #define __TBB_USE_MSVC_ALIGNED_MALLOC
52 #endif
53 
54 #if __TBB_WEAK_SYMBOLS_PRESENT
55 
56 #pragma weak scalable_malloc
57 #pragma weak scalable_free
58 #pragma weak scalable_aligned_malloc
59 #pragma weak scalable_aligned_free
60 
61 extern "C" {
62     void* scalable_malloc(std::size_t);
63     void  scalable_free(void*);
64     void* scalable_aligned_malloc(std::size_t, std::size_t);
65     void  scalable_aligned_free(void*);
66 }
67 
68 #endif /* __TBB_WEAK_SYMBOLS_PRESENT */
69 
70 namespace tbb {
71 namespace detail {
72 namespace r1 {
73 
74 //! Initialization routine used for first indirect call via allocate_handler.
75 static void* initialize_allocate_handler(std::size_t size);
76 
77 //! Handler for memory allocation
78 using allocate_handler_type = void* (*)(std::size_t size);
79 static std::atomic<allocate_handler_type> allocate_handler{ &initialize_allocate_handler };
80 allocate_handler_type allocate_handler_unsafe = nullptr;
81 
82 //! Handler for memory deallocation
83 static void  (*deallocate_handler)(void* pointer) = nullptr;
84 
85 //! Initialization routine used for first indirect call via cache_aligned_allocate_handler.
86 static void* initialize_cache_aligned_allocate_handler(std::size_t n, std::size_t alignment);
87 
88 //! Allocates overaligned memory using standard memory allocator. It is used when scalable_allocator is not available.
89 static void* std_cache_aligned_allocate(std::size_t n, std::size_t alignment);
90 
91 //! Deallocates overaligned memory using standard memory allocator. It is used when scalable_allocator is not available.
92 static void  std_cache_aligned_deallocate(void* p);
93 
94 //! Handler for padded memory allocation
95 using cache_aligned_allocate_handler_type = void* (*)(std::size_t n, std::size_t alignment);
96 static std::atomic<cache_aligned_allocate_handler_type> cache_aligned_allocate_handler{ &initialize_cache_aligned_allocate_handler };
97 cache_aligned_allocate_handler_type cache_aligned_allocate_handler_unsafe = nullptr;
98 
99 //! Handler for padded memory deallocation
100 static void (*cache_aligned_deallocate_handler)(void* p) = nullptr;
101 
102 //! Table describing how to link the handlers.
103 static const dynamic_link_descriptor MallocLinkTable[] = {
104     DLD(scalable_malloc, allocate_handler_unsafe),
105     DLD(scalable_free, deallocate_handler),
106     DLD(scalable_aligned_malloc, cache_aligned_allocate_handler_unsafe),
107     DLD(scalable_aligned_free, cache_aligned_deallocate_handler),
108 };
109 
110 
111 #if TBB_USE_DEBUG
112 #define DEBUG_SUFFIX "_debug"
113 #else
114 #define DEBUG_SUFFIX
115 #endif /* TBB_USE_DEBUG */
116 
117 // MALLOCLIB_NAME is the name of the oneTBB memory allocator library.
118 #if _WIN32||_WIN64
119 #define MALLOCLIB_NAME "tbbmalloc" DEBUG_SUFFIX ".dll"
120 #elif __APPLE__
121 #define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".2.dylib"
122 #elif __FreeBSD__ || __NetBSD__ || __OpenBSD__ || __sun || _AIX || __ANDROID__
123 #define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".so"
124 #elif __unix__  // Note that order of these #elif's is important!
125 #define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".so.2"
126 #else
127 #error Unknown OS
128 #endif
129 
130 //! Initialize the allocation/free handler pointers.
131 /** Caller is responsible for ensuring this routine is called exactly once.
132     The routine attempts to dynamically link with the TBB memory allocator.
133     If that allocator is not found, it links to malloc and free. */
134 void initialize_handler_pointers() {
135     __TBB_ASSERT(allocate_handler == &initialize_allocate_handler, nullptr);
136     bool success = dynamic_link(MALLOCLIB_NAME, MallocLinkTable, 4);
137     if(!success) {
138         // If unsuccessful, set the handlers to the default routines.
139         // This must be done now, and not before FillDynamicLinks runs, because if other
140         // threads call the handlers, we want them to go through the DoOneTimeInitializations logic,
141         // which forces them to wait.
142         allocate_handler_unsafe = &std::malloc;
143         deallocate_handler = &std::free;
144         cache_aligned_allocate_handler_unsafe = &std_cache_aligned_allocate;
145         cache_aligned_deallocate_handler = &std_cache_aligned_deallocate;
146     }
147 
148     allocate_handler.store(allocate_handler_unsafe, std::memory_order_release);
149     cache_aligned_allocate_handler.store(cache_aligned_allocate_handler_unsafe, std::memory_order_release);
150 
151     PrintExtraVersionInfo( "ALLOCATOR", success?"scalable_malloc":"malloc" );
152 }
153 
154 static std::once_flag initialization_state;
155 void initialize_cache_aligned_allocator() {
156     std::call_once(initialization_state, &initialize_handler_pointers);
157 }
158 
159 //! Executed on very first call through allocate_handler
160 static void* initialize_allocate_handler(std::size_t size) {
161     initialize_cache_aligned_allocator();
162     __TBB_ASSERT(allocate_handler != &initialize_allocate_handler, nullptr);
163     return (*allocate_handler)(size);
164 }
165 
166 //! Executed on very first call through cache_aligned_allocate_handler
167 static void* initialize_cache_aligned_allocate_handler(std::size_t bytes, std::size_t alignment) {
168     initialize_cache_aligned_allocator();
169     __TBB_ASSERT(cache_aligned_allocate_handler != &initialize_cache_aligned_allocate_handler, nullptr);
170     return (*cache_aligned_allocate_handler)(bytes, alignment);
171 }
172 
173 // TODO: use CPUID to find actual line size, though consider backward compatibility
174 // nfs - no false sharing
175 static constexpr std::size_t nfs_size = 128;
176 
177 std::size_t __TBB_EXPORTED_FUNC cache_line_size() {
178     return nfs_size;
179 }
180 
181 void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size) {
182     const std::size_t cache_line_size = nfs_size;
183     __TBB_ASSERT(is_power_of_two(cache_line_size), "must be power of two");
184 
185     // Check for overflow
186     if (size + cache_line_size < size) {
187         throw_exception(exception_id::bad_alloc);
188     }
189     // scalable_aligned_malloc considers zero size request an error, and returns nullptr
190     if (size == 0) size = 1;
191 
192     void* result = cache_aligned_allocate_handler.load(std::memory_order_acquire)(size, cache_line_size);
193     if (!result) {
194         throw_exception(exception_id::bad_alloc);
195     }
196     __TBB_ASSERT(is_aligned(result, cache_line_size), "The returned address isn't aligned");
197     return result;
198 }
199 
200 void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p) {
201     __TBB_ASSERT(cache_aligned_deallocate_handler, "Initialization has not been yet.");
202     (*cache_aligned_deallocate_handler)(p);
203 }
204 
205 static void* std_cache_aligned_allocate(std::size_t bytes, std::size_t alignment) {
206 #if defined(__TBB_USE_MEMALIGN)
207     return memalign(alignment, bytes);
208 #elif defined(__TBB_USE_POSIX_MEMALIGN)
209     void* p = nullptr;
210     int res = posix_memalign(&p, alignment, bytes);
211     if (res != 0)
212         p = nullptr;
213     return p;
214 #elif defined(__TBB_USE_MSVC_ALIGNED_MALLOC)
215     return _aligned_malloc(bytes, alignment);
216 #else
217     // TODO: make it common with cache_aligned_resource
218     std::size_t space = alignment + bytes;
219     std::uintptr_t base = reinterpret_cast<std::uintptr_t>(std::malloc(space));
220     if (!base) {
221         return nullptr;
222     }
223     std::uintptr_t result = (base + nfs_size) & ~(nfs_size - 1);
224     // Round up to the next cache line (align the base address)
225     __TBB_ASSERT((result - base) >= sizeof(std::uintptr_t), "Cannot store a base pointer to the header");
226     __TBB_ASSERT(space - (result - base) >= bytes, "Not enough space for the storage");
227 
228     // Record where block actually starts.
229     (reinterpret_cast<std::uintptr_t*>(result))[-1] = base;
230     return reinterpret_cast<void*>(result);
231 #endif
232 }
233 
234 static void std_cache_aligned_deallocate(void* p) {
235 #if defined(__TBB_USE_MEMALIGN) || defined(__TBB_USE_POSIX_MEMALIGN)
236     free(p);
237 #elif defined(__TBB_USE_MSVC_ALIGNED_MALLOC)
238     _aligned_free(p);
239 #else
240     if (p) {
241         __TBB_ASSERT(reinterpret_cast<std::uintptr_t>(p) >= 0x4096, "attempt to free block not obtained from cache_aligned_allocator");
242         // Recover where block actually starts
243         std::uintptr_t base = (reinterpret_cast<std::uintptr_t*>(p))[-1];
244         __TBB_ASSERT(((base + nfs_size) & ~(nfs_size - 1)) == reinterpret_cast<std::uintptr_t>(p), "Incorrect alignment or not allocated by std_cache_aligned_deallocate?");
245         std::free(reinterpret_cast<void*>(base));
246     }
247 #endif
248 }
249 
250 void* __TBB_EXPORTED_FUNC allocate_memory(std::size_t size) {
251     void* result = allocate_handler.load(std::memory_order_acquire)(size);
252     if (!result) {
253         throw_exception(exception_id::bad_alloc);
254     }
255     return result;
256 }
257 
258 void __TBB_EXPORTED_FUNC deallocate_memory(void* p) {
259     if (p) {
260         __TBB_ASSERT(deallocate_handler, "Initialization has not been yet.");
261         (*deallocate_handler)(p);
262     }
263 }
264 
265 bool __TBB_EXPORTED_FUNC is_tbbmalloc_used() {
266     auto handler_snapshot = allocate_handler.load(std::memory_order_acquire);
267     if (handler_snapshot == &initialize_allocate_handler) {
268         initialize_cache_aligned_allocator();
269     }
270     handler_snapshot = allocate_handler.load(std::memory_order_relaxed);
271     __TBB_ASSERT(handler_snapshot != &initialize_allocate_handler && deallocate_handler != nullptr, nullptr);
272     // Cast to void avoids type mismatch errors on some compilers (e.g. __IBMCPP__)
273     __TBB_ASSERT((reinterpret_cast<void*>(handler_snapshot) == reinterpret_cast<void*>(&std::malloc)) == (reinterpret_cast<void*>(deallocate_handler) == reinterpret_cast<void*>(&std::free)),
274                   "Both shim pointers must refer to routines from the same package (either TBB or CRT)");
275     return reinterpret_cast<void*>(handler_snapshot) == reinterpret_cast<void*>(&std::malloc);
276 }
277 
278 } // namespace r1
279 } // namespace detail
280 } // namespace tbb
281