1 /*
2 Copyright (c) 2005-2023 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 */
16
17 #include "oneapi/tbb/version.h"
18
19 #include "oneapi/tbb/detail/_exception.h"
20 #include "oneapi/tbb/detail/_assert.h"
21 #include "oneapi/tbb/detail/_utils.h"
22 #include "oneapi/tbb/tbb_allocator.h" // Is this OK?
23 #include "oneapi/tbb/cache_aligned_allocator.h"
24
25 #include "dynamic_link.h"
26 #include "misc.h"
27
28 #include <cstdlib>
29
30 #ifdef _WIN32
31 #include <windows.h>
32 #else
33 #include <dlfcn.h>
34 #endif
35
36 #if (!defined(_WIN32) && !defined(_WIN64)) || defined(__CYGWIN__)
37 #include <stdlib.h> // posix_memalign, free
38 // With glibc, uClibc and musl on Linux and bionic on Android it is safe to use memalign(), as the allocated memory
39 // can be freed with free(). It is also better to use memalign() since posix_memalign() is just a wrapper on top of
40 // memalign() and it offers nothing but overhead due to inconvenient interface. This is likely the case with other
41 // standard libraries as well, and more libraries can be added to the preprocessor check below. Unfortunately, we
42 // can't detect musl, so we simply enable memalign() on Linux and Android in general.
43 #if defined(linux) || defined(__linux) || defined(__linux__) || defined(__ANDROID__)
44 #include <malloc.h> // memalign
45 #define __TBB_USE_MEMALIGN
46 #else
47 #define __TBB_USE_POSIX_MEMALIGN
48 #endif
49 #elif defined(_MSC_VER) || defined(__MINGW32__)
50 #include <malloc.h> // _aligned_malloc, _aligned_free
51 #define __TBB_USE_MSVC_ALIGNED_MALLOC
52 #endif
53
54 #if __TBB_WEAK_SYMBOLS_PRESENT
55
56 #pragma weak scalable_malloc
57 #pragma weak scalable_free
58 #pragma weak scalable_aligned_malloc
59 #pragma weak scalable_aligned_free
60
61 extern "C" {
62 void* scalable_malloc(std::size_t);
63 void scalable_free(void*);
64 void* scalable_aligned_malloc(std::size_t, std::size_t);
65 void scalable_aligned_free(void*);
66 }
67
68 #endif /* __TBB_WEAK_SYMBOLS_PRESENT */
69
70 namespace tbb {
71 namespace detail {
72 namespace r1 {
73
74 //! Initialization routine used for first indirect call via allocate_handler.
75 static void* initialize_allocate_handler(std::size_t size);
76
77 //! Handler for memory allocation
78 using allocate_handler_type = void* (*)(std::size_t size);
79 static std::atomic<allocate_handler_type> allocate_handler{ &initialize_allocate_handler };
80 allocate_handler_type allocate_handler_unsafe = nullptr;
81
82 //! Handler for memory deallocation
83 static void (*deallocate_handler)(void* pointer) = nullptr;
84
85 //! Initialization routine used for first indirect call via cache_aligned_allocate_handler.
86 static void* initialize_cache_aligned_allocate_handler(std::size_t n, std::size_t alignment);
87
88 //! Allocates overaligned memory using standard memory allocator. It is used when scalable_allocator is not available.
89 static void* std_cache_aligned_allocate(std::size_t n, std::size_t alignment);
90
91 //! Deallocates overaligned memory using standard memory allocator. It is used when scalable_allocator is not available.
92 static void std_cache_aligned_deallocate(void* p);
93
94 //! Handler for padded memory allocation
95 using cache_aligned_allocate_handler_type = void* (*)(std::size_t n, std::size_t alignment);
96 static std::atomic<cache_aligned_allocate_handler_type> cache_aligned_allocate_handler{ &initialize_cache_aligned_allocate_handler };
97 cache_aligned_allocate_handler_type cache_aligned_allocate_handler_unsafe = nullptr;
98
99 //! Handler for padded memory deallocation
100 static void (*cache_aligned_deallocate_handler)(void* p) = nullptr;
101
102 //! Table describing how to link the handlers.
103 static const dynamic_link_descriptor MallocLinkTable[] = {
104 DLD(scalable_malloc, allocate_handler_unsafe),
105 DLD(scalable_free, deallocate_handler),
106 DLD(scalable_aligned_malloc, cache_aligned_allocate_handler_unsafe),
107 DLD(scalable_aligned_free, cache_aligned_deallocate_handler),
108 };
109
110
111 #if TBB_USE_DEBUG
112 #define DEBUG_SUFFIX "_debug"
113 #else
114 #define DEBUG_SUFFIX
115 #endif /* TBB_USE_DEBUG */
116
117 // MALLOCLIB_NAME is the name of the oneTBB memory allocator library.
118 #if _WIN32||_WIN64
119 #define MALLOCLIB_NAME "tbbmalloc" DEBUG_SUFFIX ".dll"
120 #elif __APPLE__
121 #define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".2.dylib"
122 #elif __FreeBSD__ || __NetBSD__ || __OpenBSD__ || __sun || _AIX || __ANDROID__
123 #define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".so"
124 #elif __unix__ // Note that order of these #elif's is important!
125 #define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".so.2"
126 #else
127 #error Unknown OS
128 #endif
129
130 //! Initialize the allocation/free handler pointers.
131 /** Caller is responsible for ensuring this routine is called exactly once.
132 The routine attempts to dynamically link with the TBB memory allocator.
133 If that allocator is not found, it links to malloc and free. */
initialize_handler_pointers()134 void initialize_handler_pointers() {
135 __TBB_ASSERT(allocate_handler == &initialize_allocate_handler, nullptr);
136 bool success = dynamic_link(MALLOCLIB_NAME, MallocLinkTable, 4);
137 if(!success) {
138 // If unsuccessful, set the handlers to the default routines.
139 // This must be done now, and not before FillDynamicLinks runs, because if other
140 // threads call the handlers, we want them to go through the DoOneTimeInitializations logic,
141 // which forces them to wait.
142 allocate_handler_unsafe = &std::malloc;
143 deallocate_handler = &std::free;
144 cache_aligned_allocate_handler_unsafe = &std_cache_aligned_allocate;
145 cache_aligned_deallocate_handler = &std_cache_aligned_deallocate;
146 }
147
148 allocate_handler.store(allocate_handler_unsafe, std::memory_order_release);
149 cache_aligned_allocate_handler.store(cache_aligned_allocate_handler_unsafe, std::memory_order_release);
150
151 PrintExtraVersionInfo( "ALLOCATOR", success?"scalable_malloc":"malloc" );
152 }
153
154 static std::once_flag initialization_state;
initialize_cache_aligned_allocator()155 void initialize_cache_aligned_allocator() {
156 std::call_once(initialization_state, &initialize_handler_pointers);
157 }
158
159 //! Executed on very first call through allocate_handler
initialize_allocate_handler(std::size_t size)160 static void* initialize_allocate_handler(std::size_t size) {
161 initialize_cache_aligned_allocator();
162 __TBB_ASSERT(allocate_handler != &initialize_allocate_handler, nullptr);
163 return (*allocate_handler)(size);
164 }
165
166 //! Executed on very first call through cache_aligned_allocate_handler
initialize_cache_aligned_allocate_handler(std::size_t bytes,std::size_t alignment)167 static void* initialize_cache_aligned_allocate_handler(std::size_t bytes, std::size_t alignment) {
168 initialize_cache_aligned_allocator();
169 __TBB_ASSERT(cache_aligned_allocate_handler != &initialize_cache_aligned_allocate_handler, nullptr);
170 return (*cache_aligned_allocate_handler)(bytes, alignment);
171 }
172
173 // TODO: use CPUID to find actual line size, though consider backward compatibility
174 // nfs - no false sharing
175 static constexpr std::size_t nfs_size = 128;
176
cache_line_size()177 std::size_t __TBB_EXPORTED_FUNC cache_line_size() {
178 return nfs_size;
179 }
180
cache_aligned_allocate(std::size_t size)181 void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size) {
182 const std::size_t cache_line_size = nfs_size;
183 __TBB_ASSERT(is_power_of_two(cache_line_size), "must be power of two");
184
185 // Check for overflow
186 if (size + cache_line_size < size) {
187 throw_exception(exception_id::bad_alloc);
188 }
189 // scalable_aligned_malloc considers zero size request an error, and returns nullptr
190 if (size == 0) size = 1;
191
192 void* result = cache_aligned_allocate_handler.load(std::memory_order_acquire)(size, cache_line_size);
193 if (!result) {
194 throw_exception(exception_id::bad_alloc);
195 }
196 __TBB_ASSERT(is_aligned(result, cache_line_size), "The returned address isn't aligned");
197 return result;
198 }
199
cache_aligned_deallocate(void * p)200 void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p) {
201 __TBB_ASSERT(cache_aligned_deallocate_handler, "Initialization has not been yet.");
202 (*cache_aligned_deallocate_handler)(p);
203 }
204
std_cache_aligned_allocate(std::size_t bytes,std::size_t alignment)205 static void* std_cache_aligned_allocate(std::size_t bytes, std::size_t alignment) {
206 #if defined(__TBB_USE_MEMALIGN)
207 return memalign(alignment, bytes);
208 #elif defined(__TBB_USE_POSIX_MEMALIGN)
209 void* p = nullptr;
210 int res = posix_memalign(&p, alignment, bytes);
211 if (res != 0)
212 p = nullptr;
213 return p;
214 #elif defined(__TBB_USE_MSVC_ALIGNED_MALLOC)
215 return _aligned_malloc(bytes, alignment);
216 #else
217 // TODO: make it common with cache_aligned_resource
218 std::size_t space = alignment + bytes;
219 std::uintptr_t base = reinterpret_cast<std::uintptr_t>(std::malloc(space));
220 if (!base) {
221 return nullptr;
222 }
223 std::uintptr_t result = (base + nfs_size) & ~(nfs_size - 1);
224 // Round up to the next cache line (align the base address)
225 __TBB_ASSERT((result - base) >= sizeof(std::uintptr_t), "Cannot store a base pointer to the header");
226 __TBB_ASSERT(space - (result - base) >= bytes, "Not enough space for the storage");
227
228 // Record where block actually starts.
229 (reinterpret_cast<std::uintptr_t*>(result))[-1] = base;
230 return reinterpret_cast<void*>(result);
231 #endif
232 }
233
std_cache_aligned_deallocate(void * p)234 static void std_cache_aligned_deallocate(void* p) {
235 #if defined(__TBB_USE_MEMALIGN) || defined(__TBB_USE_POSIX_MEMALIGN)
236 free(p);
237 #elif defined(__TBB_USE_MSVC_ALIGNED_MALLOC)
238 _aligned_free(p);
239 #else
240 if (p) {
241 __TBB_ASSERT(reinterpret_cast<std::uintptr_t>(p) >= 0x4096, "attempt to free block not obtained from cache_aligned_allocator");
242 // Recover where block actually starts
243 std::uintptr_t base = (reinterpret_cast<std::uintptr_t*>(p))[-1];
244 __TBB_ASSERT(((base + nfs_size) & ~(nfs_size - 1)) == reinterpret_cast<std::uintptr_t>(p), "Incorrect alignment or not allocated by std_cache_aligned_deallocate?");
245 std::free(reinterpret_cast<void*>(base));
246 }
247 #endif
248 }
249
allocate_memory(std::size_t size)250 void* __TBB_EXPORTED_FUNC allocate_memory(std::size_t size) {
251 void* result = allocate_handler.load(std::memory_order_acquire)(size);
252 if (!result) {
253 throw_exception(exception_id::bad_alloc);
254 }
255 return result;
256 }
257
deallocate_memory(void * p)258 void __TBB_EXPORTED_FUNC deallocate_memory(void* p) {
259 if (p) {
260 __TBB_ASSERT(deallocate_handler, "Initialization has not been yet.");
261 (*deallocate_handler)(p);
262 }
263 }
264
is_tbbmalloc_used()265 bool __TBB_EXPORTED_FUNC is_tbbmalloc_used() {
266 auto handler_snapshot = allocate_handler.load(std::memory_order_acquire);
267 if (handler_snapshot == &initialize_allocate_handler) {
268 initialize_cache_aligned_allocator();
269 }
270 handler_snapshot = allocate_handler.load(std::memory_order_relaxed);
271 __TBB_ASSERT(handler_snapshot != &initialize_allocate_handler && deallocate_handler != nullptr, nullptr);
272 // Cast to void avoids type mismatch errors on some compilers (e.g. __IBMCPP__)
273 __TBB_ASSERT((reinterpret_cast<void*>(handler_snapshot) == reinterpret_cast<void*>(&std::malloc)) == (reinterpret_cast<void*>(deallocate_handler) == reinterpret_cast<void*>(&std::free)),
274 "Both shim pointers must refer to routines from the same package (either TBB or CRT)");
275 return reinterpret_cast<void*>(handler_snapshot) == reinterpret_cast<void*>(&std::malloc);
276 }
277
278 } // namespace r1
279 } // namespace detail
280 } // namespace tbb
281