xref: /oneTBB/src/tbb/allocator.cpp (revision 51c0b2f7)
1*51c0b2f7Stbbdev /*
2*51c0b2f7Stbbdev     Copyright (c) 2005-2020 Intel Corporation
3*51c0b2f7Stbbdev 
4*51c0b2f7Stbbdev     Licensed under the Apache License, Version 2.0 (the "License");
5*51c0b2f7Stbbdev     you may not use this file except in compliance with the License.
6*51c0b2f7Stbbdev     You may obtain a copy of the License at
7*51c0b2f7Stbbdev 
8*51c0b2f7Stbbdev         http://www.apache.org/licenses/LICENSE-2.0
9*51c0b2f7Stbbdev 
10*51c0b2f7Stbbdev     Unless required by applicable law or agreed to in writing, software
11*51c0b2f7Stbbdev     distributed under the License is distributed on an "AS IS" BASIS,
12*51c0b2f7Stbbdev     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*51c0b2f7Stbbdev     See the License for the specific language governing permissions and
14*51c0b2f7Stbbdev     limitations under the License.
15*51c0b2f7Stbbdev */
16*51c0b2f7Stbbdev 
17*51c0b2f7Stbbdev #include "tbb/version.h"
18*51c0b2f7Stbbdev 
19*51c0b2f7Stbbdev #include "tbb/detail/_exception.h"
20*51c0b2f7Stbbdev #include "tbb/detail/_assert.h"
21*51c0b2f7Stbbdev #include "tbb/detail/_utils.h"
22*51c0b2f7Stbbdev 
23*51c0b2f7Stbbdev #include "dynamic_link.h"
24*51c0b2f7Stbbdev #include "misc.h"
25*51c0b2f7Stbbdev 
26*51c0b2f7Stbbdev #include <cstdlib>
27*51c0b2f7Stbbdev 
28*51c0b2f7Stbbdev #if _WIN32 || _WIN64
29*51c0b2f7Stbbdev #include <Windows.h>
30*51c0b2f7Stbbdev #else
31*51c0b2f7Stbbdev #include <dlfcn.h>
32*51c0b2f7Stbbdev #endif /* _WIN32||_WIN64 */
33*51c0b2f7Stbbdev 
34*51c0b2f7Stbbdev #if __TBB_WEAK_SYMBOLS_PRESENT
35*51c0b2f7Stbbdev 
36*51c0b2f7Stbbdev #pragma weak scalable_malloc
37*51c0b2f7Stbbdev #pragma weak scalable_free
38*51c0b2f7Stbbdev #pragma weak scalable_aligned_malloc
39*51c0b2f7Stbbdev #pragma weak scalable_aligned_free
40*51c0b2f7Stbbdev 
41*51c0b2f7Stbbdev extern "C" {
42*51c0b2f7Stbbdev     void* scalable_malloc(std::size_t);
43*51c0b2f7Stbbdev     void  scalable_free(void*);
44*51c0b2f7Stbbdev     void* scalable_aligned_malloc(std::size_t, std::size_t);
45*51c0b2f7Stbbdev     void  scalable_aligned_free(void*);
46*51c0b2f7Stbbdev }
47*51c0b2f7Stbbdev 
48*51c0b2f7Stbbdev #endif /* __TBB_WEAK_SYMBOLS_PRESENT */
49*51c0b2f7Stbbdev 
50*51c0b2f7Stbbdev namespace tbb {
51*51c0b2f7Stbbdev namespace detail {
52*51c0b2f7Stbbdev namespace r1 {
53*51c0b2f7Stbbdev 
54*51c0b2f7Stbbdev //! Initialization routine used for first indirect call via allocate_handler.
55*51c0b2f7Stbbdev static void* initialize_allocate_handler(std::size_t size);
56*51c0b2f7Stbbdev 
57*51c0b2f7Stbbdev //! Handler for memory allocation
58*51c0b2f7Stbbdev static void* (*allocate_handler)(std::size_t size) = &initialize_allocate_handler;
59*51c0b2f7Stbbdev 
60*51c0b2f7Stbbdev //! Handler for memory deallocation
61*51c0b2f7Stbbdev static void  (*deallocate_handler)(void* pointer) = nullptr;
62*51c0b2f7Stbbdev 
63*51c0b2f7Stbbdev //! Initialization routine used for first indirect call via cache_aligned_allocate_handler.
64*51c0b2f7Stbbdev static void* initialize_cache_aligned_allocate_handler(std::size_t n, std::size_t alignment);
65*51c0b2f7Stbbdev 
66*51c0b2f7Stbbdev //! Allocates memory using standard malloc. It is used when scalable_allocator is not available
67*51c0b2f7Stbbdev static void* std_cache_aligned_allocate(std::size_t n, std::size_t alignment);
68*51c0b2f7Stbbdev 
69*51c0b2f7Stbbdev //! Allocates memory using standard free. It is used when scalable_allocator is not available
70*51c0b2f7Stbbdev static void  std_cache_aligned_deallocate(void* p);
71*51c0b2f7Stbbdev 
72*51c0b2f7Stbbdev //! Handler for padded memory allocation
73*51c0b2f7Stbbdev static void* (*cache_aligned_allocate_handler)(std::size_t n, std::size_t alignment) = &initialize_cache_aligned_allocate_handler;
74*51c0b2f7Stbbdev 
75*51c0b2f7Stbbdev //! Handler for padded memory deallocation
76*51c0b2f7Stbbdev static void (*cache_aligned_deallocate_handler)(void* p) = nullptr;
77*51c0b2f7Stbbdev 
78*51c0b2f7Stbbdev //! Table describing how to link the handlers.
79*51c0b2f7Stbbdev static const dynamic_link_descriptor MallocLinkTable[] = {
80*51c0b2f7Stbbdev     DLD(scalable_malloc, allocate_handler),
81*51c0b2f7Stbbdev     DLD(scalable_free, deallocate_handler),
82*51c0b2f7Stbbdev     DLD(scalable_aligned_malloc, cache_aligned_allocate_handler),
83*51c0b2f7Stbbdev     DLD(scalable_aligned_free, cache_aligned_deallocate_handler),
84*51c0b2f7Stbbdev };
85*51c0b2f7Stbbdev 
86*51c0b2f7Stbbdev 
87*51c0b2f7Stbbdev #if TBB_USE_DEBUG
88*51c0b2f7Stbbdev #define DEBUG_SUFFIX "_debug"
89*51c0b2f7Stbbdev #else
90*51c0b2f7Stbbdev #define DEBUG_SUFFIX
91*51c0b2f7Stbbdev #endif /* TBB_USE_DEBUG */
92*51c0b2f7Stbbdev 
93*51c0b2f7Stbbdev // MALLOCLIB_NAME is the name of the oneTBB memory allocator library.
94*51c0b2f7Stbbdev #if _WIN32||_WIN64
95*51c0b2f7Stbbdev #define MALLOCLIB_NAME "tbbmalloc" DEBUG_SUFFIX ".dll"
96*51c0b2f7Stbbdev #elif __APPLE__
97*51c0b2f7Stbbdev #define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".dylib"
98*51c0b2f7Stbbdev #elif __FreeBSD__ || __NetBSD__ || __OpenBSD__ || __sun || _AIX || __ANDROID__
99*51c0b2f7Stbbdev #define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".so"
100*51c0b2f7Stbbdev #elif __linux__  // Note that order of these #elif's is important!
101*51c0b2f7Stbbdev #define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".so.2"
102*51c0b2f7Stbbdev #else
103*51c0b2f7Stbbdev #error Unknown OS
104*51c0b2f7Stbbdev #endif
105*51c0b2f7Stbbdev 
106*51c0b2f7Stbbdev //! Initialize the allocation/free handler pointers.
107*51c0b2f7Stbbdev /** Caller is responsible for ensuring this routine is called exactly once.
108*51c0b2f7Stbbdev     The routine attempts to dynamically link with the TBB memory allocator.
109*51c0b2f7Stbbdev     If that allocator is not found, it links to malloc and free. */
110*51c0b2f7Stbbdev void initialize_handler_pointers() {
111*51c0b2f7Stbbdev     __TBB_ASSERT(allocate_handler == &initialize_allocate_handler, NULL);
112*51c0b2f7Stbbdev     bool success = dynamic_link(MALLOCLIB_NAME, MallocLinkTable, 4);
113*51c0b2f7Stbbdev     if(!success) {
114*51c0b2f7Stbbdev         // If unsuccessful, set the handlers to the default routines.
115*51c0b2f7Stbbdev         // This must be done now, and not before FillDynamicLinks runs, because if other
116*51c0b2f7Stbbdev         // threads call the handlers, we want them to go through the DoOneTimeInitializations logic,
117*51c0b2f7Stbbdev         // which forces them to wait.
118*51c0b2f7Stbbdev         allocate_handler = &std::malloc;
119*51c0b2f7Stbbdev         deallocate_handler = &std::free;
120*51c0b2f7Stbbdev         cache_aligned_allocate_handler = &std_cache_aligned_allocate;
121*51c0b2f7Stbbdev         cache_aligned_deallocate_handler = &std_cache_aligned_deallocate;
122*51c0b2f7Stbbdev     }
123*51c0b2f7Stbbdev 
124*51c0b2f7Stbbdev     PrintExtraVersionInfo( "ALLOCATOR", success?"scalable_malloc":"malloc" );
125*51c0b2f7Stbbdev }
126*51c0b2f7Stbbdev 
127*51c0b2f7Stbbdev static std::once_flag initialization_state;
128*51c0b2f7Stbbdev void initialize_cache_aligned_allocator() {
129*51c0b2f7Stbbdev     std::call_once(initialization_state, &initialize_handler_pointers);
130*51c0b2f7Stbbdev }
131*51c0b2f7Stbbdev 
132*51c0b2f7Stbbdev //! Executed on very first call through allocate_handler
133*51c0b2f7Stbbdev static void* initialize_allocate_handler(std::size_t size) {
134*51c0b2f7Stbbdev     initialize_cache_aligned_allocator();
135*51c0b2f7Stbbdev     __TBB_ASSERT(allocate_handler != &initialize_allocate_handler, NULL);
136*51c0b2f7Stbbdev     return (*allocate_handler)(size);
137*51c0b2f7Stbbdev }
138*51c0b2f7Stbbdev 
139*51c0b2f7Stbbdev //! Executed on very first call through cache_aligned_allocate_handler
140*51c0b2f7Stbbdev static void* initialize_cache_aligned_allocate_handler(std::size_t bytes, std::size_t alignment) {
141*51c0b2f7Stbbdev     initialize_cache_aligned_allocator();
142*51c0b2f7Stbbdev     __TBB_ASSERT(cache_aligned_allocate_handler != &initialize_cache_aligned_allocate_handler, NULL);
143*51c0b2f7Stbbdev     return (*cache_aligned_allocate_handler)(bytes, alignment);
144*51c0b2f7Stbbdev }
145*51c0b2f7Stbbdev 
146*51c0b2f7Stbbdev // TODO: use CPUID to find actual line size, though consider backward compatibility
147*51c0b2f7Stbbdev // nfs - no false sharing
148*51c0b2f7Stbbdev static constexpr std::size_t nfs_size = 128;
149*51c0b2f7Stbbdev 
150*51c0b2f7Stbbdev std::size_t __TBB_EXPORTED_FUNC cache_line_size() {
151*51c0b2f7Stbbdev     return nfs_size;
152*51c0b2f7Stbbdev }
153*51c0b2f7Stbbdev 
154*51c0b2f7Stbbdev void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size) {
155*51c0b2f7Stbbdev     const std::size_t cache_line_size = nfs_size;
156*51c0b2f7Stbbdev     __TBB_ASSERT(is_power_of_two(cache_line_size), "must be power of two");
157*51c0b2f7Stbbdev 
158*51c0b2f7Stbbdev     // Check for overflow
159*51c0b2f7Stbbdev     if (size + cache_line_size < size) {
160*51c0b2f7Stbbdev         throw_exception(exception_id::bad_alloc);
161*51c0b2f7Stbbdev     }
162*51c0b2f7Stbbdev     // scalable_aligned_malloc considers zero size request an error, and returns NULL
163*51c0b2f7Stbbdev     if (size == 0) size = 1;
164*51c0b2f7Stbbdev 
165*51c0b2f7Stbbdev     void* result = cache_aligned_allocate_handler(size, cache_line_size);
166*51c0b2f7Stbbdev     if (!result) {
167*51c0b2f7Stbbdev         throw_exception(exception_id::bad_alloc);
168*51c0b2f7Stbbdev     }
169*51c0b2f7Stbbdev     __TBB_ASSERT(is_aligned(result, cache_line_size), "The returned address isn't aligned");
170*51c0b2f7Stbbdev     return result;
171*51c0b2f7Stbbdev }
172*51c0b2f7Stbbdev 
173*51c0b2f7Stbbdev void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p) {
174*51c0b2f7Stbbdev     __TBB_ASSERT(cache_aligned_deallocate_handler, "Initialization has not been yet.");
175*51c0b2f7Stbbdev     (*cache_aligned_deallocate_handler)(p);
176*51c0b2f7Stbbdev }
177*51c0b2f7Stbbdev 
178*51c0b2f7Stbbdev static void* std_cache_aligned_allocate(std::size_t bytes, std::size_t alignment) {
179*51c0b2f7Stbbdev     // TODO: make it common with cache_aligned_resource
180*51c0b2f7Stbbdev     std::size_t space = alignment + bytes;
181*51c0b2f7Stbbdev     std::uintptr_t base = reinterpret_cast<std::uintptr_t>(std::malloc(space));
182*51c0b2f7Stbbdev     if (!base) {
183*51c0b2f7Stbbdev         return nullptr;
184*51c0b2f7Stbbdev     }
185*51c0b2f7Stbbdev     std::uintptr_t result = (base + nfs_size) & ~(nfs_size - 1);
186*51c0b2f7Stbbdev     // Round up to the next cache line (align the base address)
187*51c0b2f7Stbbdev     __TBB_ASSERT((result - base) >= sizeof(std::uintptr_t), "Cannot store a base pointer to the header");
188*51c0b2f7Stbbdev     __TBB_ASSERT(space - (result - base) >= bytes, "Not enough space for the storage");
189*51c0b2f7Stbbdev 
190*51c0b2f7Stbbdev     // Record where block actually starts.
191*51c0b2f7Stbbdev     (reinterpret_cast<std::uintptr_t*>(result))[-1] = base;
192*51c0b2f7Stbbdev     return reinterpret_cast<void*>(result);
193*51c0b2f7Stbbdev }
194*51c0b2f7Stbbdev 
195*51c0b2f7Stbbdev static void std_cache_aligned_deallocate(void* p) {
196*51c0b2f7Stbbdev     if (p) {
197*51c0b2f7Stbbdev         __TBB_ASSERT(reinterpret_cast<std::uintptr_t>(p) >= 0x4096, "attempt to free block not obtained from cache_aligned_allocator");
198*51c0b2f7Stbbdev         // Recover where block actually starts
199*51c0b2f7Stbbdev         std::uintptr_t base = (reinterpret_cast<std::uintptr_t*>(p))[-1];
200*51c0b2f7Stbbdev         __TBB_ASSERT(((base + nfs_size) & ~(nfs_size - 1)) == reinterpret_cast<std::uintptr_t>(p), "Incorrect alignment or not allocated by std_cache_aligned_deallocate?");
201*51c0b2f7Stbbdev         std::free(reinterpret_cast<void*>(base));
202*51c0b2f7Stbbdev     }
203*51c0b2f7Stbbdev }
204*51c0b2f7Stbbdev 
205*51c0b2f7Stbbdev void* __TBB_EXPORTED_FUNC allocate_memory(std::size_t size) {
206*51c0b2f7Stbbdev     void* result = (*allocate_handler)(size);
207*51c0b2f7Stbbdev     if (!result) {
208*51c0b2f7Stbbdev         throw_exception(exception_id::bad_alloc);
209*51c0b2f7Stbbdev     }
210*51c0b2f7Stbbdev     return result;
211*51c0b2f7Stbbdev }
212*51c0b2f7Stbbdev 
213*51c0b2f7Stbbdev void __TBB_EXPORTED_FUNC deallocate_memory(void* p) {
214*51c0b2f7Stbbdev     if (p) {
215*51c0b2f7Stbbdev         __TBB_ASSERT(deallocate_handler, "Initialization has not been yet.");
216*51c0b2f7Stbbdev         (*deallocate_handler)(p);
217*51c0b2f7Stbbdev     }
218*51c0b2f7Stbbdev }
219*51c0b2f7Stbbdev 
220*51c0b2f7Stbbdev bool __TBB_EXPORTED_FUNC is_tbbmalloc_used() {
221*51c0b2f7Stbbdev     if (allocate_handler == &initialize_allocate_handler) {
222*51c0b2f7Stbbdev         void* void_ptr = allocate_handler(1);
223*51c0b2f7Stbbdev         deallocate_handler(void_ptr);
224*51c0b2f7Stbbdev     }
225*51c0b2f7Stbbdev     __TBB_ASSERT(allocate_handler != &initialize_allocate_handler && deallocate_handler != nullptr, NULL);
226*51c0b2f7Stbbdev     // Cast to void avoids type mismatch errors on some compilers (e.g. __IBMCPP__)
227*51c0b2f7Stbbdev     __TBB_ASSERT((reinterpret_cast<void*>(allocate_handler) == reinterpret_cast<void*>(&std::malloc)) == (reinterpret_cast<void*>(deallocate_handler) == reinterpret_cast<void*>(&std::free)),
228*51c0b2f7Stbbdev                   "Both shim pointers must refer to routines from the same package (either TBB or CRT)");
229*51c0b2f7Stbbdev     return reinterpret_cast<void*>(allocate_handler) == reinterpret_cast<void*>(&std::malloc);
230*51c0b2f7Stbbdev }
231*51c0b2f7Stbbdev 
232*51c0b2f7Stbbdev } // namespace r1
233*51c0b2f7Stbbdev } // namespace detail
234*51c0b2f7Stbbdev } // namespace tbb
235