xref: /oneTBB/src/tbbmalloc/tbbmalloc_internal.h (revision e1b24895)
1 /*
2     Copyright (c) 2005-2021 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #ifndef __TBB_tbbmalloc_internal_H
18 #define __TBB_tbbmalloc_internal_H
19 
20 #include "TypeDefinitions.h" /* Also includes customization layer Customize.h */
21 
22 #if USE_PTHREAD
23     // Some pthreads documentation says that <pthreads.h> must be first header.
24     #include <pthread.h>
25     typedef pthread_key_t tls_key_t;
26 #elif USE_WINTHREAD
27     #include <windows.h>
28     typedef DWORD tls_key_t;
29 #else
30     #error Must define USE_PTHREAD or USE_WINTHREAD
31 #endif
32 
33 #include <atomic>
34 
35 // TODO: *BSD also has it
36 #define BACKEND_HAS_MREMAP __linux__
37 #define CHECK_ALLOCATION_RANGE MALLOC_DEBUG || MALLOC_ZONE_OVERLOAD_ENABLED || MALLOC_UNIXLIKE_OVERLOAD_ENABLED
38 
39 #include "oneapi/tbb/detail/_config.h" // for __TBB_LIBSTDCPP_EXCEPTION_HEADERS_BROKEN
40 #include "oneapi/tbb/detail/_template_helpers.h"
41 #if __TBB_LIBSTDCPP_EXCEPTION_HEADERS_BROKEN
42   #define _EXCEPTION_PTR_H /* prevents exception_ptr.h inclusion */
43   #define _GLIBCXX_NESTED_EXCEPTION_H /* prevents nested_exception.h inclusion */
44 #endif
45 
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <limits.h> // for CHAR_BIT
49 #include <string.h> // for memset
50 #if MALLOC_CHECK_RECURSION
51 #include <new>        /* for placement new */
52 #endif
53 #include "oneapi/tbb/scalable_allocator.h"
54 #include "tbbmalloc_internal_api.h"
55 
56 /********* Various compile-time options        **************/
57 
58 #if !__TBB_DEFINE_MIC && __TBB_MIC_NATIVE
59  #error Intel(R) Many Integrated Core Compiler does not define __MIC__ anymore.
60 #endif
61 
62 #define MALLOC_TRACE 0
63 
64 #if MALLOC_TRACE
65 #define TRACEF(x) printf x
66 #else
67 #define TRACEF(x) ((void)0)
68 #endif /* MALLOC_TRACE */
69 
70 #define ASSERT_TEXT NULL
71 
72 #define COLLECT_STATISTICS ( MALLOC_DEBUG && MALLOCENV_COLLECT_STATISTICS )
73 #ifndef USE_INTERNAL_TID
74 #define USE_INTERNAL_TID COLLECT_STATISTICS || MALLOC_TRACE
75 #endif
76 
77 #include "Statistics.h"
78 
79 // call yield for whitebox testing, skip in real library
80 #ifndef WhiteboxTestingYield
81 #define WhiteboxTestingYield() ((void)0)
82 #endif
83 
84 
85 /********* End compile-time options        **************/
86 
87 namespace rml {
88 
89 namespace internal {
90 
91 #if __TBB_MALLOC_LOCACHE_STAT
92 extern intptr_t mallocCalls, cacheHits;
93 extern intptr_t memAllocKB, memHitKB;
94 #endif
95 
96 //! Utility template function to prevent "unused" warnings by various compilers.
97 template<typename T>
98 void suppress_unused_warning( const T& ) {}
99 
100 /********** Various global default constants ********/
101 
102 /*
103  * Default huge page size
104  */
105 static const size_t HUGE_PAGE_SIZE = 2 * 1024 * 1024;
106 
107 /********** End of global default constatns *********/
108 
109 /********** Various numeric parameters controlling allocations ********/
110 
111 /*
112  * slabSize - the size of a block for allocation of small objects,
113  * it must be larger than maxSegregatedObjectSize.
114  */
115 const uintptr_t slabSize = 16*1024;
116 
117 /*
118  * Large blocks cache cleanup frequency.
119  * It should be power of 2 for the fast checking.
120  */
121 const unsigned cacheCleanupFreq = 256;
122 
123 /*
124  * Alignment of large (>= minLargeObjectSize) objects.
125  */
126 const size_t largeObjectAlignment = estimatedCacheLineSize;
127 
128 /*
129  * This number of bins in the TLS that leads to blocks that we can allocate in.
130  */
131 const uint32_t numBlockBinLimit = 31;
132 
133 /********** End of numeric parameters controlling allocations *********/
134 
135 class BlockI;
136 class Block;
137 struct LargeMemoryBlock;
138 struct ExtMemoryPool;
139 struct MemRegion;
140 class FreeBlock;
141 class TLSData;
142 class Backend;
143 class MemoryPool;
144 struct CacheBinOperation;
145 extern const uint32_t minLargeObjectSize;
146 
147 enum DecreaseOrIncrease {
148     decrease, increase
149 };
150 
151 class TLSKey {
152     tls_key_t TLS_pointer_key;
153 public:
154     bool init();
155     bool destroy();
156     TLSData* getThreadMallocTLS() const;
157     void setThreadMallocTLS( TLSData * newvalue );
158     TLSData* createTLS(MemoryPool *memPool, Backend *backend);
159 };
160 
161 template<typename Arg, typename Compare>
162 inline void AtomicUpdate(std::atomic<Arg>& location, Arg newVal, const Compare &cmp)
163 {
164     static_assert(sizeof(Arg) == sizeof(intptr_t), "Type of argument must match AtomicCompareExchange type.");
165     Arg old = location.load(std::memory_order_acquire);
166     for (; cmp(old, newVal); ) {
167         if (location.compare_exchange_strong(old, newVal))
168             break;
169         // TODO: do we need backoff after unsuccessful CAS?
170         //old = val;
171     }
172 }
173 
174 // TODO: make BitMaskBasic more general
175 // TODO: check that BitMaskBasic is not used for synchronization
176 // (currently, it fits BitMaskMin well, but not as suitable for BitMaskMax)
177 template<unsigned NUM>
178 class BitMaskBasic {
179     static const unsigned SZ = (NUM-1)/(CHAR_BIT*sizeof(uintptr_t))+1;
180     static const unsigned WORD_LEN = CHAR_BIT*sizeof(uintptr_t);
181 
182     std::atomic<uintptr_t> mask[SZ];
183 
184 protected:
185     void set(size_t idx, bool val) {
186         MALLOC_ASSERT(idx<NUM, ASSERT_TEXT);
187 
188         size_t i = idx / WORD_LEN;
189         int pos = WORD_LEN - idx % WORD_LEN - 1;
190         if (val) {
191             mask[i].fetch_or(1ULL << pos);
192         } else {
193             mask[i].fetch_and(~(1ULL << pos));
194         }
195     }
196     int getMinTrue(unsigned startIdx) const {
197         unsigned idx = startIdx / WORD_LEN;
198         int pos;
199 
200         if (startIdx % WORD_LEN) {
201             // only interested in part of a word, clear bits before startIdx
202             pos = WORD_LEN - startIdx % WORD_LEN;
203             uintptr_t actualMask = mask[idx].load(std::memory_order_relaxed) & (((uintptr_t)1<<pos) - 1);
204             idx++;
205             if (-1 != (pos = BitScanRev(actualMask)))
206                 return idx*WORD_LEN - pos - 1;
207         }
208 
209         while (idx<SZ)
210             if (-1 != (pos = BitScanRev(mask[idx++].load(std::memory_order_relaxed))))
211                 return idx*WORD_LEN - pos - 1;
212         return -1;
213     }
214 public:
215     void reset() { for (unsigned i=0; i<SZ; i++) mask[i].store(0, std::memory_order_relaxed); }
216 };
217 
218 template<unsigned NUM>
219 class BitMaskMin : public BitMaskBasic<NUM> {
220 public:
221     void set(size_t idx, bool val) { BitMaskBasic<NUM>::set(idx, val); }
222     int getMinTrue(unsigned startIdx) const {
223         return BitMaskBasic<NUM>::getMinTrue(startIdx);
224     }
225 };
226 
227 template<unsigned NUM>
228 class BitMaskMax : public BitMaskBasic<NUM> {
229 public:
230     void set(size_t idx, bool val) {
231         BitMaskBasic<NUM>::set(NUM - 1 - idx, val);
232     }
233     int getMaxTrue(unsigned startIdx) const {
234         int p = BitMaskBasic<NUM>::getMinTrue(NUM-startIdx-1);
235         return -1==p? -1 : (int)NUM - 1 - p;
236     }
237 };
238 
239 
240 // The part of thread-specific data that can be modified by other threads.
241 // Such modifications must be protected by AllLocalCaches::listLock.
242 struct TLSRemote {
243     TLSRemote *next,
244               *prev;
245 };
246 
247 // The list of all thread-local data; supporting cleanup of thread caches
248 class AllLocalCaches {
249     TLSRemote  *head;
250     MallocMutex listLock; // protects operations in the list
251 public:
252     void registerThread(TLSRemote *tls);
253     void unregisterThread(TLSRemote *tls);
254     bool cleanup(bool cleanOnlyUnused);
255     void markUnused();
256     void reset() { head = NULL; }
257 };
258 
259 class LifoList {
260 public:
261     inline LifoList();
262     inline void push(Block *block);
263     inline Block *pop();
264     inline Block *grab();
265 
266 private:
267     std::atomic<Block*> top;
268     MallocMutex lock;
269 };
270 
271 /*
272  * When a block that is not completely free is returned for reuse by other threads
273  * this is where the block goes.
274  *
275  * LifoList assumes zero initialization; so below its constructors are omitted,
276  * to avoid linking with C++ libraries on Linux.
277  */
278 
279 class OrphanedBlocks {
280     LifoList bins[numBlockBinLimit];
281 public:
282     Block *get(TLSData *tls, unsigned int size);
283     void put(intptr_t binTag, Block *block);
284     void reset();
285     bool cleanup(Backend* backend);
286 };
287 
288 /* Large objects entities */
289 #include "large_objects.h"
290 
291 // select index size for BackRefMaster based on word size: default is uint32_t,
292 // uint16_t for 32-bit platforms
293 template<bool>
294 struct MasterIndexSelect {
295     typedef uint32_t master_type;
296 };
297 
298 template<>
299 struct MasterIndexSelect<false> {
300     typedef uint16_t master_type;
301 };
302 
303 class BackRefIdx { // composite index to backreference array
304 public:
305     typedef MasterIndexSelect<4 < sizeof(uintptr_t)>::master_type master_t;
306 private:
307     static const master_t invalid = ~master_t(0);
308     master_t master;      // index in BackRefMaster
309     uint16_t largeObj:1;  // is this object "large"?
310     uint16_t offset  :15; // offset from beginning of BackRefBlock
311 public:
312     BackRefIdx() : master(invalid), largeObj(0), offset(0) {}
313     bool isInvalid() const { return master == invalid; }
314     bool isLargeObject() const { return largeObj; }
315     master_t getMaster() const { return master; }
316     uint16_t getOffset() const { return offset; }
317 
318 #if __TBB_USE_THREAD_SANITIZER
319     friend
320     __attribute__((no_sanitize("thread")))
321      BackRefIdx dereference(const BackRefIdx* ptr) {
322         BackRefIdx idx;
323         idx.master = ptr->master;
324         idx.largeObj = ptr->largeObj;
325         idx.offset = ptr->offset;
326         return idx;
327     }
328 #else
329     friend
330     BackRefIdx dereference(const BackRefIdx* ptr) {
331         return *ptr;
332     }
333 #endif
334 
335     // only newBackRef can modify BackRefIdx
336     static BackRefIdx newBackRef(bool largeObj);
337 };
338 
339 // Block header is used during block coalescing
340 // and must be preserved in used blocks.
341 class BlockI {
342     intptr_t     blockState[2];
343 };
344 
345 struct LargeMemoryBlock : public BlockI {
346     MemoryPool       *pool;          // owner pool
347     LargeMemoryBlock *next,          // ptrs in list of cached blocks
348                      *prev,
349     // 2-linked list of pool's large objects
350     // Used to destroy backrefs on pool destroy (backrefs are global)
351     // and for object releasing during pool reset.
352                      *gPrev,
353                      *gNext;
354     uintptr_t         age;           // age of block while in cache
355     size_t            objectSize;    // the size requested by a client
356     size_t            unalignedSize; // the size requested from backend
357     BackRefIdx        backRefIdx;    // cached here, used copy is in LargeObjectHdr
358 };
359 
360 // Classes and methods for backend.cpp
361 #include "backend.h"
362 
363 // An TBB allocator mode that can be controlled by user
364 // via API/environment variable. Must be placed in zero-initialized memory.
365 // External synchronization assumed.
366 // TODO: TBB_VERSION support
367 class AllocControlledMode {
368     intptr_t val;
369     bool     setDone;
370 
371 public:
372     intptr_t get() const {
373         MALLOC_ASSERT(setDone, ASSERT_TEXT);
374         return val;
375     }
376 
377     // Note: set() can be called before init()
378     void set(intptr_t newVal) {
379         val = newVal;
380         setDone = true;
381     }
382 
383     bool ready() const {
384         return setDone;
385     }
386 
387     // envName - environment variable to get controlled mode
388     void initReadEnv(const char *envName, intptr_t defaultVal) {
389         if (!setDone) {
390             // unreferenced formal parameter warning
391             tbb::detail::suppress_unused_warning(envName);
392 #if !__TBB_WIN8UI_SUPPORT
393         // TODO: use strtol to get the actual value of the envirable
394             const char *envVal = getenv(envName);
395             if (envVal && !strcmp(envVal, "1"))
396                 val = 1;
397             else
398 #endif
399                 val = defaultVal;
400             setDone = true;
401         }
402     }
403 };
404 
405 // Page type to be used inside MapMemory.
406 // Regular (4KB aligned), Huge and Transparent Huge Pages (2MB aligned).
407 enum PageType {
408     REGULAR = 0,
409     PREALLOCATED_HUGE_PAGE,
410     TRANSPARENT_HUGE_PAGE
411 };
412 
413 // init() and printStatus() is called only under global initialization lock.
414 // Race is possible between registerAllocation() and registerReleasing(),
415 // harm is that up to single huge page releasing is missed (because failure
416 // to get huge page is registered only 1st time), that is negligible.
417 // setMode is also can be called concurrently.
418 // Object must reside in zero-initialized memory
419 // TODO: can we check for huge page presence during every 10th mmap() call
420 // in case huge page is released by another process?
421 class HugePagesStatus {
422 private:
423     AllocControlledMode requestedMode; // changed only by user
424                                        // to keep enabled and requestedMode consistent
425     MallocMutex setModeLock;
426     size_t      pageSize;
427     std::atomic<intptr_t> needActualStatusPrint;
428 
429     static void doPrintStatus(bool state, const char *stateName) {
430         // Under macOS* fprintf/snprintf acquires an internal lock, so when
431         // 1st allocation is done under the lock, we got a deadlock.
432         // Do not use fprintf etc during initialization.
433         fputs("TBBmalloc: huge pages\t", stderr);
434         if (!state)
435             fputs("not ", stderr);
436         fputs(stateName, stderr);
437         fputs("\n", stderr);
438     }
439 
440     void parseSystemMemInfo() {
441         bool hpAvailable  = false;
442         bool thpAvailable = false;
443         unsigned long long hugePageSize = 0;
444 
445 #if __linux__
446         // Check huge pages existence
447         unsigned long long meminfoHugePagesTotal = 0;
448 
449         parseFileItem meminfoItems[] = {
450             // Parse system huge page size
451             { "Hugepagesize: %llu kB", hugePageSize },
452             // Check if there are preallocated huge pages on the system
453             // https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt
454             { "HugePages_Total: %llu", meminfoHugePagesTotal } };
455 
456         parseFile</*BUFF_SIZE=*/100>("/proc/meminfo", meminfoItems);
457 
458         // Double check another system information regarding preallocated
459         // huge pages if there are no information in /proc/meminfo
460         unsigned long long vmHugePagesTotal = 0;
461 
462         parseFileItem vmItem[] = { { "%llu", vmHugePagesTotal } };
463 
464         // We parse a counter number, it can't be huge
465         parseFile</*BUFF_SIZE=*/100>("/proc/sys/vm/nr_hugepages", vmItem);
466 
467         if (meminfoHugePagesTotal > 0 || vmHugePagesTotal > 0) {
468             MALLOC_ASSERT(hugePageSize != 0, "Huge Page size can't be zero if we found preallocated.");
469 
470             // Any non zero value clearly states that there are preallocated
471             // huge pages on the system
472             hpAvailable = true;
473         }
474 
475         // Check if there is transparent huge pages support on the system
476         unsigned long long thpPresent = 'n';
477         parseFileItem thpItem[] = { { "[alwa%cs] madvise never\n", thpPresent } };
478         parseFile</*BUFF_SIZE=*/100>("/sys/kernel/mm/transparent_hugepage/enabled", thpItem);
479 
480         if (thpPresent == 'y') {
481             MALLOC_ASSERT(hugePageSize != 0, "Huge Page size can't be zero if we found thp existence.");
482             thpAvailable = true;
483         }
484 #endif
485         MALLOC_ASSERT(!pageSize, "Huge page size can't be set twice. Double initialization.");
486 
487         // Initialize object variables
488         pageSize       = hugePageSize * 1024; // was read in KB from meminfo
489         isHPAvailable  = hpAvailable;
490         isTHPAvailable = thpAvailable;
491     }
492 
493 public:
494 
495     // System information
496     bool isHPAvailable;
497     bool isTHPAvailable;
498 
499     // User defined value
500     bool isEnabled;
501 
502     void init() {
503         parseSystemMemInfo();
504         MallocMutex::scoped_lock lock(setModeLock);
505         requestedMode.initReadEnv("TBB_MALLOC_USE_HUGE_PAGES", 0);
506         isEnabled = (isHPAvailable || isTHPAvailable) && requestedMode.get();
507     }
508 
509     // Could be set from user code at any place.
510     // If we didn't call init() at this place, isEnabled will be false
511     void setMode(intptr_t newVal) {
512         MallocMutex::scoped_lock lock(setModeLock);
513         requestedMode.set(newVal);
514         isEnabled = (isHPAvailable || isTHPAvailable) && newVal;
515     }
516 
517     void reset() {
518         needActualStatusPrint.store(0, std::memory_order_relaxed);
519         pageSize = 0;
520         isEnabled = isHPAvailable = isTHPAvailable = false;
521     }
522 
523     // If memory mapping size is a multiple of huge page size, some OS kernels
524     // can use huge pages transparently. Use this when huge pages are requested.
525     size_t getGranularity() const {
526         if (requestedMode.ready())
527             return requestedMode.get() ? pageSize : 0;
528         else
529             return HUGE_PAGE_SIZE; // the mode is not yet known; assume typical 2MB huge pages
530     }
531 
532     void printStatus() {
533         doPrintStatus(requestedMode.get(), "requested");
534         if (requestedMode.get()) { // report actual status iff requested
535             if (pageSize)
536                 needActualStatusPrint.store(1, std::memory_order_release);
537             else
538                 doPrintStatus(/*state=*/false, "available");
539         }
540     }
541 };
542 
543 class AllLargeBlocksList {
544     MallocMutex       largeObjLock;
545     LargeMemoryBlock *loHead;
546 public:
547     void add(LargeMemoryBlock *lmb);
548     void remove(LargeMemoryBlock *lmb);
549     template<bool poolDestroy> void releaseAll(Backend *backend);
550 };
551 
552 struct ExtMemoryPool {
553     Backend           backend;
554     LargeObjectCache  loc;
555     AllLocalCaches    allLocalCaches;
556     OrphanedBlocks    orphanedBlocks;
557 
558     intptr_t          poolId;
559     // To find all large objects. Used during user pool destruction,
560     // to release all backreferences in large blocks (slab blocks do not have them).
561     AllLargeBlocksList lmbList;
562     // Callbacks to be used instead of MapMemory/UnmapMemory.
563     rawAllocType      rawAlloc;
564     rawFreeType       rawFree;
565     size_t            granularity;
566     bool              keepAllMemory,
567                       delayRegsReleasing,
568     // TODO: implements fixedPool with calling rawFree on destruction
569                       fixedPool;
570     TLSKey            tlsPointerKey;  // per-pool TLS key
571 
572     bool init(intptr_t poolId, rawAllocType rawAlloc, rawFreeType rawFree,
573               size_t granularity, bool keepAllMemory, bool fixedPool);
574     bool initTLS();
575 
576     // i.e., not system default pool for scalable_malloc/scalable_free
577     bool userPool() const { return rawAlloc; }
578 
579      // true if something has been released
580     bool softCachesCleanup();
581     bool releaseAllLocalCaches();
582     bool hardCachesCleanup();
583     void *remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment);
584     bool reset() {
585         loc.reset();
586         allLocalCaches.reset();
587         orphanedBlocks.reset();
588         bool ret = tlsPointerKey.destroy();
589         backend.reset();
590         return ret;
591     }
592     bool destroy() {
593         MALLOC_ASSERT(isPoolValid(),
594                       "Possible double pool_destroy or heap corruption");
595         if (!userPool()) {
596             loc.reset();
597             allLocalCaches.reset();
598         }
599         // pthread_key_dtors must be disabled before memory unmapping
600         // TODO: race-free solution
601         bool ret = tlsPointerKey.destroy();
602         if (rawFree || !userPool())
603             ret &= backend.destroy();
604         // pool is not valid after this point
605         granularity = 0;
606         return ret;
607     }
608     void delayRegionsReleasing(bool mode) { delayRegsReleasing = mode; }
609     inline bool regionsAreReleaseable() const;
610 
611     LargeMemoryBlock *mallocLargeObject(MemoryPool *pool, size_t allocationSize);
612     void freeLargeObject(LargeMemoryBlock *lmb);
613     void freeLargeObjectList(LargeMemoryBlock *head);
614 #if MALLOC_DEBUG
615     // use granulatity as marker for pool validity
616     bool isPoolValid() const { return granularity; }
617 #endif
618 };
619 
620 inline bool Backend::inUserPool() const { return extMemPool->userPool(); }
621 
622 struct LargeObjectHdr {
623     LargeMemoryBlock *memoryBlock;
624     /* Backreference points to LargeObjectHdr.
625        Duplicated in LargeMemoryBlock to reuse in subsequent allocations. */
626     BackRefIdx       backRefIdx;
627 };
628 
629 struct FreeObject {
630     FreeObject  *next;
631 };
632 
633 
634 /******* A helper class to support overriding malloc with scalable_malloc *******/
635 #if MALLOC_CHECK_RECURSION
636 
637 class RecursiveMallocCallProtector {
638     // pointer to an automatic data of holding thread
639     static std::atomic<void*> autoObjPtr;
640     static MallocMutex rmc_mutex;
641     static std::atomic<pthread_t> owner_thread;
642 /* Under FreeBSD 8.0 1st call to any pthread function including pthread_self
643    leads to pthread initialization, that causes malloc calls. As 1st usage of
644    RecursiveMallocCallProtector can be before pthread initialized, pthread calls
645    can't be used in 1st instance of RecursiveMallocCallProtector.
646    RecursiveMallocCallProtector is used 1st time in checkInitialization(),
647    so there is a guarantee that on 2nd usage pthread is initialized.
648    No such situation observed with other supported OSes.
649  */
650 #if __FreeBSD__
651     static bool        canUsePthread;
652 #else
653     static const bool  canUsePthread = true;
654 #endif
655 /*
656   The variable modified in checkInitialization,
657   so can be read without memory barriers.
658  */
659     static bool mallocRecursionDetected;
660 
661     MallocMutex::scoped_lock* lock_acquired;
662     char scoped_lock_space[sizeof(MallocMutex::scoped_lock)+1];
663 
664     static uintptr_t absDiffPtr(void *x, void *y) {
665         uintptr_t xi = (uintptr_t)x, yi = (uintptr_t)y;
666         return xi > yi ? xi - yi : yi - xi;
667     }
668 public:
669 
670     RecursiveMallocCallProtector() : lock_acquired(NULL) {
671         lock_acquired = new (scoped_lock_space) MallocMutex::scoped_lock( rmc_mutex );
672         if (canUsePthread)
673             owner_thread.store(pthread_self(), std::memory_order_relaxed);
674         autoObjPtr.store(&scoped_lock_space, std::memory_order_relaxed);
675     }
676     ~RecursiveMallocCallProtector() {
677         if (lock_acquired) {
678             autoObjPtr.store(nullptr, std::memory_order_relaxed);
679             lock_acquired->~scoped_lock();
680         }
681     }
682     static bool sameThreadActive() {
683         if (!autoObjPtr.load(std::memory_order_relaxed)) // fast path
684             return false;
685         // Some thread has an active recursive call protector; check if the current one.
686         // Exact pthread_self based test
687         if (canUsePthread) {
688             if (pthread_equal( owner_thread.load(std::memory_order_relaxed), pthread_self() )) {
689                 mallocRecursionDetected = true;
690                 return true;
691             } else
692                 return false;
693         }
694         // inexact stack size based test
695         const uintptr_t threadStackSz = 2*1024*1024;
696         int dummy;
697         return absDiffPtr(autoObjPtr.load(std::memory_order_relaxed), &dummy)<threadStackSz;
698     }
699 
700 /* The function is called on 1st scalable_malloc call to check if malloc calls
701    scalable_malloc (nested call must set mallocRecursionDetected). */
702     static void detectNaiveOverload() {
703         if (!malloc_proxy) {
704 #if __FreeBSD__
705 /* If !canUsePthread, we can't call pthread_self() before, but now pthread
706    is already on, so can do it. */
707             if (!canUsePthread) {
708                 canUsePthread = true;
709                 owner_thread.store(pthread_self(), std::memory_order_relaxed);
710             }
711 #endif
712             free(malloc(1));
713         }
714     }
715 };
716 
717 #else
718 
719 class RecursiveMallocCallProtector {
720 public:
721     RecursiveMallocCallProtector() {}
722     ~RecursiveMallocCallProtector() {}
723 };
724 
725 #endif  /* MALLOC_CHECK_RECURSION */
726 
727 unsigned int getThreadId();
728 
729 bool initBackRefMaster(Backend *backend);
730 void destroyBackRefMaster(Backend *backend);
731 void removeBackRef(BackRefIdx backRefIdx);
732 void setBackRef(BackRefIdx backRefIdx, void *newPtr);
733 void *getBackRef(BackRefIdx backRefIdx);
734 
735 } // namespace internal
736 } // namespace rml
737 
738 #endif // __TBB_tbbmalloc_internal_H
739