xref: /oneTBB/src/tbbmalloc/frontend.cpp (revision b15aabb3)
151c0b2f7Stbbdev /*
2*b15aabb3Stbbdev     Copyright (c) 2005-2021 Intel Corporation
351c0b2f7Stbbdev 
451c0b2f7Stbbdev     Licensed under the Apache License, Version 2.0 (the "License");
551c0b2f7Stbbdev     you may not use this file except in compliance with the License.
651c0b2f7Stbbdev     You may obtain a copy of the License at
751c0b2f7Stbbdev 
851c0b2f7Stbbdev         http://www.apache.org/licenses/LICENSE-2.0
951c0b2f7Stbbdev 
1051c0b2f7Stbbdev     Unless required by applicable law or agreed to in writing, software
1151c0b2f7Stbbdev     distributed under the License is distributed on an "AS IS" BASIS,
1251c0b2f7Stbbdev     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1351c0b2f7Stbbdev     See the License for the specific language governing permissions and
1451c0b2f7Stbbdev     limitations under the License.
1551c0b2f7Stbbdev */
1651c0b2f7Stbbdev 
1751c0b2f7Stbbdev #include "tbbmalloc_internal.h"
1851c0b2f7Stbbdev #include <errno.h>
1951c0b2f7Stbbdev #include <new>        /* for placement new */
2051c0b2f7Stbbdev #include <string.h>   /* for memset */
2151c0b2f7Stbbdev 
2249e08aacStbbdev #include "oneapi/tbb/version.h"
2351c0b2f7Stbbdev #include "../tbb/environment.h"
2451c0b2f7Stbbdev #include "../tbb/itt_notify.h" // for __TBB_load_ittnotify()
2551c0b2f7Stbbdev 
2651c0b2f7Stbbdev #if USE_PTHREAD
2751c0b2f7Stbbdev     #define TlsSetValue_func pthread_setspecific
2851c0b2f7Stbbdev     #define TlsGetValue_func pthread_getspecific
2951c0b2f7Stbbdev     #define GetMyTID() pthread_self()
3051c0b2f7Stbbdev     #include <sched.h>
3151c0b2f7Stbbdev     inline void do_yield() {sched_yield();}
3251c0b2f7Stbbdev     extern "C" { static void mallocThreadShutdownNotification(void*); }
3351c0b2f7Stbbdev     #if __sun || __SUNPRO_CC
3451c0b2f7Stbbdev     #define __asm__ asm
3551c0b2f7Stbbdev     #endif
3651c0b2f7Stbbdev     #include <unistd.h> // sysconf(_SC_PAGESIZE)
3751c0b2f7Stbbdev #elif USE_WINTHREAD
3851c0b2f7Stbbdev     #define GetMyTID() GetCurrentThreadId()
3951c0b2f7Stbbdev #if __TBB_WIN8UI_SUPPORT
4051c0b2f7Stbbdev     #include<thread>
4151c0b2f7Stbbdev     #define TlsSetValue_func FlsSetValue
4251c0b2f7Stbbdev     #define TlsGetValue_func FlsGetValue
4351c0b2f7Stbbdev     #define TlsAlloc() FlsAlloc(NULL)
4451c0b2f7Stbbdev     #define TLS_ALLOC_FAILURE FLS_OUT_OF_INDEXES
4551c0b2f7Stbbdev     #define TlsFree FlsFree
4651c0b2f7Stbbdev     inline void do_yield() {std::this_thread::yield();}
4751c0b2f7Stbbdev #else
4851c0b2f7Stbbdev     #define TlsSetValue_func TlsSetValue
4951c0b2f7Stbbdev     #define TlsGetValue_func TlsGetValue
5051c0b2f7Stbbdev     #define TLS_ALLOC_FAILURE TLS_OUT_OF_INDEXES
5151c0b2f7Stbbdev     inline void do_yield() {SwitchToThread();}
5251c0b2f7Stbbdev #endif
5351c0b2f7Stbbdev #else
5451c0b2f7Stbbdev     #error Must define USE_PTHREAD or USE_WINTHREAD
5551c0b2f7Stbbdev #endif
5651c0b2f7Stbbdev 
5751c0b2f7Stbbdev #define FREELIST_NONBLOCKING 1
5851c0b2f7Stbbdev 
5951c0b2f7Stbbdev namespace rml {
6051c0b2f7Stbbdev class MemoryPool;
6151c0b2f7Stbbdev namespace internal {
6251c0b2f7Stbbdev 
6351c0b2f7Stbbdev class Block;
6451c0b2f7Stbbdev class MemoryPool;
6551c0b2f7Stbbdev 
6651c0b2f7Stbbdev #if MALLOC_CHECK_RECURSION
6751c0b2f7Stbbdev 
6851c0b2f7Stbbdev inline bool isMallocInitialized();
6951c0b2f7Stbbdev 
7051c0b2f7Stbbdev bool RecursiveMallocCallProtector::noRecursion() {
7151c0b2f7Stbbdev     MALLOC_ASSERT(isMallocInitialized(),
7251c0b2f7Stbbdev                   "Recursion status can be checked only when initialization was done.");
7351c0b2f7Stbbdev     return !mallocRecursionDetected;
7451c0b2f7Stbbdev }
7551c0b2f7Stbbdev 
7651c0b2f7Stbbdev #endif // MALLOC_CHECK_RECURSION
7751c0b2f7Stbbdev 
7851c0b2f7Stbbdev /** Support for handling the special UNUSABLE pointer state **/
7951c0b2f7Stbbdev const intptr_t UNUSABLE = 0x1;
8051c0b2f7Stbbdev inline bool isSolidPtr( void* ptr ) {
8151c0b2f7Stbbdev     return (UNUSABLE|(intptr_t)ptr)!=UNUSABLE;
8251c0b2f7Stbbdev }
8351c0b2f7Stbbdev inline bool isNotForUse( void* ptr ) {
8451c0b2f7Stbbdev     return (intptr_t)ptr==UNUSABLE;
8551c0b2f7Stbbdev }
8651c0b2f7Stbbdev 
8751c0b2f7Stbbdev /*
8851c0b2f7Stbbdev  * Block::objectSize value used to mark blocks allocated by startupAlloc
8951c0b2f7Stbbdev  */
9051c0b2f7Stbbdev const uint16_t startupAllocObjSizeMark = ~(uint16_t)0;
9151c0b2f7Stbbdev 
9251c0b2f7Stbbdev /*
9351c0b2f7Stbbdev  * The following constant is used to define the size of struct Block, the block header.
9451c0b2f7Stbbdev  * The intent is to have the size of a Block multiple of the cache line size, this allows us to
9551c0b2f7Stbbdev  * get good alignment at the cost of some overhead equal to the amount of padding included in the Block.
9651c0b2f7Stbbdev  */
9751c0b2f7Stbbdev const int blockHeaderAlignment = estimatedCacheLineSize;
9851c0b2f7Stbbdev 
9951c0b2f7Stbbdev /********* The data structures and global objects        **************/
10051c0b2f7Stbbdev 
10151c0b2f7Stbbdev /*
10251c0b2f7Stbbdev  * The malloc routines themselves need to be able to occasionally malloc some space,
10351c0b2f7Stbbdev  * in order to set up the structures used by the thread local structures. This
10451c0b2f7Stbbdev  * routine performs that functions.
10551c0b2f7Stbbdev  */
10651c0b2f7Stbbdev class BootStrapBlocks {
10751c0b2f7Stbbdev     MallocMutex bootStrapLock;
10851c0b2f7Stbbdev     Block      *bootStrapBlock;
10951c0b2f7Stbbdev     Block      *bootStrapBlockUsed;
11051c0b2f7Stbbdev     FreeObject *bootStrapObjectList;
11151c0b2f7Stbbdev public:
11251c0b2f7Stbbdev     void *allocate(MemoryPool *memPool, size_t size);
11351c0b2f7Stbbdev     void free(void* ptr);
11451c0b2f7Stbbdev     void reset();
11551c0b2f7Stbbdev };
11651c0b2f7Stbbdev 
11751c0b2f7Stbbdev #if USE_INTERNAL_TID
11851c0b2f7Stbbdev class ThreadId {
11951c0b2f7Stbbdev     static tls_key_t Tid_key;
12051c0b2f7Stbbdev     std::atomic<intptr_t> ThreadCount;
12151c0b2f7Stbbdev 
12251c0b2f7Stbbdev     unsigned int id;
12351c0b2f7Stbbdev 
12451c0b2f7Stbbdev     static unsigned int tlsNumber() {
12551c0b2f7Stbbdev         unsigned int result = reinterpret_cast<intptr_t>(TlsGetValue_func(Tid_key));
12651c0b2f7Stbbdev         if( !result ) {
12751c0b2f7Stbbdev             RecursiveMallocCallProtector scoped;
12851c0b2f7Stbbdev             // Thread-local value is zero -> first call from this thread,
12951c0b2f7Stbbdev             // need to initialize with next ID value (IDs start from 1)
13051c0b2f7Stbbdev             result = ++ThreadCount; // returned new value!
13151c0b2f7Stbbdev             TlsSetValue_func( Tid_key, reinterpret_cast<void*>(result) );
13251c0b2f7Stbbdev         }
13351c0b2f7Stbbdev         return result;
13451c0b2f7Stbbdev     }
13551c0b2f7Stbbdev public:
13651c0b2f7Stbbdev     static bool init() {
13751c0b2f7Stbbdev #if USE_WINTHREAD
13851c0b2f7Stbbdev         Tid_key = TlsAlloc();
13951c0b2f7Stbbdev         if (Tid_key == TLS_ALLOC_FAILURE)
14051c0b2f7Stbbdev             return false;
14151c0b2f7Stbbdev #else
14251c0b2f7Stbbdev         int status = pthread_key_create( &Tid_key, NULL );
14351c0b2f7Stbbdev         if ( status ) {
14451c0b2f7Stbbdev             fprintf (stderr, "The memory manager cannot create tls key during initialization\n");
14551c0b2f7Stbbdev             return false;
14651c0b2f7Stbbdev         }
14751c0b2f7Stbbdev #endif /* USE_WINTHREAD */
14851c0b2f7Stbbdev         return true;
14951c0b2f7Stbbdev     }
15051c0b2f7Stbbdev     static void destroy() {
15151c0b2f7Stbbdev         if( Tid_key ) {
15251c0b2f7Stbbdev #if USE_WINTHREAD
15351c0b2f7Stbbdev             BOOL status = !(TlsFree( Tid_key ));  // fail is zero
15451c0b2f7Stbbdev #else
15551c0b2f7Stbbdev             int status = pthread_key_delete( Tid_key );
15651c0b2f7Stbbdev #endif /* USE_WINTHREAD */
15751c0b2f7Stbbdev             if ( status )
15851c0b2f7Stbbdev                 fprintf (stderr, "The memory manager cannot delete tls key\n");
15951c0b2f7Stbbdev             Tid_key = 0;
16051c0b2f7Stbbdev         }
16151c0b2f7Stbbdev     }
16251c0b2f7Stbbdev 
16351c0b2f7Stbbdev     ThreadId() : id(ThreadId::tlsNumber()) {}
16451c0b2f7Stbbdev     bool isCurrentThreadId() const { return id == ThreadId::tlsNumber(); }
16551c0b2f7Stbbdev 
16651c0b2f7Stbbdev #if COLLECT_STATISTICS || MALLOC_TRACE
16751c0b2f7Stbbdev     friend unsigned int getThreadId() { return ThreadId::tlsNumber(); }
16851c0b2f7Stbbdev #endif
16951c0b2f7Stbbdev #if COLLECT_STATISTICS
17051c0b2f7Stbbdev     static unsigned getMaxThreadId() { return ThreadCount.load(std::memory_order_relaxed); }
17151c0b2f7Stbbdev 
17251c0b2f7Stbbdev     friend int STAT_increment(ThreadId tid, int bin, int ctr);
17351c0b2f7Stbbdev #endif
17451c0b2f7Stbbdev };
17551c0b2f7Stbbdev 
17651c0b2f7Stbbdev tls_key_t ThreadId::Tid_key;
17751c0b2f7Stbbdev intptr_t ThreadId::ThreadCount;
17851c0b2f7Stbbdev 
17951c0b2f7Stbbdev #if COLLECT_STATISTICS
18051c0b2f7Stbbdev int STAT_increment(ThreadId tid, int bin, int ctr)
18151c0b2f7Stbbdev {
18251c0b2f7Stbbdev     return ::STAT_increment(tid.id, bin, ctr);
18351c0b2f7Stbbdev }
18451c0b2f7Stbbdev #endif
18551c0b2f7Stbbdev 
18651c0b2f7Stbbdev #else // USE_INTERNAL_TID
18751c0b2f7Stbbdev 
18851c0b2f7Stbbdev class ThreadId {
18951c0b2f7Stbbdev #if USE_PTHREAD
19051c0b2f7Stbbdev     pthread_t tid;
19151c0b2f7Stbbdev #else
19251c0b2f7Stbbdev     DWORD     tid;
19351c0b2f7Stbbdev #endif
19451c0b2f7Stbbdev public:
19551c0b2f7Stbbdev     ThreadId() : tid(GetMyTID()) {}
19651c0b2f7Stbbdev #if USE_PTHREAD
19751c0b2f7Stbbdev     bool isCurrentThreadId() const { return pthread_equal(pthread_self(), tid); }
19851c0b2f7Stbbdev #else
19951c0b2f7Stbbdev     bool isCurrentThreadId() const { return GetCurrentThreadId() == tid; }
20051c0b2f7Stbbdev #endif
20151c0b2f7Stbbdev     static bool init() { return true; }
20251c0b2f7Stbbdev     static void destroy() {}
20351c0b2f7Stbbdev };
20451c0b2f7Stbbdev 
20551c0b2f7Stbbdev #endif // USE_INTERNAL_TID
20651c0b2f7Stbbdev 
20751c0b2f7Stbbdev /*********** Code to provide thread ID and a thread-local void pointer **********/
20851c0b2f7Stbbdev 
20951c0b2f7Stbbdev bool TLSKey::init()
21051c0b2f7Stbbdev {
21151c0b2f7Stbbdev #if USE_WINTHREAD
21251c0b2f7Stbbdev     TLS_pointer_key = TlsAlloc();
21351c0b2f7Stbbdev     if (TLS_pointer_key == TLS_ALLOC_FAILURE)
21451c0b2f7Stbbdev         return false;
21551c0b2f7Stbbdev #else
21651c0b2f7Stbbdev     int status = pthread_key_create( &TLS_pointer_key, mallocThreadShutdownNotification );
21751c0b2f7Stbbdev     if ( status )
21851c0b2f7Stbbdev         return false;
21951c0b2f7Stbbdev #endif /* USE_WINTHREAD */
22051c0b2f7Stbbdev     return true;
22151c0b2f7Stbbdev }
22251c0b2f7Stbbdev 
22351c0b2f7Stbbdev bool TLSKey::destroy()
22451c0b2f7Stbbdev {
22551c0b2f7Stbbdev #if USE_WINTHREAD
22651c0b2f7Stbbdev     BOOL status1 = !(TlsFree(TLS_pointer_key)); // fail is zero
22751c0b2f7Stbbdev #else
22851c0b2f7Stbbdev     int status1 = pthread_key_delete(TLS_pointer_key);
22951c0b2f7Stbbdev #endif /* USE_WINTHREAD */
23051c0b2f7Stbbdev     MALLOC_ASSERT(!status1, "The memory manager cannot delete tls key.");
23151c0b2f7Stbbdev     return status1==0;
23251c0b2f7Stbbdev }
23351c0b2f7Stbbdev 
23451c0b2f7Stbbdev inline TLSData* TLSKey::getThreadMallocTLS() const
23551c0b2f7Stbbdev {
23651c0b2f7Stbbdev     return (TLSData *)TlsGetValue_func( TLS_pointer_key );
23751c0b2f7Stbbdev }
23851c0b2f7Stbbdev 
23951c0b2f7Stbbdev inline void TLSKey::setThreadMallocTLS( TLSData * newvalue ) {
24051c0b2f7Stbbdev     RecursiveMallocCallProtector scoped;
24151c0b2f7Stbbdev     TlsSetValue_func( TLS_pointer_key, newvalue );
24251c0b2f7Stbbdev }
24351c0b2f7Stbbdev 
24451c0b2f7Stbbdev /* The 'next' field in the block header has to maintain some invariants:
24551c0b2f7Stbbdev  *   it needs to be on a 16K boundary and the first field in the block.
24651c0b2f7Stbbdev  *   Any value stored there needs to have the lower 14 bits set to 0
24751c0b2f7Stbbdev  *   so that various assert work. This means that if you want to smash this memory
24851c0b2f7Stbbdev  *   for debugging purposes you will need to obey this invariant.
24951c0b2f7Stbbdev  * The total size of the header needs to be a power of 2 to simplify
25051c0b2f7Stbbdev  * the alignment requirements. For now it is a 128 byte structure.
25151c0b2f7Stbbdev  * To avoid false sharing, the fields changed only locally are separated
25251c0b2f7Stbbdev  * from the fields changed by foreign threads.
25351c0b2f7Stbbdev  * Changing the size of the block header would require to change
25451c0b2f7Stbbdev  * some bin allocation sizes, in particular "fitting" sizes (see above).
25551c0b2f7Stbbdev  */
25651c0b2f7Stbbdev class Bin;
25751c0b2f7Stbbdev class StartupBlock;
25851c0b2f7Stbbdev 
25951c0b2f7Stbbdev class MemoryPool {
26051c0b2f7Stbbdev     // if no explicit grainsize, expect to see malloc in user's pAlloc
26151c0b2f7Stbbdev     // and set reasonable low granularity
26251c0b2f7Stbbdev     static const size_t defaultGranularity = estimatedCacheLineSize;
26351c0b2f7Stbbdev 
26451c0b2f7Stbbdev     MemoryPool();                  // deny
26551c0b2f7Stbbdev public:
26651c0b2f7Stbbdev     static MallocMutex  memPoolListLock;
26751c0b2f7Stbbdev 
26851c0b2f7Stbbdev     // list of all active pools is used to release
26951c0b2f7Stbbdev     // all TLS data on thread termination or library unload
27051c0b2f7Stbbdev     MemoryPool    *next,
27151c0b2f7Stbbdev                   *prev;
27251c0b2f7Stbbdev     ExtMemoryPool  extMemPool;
27351c0b2f7Stbbdev     BootStrapBlocks bootStrapBlocks;
27451c0b2f7Stbbdev 
27551c0b2f7Stbbdev     static void initDefaultPool();
27651c0b2f7Stbbdev 
27751c0b2f7Stbbdev     bool init(intptr_t poolId, const MemPoolPolicy* memPoolPolicy);
27851c0b2f7Stbbdev     bool reset();
27951c0b2f7Stbbdev     bool destroy();
28051c0b2f7Stbbdev     void onThreadShutdown(TLSData *tlsData);
28151c0b2f7Stbbdev 
28251c0b2f7Stbbdev     inline TLSData *getTLS(bool create);
28351c0b2f7Stbbdev     void clearTLS() { extMemPool.tlsPointerKey.setThreadMallocTLS(NULL); }
28451c0b2f7Stbbdev 
28551c0b2f7Stbbdev     Block *getEmptyBlock(size_t size);
28651c0b2f7Stbbdev     void returnEmptyBlock(Block *block, bool poolTheBlock);
28751c0b2f7Stbbdev 
28851c0b2f7Stbbdev     // get/put large object to/from local large object cache
28951c0b2f7Stbbdev     void *getFromLLOCache(TLSData *tls, size_t size, size_t alignment);
29051c0b2f7Stbbdev     void putToLLOCache(TLSData *tls, void *object);
29151c0b2f7Stbbdev };
29251c0b2f7Stbbdev 
29351c0b2f7Stbbdev static intptr_t defaultMemPool_space[sizeof(MemoryPool)/sizeof(intptr_t) +
29451c0b2f7Stbbdev                                      (sizeof(MemoryPool)%sizeof(intptr_t)? 1 : 0)];
29551c0b2f7Stbbdev static MemoryPool *defaultMemPool = (MemoryPool*)defaultMemPool_space;
29651c0b2f7Stbbdev const size_t MemoryPool::defaultGranularity;
29751c0b2f7Stbbdev // zero-initialized
29851c0b2f7Stbbdev MallocMutex  MemoryPool::memPoolListLock;
29951c0b2f7Stbbdev // TODO: move huge page status to default pool, because that's its states
30051c0b2f7Stbbdev HugePagesStatus hugePages;
30151c0b2f7Stbbdev static bool usedBySrcIncluded = false;
30251c0b2f7Stbbdev 
30351c0b2f7Stbbdev // Padding helpers
30451c0b2f7Stbbdev template<size_t padd>
30551c0b2f7Stbbdev struct PaddingImpl {
30651c0b2f7Stbbdev     size_t       __padding[padd];
30751c0b2f7Stbbdev };
30851c0b2f7Stbbdev 
30951c0b2f7Stbbdev template<>
31051c0b2f7Stbbdev struct PaddingImpl<0> {};
31151c0b2f7Stbbdev 
31251c0b2f7Stbbdev template<int N>
31351c0b2f7Stbbdev struct Padding : PaddingImpl<N/sizeof(size_t)> {};
31451c0b2f7Stbbdev 
31551c0b2f7Stbbdev // Slab block is 16KB-aligned. To prevent false sharing, separate locally-accessed
31651c0b2f7Stbbdev // fields and fields commonly accessed by not owner threads.
31751c0b2f7Stbbdev class GlobalBlockFields : public BlockI {
31851c0b2f7Stbbdev protected:
31951c0b2f7Stbbdev     std::atomic<FreeObject*> publicFreeList;
32051c0b2f7Stbbdev     Block       *nextPrivatizable;
32151c0b2f7Stbbdev     MemoryPool  *poolPtr;
32251c0b2f7Stbbdev };
32351c0b2f7Stbbdev 
32451c0b2f7Stbbdev class LocalBlockFields : public GlobalBlockFields, Padding<blockHeaderAlignment - sizeof(GlobalBlockFields)>  {
32551c0b2f7Stbbdev protected:
32651c0b2f7Stbbdev     Block       *next;
32751c0b2f7Stbbdev     Block       *previous;        /* Use double linked list to speed up removal */
32851c0b2f7Stbbdev     FreeObject  *bumpPtr;         /* Bump pointer moves from the end to the beginning of a block */
32951c0b2f7Stbbdev     FreeObject  *freeList;
33051c0b2f7Stbbdev     /* Pointer to local data for the owner thread. Used for fast finding tls
33151c0b2f7Stbbdev        when releasing object from a block that current thread owned.
33251c0b2f7Stbbdev        NULL for orphaned blocks. */
33351c0b2f7Stbbdev     TLSData     *tlsPtr;
33451c0b2f7Stbbdev     ThreadId     ownerTid;        /* the ID of the thread that owns or last owned the block */
33551c0b2f7Stbbdev     BackRefIdx   backRefIdx;
33651c0b2f7Stbbdev     uint16_t     allocatedCount;  /* Number of objects allocated (obviously by the owning thread) */
33751c0b2f7Stbbdev     uint16_t     objectSize;
33851c0b2f7Stbbdev     bool         isFull;
33951c0b2f7Stbbdev 
34051c0b2f7Stbbdev     friend class FreeBlockPool;
34151c0b2f7Stbbdev     friend class StartupBlock;
34251c0b2f7Stbbdev     friend class LifoList;
34351c0b2f7Stbbdev     friend void *BootStrapBlocks::allocate(MemoryPool *, size_t);
34451c0b2f7Stbbdev     friend bool OrphanedBlocks::cleanup(Backend*);
34551c0b2f7Stbbdev     friend Block *MemoryPool::getEmptyBlock(size_t);
34651c0b2f7Stbbdev };
34751c0b2f7Stbbdev 
34851c0b2f7Stbbdev // Use inheritance to guarantee that a user data start on next cache line.
34951c0b2f7Stbbdev // Can't use member for it, because when LocalBlockFields already on cache line,
35051c0b2f7Stbbdev // we must have no additional memory consumption for all compilers.
35151c0b2f7Stbbdev class Block : public LocalBlockFields,
35251c0b2f7Stbbdev               Padding<2*blockHeaderAlignment - sizeof(LocalBlockFields)> {
35351c0b2f7Stbbdev public:
35451c0b2f7Stbbdev     bool empty() const {
35551c0b2f7Stbbdev         if (allocatedCount > 0) return false;
35651c0b2f7Stbbdev         MALLOC_ASSERT(!isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT);
35751c0b2f7Stbbdev         return true;
35851c0b2f7Stbbdev     }
35951c0b2f7Stbbdev     inline FreeObject* allocate();
36051c0b2f7Stbbdev     inline FreeObject *allocateFromFreeList();
36151c0b2f7Stbbdev 
36251c0b2f7Stbbdev     inline bool adjustFullness();
36351c0b2f7Stbbdev     void adjustPositionInBin(Bin* bin = NULL);
36451c0b2f7Stbbdev 
36551c0b2f7Stbbdev     bool freeListNonNull() { return freeList; }
36651c0b2f7Stbbdev     void freePublicObject(FreeObject *objectToFree);
36751c0b2f7Stbbdev     inline void freeOwnObject(void *object);
36851c0b2f7Stbbdev     void reset();
36951c0b2f7Stbbdev     void privatizePublicFreeList( bool reset = true );
37051c0b2f7Stbbdev     void restoreBumpPtr();
37151c0b2f7Stbbdev     void privatizeOrphaned(TLSData *tls, unsigned index);
37251c0b2f7Stbbdev     bool readyToShare();
37351c0b2f7Stbbdev     void shareOrphaned(intptr_t binTag, unsigned index);
37451c0b2f7Stbbdev     unsigned int getSize() const {
37551c0b2f7Stbbdev         MALLOC_ASSERT(isStartupAllocObject() || objectSize<minLargeObjectSize,
37651c0b2f7Stbbdev                       "Invalid object size");
37751c0b2f7Stbbdev         return isStartupAllocObject()? 0 : objectSize;
37851c0b2f7Stbbdev     }
37951c0b2f7Stbbdev     const BackRefIdx *getBackRefIdx() const { return &backRefIdx; }
38051c0b2f7Stbbdev     inline bool isOwnedByCurrentThread() const;
38151c0b2f7Stbbdev     bool isStartupAllocObject() const { return objectSize == startupAllocObjSizeMark; }
38251c0b2f7Stbbdev     inline FreeObject *findObjectToFree(const void *object) const;
38351c0b2f7Stbbdev     void checkFreePrecond(const void *object) const {
38451c0b2f7Stbbdev #if MALLOC_DEBUG
38551c0b2f7Stbbdev         const char *msg = "Possible double free or heap corruption.";
38651c0b2f7Stbbdev         // small objects are always at least sizeof(size_t) Byte aligned,
38751c0b2f7Stbbdev         // try to check this before this dereference as for invalid objects
38851c0b2f7Stbbdev         // this may be unreadable
38951c0b2f7Stbbdev         MALLOC_ASSERT(isAligned(object, sizeof(size_t)), "Try to free invalid small object");
39051c0b2f7Stbbdev         // releasing to free slab
39151c0b2f7Stbbdev         MALLOC_ASSERT(allocatedCount>0, msg);
39251c0b2f7Stbbdev         // must not point to slab's header
39351c0b2f7Stbbdev         MALLOC_ASSERT((uintptr_t)object - (uintptr_t)this >= sizeof(Block), msg);
39451c0b2f7Stbbdev         if (startupAllocObjSizeMark == objectSize) // startup block
39551c0b2f7Stbbdev             MALLOC_ASSERT(object<=bumpPtr, msg);
39651c0b2f7Stbbdev         else {
39751c0b2f7Stbbdev             // non-startup objects are 8 Byte aligned
39851c0b2f7Stbbdev             MALLOC_ASSERT(isAligned(object, 8), "Try to free invalid small object");
39951c0b2f7Stbbdev             MALLOC_ASSERT(allocatedCount <= (slabSize-sizeof(Block))/objectSize
40051c0b2f7Stbbdev                           && (!bumpPtr || object>bumpPtr), msg);
40151c0b2f7Stbbdev             FreeObject *toFree = findObjectToFree(object);
40251c0b2f7Stbbdev             // check against head of freeList, as this is mostly
40351c0b2f7Stbbdev             // expected after double free
40451c0b2f7Stbbdev             MALLOC_ASSERT(toFree != freeList, msg);
40551c0b2f7Stbbdev             // check against head of publicFreeList, to detect double free
40651c0b2f7Stbbdev             // involving foreign thread
40751c0b2f7Stbbdev             MALLOC_ASSERT(toFree != publicFreeList.load(std::memory_order_relaxed), msg);
40851c0b2f7Stbbdev         }
40951c0b2f7Stbbdev #else
41051c0b2f7Stbbdev         suppress_unused_warning(object);
41151c0b2f7Stbbdev #endif
41251c0b2f7Stbbdev     }
41351c0b2f7Stbbdev     void initEmptyBlock(TLSData *tls, size_t size);
41451c0b2f7Stbbdev     size_t findObjectSize(void *object) const;
41551c0b2f7Stbbdev     MemoryPool *getMemPool() const { return poolPtr; } // do not use on the hot path!
41651c0b2f7Stbbdev 
41751c0b2f7Stbbdev protected:
41851c0b2f7Stbbdev     void cleanBlockHeader();
41951c0b2f7Stbbdev 
42051c0b2f7Stbbdev private:
42151c0b2f7Stbbdev     static const float emptyEnoughRatio; /* Threshold on free space needed to "reactivate" a block */
42251c0b2f7Stbbdev 
42351c0b2f7Stbbdev     inline FreeObject *allocateFromBumpPtr();
42451c0b2f7Stbbdev     inline FreeObject *findAllocatedObject(const void *address) const;
42551c0b2f7Stbbdev     inline bool isProperlyPlaced(const void *object) const;
42651c0b2f7Stbbdev     inline void markOwned(TLSData *tls) {
42751c0b2f7Stbbdev         MALLOC_ASSERT(!tlsPtr, ASSERT_TEXT);
42851c0b2f7Stbbdev         ownerTid = ThreadId(); /* save the ID of the current thread */
42951c0b2f7Stbbdev         tlsPtr = tls;
43051c0b2f7Stbbdev     }
43151c0b2f7Stbbdev     inline void markOrphaned() {
43251c0b2f7Stbbdev         MALLOC_ASSERT(tlsPtr, ASSERT_TEXT);
43351c0b2f7Stbbdev         tlsPtr = NULL;
43451c0b2f7Stbbdev     }
43551c0b2f7Stbbdev 
43651c0b2f7Stbbdev     friend class Bin;
43751c0b2f7Stbbdev     friend class TLSData;
43851c0b2f7Stbbdev     friend bool MemoryPool::destroy();
43951c0b2f7Stbbdev };
44051c0b2f7Stbbdev 
44151c0b2f7Stbbdev const float Block::emptyEnoughRatio = 1.0 / 4.0;
44251c0b2f7Stbbdev 
44351c0b2f7Stbbdev static_assert(sizeof(Block) <= 2*estimatedCacheLineSize,
44451c0b2f7Stbbdev     "The class Block does not fit into 2 cache lines on this platform. "
44551c0b2f7Stbbdev     "Defining USE_INTERNAL_TID may help to fix it.");
44651c0b2f7Stbbdev 
44751c0b2f7Stbbdev class Bin {
44851c0b2f7Stbbdev private:
44951c0b2f7Stbbdev     Block *activeBlk;
45051c0b2f7Stbbdev     std::atomic<Block*> mailbox;
45151c0b2f7Stbbdev     MallocMutex mailLock;
45251c0b2f7Stbbdev 
45351c0b2f7Stbbdev public:
45451c0b2f7Stbbdev     inline Block* getActiveBlock() const { return activeBlk; }
45551c0b2f7Stbbdev     void resetActiveBlock() { activeBlk = NULL; }
45651c0b2f7Stbbdev     bool activeBlockUnused() const { return activeBlk && !activeBlk->allocatedCount; }
45751c0b2f7Stbbdev     inline void setActiveBlock(Block *block);
45851c0b2f7Stbbdev     inline Block* setPreviousBlockActive();
45951c0b2f7Stbbdev     Block* getPrivatizedFreeListBlock();
46051c0b2f7Stbbdev     void moveBlockToFront(Block *block);
46151c0b2f7Stbbdev     bool cleanPublicFreeLists();
46251c0b2f7Stbbdev     void processEmptyBlock(Block *block, bool poolTheBlock);
46351c0b2f7Stbbdev     void addPublicFreeListBlock(Block* block);
46451c0b2f7Stbbdev 
46551c0b2f7Stbbdev     void outofTLSBin(Block* block);
46651c0b2f7Stbbdev     void verifyTLSBin(size_t size) const;
46751c0b2f7Stbbdev     void pushTLSBin(Block* block);
46851c0b2f7Stbbdev 
46951c0b2f7Stbbdev     void verifyInitState() const {
47051c0b2f7Stbbdev         MALLOC_ASSERT( !activeBlk, ASSERT_TEXT );
47151c0b2f7Stbbdev         MALLOC_ASSERT( !mailbox.load(std::memory_order_relaxed), ASSERT_TEXT );
47251c0b2f7Stbbdev     }
47351c0b2f7Stbbdev 
47451c0b2f7Stbbdev     friend void Block::freePublicObject (FreeObject *objectToFree);
47551c0b2f7Stbbdev };
47651c0b2f7Stbbdev 
47751c0b2f7Stbbdev /********* End of the data structures                    **************/
47851c0b2f7Stbbdev 
47951c0b2f7Stbbdev /*
48051c0b2f7Stbbdev  * There are bins for all 8 byte aligned objects less than this segregated size; 8 bins in total
48151c0b2f7Stbbdev  */
48251c0b2f7Stbbdev const uint32_t minSmallObjectIndex = 0;
48351c0b2f7Stbbdev const uint32_t numSmallObjectBins = 8;
48451c0b2f7Stbbdev const uint32_t maxSmallObjectSize = 64;
48551c0b2f7Stbbdev 
48651c0b2f7Stbbdev /*
48751c0b2f7Stbbdev  * There are 4 bins between each couple of powers of 2 [64-128-256-...]
48851c0b2f7Stbbdev  * from maxSmallObjectSize till this size; 16 bins in total
48951c0b2f7Stbbdev  */
49051c0b2f7Stbbdev const uint32_t minSegregatedObjectIndex = minSmallObjectIndex+numSmallObjectBins;
49151c0b2f7Stbbdev const uint32_t numSegregatedObjectBins = 16;
49251c0b2f7Stbbdev const uint32_t maxSegregatedObjectSize = 1024;
49351c0b2f7Stbbdev 
49451c0b2f7Stbbdev /*
49551c0b2f7Stbbdev  * And there are 5 bins with allocation sizes that are multiples of estimatedCacheLineSize
49651c0b2f7Stbbdev  * and selected to fit 9, 6, 4, 3, and 2 allocations in a block.
49751c0b2f7Stbbdev  */
49851c0b2f7Stbbdev const uint32_t minFittingIndex = minSegregatedObjectIndex+numSegregatedObjectBins;
49951c0b2f7Stbbdev const uint32_t numFittingBins = 5;
50051c0b2f7Stbbdev 
50151c0b2f7Stbbdev const uint32_t fittingAlignment = estimatedCacheLineSize;
50251c0b2f7Stbbdev 
50351c0b2f7Stbbdev #define SET_FITTING_SIZE(N) ( (slabSize-sizeof(Block))/N ) & ~(fittingAlignment-1)
50451c0b2f7Stbbdev // For blockSize=16*1024, sizeof(Block)=2*estimatedCacheLineSize and fittingAlignment=estimatedCacheLineSize,
50551c0b2f7Stbbdev // the comments show the fitting sizes and the amounts left unused for estimatedCacheLineSize=64/128:
50651c0b2f7Stbbdev const uint32_t fittingSize1 = SET_FITTING_SIZE(9); // 1792/1792 128/000
50751c0b2f7Stbbdev const uint32_t fittingSize2 = SET_FITTING_SIZE(6); // 2688/2688 128/000
50851c0b2f7Stbbdev const uint32_t fittingSize3 = SET_FITTING_SIZE(4); // 4032/3968 128/256
50951c0b2f7Stbbdev const uint32_t fittingSize4 = SET_FITTING_SIZE(3); // 5376/5376 128/000
51051c0b2f7Stbbdev const uint32_t fittingSize5 = SET_FITTING_SIZE(2); // 8128/8064 000/000
51151c0b2f7Stbbdev #undef SET_FITTING_SIZE
51251c0b2f7Stbbdev 
51351c0b2f7Stbbdev /*
51451c0b2f7Stbbdev  * The total number of thread-specific Block-based bins
51551c0b2f7Stbbdev  */
51651c0b2f7Stbbdev const uint32_t numBlockBins = minFittingIndex+numFittingBins;
51751c0b2f7Stbbdev 
51851c0b2f7Stbbdev /*
51951c0b2f7Stbbdev  * Objects of this size and larger are considered large objects.
52051c0b2f7Stbbdev  */
52151c0b2f7Stbbdev const uint32_t minLargeObjectSize = fittingSize5 + 1;
52251c0b2f7Stbbdev 
52351c0b2f7Stbbdev /*
52451c0b2f7Stbbdev  * Per-thread pool of slab blocks. Idea behind it is to not share with other
52551c0b2f7Stbbdev  * threads memory that are likely in local cache(s) of our CPU.
52651c0b2f7Stbbdev  */
52751c0b2f7Stbbdev class FreeBlockPool {
52851c0b2f7Stbbdev private:
52951c0b2f7Stbbdev     std::atomic<Block*> head;
53051c0b2f7Stbbdev     int         size;
53151c0b2f7Stbbdev     Backend    *backend;
53251c0b2f7Stbbdev     bool        lastAccessMiss;
53351c0b2f7Stbbdev public:
53451c0b2f7Stbbdev     static const int POOL_HIGH_MARK = 32;
53551c0b2f7Stbbdev     static const int POOL_LOW_MARK  = 8;
53651c0b2f7Stbbdev 
53751c0b2f7Stbbdev     class ResOfGet {
53851c0b2f7Stbbdev         ResOfGet();
53951c0b2f7Stbbdev     public:
54051c0b2f7Stbbdev         Block* block;
54151c0b2f7Stbbdev         bool   lastAccMiss;
54251c0b2f7Stbbdev         ResOfGet(Block *b, bool lastMiss) : block(b), lastAccMiss(lastMiss) {}
54351c0b2f7Stbbdev     };
54451c0b2f7Stbbdev 
54551c0b2f7Stbbdev     // allocated in zero-initialized memory
54651c0b2f7Stbbdev     FreeBlockPool(Backend *bknd) : backend(bknd) {}
54751c0b2f7Stbbdev     ResOfGet getBlock();
54851c0b2f7Stbbdev     void returnBlock(Block *block);
54951c0b2f7Stbbdev     bool externalCleanup(); // can be called by another thread
55051c0b2f7Stbbdev };
55151c0b2f7Stbbdev 
55251c0b2f7Stbbdev template<int LOW_MARK, int HIGH_MARK>
55351c0b2f7Stbbdev class LocalLOCImpl {
55451c0b2f7Stbbdev private:
55551c0b2f7Stbbdev     static const size_t MAX_TOTAL_SIZE = 4*1024*1024;
55651c0b2f7Stbbdev     // TODO: can single-linked list be faster here?
55751c0b2f7Stbbdev     LargeMemoryBlock *tail; // need it when do releasing on overflow
55851c0b2f7Stbbdev     std::atomic<LargeMemoryBlock*> head;
55951c0b2f7Stbbdev     size_t            totalSize;
56051c0b2f7Stbbdev     int               numOfBlocks;
56151c0b2f7Stbbdev public:
56251c0b2f7Stbbdev     bool put(LargeMemoryBlock *object, ExtMemoryPool *extMemPool);
56351c0b2f7Stbbdev     LargeMemoryBlock *get(size_t size);
56451c0b2f7Stbbdev     bool externalCleanup(ExtMemoryPool *extMemPool);
56551c0b2f7Stbbdev #if __TBB_MALLOC_WHITEBOX_TEST
56651c0b2f7Stbbdev     LocalLOCImpl() : head(NULL), tail(NULL), totalSize(0), numOfBlocks(0) {}
56751c0b2f7Stbbdev     static size_t getMaxSize() { return MAX_TOTAL_SIZE; }
56851c0b2f7Stbbdev     static const int LOC_HIGH_MARK = HIGH_MARK;
56951c0b2f7Stbbdev #else
57051c0b2f7Stbbdev     // no ctor, object must be created in zero-initialized memory
57151c0b2f7Stbbdev #endif
57251c0b2f7Stbbdev };
57351c0b2f7Stbbdev 
57451c0b2f7Stbbdev typedef LocalLOCImpl<8,32> LocalLOC; // set production code parameters
57551c0b2f7Stbbdev 
57651c0b2f7Stbbdev class TLSData : public TLSRemote {
57751c0b2f7Stbbdev     MemoryPool   *memPool;
57851c0b2f7Stbbdev public:
57951c0b2f7Stbbdev     Bin           bin[numBlockBinLimit];
58051c0b2f7Stbbdev     FreeBlockPool freeSlabBlocks;
58151c0b2f7Stbbdev     LocalLOC      lloc;
58251c0b2f7Stbbdev     unsigned      currCacheIdx;
58351c0b2f7Stbbdev private:
58451c0b2f7Stbbdev     bool unused;
58551c0b2f7Stbbdev public:
58651c0b2f7Stbbdev     TLSData(MemoryPool *mPool, Backend *bknd) : memPool(mPool), freeSlabBlocks(bknd) {}
58751c0b2f7Stbbdev     MemoryPool *getMemPool() const { return memPool; }
58851c0b2f7Stbbdev     Bin* getAllocationBin(size_t size);
58951c0b2f7Stbbdev     void release();
59051c0b2f7Stbbdev     bool externalCleanup(bool cleanOnlyUnused, bool cleanBins) {
59151c0b2f7Stbbdev         if (!unused && cleanOnlyUnused) return false;
59251c0b2f7Stbbdev         // Heavy operation in terms of synchronization complexity,
59351c0b2f7Stbbdev         // should be called only for the current thread
59451c0b2f7Stbbdev         bool released = cleanBins ? cleanupBlockBins() : false;
59551c0b2f7Stbbdev         // both cleanups to be called, and the order is not important
59651c0b2f7Stbbdev         return released | lloc.externalCleanup(&memPool->extMemPool) | freeSlabBlocks.externalCleanup();
59751c0b2f7Stbbdev     }
59851c0b2f7Stbbdev     bool cleanupBlockBins();
59951c0b2f7Stbbdev     void markUsed() { unused = false; } // called by owner when TLS touched
60051c0b2f7Stbbdev     void markUnused() { unused =  true; } // can be called by not owner thread
60151c0b2f7Stbbdev };
60251c0b2f7Stbbdev 
60351c0b2f7Stbbdev TLSData *TLSKey::createTLS(MemoryPool *memPool, Backend *backend)
60451c0b2f7Stbbdev {
60551c0b2f7Stbbdev     MALLOC_ASSERT( sizeof(TLSData) >= sizeof(Bin) * numBlockBins + sizeof(FreeBlockPool), ASSERT_TEXT );
60651c0b2f7Stbbdev     TLSData* tls = (TLSData*) memPool->bootStrapBlocks.allocate(memPool, sizeof(TLSData));
60751c0b2f7Stbbdev     if ( !tls )
60851c0b2f7Stbbdev         return NULL;
60951c0b2f7Stbbdev     new(tls) TLSData(memPool, backend);
61051c0b2f7Stbbdev     /* the block contains zeroes after bootStrapMalloc, so bins are initialized */
61151c0b2f7Stbbdev #if MALLOC_DEBUG
61251c0b2f7Stbbdev     for (uint32_t i = 0; i < numBlockBinLimit; i++)
61351c0b2f7Stbbdev         tls->bin[i].verifyInitState();
61451c0b2f7Stbbdev #endif
61551c0b2f7Stbbdev     setThreadMallocTLS(tls);
61651c0b2f7Stbbdev     memPool->extMemPool.allLocalCaches.registerThread(tls);
61751c0b2f7Stbbdev     return tls;
61851c0b2f7Stbbdev }
61951c0b2f7Stbbdev 
62051c0b2f7Stbbdev bool TLSData::cleanupBlockBins()
62151c0b2f7Stbbdev {
62251c0b2f7Stbbdev     bool released = false;
62351c0b2f7Stbbdev     for (uint32_t i = 0; i < numBlockBinLimit; i++) {
62451c0b2f7Stbbdev         released |= bin[i].cleanPublicFreeLists();
62551c0b2f7Stbbdev         // After cleaning public free lists, only the active block might be empty.
62651c0b2f7Stbbdev         // Do not use processEmptyBlock because it will just restore bumpPtr.
62751c0b2f7Stbbdev         Block *block = bin[i].getActiveBlock();
62851c0b2f7Stbbdev         if (block && block->empty()) {
62951c0b2f7Stbbdev             bin[i].outofTLSBin(block);
63051c0b2f7Stbbdev             memPool->returnEmptyBlock(block, /*poolTheBlock=*/false);
63151c0b2f7Stbbdev             released = true;
63251c0b2f7Stbbdev         }
63351c0b2f7Stbbdev     }
63451c0b2f7Stbbdev     return released;
63551c0b2f7Stbbdev }
63651c0b2f7Stbbdev 
63751c0b2f7Stbbdev bool ExtMemoryPool::releaseAllLocalCaches()
63851c0b2f7Stbbdev {
63951c0b2f7Stbbdev     // Iterate all registered TLS data and clean LLOC and Slab pools
64051c0b2f7Stbbdev     bool released = allLocalCaches.cleanup(/*cleanOnlyUnused=*/false);
64151c0b2f7Stbbdev 
64251c0b2f7Stbbdev     // Bins privatization is done only for the current thread
64351c0b2f7Stbbdev     if (TLSData *tlsData = tlsPointerKey.getThreadMallocTLS())
64451c0b2f7Stbbdev         released |= tlsData->cleanupBlockBins();
64551c0b2f7Stbbdev 
64651c0b2f7Stbbdev     return released;
64751c0b2f7Stbbdev }
64851c0b2f7Stbbdev 
64951c0b2f7Stbbdev void AllLocalCaches::registerThread(TLSRemote *tls)
65051c0b2f7Stbbdev {
65151c0b2f7Stbbdev     tls->prev = NULL;
65251c0b2f7Stbbdev     MallocMutex::scoped_lock lock(listLock);
65351c0b2f7Stbbdev     MALLOC_ASSERT(head!=tls, ASSERT_TEXT);
65451c0b2f7Stbbdev     tls->next = head;
65551c0b2f7Stbbdev     if (head)
65651c0b2f7Stbbdev         head->prev = tls;
65751c0b2f7Stbbdev     head = tls;
65851c0b2f7Stbbdev     MALLOC_ASSERT(head->next!=head, ASSERT_TEXT);
65951c0b2f7Stbbdev }
66051c0b2f7Stbbdev 
66151c0b2f7Stbbdev void AllLocalCaches::unregisterThread(TLSRemote *tls)
66251c0b2f7Stbbdev {
66351c0b2f7Stbbdev     MallocMutex::scoped_lock lock(listLock);
66451c0b2f7Stbbdev     MALLOC_ASSERT(head, "Can't unregister thread: no threads are registered.");
66551c0b2f7Stbbdev     if (head == tls)
66651c0b2f7Stbbdev         head = tls->next;
66751c0b2f7Stbbdev     if (tls->next)
66851c0b2f7Stbbdev         tls->next->prev = tls->prev;
66951c0b2f7Stbbdev     if (tls->prev)
67051c0b2f7Stbbdev         tls->prev->next = tls->next;
67151c0b2f7Stbbdev     MALLOC_ASSERT(!tls->next || tls->next->next!=tls->next, ASSERT_TEXT);
67251c0b2f7Stbbdev }
67351c0b2f7Stbbdev 
67451c0b2f7Stbbdev bool AllLocalCaches::cleanup(bool cleanOnlyUnused)
67551c0b2f7Stbbdev {
67651c0b2f7Stbbdev     bool released = false;
67751c0b2f7Stbbdev     {
67851c0b2f7Stbbdev         MallocMutex::scoped_lock lock(listLock);
67951c0b2f7Stbbdev         for (TLSRemote *curr=head; curr; curr=curr->next)
68051c0b2f7Stbbdev             released |= static_cast<TLSData*>(curr)->externalCleanup(cleanOnlyUnused, /*cleanBins=*/false);
68151c0b2f7Stbbdev     }
68251c0b2f7Stbbdev     return released;
68351c0b2f7Stbbdev }
68451c0b2f7Stbbdev 
68551c0b2f7Stbbdev void AllLocalCaches::markUnused()
68651c0b2f7Stbbdev {
68751c0b2f7Stbbdev     bool locked;
68851c0b2f7Stbbdev     MallocMutex::scoped_lock lock(listLock, /*block=*/false, &locked);
68951c0b2f7Stbbdev     if (!locked) // not wait for marking if someone doing something with it
69051c0b2f7Stbbdev         return;
69151c0b2f7Stbbdev 
69251c0b2f7Stbbdev     for (TLSRemote *curr=head; curr; curr=curr->next)
69351c0b2f7Stbbdev         static_cast<TLSData*>(curr)->markUnused();
69451c0b2f7Stbbdev }
69551c0b2f7Stbbdev 
69651c0b2f7Stbbdev #if MALLOC_CHECK_RECURSION
69751c0b2f7Stbbdev MallocMutex RecursiveMallocCallProtector::rmc_mutex;
69851c0b2f7Stbbdev pthread_t   RecursiveMallocCallProtector::owner_thread;
69951c0b2f7Stbbdev void       *RecursiveMallocCallProtector::autoObjPtr;
70051c0b2f7Stbbdev bool        RecursiveMallocCallProtector::mallocRecursionDetected;
70151c0b2f7Stbbdev #if __FreeBSD__
70251c0b2f7Stbbdev bool        RecursiveMallocCallProtector::canUsePthread;
70351c0b2f7Stbbdev #endif
70451c0b2f7Stbbdev 
70551c0b2f7Stbbdev #endif
70651c0b2f7Stbbdev 
70751c0b2f7Stbbdev /*********** End code to provide thread ID and a TLS pointer **********/
70851c0b2f7Stbbdev 
70951c0b2f7Stbbdev // Parameter for isLargeObject, keeps our expectations on memory origin.
71051c0b2f7Stbbdev // Assertions must use unknownMem to reliably report object invalidity.
71151c0b2f7Stbbdev enum MemoryOrigin {
71251c0b2f7Stbbdev     ourMem,    // allocated by TBB allocator
71351c0b2f7Stbbdev     unknownMem // can be allocated by system allocator or TBB allocator
71451c0b2f7Stbbdev };
71551c0b2f7Stbbdev 
71651c0b2f7Stbbdev template<MemoryOrigin> bool isLargeObject(void *object);
71751c0b2f7Stbbdev static void *internalMalloc(size_t size);
71851c0b2f7Stbbdev static void internalFree(void *object);
71951c0b2f7Stbbdev static void *internalPoolMalloc(MemoryPool* mPool, size_t size);
72051c0b2f7Stbbdev static bool internalPoolFree(MemoryPool *mPool, void *object, size_t size);
72151c0b2f7Stbbdev 
72251c0b2f7Stbbdev #if !MALLOC_DEBUG
72351c0b2f7Stbbdev #if __INTEL_COMPILER || _MSC_VER
72451c0b2f7Stbbdev #define NOINLINE(decl) __declspec(noinline) decl
72551c0b2f7Stbbdev #define ALWAYSINLINE(decl) __forceinline decl
72651c0b2f7Stbbdev #elif __GNUC__
72751c0b2f7Stbbdev #define NOINLINE(decl) decl __attribute__ ((noinline))
72851c0b2f7Stbbdev #define ALWAYSINLINE(decl) decl __attribute__ ((always_inline))
72951c0b2f7Stbbdev #else
73051c0b2f7Stbbdev #define NOINLINE(decl) decl
73151c0b2f7Stbbdev #define ALWAYSINLINE(decl) decl
73251c0b2f7Stbbdev #endif
73351c0b2f7Stbbdev 
73451c0b2f7Stbbdev static NOINLINE( bool doInitialization() );
73551c0b2f7Stbbdev ALWAYSINLINE( bool isMallocInitialized() );
73651c0b2f7Stbbdev 
73751c0b2f7Stbbdev #undef ALWAYSINLINE
73851c0b2f7Stbbdev #undef NOINLINE
73951c0b2f7Stbbdev #endif /* !MALLOC_DEBUG */
74051c0b2f7Stbbdev 
74151c0b2f7Stbbdev 
74251c0b2f7Stbbdev /********* Now some rough utility code to deal with indexing the size bins. **************/
74351c0b2f7Stbbdev 
74451c0b2f7Stbbdev /*
74551c0b2f7Stbbdev  * Given a number return the highest non-zero bit in it. It is intended to work with 32-bit values only.
74651c0b2f7Stbbdev  * Moreover, on IPF, for sake of simplicity and performance, it is narrowed to only serve for 64 to 1023.
74751c0b2f7Stbbdev  * This is enough for current algorithm of distribution of sizes among bins.
74851c0b2f7Stbbdev  * __TBB_Log2 is not used here to minimize dependencies on TBB specific sources.
74951c0b2f7Stbbdev  */
75051c0b2f7Stbbdev #if _WIN64 && _MSC_VER>=1400 && !__INTEL_COMPILER
75151c0b2f7Stbbdev extern "C" unsigned char _BitScanReverse( unsigned long* i, unsigned long w );
75251c0b2f7Stbbdev #pragma intrinsic(_BitScanReverse)
75351c0b2f7Stbbdev #endif
75451c0b2f7Stbbdev static inline unsigned int highestBitPos(unsigned int n)
75551c0b2f7Stbbdev {
75651c0b2f7Stbbdev     MALLOC_ASSERT( n>=64 && n<1024, ASSERT_TEXT ); // only needed for bsr array lookup, but always true
75751c0b2f7Stbbdev     unsigned int pos;
75851c0b2f7Stbbdev #if __ARCH_x86_32||__ARCH_x86_64
75951c0b2f7Stbbdev 
76051c0b2f7Stbbdev # if __linux__||__APPLE__||__FreeBSD__||__NetBSD__||__OpenBSD__||__sun||__MINGW32__
76151c0b2f7Stbbdev     __asm__ ("bsr %1,%0" : "=r"(pos) : "r"(n));
76251c0b2f7Stbbdev # elif (_WIN32 && (!_WIN64 || __INTEL_COMPILER))
76351c0b2f7Stbbdev     __asm
76451c0b2f7Stbbdev     {
76551c0b2f7Stbbdev         bsr eax, n
76651c0b2f7Stbbdev         mov pos, eax
76751c0b2f7Stbbdev     }
76851c0b2f7Stbbdev # elif _WIN64 && _MSC_VER>=1400
76951c0b2f7Stbbdev     _BitScanReverse((unsigned long*)&pos, (unsigned long)n);
77051c0b2f7Stbbdev # else
77151c0b2f7Stbbdev #   error highestBitPos() not implemented for this platform
77251c0b2f7Stbbdev # endif
77351c0b2f7Stbbdev #elif __arm__
77451c0b2f7Stbbdev     __asm__ __volatile__
77551c0b2f7Stbbdev     (
77651c0b2f7Stbbdev        "clz %0, %1\n"
77751c0b2f7Stbbdev        "rsb %0, %0, %2\n"
77851c0b2f7Stbbdev        :"=r" (pos) :"r" (n), "I" (31)
77951c0b2f7Stbbdev     );
78051c0b2f7Stbbdev #else
78151c0b2f7Stbbdev     static unsigned int bsr[16] = {0/*N/A*/,6,7,7,8,8,8,8,9,9,9,9,9,9,9,9};
78251c0b2f7Stbbdev     pos = bsr[ n>>6 ];
78351c0b2f7Stbbdev #endif /* __ARCH_* */
78451c0b2f7Stbbdev     return pos;
78551c0b2f7Stbbdev }
78651c0b2f7Stbbdev 
78751c0b2f7Stbbdev template<bool Is32Bit>
78851c0b2f7Stbbdev unsigned int getSmallObjectIndex(unsigned int size)
78951c0b2f7Stbbdev {
79051c0b2f7Stbbdev     return (size-1)>>3;
79151c0b2f7Stbbdev }
79251c0b2f7Stbbdev template<>
79351c0b2f7Stbbdev unsigned int getSmallObjectIndex</*Is32Bit=*/false>(unsigned int size)
79451c0b2f7Stbbdev {
79551c0b2f7Stbbdev     // For 64-bit malloc, 16 byte alignment is needed except for bin 0.
79651c0b2f7Stbbdev     unsigned int result = (size-1)>>3;
79751c0b2f7Stbbdev     if (result) result |= 1; // 0,1,3,5,7; bins 2,4,6 are not aligned to 16 bytes
79851c0b2f7Stbbdev     return result;
79951c0b2f7Stbbdev }
80051c0b2f7Stbbdev /*
80151c0b2f7Stbbdev  * Depending on indexRequest, for a given size return either the index into the bin
80251c0b2f7Stbbdev  * for objects of this size, or the actual size of objects in this bin.
80351c0b2f7Stbbdev  */
80451c0b2f7Stbbdev template<bool indexRequest>
80551c0b2f7Stbbdev static unsigned int getIndexOrObjectSize (unsigned int size)
80651c0b2f7Stbbdev {
80751c0b2f7Stbbdev     if (size <= maxSmallObjectSize) { // selection from 8/16/24/32/40/48/56/64
80851c0b2f7Stbbdev         unsigned int index = getSmallObjectIndex</*Is32Bit=*/(sizeof(size_t)<=4)>( size );
80951c0b2f7Stbbdev          /* Bin 0 is for 8 bytes, bin 1 is for 16, and so forth */
81051c0b2f7Stbbdev         return indexRequest ? index : (index+1)<<3;
81151c0b2f7Stbbdev     }
81251c0b2f7Stbbdev     else if (size <= maxSegregatedObjectSize ) { // 80/96/112/128 / 160/192/224/256 / 320/384/448/512 / 640/768/896/1024
81351c0b2f7Stbbdev         unsigned int order = highestBitPos(size-1); // which group of bin sizes?
81451c0b2f7Stbbdev         MALLOC_ASSERT( 6<=order && order<=9, ASSERT_TEXT );
81551c0b2f7Stbbdev         if (indexRequest)
81651c0b2f7Stbbdev             return minSegregatedObjectIndex - (4*6) - 4 + (4*order) + ((size-1)>>(order-2));
81751c0b2f7Stbbdev         else {
81851c0b2f7Stbbdev             unsigned int alignment = 128 >> (9-order); // alignment in the group
81951c0b2f7Stbbdev             MALLOC_ASSERT( alignment==16 || alignment==32 || alignment==64 || alignment==128, ASSERT_TEXT );
82051c0b2f7Stbbdev             return alignUp(size,alignment);
82151c0b2f7Stbbdev         }
82251c0b2f7Stbbdev     }
82351c0b2f7Stbbdev     else {
82451c0b2f7Stbbdev         if( size <= fittingSize3 ) {
82551c0b2f7Stbbdev             if( size <= fittingSize2 ) {
82651c0b2f7Stbbdev                 if( size <= fittingSize1 )
82751c0b2f7Stbbdev                     return indexRequest ? minFittingIndex : fittingSize1;
82851c0b2f7Stbbdev                 else
82951c0b2f7Stbbdev                     return indexRequest ? minFittingIndex+1 : fittingSize2;
83051c0b2f7Stbbdev             } else
83151c0b2f7Stbbdev                 return indexRequest ? minFittingIndex+2 : fittingSize3;
83251c0b2f7Stbbdev         } else {
83351c0b2f7Stbbdev             if( size <= fittingSize5 ) {
83451c0b2f7Stbbdev                 if( size <= fittingSize4 )
83551c0b2f7Stbbdev                     return indexRequest ? minFittingIndex+3 : fittingSize4;
83651c0b2f7Stbbdev                 else
83751c0b2f7Stbbdev                     return indexRequest ? minFittingIndex+4 : fittingSize5;
83851c0b2f7Stbbdev             } else {
83951c0b2f7Stbbdev                 MALLOC_ASSERT( 0,ASSERT_TEXT ); // this should not happen
84051c0b2f7Stbbdev                 return ~0U;
84151c0b2f7Stbbdev             }
84251c0b2f7Stbbdev         }
84351c0b2f7Stbbdev     }
84451c0b2f7Stbbdev }
84551c0b2f7Stbbdev 
84651c0b2f7Stbbdev static unsigned int getIndex (unsigned int size)
84751c0b2f7Stbbdev {
84851c0b2f7Stbbdev     return getIndexOrObjectSize</*indexRequest=*/true>(size);
84951c0b2f7Stbbdev }
85051c0b2f7Stbbdev 
85151c0b2f7Stbbdev static unsigned int getObjectSize (unsigned int size)
85251c0b2f7Stbbdev {
85351c0b2f7Stbbdev     return getIndexOrObjectSize</*indexRequest=*/false>(size);
85451c0b2f7Stbbdev }
85551c0b2f7Stbbdev 
85651c0b2f7Stbbdev 
85751c0b2f7Stbbdev void *BootStrapBlocks::allocate(MemoryPool *memPool, size_t size)
85851c0b2f7Stbbdev {
85951c0b2f7Stbbdev     FreeObject *result;
86051c0b2f7Stbbdev 
86151c0b2f7Stbbdev     MALLOC_ASSERT( size == sizeof(TLSData), ASSERT_TEXT );
86251c0b2f7Stbbdev 
86351c0b2f7Stbbdev     { // Lock with acquire
86451c0b2f7Stbbdev         MallocMutex::scoped_lock scoped_cs(bootStrapLock);
86551c0b2f7Stbbdev 
86651c0b2f7Stbbdev         if( bootStrapObjectList) {
86751c0b2f7Stbbdev             result = bootStrapObjectList;
86851c0b2f7Stbbdev             bootStrapObjectList = bootStrapObjectList->next;
86951c0b2f7Stbbdev         } else {
87051c0b2f7Stbbdev             if (!bootStrapBlock) {
87151c0b2f7Stbbdev                 bootStrapBlock = memPool->getEmptyBlock(size);
87251c0b2f7Stbbdev                 if (!bootStrapBlock) return NULL;
87351c0b2f7Stbbdev             }
87451c0b2f7Stbbdev             result = bootStrapBlock->bumpPtr;
87551c0b2f7Stbbdev             bootStrapBlock->bumpPtr = (FreeObject *)((uintptr_t)bootStrapBlock->bumpPtr - bootStrapBlock->objectSize);
87651c0b2f7Stbbdev             if ((uintptr_t)bootStrapBlock->bumpPtr < (uintptr_t)bootStrapBlock+sizeof(Block)) {
87751c0b2f7Stbbdev                 bootStrapBlock->bumpPtr = NULL;
87851c0b2f7Stbbdev                 bootStrapBlock->next = bootStrapBlockUsed;
87951c0b2f7Stbbdev                 bootStrapBlockUsed = bootStrapBlock;
88051c0b2f7Stbbdev                 bootStrapBlock = NULL;
88151c0b2f7Stbbdev             }
88251c0b2f7Stbbdev         }
88351c0b2f7Stbbdev     } // Unlock with release
88451c0b2f7Stbbdev 
88551c0b2f7Stbbdev     memset (result, 0, size);
88651c0b2f7Stbbdev     return (void*)result;
88751c0b2f7Stbbdev }
88851c0b2f7Stbbdev 
88951c0b2f7Stbbdev void BootStrapBlocks::free(void* ptr)
89051c0b2f7Stbbdev {
89151c0b2f7Stbbdev     MALLOC_ASSERT( ptr, ASSERT_TEXT );
89251c0b2f7Stbbdev     { // Lock with acquire
89351c0b2f7Stbbdev         MallocMutex::scoped_lock scoped_cs(bootStrapLock);
89451c0b2f7Stbbdev         ((FreeObject*)ptr)->next = bootStrapObjectList;
89551c0b2f7Stbbdev         bootStrapObjectList = (FreeObject*)ptr;
89651c0b2f7Stbbdev     } // Unlock with release
89751c0b2f7Stbbdev }
89851c0b2f7Stbbdev 
89951c0b2f7Stbbdev void BootStrapBlocks::reset()
90051c0b2f7Stbbdev {
90151c0b2f7Stbbdev     bootStrapBlock = bootStrapBlockUsed = NULL;
90251c0b2f7Stbbdev     bootStrapObjectList = NULL;
90351c0b2f7Stbbdev }
90451c0b2f7Stbbdev 
90551c0b2f7Stbbdev #if !(FREELIST_NONBLOCKING)
90651c0b2f7Stbbdev static MallocMutex publicFreeListLock; // lock for changes of publicFreeList
90751c0b2f7Stbbdev #endif
90851c0b2f7Stbbdev 
90951c0b2f7Stbbdev /********* End rough utility code  **************/
91051c0b2f7Stbbdev 
91151c0b2f7Stbbdev /* LifoList assumes zero initialization so a vector of it can be created
91251c0b2f7Stbbdev  * by just allocating some space with no call to constructor.
91351c0b2f7Stbbdev  * On Linux, it seems to be necessary to avoid linking with C++ libraries.
91451c0b2f7Stbbdev  *
91551c0b2f7Stbbdev  * By usage convention there is no race on the initialization. */
91651c0b2f7Stbbdev LifoList::LifoList( ) : top(NULL)
91751c0b2f7Stbbdev {
91851c0b2f7Stbbdev     // MallocMutex assumes zero initialization
91951c0b2f7Stbbdev     memset(&lock, 0, sizeof(MallocMutex));
92051c0b2f7Stbbdev }
92151c0b2f7Stbbdev 
92251c0b2f7Stbbdev void LifoList::push(Block *block)
92351c0b2f7Stbbdev {
92451c0b2f7Stbbdev     MallocMutex::scoped_lock scoped_cs(lock);
92551c0b2f7Stbbdev     block->next = top;
92651c0b2f7Stbbdev     top = block;
92751c0b2f7Stbbdev }
92851c0b2f7Stbbdev 
92951c0b2f7Stbbdev Block *LifoList::pop()
93051c0b2f7Stbbdev {
93151c0b2f7Stbbdev     Block *block=NULL;
93251c0b2f7Stbbdev     if (top) {
93351c0b2f7Stbbdev         MallocMutex::scoped_lock scoped_cs(lock);
93451c0b2f7Stbbdev         if (top) {
93551c0b2f7Stbbdev             block = top;
93651c0b2f7Stbbdev             top = block->next;
93751c0b2f7Stbbdev         }
93851c0b2f7Stbbdev     }
93951c0b2f7Stbbdev     return block;
94051c0b2f7Stbbdev }
94151c0b2f7Stbbdev 
94251c0b2f7Stbbdev Block *LifoList::grab()
94351c0b2f7Stbbdev {
94451c0b2f7Stbbdev     Block *block = NULL;
94551c0b2f7Stbbdev     if (top) {
94651c0b2f7Stbbdev         MallocMutex::scoped_lock scoped_cs(lock);
94751c0b2f7Stbbdev         block = top;
94851c0b2f7Stbbdev         top = NULL;
94951c0b2f7Stbbdev     }
95051c0b2f7Stbbdev     return block;
95151c0b2f7Stbbdev }
95251c0b2f7Stbbdev 
95351c0b2f7Stbbdev /********* Thread and block related code      *************/
95451c0b2f7Stbbdev 
95551c0b2f7Stbbdev template<bool poolDestroy> void AllLargeBlocksList::releaseAll(Backend *backend) {
95651c0b2f7Stbbdev      LargeMemoryBlock *next, *lmb = loHead;
95751c0b2f7Stbbdev      loHead = NULL;
95851c0b2f7Stbbdev 
95951c0b2f7Stbbdev      for (; lmb; lmb = next) {
96051c0b2f7Stbbdev          next = lmb->gNext;
96151c0b2f7Stbbdev          if (poolDestroy) {
96251c0b2f7Stbbdev              // as it's pool destruction, no need to return object to backend,
96351c0b2f7Stbbdev              // only remove backrefs, as they are global
96451c0b2f7Stbbdev              removeBackRef(lmb->backRefIdx);
96551c0b2f7Stbbdev          } else {
96651c0b2f7Stbbdev              // clean g(Next|Prev) to prevent removing lmb
96751c0b2f7Stbbdev              // from AllLargeBlocksList inside returnLargeObject
96851c0b2f7Stbbdev              lmb->gNext = lmb->gPrev = NULL;
96951c0b2f7Stbbdev              backend->returnLargeObject(lmb);
97051c0b2f7Stbbdev          }
97151c0b2f7Stbbdev      }
97251c0b2f7Stbbdev }
97351c0b2f7Stbbdev 
97451c0b2f7Stbbdev TLSData* MemoryPool::getTLS(bool create)
97551c0b2f7Stbbdev {
97651c0b2f7Stbbdev     TLSData* tls = extMemPool.tlsPointerKey.getThreadMallocTLS();
97751c0b2f7Stbbdev     if (create && !tls)
97851c0b2f7Stbbdev         tls = extMemPool.tlsPointerKey.createTLS(this, &extMemPool.backend);
97951c0b2f7Stbbdev     return tls;
98051c0b2f7Stbbdev }
98151c0b2f7Stbbdev 
98251c0b2f7Stbbdev /*
98351c0b2f7Stbbdev  * Return the bin for the given size.
98451c0b2f7Stbbdev  */
98551c0b2f7Stbbdev inline Bin* TLSData::getAllocationBin(size_t size)
98651c0b2f7Stbbdev {
98751c0b2f7Stbbdev     return bin + getIndex(size);
98851c0b2f7Stbbdev }
98951c0b2f7Stbbdev 
99051c0b2f7Stbbdev /* Return an empty uninitialized block in a non-blocking fashion. */
99151c0b2f7Stbbdev Block *MemoryPool::getEmptyBlock(size_t size)
99251c0b2f7Stbbdev {
99351c0b2f7Stbbdev     TLSData* tls = getTLS(/*create=*/false);
99451c0b2f7Stbbdev     // try to use per-thread cache, if TLS available
99551c0b2f7Stbbdev     FreeBlockPool::ResOfGet resOfGet = tls?
99651c0b2f7Stbbdev         tls->freeSlabBlocks.getBlock() : FreeBlockPool::ResOfGet(NULL, false);
99751c0b2f7Stbbdev     Block *result = resOfGet.block;
99851c0b2f7Stbbdev 
99951c0b2f7Stbbdev     if (!result) { // not found in local cache, asks backend for slabs
100051c0b2f7Stbbdev         int num = resOfGet.lastAccMiss? Backend::numOfSlabAllocOnMiss : 1;
100151c0b2f7Stbbdev         BackRefIdx backRefIdx[Backend::numOfSlabAllocOnMiss];
100251c0b2f7Stbbdev 
100351c0b2f7Stbbdev         result = static_cast<Block*>(extMemPool.backend.getSlabBlock(num));
100451c0b2f7Stbbdev         if (!result) return NULL;
100551c0b2f7Stbbdev 
100651c0b2f7Stbbdev         if (!extMemPool.userPool())
100751c0b2f7Stbbdev             for (int i=0; i<num; i++) {
100851c0b2f7Stbbdev                 backRefIdx[i] = BackRefIdx::newBackRef(/*largeObj=*/false);
100951c0b2f7Stbbdev                 if (backRefIdx[i].isInvalid()) {
101051c0b2f7Stbbdev                     // roll back resource allocation
101151c0b2f7Stbbdev                     for (int j=0; j<i; j++)
101251c0b2f7Stbbdev                         removeBackRef(backRefIdx[j]);
101351c0b2f7Stbbdev                     Block *b = result;
101451c0b2f7Stbbdev                     for (int j=0; j<num; b=(Block*)((uintptr_t)b+slabSize), j++)
101551c0b2f7Stbbdev                         extMemPool.backend.putSlabBlock(b);
101651c0b2f7Stbbdev                     return NULL;
101751c0b2f7Stbbdev                 }
101851c0b2f7Stbbdev             }
101951c0b2f7Stbbdev         // resources were allocated, register blocks
102051c0b2f7Stbbdev         Block *b = result;
102151c0b2f7Stbbdev         for (int i=0; i<num; b=(Block*)((uintptr_t)b+slabSize), i++) {
102251c0b2f7Stbbdev             // slab block in user's pool must have invalid backRefIdx
102351c0b2f7Stbbdev             if (extMemPool.userPool()) {
102451c0b2f7Stbbdev                 new (&b->backRefIdx) BackRefIdx();
102551c0b2f7Stbbdev             } else {
102651c0b2f7Stbbdev                 setBackRef(backRefIdx[i], b);
102751c0b2f7Stbbdev                 b->backRefIdx = backRefIdx[i];
102851c0b2f7Stbbdev             }
102951c0b2f7Stbbdev             b->tlsPtr = tls;
103051c0b2f7Stbbdev             b->poolPtr = this;
103151c0b2f7Stbbdev             // all but first one go to per-thread pool
103251c0b2f7Stbbdev             if (i > 0) {
103351c0b2f7Stbbdev                 MALLOC_ASSERT(tls, ASSERT_TEXT);
103451c0b2f7Stbbdev                 tls->freeSlabBlocks.returnBlock(b);
103551c0b2f7Stbbdev             }
103651c0b2f7Stbbdev         }
103751c0b2f7Stbbdev     }
103851c0b2f7Stbbdev     MALLOC_ASSERT(result, ASSERT_TEXT);
103951c0b2f7Stbbdev     result->initEmptyBlock(tls, size);
104051c0b2f7Stbbdev     STAT_increment(getThreadId(), getIndex(result->objectSize), allocBlockNew);
104151c0b2f7Stbbdev     return result;
104251c0b2f7Stbbdev }
104351c0b2f7Stbbdev 
104451c0b2f7Stbbdev void MemoryPool::returnEmptyBlock(Block *block, bool poolTheBlock)
104551c0b2f7Stbbdev {
104651c0b2f7Stbbdev     block->reset();
104751c0b2f7Stbbdev     if (poolTheBlock) {
104851c0b2f7Stbbdev         getTLS(/*create=*/false)->freeSlabBlocks.returnBlock(block);
104951c0b2f7Stbbdev     } else {
105051c0b2f7Stbbdev         // slab blocks in user's pools do not have valid backRefIdx
105151c0b2f7Stbbdev         if (!extMemPool.userPool())
105251c0b2f7Stbbdev             removeBackRef(*(block->getBackRefIdx()));
105351c0b2f7Stbbdev         extMemPool.backend.putSlabBlock(block);
105451c0b2f7Stbbdev     }
105551c0b2f7Stbbdev }
105651c0b2f7Stbbdev 
105751c0b2f7Stbbdev bool ExtMemoryPool::init(intptr_t poolId, rawAllocType rawAlloc,
105851c0b2f7Stbbdev                          rawFreeType rawFree, size_t granularity,
105951c0b2f7Stbbdev                          bool keepAllMemory, bool fixedPool)
106051c0b2f7Stbbdev {
106151c0b2f7Stbbdev     this->poolId = poolId;
106251c0b2f7Stbbdev     this->rawAlloc = rawAlloc;
106351c0b2f7Stbbdev     this->rawFree = rawFree;
106451c0b2f7Stbbdev     this->granularity = granularity;
106551c0b2f7Stbbdev     this->keepAllMemory = keepAllMemory;
106651c0b2f7Stbbdev     this->fixedPool = fixedPool;
106751c0b2f7Stbbdev     this->delayRegsReleasing = false;
106851c0b2f7Stbbdev     if (!initTLS())
106951c0b2f7Stbbdev         return false;
107051c0b2f7Stbbdev     loc.init(this);
107151c0b2f7Stbbdev     backend.init(this);
107251c0b2f7Stbbdev     MALLOC_ASSERT(isPoolValid(), NULL);
107351c0b2f7Stbbdev     return true;
107451c0b2f7Stbbdev }
107551c0b2f7Stbbdev 
107651c0b2f7Stbbdev bool ExtMemoryPool::initTLS() { return tlsPointerKey.init(); }
107751c0b2f7Stbbdev 
107851c0b2f7Stbbdev bool MemoryPool::init(intptr_t poolId, const MemPoolPolicy *policy)
107951c0b2f7Stbbdev {
108051c0b2f7Stbbdev     if (!extMemPool.init(poolId, policy->pAlloc, policy->pFree,
108151c0b2f7Stbbdev                policy->granularity? policy->granularity : defaultGranularity,
108251c0b2f7Stbbdev                policy->keepAllMemory, policy->fixedPool))
108351c0b2f7Stbbdev         return false;
108451c0b2f7Stbbdev     {
108551c0b2f7Stbbdev         MallocMutex::scoped_lock lock(memPoolListLock);
108651c0b2f7Stbbdev         next = defaultMemPool->next;
108751c0b2f7Stbbdev         defaultMemPool->next = this;
108851c0b2f7Stbbdev         prev = defaultMemPool;
108951c0b2f7Stbbdev         if (next)
109051c0b2f7Stbbdev             next->prev = this;
109151c0b2f7Stbbdev     }
109251c0b2f7Stbbdev     return true;
109351c0b2f7Stbbdev }
109451c0b2f7Stbbdev 
109551c0b2f7Stbbdev bool MemoryPool::reset()
109651c0b2f7Stbbdev {
109751c0b2f7Stbbdev     MALLOC_ASSERT(extMemPool.userPool(), "No reset for the system pool.");
109851c0b2f7Stbbdev     // memory is not releasing during pool reset
109951c0b2f7Stbbdev     // TODO: mark regions to release unused on next reset()
110051c0b2f7Stbbdev     extMemPool.delayRegionsReleasing(true);
110151c0b2f7Stbbdev 
110251c0b2f7Stbbdev     bootStrapBlocks.reset();
110351c0b2f7Stbbdev     extMemPool.lmbList.releaseAll</*poolDestroy=*/false>(&extMemPool.backend);
110451c0b2f7Stbbdev     if (!extMemPool.reset())
110551c0b2f7Stbbdev         return false;
110651c0b2f7Stbbdev 
110751c0b2f7Stbbdev     if (!extMemPool.initTLS())
110851c0b2f7Stbbdev         return false;
110951c0b2f7Stbbdev     extMemPool.delayRegionsReleasing(false);
111051c0b2f7Stbbdev     return true;
111151c0b2f7Stbbdev }
111251c0b2f7Stbbdev 
111351c0b2f7Stbbdev bool MemoryPool::destroy()
111451c0b2f7Stbbdev {
111551c0b2f7Stbbdev #if __TBB_MALLOC_LOCACHE_STAT
111651c0b2f7Stbbdev     extMemPool.loc.reportStat(stdout);
111751c0b2f7Stbbdev #endif
111851c0b2f7Stbbdev #if __TBB_MALLOC_BACKEND_STAT
111951c0b2f7Stbbdev     extMemPool.backend.reportStat(stdout);
112051c0b2f7Stbbdev #endif
112151c0b2f7Stbbdev     {
112251c0b2f7Stbbdev         MallocMutex::scoped_lock lock(memPoolListLock);
112351c0b2f7Stbbdev         // remove itself from global pool list
112451c0b2f7Stbbdev         if (prev)
112551c0b2f7Stbbdev             prev->next = next;
112651c0b2f7Stbbdev         if (next)
112751c0b2f7Stbbdev             next->prev = prev;
112851c0b2f7Stbbdev     }
112951c0b2f7Stbbdev     // slab blocks in non-default pool do not have backreferences,
113051c0b2f7Stbbdev     // only large objects do
113151c0b2f7Stbbdev     if (extMemPool.userPool())
113251c0b2f7Stbbdev         extMemPool.lmbList.releaseAll</*poolDestroy=*/true>(&extMemPool.backend);
113351c0b2f7Stbbdev     else {
113451c0b2f7Stbbdev         // only one non-userPool() is supported now
113551c0b2f7Stbbdev         MALLOC_ASSERT(this==defaultMemPool, NULL);
113651c0b2f7Stbbdev         // There and below in extMemPool.destroy(), do not restore initial state
113751c0b2f7Stbbdev         // for user pool, because it's just about to be released. But for system
113851c0b2f7Stbbdev         // pool restoring, we do not want to do zeroing of it on subsequent reload.
113951c0b2f7Stbbdev         bootStrapBlocks.reset();
114051c0b2f7Stbbdev         extMemPool.orphanedBlocks.reset();
114151c0b2f7Stbbdev     }
114251c0b2f7Stbbdev     return extMemPool.destroy();
114351c0b2f7Stbbdev }
114451c0b2f7Stbbdev 
114551c0b2f7Stbbdev void MemoryPool::onThreadShutdown(TLSData *tlsData)
114651c0b2f7Stbbdev {
114751c0b2f7Stbbdev     if (tlsData) { // might be called for "empty" TLS
114851c0b2f7Stbbdev         tlsData->release();
114951c0b2f7Stbbdev         bootStrapBlocks.free(tlsData);
115051c0b2f7Stbbdev         clearTLS();
115151c0b2f7Stbbdev     }
115251c0b2f7Stbbdev }
115351c0b2f7Stbbdev 
115451c0b2f7Stbbdev #if MALLOC_DEBUG
115551c0b2f7Stbbdev void Bin::verifyTLSBin (size_t size) const
115651c0b2f7Stbbdev {
115751c0b2f7Stbbdev /* The debug version verifies the TLSBin as needed */
115851c0b2f7Stbbdev     uint32_t objSize = getObjectSize(size);
115951c0b2f7Stbbdev 
116051c0b2f7Stbbdev     if (activeBlk) {
116151c0b2f7Stbbdev         MALLOC_ASSERT( activeBlk->isOwnedByCurrentThread(), ASSERT_TEXT );
116251c0b2f7Stbbdev         MALLOC_ASSERT( activeBlk->objectSize == objSize, ASSERT_TEXT );
116351c0b2f7Stbbdev #if MALLOC_DEBUG>1
116451c0b2f7Stbbdev         for (Block* temp = activeBlk->next; temp; temp=temp->next) {
116551c0b2f7Stbbdev             MALLOC_ASSERT( temp!=activeBlk, ASSERT_TEXT );
116651c0b2f7Stbbdev             MALLOC_ASSERT( temp->isOwnedByCurrentThread(), ASSERT_TEXT );
116751c0b2f7Stbbdev             MALLOC_ASSERT( temp->objectSize == objSize, ASSERT_TEXT );
116851c0b2f7Stbbdev             MALLOC_ASSERT( temp->previous->next == temp, ASSERT_TEXT );
116951c0b2f7Stbbdev             if (temp->next) {
117051c0b2f7Stbbdev                 MALLOC_ASSERT( temp->next->previous == temp, ASSERT_TEXT );
117151c0b2f7Stbbdev             }
117251c0b2f7Stbbdev         }
117351c0b2f7Stbbdev         for (Block* temp = activeBlk->previous; temp; temp=temp->previous) {
117451c0b2f7Stbbdev             MALLOC_ASSERT( temp!=activeBlk, ASSERT_TEXT );
117551c0b2f7Stbbdev             MALLOC_ASSERT( temp->isOwnedByCurrentThread(), ASSERT_TEXT );
117651c0b2f7Stbbdev             MALLOC_ASSERT( temp->objectSize == objSize, ASSERT_TEXT );
117751c0b2f7Stbbdev             MALLOC_ASSERT( temp->next->previous == temp, ASSERT_TEXT );
117851c0b2f7Stbbdev             if (temp->previous) {
117951c0b2f7Stbbdev                 MALLOC_ASSERT( temp->previous->next == temp, ASSERT_TEXT );
118051c0b2f7Stbbdev             }
118151c0b2f7Stbbdev         }
118251c0b2f7Stbbdev #endif /* MALLOC_DEBUG>1 */
118351c0b2f7Stbbdev     }
118451c0b2f7Stbbdev }
118551c0b2f7Stbbdev #else /* MALLOC_DEBUG */
118651c0b2f7Stbbdev inline void Bin::verifyTLSBin (size_t) const { }
118751c0b2f7Stbbdev #endif /* MALLOC_DEBUG */
118851c0b2f7Stbbdev 
118951c0b2f7Stbbdev /*
119051c0b2f7Stbbdev  * Add a block to the start of this tls bin list.
119151c0b2f7Stbbdev  */
119251c0b2f7Stbbdev void Bin::pushTLSBin(Block* block)
119351c0b2f7Stbbdev {
119451c0b2f7Stbbdev     /* The objectSize should be defined and not a parameter
119551c0b2f7Stbbdev        because the function is applied to partially filled blocks as well */
119651c0b2f7Stbbdev     unsigned int size = block->objectSize;
119751c0b2f7Stbbdev 
119851c0b2f7Stbbdev     MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT );
119951c0b2f7Stbbdev     MALLOC_ASSERT( block->objectSize != 0, ASSERT_TEXT );
120051c0b2f7Stbbdev     MALLOC_ASSERT( block->next == NULL, ASSERT_TEXT );
120151c0b2f7Stbbdev     MALLOC_ASSERT( block->previous == NULL, ASSERT_TEXT );
120251c0b2f7Stbbdev 
120351c0b2f7Stbbdev     MALLOC_ASSERT( this, ASSERT_TEXT );
120451c0b2f7Stbbdev     verifyTLSBin(size);
120551c0b2f7Stbbdev 
120651c0b2f7Stbbdev     block->next = activeBlk;
120751c0b2f7Stbbdev     if( activeBlk ) {
120851c0b2f7Stbbdev         block->previous = activeBlk->previous;
120951c0b2f7Stbbdev         activeBlk->previous = block;
121051c0b2f7Stbbdev         if( block->previous )
121151c0b2f7Stbbdev             block->previous->next = block;
121251c0b2f7Stbbdev     } else {
121351c0b2f7Stbbdev         activeBlk = block;
121451c0b2f7Stbbdev     }
121551c0b2f7Stbbdev 
121651c0b2f7Stbbdev     verifyTLSBin(size);
121751c0b2f7Stbbdev }
121851c0b2f7Stbbdev 
121951c0b2f7Stbbdev /*
122051c0b2f7Stbbdev  * Take a block out of its tls bin (e.g. before removal).
122151c0b2f7Stbbdev  */
122251c0b2f7Stbbdev void Bin::outofTLSBin(Block* block)
122351c0b2f7Stbbdev {
122451c0b2f7Stbbdev     unsigned int size = block->objectSize;
122551c0b2f7Stbbdev 
122651c0b2f7Stbbdev     MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT );
122751c0b2f7Stbbdev     MALLOC_ASSERT( block->objectSize != 0, ASSERT_TEXT );
122851c0b2f7Stbbdev 
122951c0b2f7Stbbdev     MALLOC_ASSERT( this, ASSERT_TEXT );
123051c0b2f7Stbbdev     verifyTLSBin(size);
123151c0b2f7Stbbdev 
123251c0b2f7Stbbdev     if (block == activeBlk) {
123351c0b2f7Stbbdev         activeBlk = block->previous? block->previous : block->next;
123451c0b2f7Stbbdev     }
123551c0b2f7Stbbdev     /* Unlink the block */
123651c0b2f7Stbbdev     if (block->previous) {
123751c0b2f7Stbbdev         MALLOC_ASSERT( block->previous->next == block, ASSERT_TEXT );
123851c0b2f7Stbbdev         block->previous->next = block->next;
123951c0b2f7Stbbdev     }
124051c0b2f7Stbbdev     if (block->next) {
124151c0b2f7Stbbdev         MALLOC_ASSERT( block->next->previous == block, ASSERT_TEXT );
124251c0b2f7Stbbdev         block->next->previous = block->previous;
124351c0b2f7Stbbdev     }
124451c0b2f7Stbbdev     block->next = NULL;
124551c0b2f7Stbbdev     block->previous = NULL;
124651c0b2f7Stbbdev 
124751c0b2f7Stbbdev     verifyTLSBin(size);
124851c0b2f7Stbbdev }
124951c0b2f7Stbbdev 
125051c0b2f7Stbbdev Block* Bin::getPrivatizedFreeListBlock()
125151c0b2f7Stbbdev {
125251c0b2f7Stbbdev     Block* block;
125351c0b2f7Stbbdev     MALLOC_ASSERT( this, ASSERT_TEXT );
125451c0b2f7Stbbdev     // if this method is called, active block usage must be unsuccessful
125551c0b2f7Stbbdev     MALLOC_ASSERT( !activeBlk && !mailbox.load(std::memory_order_relaxed) || activeBlk && activeBlk->isFull, ASSERT_TEXT );
125651c0b2f7Stbbdev 
125751c0b2f7Stbbdev // the counter should be changed    STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList);
125851c0b2f7Stbbdev     if (!mailbox.load(std::memory_order_acquire)) // hotpath is empty mailbox
125951c0b2f7Stbbdev         return NULL;
126051c0b2f7Stbbdev     else { // mailbox is not empty, take lock and inspect it
126151c0b2f7Stbbdev         MallocMutex::scoped_lock scoped_cs(mailLock);
126251c0b2f7Stbbdev         block = mailbox.load(std::memory_order_relaxed);
126351c0b2f7Stbbdev         if( block ) {
126451c0b2f7Stbbdev             MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT );
126551c0b2f7Stbbdev             MALLOC_ASSERT( !isNotForUse(block->nextPrivatizable), ASSERT_TEXT );
126651c0b2f7Stbbdev             mailbox.store(block->nextPrivatizable, std::memory_order_relaxed);
126751c0b2f7Stbbdev             block->nextPrivatizable = (Block*) this;
126851c0b2f7Stbbdev         }
126951c0b2f7Stbbdev     }
127051c0b2f7Stbbdev     if( block ) {
127151c0b2f7Stbbdev         MALLOC_ASSERT( isSolidPtr(block->publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT );
127251c0b2f7Stbbdev         block->privatizePublicFreeList();
127351c0b2f7Stbbdev         block->adjustPositionInBin(this);
127451c0b2f7Stbbdev     }
127551c0b2f7Stbbdev     return block;
127651c0b2f7Stbbdev }
127751c0b2f7Stbbdev 
127851c0b2f7Stbbdev void Bin::addPublicFreeListBlock(Block* block)
127951c0b2f7Stbbdev {
128051c0b2f7Stbbdev     MallocMutex::scoped_lock scoped_cs(mailLock);
128151c0b2f7Stbbdev     block->nextPrivatizable = mailbox.load(std::memory_order_relaxed);
128251c0b2f7Stbbdev     mailbox.store(block, std::memory_order_relaxed);
128351c0b2f7Stbbdev }
128451c0b2f7Stbbdev 
128551c0b2f7Stbbdev // Process publicly freed objects in all blocks and return empty blocks
128651c0b2f7Stbbdev // to the backend in order to reduce overall footprint.
128751c0b2f7Stbbdev bool Bin::cleanPublicFreeLists()
128851c0b2f7Stbbdev {
128951c0b2f7Stbbdev     Block* block;
129051c0b2f7Stbbdev     if (!mailbox.load(std::memory_order_acquire))
129151c0b2f7Stbbdev         return false;
129251c0b2f7Stbbdev     else {
129351c0b2f7Stbbdev         // Grab all the blocks in the mailbox
129451c0b2f7Stbbdev         MallocMutex::scoped_lock scoped_cs(mailLock);
129551c0b2f7Stbbdev         block = mailbox.load(std::memory_order_relaxed);
129651c0b2f7Stbbdev         mailbox.store(NULL, std::memory_order_relaxed);
129751c0b2f7Stbbdev     }
129851c0b2f7Stbbdev     bool released = false;
129951c0b2f7Stbbdev     while (block) {
130051c0b2f7Stbbdev         MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT );
130151c0b2f7Stbbdev         Block* tmp = block->nextPrivatizable;
130251c0b2f7Stbbdev         block->nextPrivatizable = (Block*) this;
130351c0b2f7Stbbdev         block->privatizePublicFreeList();
130451c0b2f7Stbbdev         if (block->empty()) {
130551c0b2f7Stbbdev             processEmptyBlock(block, /*poolTheBlock=*/false);
130651c0b2f7Stbbdev             released = true;
130751c0b2f7Stbbdev         } else
130851c0b2f7Stbbdev             block->adjustPositionInBin(this);
130951c0b2f7Stbbdev         block = tmp;
131051c0b2f7Stbbdev     }
131151c0b2f7Stbbdev     return released;
131251c0b2f7Stbbdev }
131351c0b2f7Stbbdev 
131451c0b2f7Stbbdev bool Block::adjustFullness()
131551c0b2f7Stbbdev {
131651c0b2f7Stbbdev     if (bumpPtr) {
131751c0b2f7Stbbdev         /* If we are still using a bump ptr for this block it is empty enough to use. */
131851c0b2f7Stbbdev         STAT_increment(getThreadId(), getIndex(objectSize), examineEmptyEnough);
131951c0b2f7Stbbdev         isFull = false;
132051c0b2f7Stbbdev     } else {
132151c0b2f7Stbbdev         const float threshold = (slabSize - sizeof(Block)) * (1 - emptyEnoughRatio);
132251c0b2f7Stbbdev         /* allocatedCount shows how many objects in the block are in use; however it still counts
132351c0b2f7Stbbdev          * blocks freed by other threads; so prior call to privatizePublicFreeList() is recommended */
132451c0b2f7Stbbdev         isFull = (allocatedCount*objectSize > threshold) ? true : false;
132551c0b2f7Stbbdev #if COLLECT_STATISTICS
132651c0b2f7Stbbdev         if (isFull)
132751c0b2f7Stbbdev             STAT_increment(getThreadId(), getIndex(objectSize), examineNotEmpty);
132851c0b2f7Stbbdev         else
132951c0b2f7Stbbdev             STAT_increment(getThreadId(), getIndex(objectSize), examineEmptyEnough);
133051c0b2f7Stbbdev #endif
133151c0b2f7Stbbdev     }
133251c0b2f7Stbbdev     return isFull;
133351c0b2f7Stbbdev }
133451c0b2f7Stbbdev 
133551c0b2f7Stbbdev // This method resides in class Block, and not in class Bin, in order to avoid
133651c0b2f7Stbbdev // calling getAllocationBin on a reasonably hot path in Block::freeOwnObject
133751c0b2f7Stbbdev void Block::adjustPositionInBin(Bin* bin/*=NULL*/)
133851c0b2f7Stbbdev {
133951c0b2f7Stbbdev     // If the block were full, but became empty enough to use,
134051c0b2f7Stbbdev     // move it to the front of the list
134151c0b2f7Stbbdev     if (isFull && !adjustFullness()) {
134251c0b2f7Stbbdev         if (!bin)
134351c0b2f7Stbbdev             bin = tlsPtr->getAllocationBin(objectSize);
134451c0b2f7Stbbdev         bin->moveBlockToFront(this);
134551c0b2f7Stbbdev     }
134651c0b2f7Stbbdev }
134751c0b2f7Stbbdev 
134851c0b2f7Stbbdev /* Restore the bump pointer for an empty block that is planned to use */
134951c0b2f7Stbbdev void Block::restoreBumpPtr()
135051c0b2f7Stbbdev {
135151c0b2f7Stbbdev     MALLOC_ASSERT( allocatedCount == 0, ASSERT_TEXT );
135251c0b2f7Stbbdev     MALLOC_ASSERT( !isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT );
135351c0b2f7Stbbdev     STAT_increment(getThreadId(), getIndex(objectSize), freeRestoreBumpPtr);
135451c0b2f7Stbbdev     bumpPtr = (FreeObject *)((uintptr_t)this + slabSize - objectSize);
135551c0b2f7Stbbdev     freeList = NULL;
135651c0b2f7Stbbdev     isFull = false;
135751c0b2f7Stbbdev }
135851c0b2f7Stbbdev 
135951c0b2f7Stbbdev void Block::freeOwnObject(void *object)
136051c0b2f7Stbbdev {
136151c0b2f7Stbbdev     tlsPtr->markUsed();
136251c0b2f7Stbbdev     allocatedCount--;
136351c0b2f7Stbbdev     MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT );
136451c0b2f7Stbbdev #if COLLECT_STATISTICS
136551c0b2f7Stbbdev     // Note that getAllocationBin is not called on the hottest path with statistics off.
136651c0b2f7Stbbdev     if (tlsPtr->getAllocationBin(objectSize)->getActiveBlock() != this)
136751c0b2f7Stbbdev         STAT_increment(getThreadId(), getIndex(objectSize), freeToInactiveBlock);
136851c0b2f7Stbbdev     else
136951c0b2f7Stbbdev         STAT_increment(getThreadId(), getIndex(objectSize), freeToActiveBlock);
137051c0b2f7Stbbdev #endif
137151c0b2f7Stbbdev     if (empty()) {
137251c0b2f7Stbbdev         // If the last object of a slab is freed, the slab cannot be marked full
137351c0b2f7Stbbdev         MALLOC_ASSERT(!isFull, ASSERT_TEXT);
137451c0b2f7Stbbdev         tlsPtr->getAllocationBin(objectSize)->processEmptyBlock(this, /*poolTheBlock=*/true);
137551c0b2f7Stbbdev     } else { // hot path
137651c0b2f7Stbbdev         FreeObject *objectToFree = findObjectToFree(object);
137751c0b2f7Stbbdev         objectToFree->next = freeList;
137851c0b2f7Stbbdev         freeList = objectToFree;
137951c0b2f7Stbbdev         adjustPositionInBin();
138051c0b2f7Stbbdev     }
138151c0b2f7Stbbdev }
138251c0b2f7Stbbdev 
138351c0b2f7Stbbdev void Block::freePublicObject (FreeObject *objectToFree)
138451c0b2f7Stbbdev {
138551c0b2f7Stbbdev     FreeObject *localPublicFreeList;
138651c0b2f7Stbbdev 
138751c0b2f7Stbbdev     MALLOC_ITT_SYNC_RELEASING(&publicFreeList);
138851c0b2f7Stbbdev #if FREELIST_NONBLOCKING
138951c0b2f7Stbbdev     // TBB_REVAMP_TODO: make it non atomic in non-blocking scenario
139051c0b2f7Stbbdev     FreeObject *temp = publicFreeList.load(std::memory_order_acquire);
139151c0b2f7Stbbdev     do {
139251c0b2f7Stbbdev         localPublicFreeList = objectToFree->next = temp;
139351c0b2f7Stbbdev         // no backoff necessary because trying to make change, not waiting for a change
139451c0b2f7Stbbdev     } while( !publicFreeList.compare_exchange_strong(temp, objectToFree) );
139551c0b2f7Stbbdev #else
139651c0b2f7Stbbdev     STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList);
139751c0b2f7Stbbdev     {
139851c0b2f7Stbbdev         MallocMutex::scoped_lock scoped_cs(publicFreeListLock);
139951c0b2f7Stbbdev         localPublicFreeList = objectToFree->next = publicFreeList;
140051c0b2f7Stbbdev         publicFreeList = objectToFree;
140151c0b2f7Stbbdev     }
140251c0b2f7Stbbdev #endif
140351c0b2f7Stbbdev 
140451c0b2f7Stbbdev     if( localPublicFreeList==NULL ) {
140551c0b2f7Stbbdev         // if the block is abandoned, its nextPrivatizable pointer should be UNUSABLE
140651c0b2f7Stbbdev         // otherwise, it should point to the bin the block belongs to.
140751c0b2f7Stbbdev         // reading nextPrivatizable is thread-safe below, because:
140851c0b2f7Stbbdev         // 1) the executing thread atomically got publicFreeList==NULL and changed it to non-NULL;
140951c0b2f7Stbbdev         // 2) only owning thread can change it back to NULL,
141051c0b2f7Stbbdev         // 3) but it can not be done until the block is put to the mailbox
141151c0b2f7Stbbdev         // So the executing thread is now the only one that can change nextPrivatizable
141251c0b2f7Stbbdev         if( !isNotForUse(nextPrivatizable) ) {
141351c0b2f7Stbbdev             MALLOC_ASSERT( nextPrivatizable!=NULL, ASSERT_TEXT );
141451c0b2f7Stbbdev             Bin* theBin = (Bin*) nextPrivatizable;
141551c0b2f7Stbbdev #if MALLOC_DEBUG && TBB_REVAMP_TODO
141651c0b2f7Stbbdev             // FIXME: The thread that returns the block is not the block's owner.
141751c0b2f7Stbbdev             // The below assertion compares 'theBin' against the caller's local bin, thus, it always fails.
141851c0b2f7Stbbdev             // Need to find a way to get the correct remote bin for comparison.
141951c0b2f7Stbbdev             { // check that nextPrivatizable points to the bin the block belongs to
142051c0b2f7Stbbdev                 uint32_t index = getIndex( objectSize );
142151c0b2f7Stbbdev                 TLSData* tls = getThreadMallocTLS();
142251c0b2f7Stbbdev                 MALLOC_ASSERT( theBin==tls->bin+index, ASSERT_TEXT );
142351c0b2f7Stbbdev             }
142451c0b2f7Stbbdev #endif // MALLOC_DEBUG
142551c0b2f7Stbbdev             theBin->addPublicFreeListBlock(this);
142651c0b2f7Stbbdev         }
142751c0b2f7Stbbdev     }
142851c0b2f7Stbbdev     STAT_increment(getThreadId(), ThreadCommonCounters, freeToOtherThread);
142951c0b2f7Stbbdev     STAT_increment(ownerTid, getIndex(objectSize), freeByOtherThread);
143051c0b2f7Stbbdev }
143151c0b2f7Stbbdev 
143251c0b2f7Stbbdev // Make objects freed by other threads available for use again
143351c0b2f7Stbbdev void Block::privatizePublicFreeList( bool reset )
143451c0b2f7Stbbdev {
143551c0b2f7Stbbdev     FreeObject *localPublicFreeList;
143651c0b2f7Stbbdev     // If reset is false, publicFreeList should not be zeroed but set to UNUSABLE
143751c0b2f7Stbbdev     // to properly synchronize with other threads freeing objects to this slab.
143851c0b2f7Stbbdev     const intptr_t endMarker = reset ? 0 : UNUSABLE;
143951c0b2f7Stbbdev 
144051c0b2f7Stbbdev     // Only the owner thread may reset the pointer to NULL
144151c0b2f7Stbbdev     MALLOC_ASSERT( isOwnedByCurrentThread() || !reset, ASSERT_TEXT );
144251c0b2f7Stbbdev #if FREELIST_NONBLOCKING
144351c0b2f7Stbbdev     localPublicFreeList = publicFreeList.exchange((FreeObject*)endMarker);
144451c0b2f7Stbbdev #else
144551c0b2f7Stbbdev     STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList);
144651c0b2f7Stbbdev     {
144751c0b2f7Stbbdev         MallocMutex::scoped_lock scoped_cs(publicFreeListLock);
144851c0b2f7Stbbdev         localPublicFreeList = publicFreeList;
144951c0b2f7Stbbdev         publicFreeList = endMarker;
145051c0b2f7Stbbdev     }
145151c0b2f7Stbbdev #endif
145251c0b2f7Stbbdev     MALLOC_ITT_SYNC_ACQUIRED(&publicFreeList);
145351c0b2f7Stbbdev     MALLOC_ASSERT( !(reset && isNotForUse(publicFreeList)), ASSERT_TEXT );
145451c0b2f7Stbbdev 
145551c0b2f7Stbbdev     // publicFreeList must have been UNUSABLE or valid, but not NULL
145651c0b2f7Stbbdev     MALLOC_ASSERT( localPublicFreeList!=NULL, ASSERT_TEXT );
145751c0b2f7Stbbdev     if( isSolidPtr(localPublicFreeList) ) {
145851c0b2f7Stbbdev         MALLOC_ASSERT( allocatedCount <= (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT );
145951c0b2f7Stbbdev         /* other threads did not change the counter freeing our blocks */
146051c0b2f7Stbbdev         allocatedCount--;
146151c0b2f7Stbbdev         FreeObject *temp = localPublicFreeList;
146251c0b2f7Stbbdev         while( isSolidPtr(temp->next) ){ // the list will end with either NULL or UNUSABLE
146351c0b2f7Stbbdev             temp = temp->next;
146451c0b2f7Stbbdev             allocatedCount--;
146551c0b2f7Stbbdev             MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT );
146651c0b2f7Stbbdev         }
146751c0b2f7Stbbdev         /* merge with local freeList */
146851c0b2f7Stbbdev         temp->next = freeList;
146951c0b2f7Stbbdev         freeList = localPublicFreeList;
147051c0b2f7Stbbdev         STAT_increment(getThreadId(), getIndex(objectSize), allocPrivatized);
147151c0b2f7Stbbdev     }
147251c0b2f7Stbbdev }
147351c0b2f7Stbbdev 
147451c0b2f7Stbbdev void Block::privatizeOrphaned(TLSData *tls, unsigned index)
147551c0b2f7Stbbdev {
147651c0b2f7Stbbdev     Bin* bin = tls->bin + index;
147751c0b2f7Stbbdev     STAT_increment(getThreadId(), index, allocBlockPublic);
147851c0b2f7Stbbdev     next = NULL;
147951c0b2f7Stbbdev     previous = NULL;
148051c0b2f7Stbbdev     MALLOC_ASSERT( publicFreeList.load(std::memory_order_relaxed) != NULL, ASSERT_TEXT );
148151c0b2f7Stbbdev     /* There is not a race here since no other thread owns this block */
148251c0b2f7Stbbdev     markOwned(tls);
148351c0b2f7Stbbdev     // It is safe to change nextPrivatizable, as publicFreeList is not null
148451c0b2f7Stbbdev     MALLOC_ASSERT( isNotForUse(nextPrivatizable), ASSERT_TEXT );
148551c0b2f7Stbbdev     nextPrivatizable = (Block*)bin;
148651c0b2f7Stbbdev     // the next call is required to change publicFreeList to 0
148751c0b2f7Stbbdev     privatizePublicFreeList();
148851c0b2f7Stbbdev     if( empty() ) {
148951c0b2f7Stbbdev         restoreBumpPtr();
149051c0b2f7Stbbdev     } else {
149151c0b2f7Stbbdev         adjustFullness(); // check the block fullness and set isFull
149251c0b2f7Stbbdev     }
149351c0b2f7Stbbdev     MALLOC_ASSERT( !isNotForUse(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT );
149451c0b2f7Stbbdev }
149551c0b2f7Stbbdev 
149651c0b2f7Stbbdev 
149751c0b2f7Stbbdev bool Block::readyToShare()
149851c0b2f7Stbbdev {
149951c0b2f7Stbbdev     FreeObject* oldVal = NULL;
150051c0b2f7Stbbdev #if FREELIST_NONBLOCKING
150151c0b2f7Stbbdev     publicFreeList.compare_exchange_strong(oldVal, (FreeObject*)UNUSABLE);
150251c0b2f7Stbbdev #else
150351c0b2f7Stbbdev     STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList);
150451c0b2f7Stbbdev     {
150551c0b2f7Stbbdev         MallocMutex::scoped_lock scoped_cs(publicFreeListLock);
150651c0b2f7Stbbdev         if ( (oldVal=publicFreeList)==NULL )
150751c0b2f7Stbbdev             (intptr_t&)(publicFreeList) = UNUSABLE;
150851c0b2f7Stbbdev     }
150951c0b2f7Stbbdev #endif
151051c0b2f7Stbbdev     return oldVal==NULL;
151151c0b2f7Stbbdev }
151251c0b2f7Stbbdev 
151351c0b2f7Stbbdev void Block::shareOrphaned(intptr_t binTag, unsigned index)
151451c0b2f7Stbbdev {
151551c0b2f7Stbbdev     MALLOC_ASSERT( binTag, ASSERT_TEXT );
151651c0b2f7Stbbdev     // unreferenced formal parameter warning
151751c0b2f7Stbbdev     tbb::detail::suppress_unused_warning(index);
151851c0b2f7Stbbdev     STAT_increment(getThreadId(), index, freeBlockPublic);
151951c0b2f7Stbbdev     markOrphaned();
152051c0b2f7Stbbdev     if ((intptr_t)nextPrivatizable==binTag) {
152151c0b2f7Stbbdev         // First check passed: the block is not in mailbox yet.
152251c0b2f7Stbbdev         // Need to set publicFreeList to non-zero, so other threads
152351c0b2f7Stbbdev         // will not change nextPrivatizable and it can be zeroed.
152451c0b2f7Stbbdev         if ( !readyToShare() ) {
152551c0b2f7Stbbdev             // another thread freed an object; we need to wait until it finishes.
152651c0b2f7Stbbdev             // There is no need for exponential backoff, as the wait here is not for a lock;
152751c0b2f7Stbbdev             // but need to yield, so the thread we wait has a chance to run.
152851c0b2f7Stbbdev             // TODO: add a pause to also be friendly to hyperthreads
152951c0b2f7Stbbdev             int count = 256;
153051c0b2f7Stbbdev             while( (intptr_t)const_cast<Block* volatile &>(nextPrivatizable)==binTag ) {
153151c0b2f7Stbbdev                 if (--count==0) {
153251c0b2f7Stbbdev                     do_yield();
153351c0b2f7Stbbdev                     count = 256;
153451c0b2f7Stbbdev                 }
153551c0b2f7Stbbdev             }
153651c0b2f7Stbbdev         }
153751c0b2f7Stbbdev     }
153851c0b2f7Stbbdev     MALLOC_ASSERT( publicFreeList.load(std::memory_order_relaxed) !=NULL, ASSERT_TEXT );
153951c0b2f7Stbbdev     // now it is safe to change our data
154051c0b2f7Stbbdev     previous = NULL;
154151c0b2f7Stbbdev     // it is caller responsibility to ensure that the list of blocks
154251c0b2f7Stbbdev     // formed by nextPrivatizable pointers is kept consistent if required.
154351c0b2f7Stbbdev     // if only called from thread shutdown code, it does not matter.
154451c0b2f7Stbbdev     (intptr_t&)(nextPrivatizable) = UNUSABLE;
154551c0b2f7Stbbdev }
154651c0b2f7Stbbdev 
154751c0b2f7Stbbdev void Block::cleanBlockHeader()
154851c0b2f7Stbbdev {
154951c0b2f7Stbbdev     next = NULL;
155051c0b2f7Stbbdev     previous = NULL;
155151c0b2f7Stbbdev     freeList = NULL;
155251c0b2f7Stbbdev     allocatedCount = 0;
155351c0b2f7Stbbdev     isFull = false;
155451c0b2f7Stbbdev     tlsPtr = NULL;
155551c0b2f7Stbbdev 
155651c0b2f7Stbbdev     publicFreeList.store(NULL, std::memory_order_relaxed);
155751c0b2f7Stbbdev }
155851c0b2f7Stbbdev 
155951c0b2f7Stbbdev void Block::initEmptyBlock(TLSData *tls, size_t size)
156051c0b2f7Stbbdev {
156151c0b2f7Stbbdev     // Having getIndex and getObjectSize called next to each other
156251c0b2f7Stbbdev     // allows better compiler optimization as they basically share the code.
156351c0b2f7Stbbdev     unsigned int index = getIndex(size);
156451c0b2f7Stbbdev     unsigned int objSz = getObjectSize(size);
156551c0b2f7Stbbdev 
156651c0b2f7Stbbdev     cleanBlockHeader();
156751c0b2f7Stbbdev     objectSize = objSz;
156851c0b2f7Stbbdev     markOwned(tls);
156951c0b2f7Stbbdev     // bump pointer should be prepared for first allocation - thus mode it down to objectSize
157051c0b2f7Stbbdev     bumpPtr = (FreeObject *)((uintptr_t)this + slabSize - objectSize);
157151c0b2f7Stbbdev 
157251c0b2f7Stbbdev     // each block should have the address where the head of the list of "privatizable" blocks is kept
157351c0b2f7Stbbdev     // the only exception is a block for boot strap which is initialized when TLS is yet NULL
157451c0b2f7Stbbdev     nextPrivatizable = tls? (Block*)(tls->bin + index) : NULL;
157551c0b2f7Stbbdev     TRACEF(( "[ScalableMalloc trace] Empty block %p is initialized, owner is %ld, objectSize is %d, bumpPtr is %p\n",
157651c0b2f7Stbbdev              this, tlsPtr ? getThreadId() : -1, objectSize, bumpPtr ));
157751c0b2f7Stbbdev }
157851c0b2f7Stbbdev 
157951c0b2f7Stbbdev Block *OrphanedBlocks::get(TLSData *tls, unsigned int size)
158051c0b2f7Stbbdev {
158151c0b2f7Stbbdev     // TODO: try to use index from getAllocationBin
158251c0b2f7Stbbdev     unsigned int index = getIndex(size);
158351c0b2f7Stbbdev     Block *block = bins[index].pop();
158451c0b2f7Stbbdev     if (block) {
158551c0b2f7Stbbdev         MALLOC_ITT_SYNC_ACQUIRED(bins+index);
158651c0b2f7Stbbdev         block->privatizeOrphaned(tls, index);
158751c0b2f7Stbbdev     }
158851c0b2f7Stbbdev     return block;
158951c0b2f7Stbbdev }
159051c0b2f7Stbbdev 
159151c0b2f7Stbbdev void OrphanedBlocks::put(intptr_t binTag, Block *block)
159251c0b2f7Stbbdev {
159351c0b2f7Stbbdev     unsigned int index = getIndex(block->getSize());
159451c0b2f7Stbbdev     block->shareOrphaned(binTag, index);
159551c0b2f7Stbbdev     MALLOC_ITT_SYNC_RELEASING(bins+index);
159651c0b2f7Stbbdev     bins[index].push(block);
159751c0b2f7Stbbdev }
159851c0b2f7Stbbdev 
159951c0b2f7Stbbdev void OrphanedBlocks::reset()
160051c0b2f7Stbbdev {
160151c0b2f7Stbbdev     for (uint32_t i=0; i<numBlockBinLimit; i++)
160251c0b2f7Stbbdev         new (bins+i) LifoList();
160351c0b2f7Stbbdev }
160451c0b2f7Stbbdev 
160551c0b2f7Stbbdev bool OrphanedBlocks::cleanup(Backend* backend)
160651c0b2f7Stbbdev {
160751c0b2f7Stbbdev     bool released = false;
160851c0b2f7Stbbdev     for (uint32_t i=0; i<numBlockBinLimit; i++) {
160951c0b2f7Stbbdev         Block* block = bins[i].grab();
161051c0b2f7Stbbdev         MALLOC_ITT_SYNC_ACQUIRED(bins+i);
161151c0b2f7Stbbdev         while (block) {
161251c0b2f7Stbbdev             Block* next = block->next;
161351c0b2f7Stbbdev             block->privatizePublicFreeList( /*reset=*/false ); // do not set publicFreeList to NULL
161451c0b2f7Stbbdev             if (block->empty()) {
161551c0b2f7Stbbdev                 block->reset();
161651c0b2f7Stbbdev                 // slab blocks in user's pools do not have valid backRefIdx
161751c0b2f7Stbbdev                 if (!backend->inUserPool())
161851c0b2f7Stbbdev                     removeBackRef(*(block->getBackRefIdx()));
161951c0b2f7Stbbdev                 backend->putSlabBlock(block);
162051c0b2f7Stbbdev                 released = true;
162151c0b2f7Stbbdev             } else {
162251c0b2f7Stbbdev                 MALLOC_ITT_SYNC_RELEASING(bins+i);
162351c0b2f7Stbbdev                 bins[i].push(block);
162451c0b2f7Stbbdev             }
162551c0b2f7Stbbdev             block = next;
162651c0b2f7Stbbdev         }
162751c0b2f7Stbbdev     }
162851c0b2f7Stbbdev     return released;
162951c0b2f7Stbbdev }
163051c0b2f7Stbbdev 
163151c0b2f7Stbbdev FreeBlockPool::ResOfGet FreeBlockPool::getBlock()
163251c0b2f7Stbbdev {
163351c0b2f7Stbbdev     Block *b = head.exchange(NULL);
163451c0b2f7Stbbdev 
163551c0b2f7Stbbdev     if (b) {
163651c0b2f7Stbbdev         size--;
163751c0b2f7Stbbdev         Block *newHead = b->next;
163851c0b2f7Stbbdev         lastAccessMiss = false;
163951c0b2f7Stbbdev         head.store(newHead, std::memory_order_release);
164051c0b2f7Stbbdev     } else {
164151c0b2f7Stbbdev         lastAccessMiss = true;
164251c0b2f7Stbbdev     }
164351c0b2f7Stbbdev     return ResOfGet(b, lastAccessMiss);
164451c0b2f7Stbbdev }
164551c0b2f7Stbbdev 
164651c0b2f7Stbbdev void FreeBlockPool::returnBlock(Block *block)
164751c0b2f7Stbbdev {
164851c0b2f7Stbbdev     MALLOC_ASSERT( size <= POOL_HIGH_MARK, ASSERT_TEXT );
164951c0b2f7Stbbdev     Block *localHead = head.exchange(NULL);
165051c0b2f7Stbbdev 
165151c0b2f7Stbbdev     if (!localHead) {
165251c0b2f7Stbbdev         size = 0; // head was stolen by externalClean, correct size accordingly
165351c0b2f7Stbbdev     } else if (size == POOL_HIGH_MARK) {
165451c0b2f7Stbbdev         // release cold blocks and add hot one,
165551c0b2f7Stbbdev         // so keep POOL_LOW_MARK-1 blocks and add new block to head
165651c0b2f7Stbbdev         Block *headToFree = localHead, *helper;
165751c0b2f7Stbbdev         for (int i=0; i<POOL_LOW_MARK-2; i++)
165851c0b2f7Stbbdev             headToFree = headToFree->next;
165951c0b2f7Stbbdev         Block *last = headToFree;
166051c0b2f7Stbbdev         headToFree = headToFree->next;
166151c0b2f7Stbbdev         last->next = NULL;
166251c0b2f7Stbbdev         size = POOL_LOW_MARK-1;
166351c0b2f7Stbbdev         for (Block *currBl = headToFree; currBl; currBl = helper) {
166451c0b2f7Stbbdev             helper = currBl->next;
166551c0b2f7Stbbdev             // slab blocks in user's pools do not have valid backRefIdx
166651c0b2f7Stbbdev             if (!backend->inUserPool())
166751c0b2f7Stbbdev                 removeBackRef(currBl->backRefIdx);
166851c0b2f7Stbbdev             backend->putSlabBlock(currBl);
166951c0b2f7Stbbdev         }
167051c0b2f7Stbbdev     }
167151c0b2f7Stbbdev     size++;
167251c0b2f7Stbbdev     block->next = localHead;
167351c0b2f7Stbbdev     head.store(block, std::memory_order_release);
167451c0b2f7Stbbdev }
167551c0b2f7Stbbdev 
167651c0b2f7Stbbdev bool FreeBlockPool::externalCleanup()
167751c0b2f7Stbbdev {
167851c0b2f7Stbbdev     Block *helper;
167951c0b2f7Stbbdev     bool released = false;
168051c0b2f7Stbbdev 
168151c0b2f7Stbbdev     for (Block *currBl=head.exchange(NULL); currBl; currBl=helper) {
168251c0b2f7Stbbdev         helper = currBl->next;
168351c0b2f7Stbbdev         // slab blocks in user's pools do not have valid backRefIdx
168451c0b2f7Stbbdev         if (!backend->inUserPool())
168551c0b2f7Stbbdev             removeBackRef(currBl->backRefIdx);
168651c0b2f7Stbbdev         backend->putSlabBlock(currBl);
168751c0b2f7Stbbdev         released = true;
168851c0b2f7Stbbdev     }
168951c0b2f7Stbbdev     return released;
169051c0b2f7Stbbdev }
169151c0b2f7Stbbdev 
169251c0b2f7Stbbdev /* Prepare the block for returning to FreeBlockPool */
169351c0b2f7Stbbdev void Block::reset()
169451c0b2f7Stbbdev {
169551c0b2f7Stbbdev     // it is caller's responsibility to ensure no data is lost before calling this
169651c0b2f7Stbbdev     MALLOC_ASSERT( allocatedCount==0, ASSERT_TEXT );
169751c0b2f7Stbbdev     MALLOC_ASSERT( !isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT );
169851c0b2f7Stbbdev     if (!isStartupAllocObject())
169951c0b2f7Stbbdev         STAT_increment(getThreadId(), getIndex(objectSize), freeBlockBack);
170051c0b2f7Stbbdev 
170151c0b2f7Stbbdev     cleanBlockHeader();
170251c0b2f7Stbbdev 
170351c0b2f7Stbbdev     nextPrivatizable = NULL;
170451c0b2f7Stbbdev 
170551c0b2f7Stbbdev     objectSize = 0;
170651c0b2f7Stbbdev     // for an empty block, bump pointer should point right after the end of the block
170751c0b2f7Stbbdev     bumpPtr = (FreeObject *)((uintptr_t)this + slabSize);
170851c0b2f7Stbbdev }
170951c0b2f7Stbbdev 
171051c0b2f7Stbbdev inline void Bin::setActiveBlock (Block *block)
171151c0b2f7Stbbdev {
171251c0b2f7Stbbdev //    MALLOC_ASSERT( bin, ASSERT_TEXT );
171351c0b2f7Stbbdev     MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT );
171451c0b2f7Stbbdev     // it is the caller responsibility to keep bin consistence (i.e. ensure this block is in the bin list)
171551c0b2f7Stbbdev     activeBlk = block;
171651c0b2f7Stbbdev }
171751c0b2f7Stbbdev 
171851c0b2f7Stbbdev inline Block* Bin::setPreviousBlockActive()
171951c0b2f7Stbbdev {
172051c0b2f7Stbbdev     MALLOC_ASSERT( activeBlk, ASSERT_TEXT );
172151c0b2f7Stbbdev     Block* temp = activeBlk->previous;
172251c0b2f7Stbbdev     if( temp ) {
172351c0b2f7Stbbdev         MALLOC_ASSERT( !(temp->isFull), ASSERT_TEXT );
172451c0b2f7Stbbdev         activeBlk = temp;
172551c0b2f7Stbbdev     }
172651c0b2f7Stbbdev     return temp;
172751c0b2f7Stbbdev }
172851c0b2f7Stbbdev 
172951c0b2f7Stbbdev inline bool Block::isOwnedByCurrentThread() const {
173051c0b2f7Stbbdev     return tlsPtr && ownerTid.isCurrentThreadId();
173151c0b2f7Stbbdev }
173251c0b2f7Stbbdev 
173351c0b2f7Stbbdev FreeObject *Block::findObjectToFree(const void *object) const
173451c0b2f7Stbbdev {
173551c0b2f7Stbbdev     FreeObject *objectToFree;
173651c0b2f7Stbbdev     // Due to aligned allocations, a pointer passed to scalable_free
173751c0b2f7Stbbdev     // might differ from the address of internally allocated object.
173851c0b2f7Stbbdev     // Small objects however should always be fine.
173951c0b2f7Stbbdev     if (objectSize <= maxSegregatedObjectSize)
174051c0b2f7Stbbdev         objectToFree = (FreeObject*)object;
174151c0b2f7Stbbdev     // "Fitting size" allocations are suspicious if aligned higher than naturally
174251c0b2f7Stbbdev     else {
174351c0b2f7Stbbdev         if ( ! isAligned(object,2*fittingAlignment) )
174451c0b2f7Stbbdev             // TODO: the above check is questionable - it gives false negatives in ~50% cases,
174551c0b2f7Stbbdev             //       so might even be slower in average than unconditional use of findAllocatedObject.
174651c0b2f7Stbbdev             // here it should be a "real" object
174751c0b2f7Stbbdev             objectToFree = (FreeObject*)object;
174851c0b2f7Stbbdev         else
174951c0b2f7Stbbdev             // here object can be an aligned address, so applying additional checks
175051c0b2f7Stbbdev             objectToFree = findAllocatedObject(object);
175151c0b2f7Stbbdev         MALLOC_ASSERT( isAligned(objectToFree,fittingAlignment), ASSERT_TEXT );
175251c0b2f7Stbbdev     }
175351c0b2f7Stbbdev     MALLOC_ASSERT( isProperlyPlaced(objectToFree), ASSERT_TEXT );
175451c0b2f7Stbbdev 
175551c0b2f7Stbbdev     return objectToFree;
175651c0b2f7Stbbdev }
175751c0b2f7Stbbdev 
175851c0b2f7Stbbdev void TLSData::release()
175951c0b2f7Stbbdev {
176051c0b2f7Stbbdev     memPool->extMemPool.allLocalCaches.unregisterThread(this);
176151c0b2f7Stbbdev     externalCleanup(/*cleanOnlyUnused=*/false, /*cleanBins=*/false);
176251c0b2f7Stbbdev 
176351c0b2f7Stbbdev     for (unsigned index = 0; index < numBlockBins; index++) {
176451c0b2f7Stbbdev         Block *activeBlk = bin[index].getActiveBlock();
176551c0b2f7Stbbdev         if (!activeBlk)
176651c0b2f7Stbbdev             continue;
176751c0b2f7Stbbdev         Block *threadlessBlock = activeBlk->previous;
176851c0b2f7Stbbdev         while (threadlessBlock) {
176951c0b2f7Stbbdev             Block *threadBlock = threadlessBlock->previous;
177051c0b2f7Stbbdev             if (threadlessBlock->empty()) {
177151c0b2f7Stbbdev                 /* we destroy the thread, so not use its block pool */
177251c0b2f7Stbbdev                 memPool->returnEmptyBlock(threadlessBlock, /*poolTheBlock=*/false);
177351c0b2f7Stbbdev             } else {
177451c0b2f7Stbbdev                 memPool->extMemPool.orphanedBlocks.put(intptr_t(bin+index), threadlessBlock);
177551c0b2f7Stbbdev             }
177651c0b2f7Stbbdev             threadlessBlock = threadBlock;
177751c0b2f7Stbbdev         }
177851c0b2f7Stbbdev         threadlessBlock = activeBlk;
177951c0b2f7Stbbdev         while (threadlessBlock) {
178051c0b2f7Stbbdev             Block *threadBlock = threadlessBlock->next;
178151c0b2f7Stbbdev             if (threadlessBlock->empty()) {
178251c0b2f7Stbbdev                 /* we destroy the thread, so not use its block pool */
178351c0b2f7Stbbdev                 memPool->returnEmptyBlock(threadlessBlock, /*poolTheBlock=*/false);
178451c0b2f7Stbbdev             } else {
178551c0b2f7Stbbdev                 memPool->extMemPool.orphanedBlocks.put(intptr_t(bin+index), threadlessBlock);
178651c0b2f7Stbbdev             }
178751c0b2f7Stbbdev             threadlessBlock = threadBlock;
178851c0b2f7Stbbdev         }
178951c0b2f7Stbbdev         bin[index].resetActiveBlock();
179051c0b2f7Stbbdev     }
179151c0b2f7Stbbdev }
179251c0b2f7Stbbdev 
179351c0b2f7Stbbdev 
179451c0b2f7Stbbdev #if MALLOC_CHECK_RECURSION
179551c0b2f7Stbbdev // TODO: Use dedicated heap for this
179651c0b2f7Stbbdev 
179751c0b2f7Stbbdev /*
179851c0b2f7Stbbdev  * It's a special kind of allocation that can be used when malloc is
179951c0b2f7Stbbdev  * not available (either during startup or when malloc was already called and
180051c0b2f7Stbbdev  * we are, say, inside pthread_setspecific's call).
180151c0b2f7Stbbdev  * Block can contain objects of different sizes,
180251c0b2f7Stbbdev  * allocations are performed by moving bump pointer and increasing of object counter,
180351c0b2f7Stbbdev  * releasing is done via counter of objects allocated in the block
180451c0b2f7Stbbdev  * or moving bump pointer if releasing object is on a bound.
180551c0b2f7Stbbdev  * TODO: make bump pointer to grow to the same backward direction as all the others.
180651c0b2f7Stbbdev  */
180751c0b2f7Stbbdev 
180851c0b2f7Stbbdev class StartupBlock : public Block {
180951c0b2f7Stbbdev     size_t availableSize() const {
181051c0b2f7Stbbdev         return slabSize - ((uintptr_t)bumpPtr - (uintptr_t)this);
181151c0b2f7Stbbdev     }
181251c0b2f7Stbbdev     static StartupBlock *getBlock();
181351c0b2f7Stbbdev public:
181451c0b2f7Stbbdev     static FreeObject *allocate(size_t size);
181551c0b2f7Stbbdev     static size_t msize(void *ptr) { return *((size_t*)ptr - 1); }
181651c0b2f7Stbbdev     void free(void *ptr);
181751c0b2f7Stbbdev };
181851c0b2f7Stbbdev 
181951c0b2f7Stbbdev static MallocMutex startupMallocLock;
182051c0b2f7Stbbdev static StartupBlock *firstStartupBlock;
182151c0b2f7Stbbdev 
182251c0b2f7Stbbdev StartupBlock *StartupBlock::getBlock()
182351c0b2f7Stbbdev {
182451c0b2f7Stbbdev     BackRefIdx backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/false);
182551c0b2f7Stbbdev     if (backRefIdx.isInvalid()) return NULL;
182651c0b2f7Stbbdev 
182751c0b2f7Stbbdev     StartupBlock *block = static_cast<StartupBlock*>(
182851c0b2f7Stbbdev         defaultMemPool->extMemPool.backend.getSlabBlock(1));
182951c0b2f7Stbbdev     if (!block) return NULL;
183051c0b2f7Stbbdev 
183151c0b2f7Stbbdev     block->cleanBlockHeader();
183251c0b2f7Stbbdev     setBackRef(backRefIdx, block);
183351c0b2f7Stbbdev     block->backRefIdx = backRefIdx;
183451c0b2f7Stbbdev     // use startupAllocObjSizeMark to mark objects from startup block marker
183551c0b2f7Stbbdev     block->objectSize = startupAllocObjSizeMark;
183651c0b2f7Stbbdev     block->bumpPtr = (FreeObject *)((uintptr_t)block + sizeof(StartupBlock));
183751c0b2f7Stbbdev     return block;
183851c0b2f7Stbbdev }
183951c0b2f7Stbbdev 
184051c0b2f7Stbbdev FreeObject *StartupBlock::allocate(size_t size)
184151c0b2f7Stbbdev {
184251c0b2f7Stbbdev     FreeObject *result;
184351c0b2f7Stbbdev     StartupBlock *newBlock = NULL;
184451c0b2f7Stbbdev     bool newBlockUnused = false;
184551c0b2f7Stbbdev 
184651c0b2f7Stbbdev     /* Objects must be aligned on their natural bounds,
184751c0b2f7Stbbdev        and objects bigger than word on word's bound. */
184851c0b2f7Stbbdev     size = alignUp(size, sizeof(size_t));
184951c0b2f7Stbbdev     // We need size of an object to implement msize.
185051c0b2f7Stbbdev     size_t reqSize = size + sizeof(size_t);
185151c0b2f7Stbbdev     // speculatively allocates newBlock to try avoid allocation while holding lock
185251c0b2f7Stbbdev     /* TODO: The function is called when malloc nested call is detected,
185351c0b2f7Stbbdev              so simultaneous usage from different threads seems unlikely.
185451c0b2f7Stbbdev              If pre-allocation is found useless, the code might be simplified. */
185551c0b2f7Stbbdev     if (!firstStartupBlock || firstStartupBlock->availableSize() < reqSize) {
185651c0b2f7Stbbdev         newBlock = StartupBlock::getBlock();
185751c0b2f7Stbbdev         if (!newBlock) return NULL;
185851c0b2f7Stbbdev     }
185951c0b2f7Stbbdev     {
186051c0b2f7Stbbdev         MallocMutex::scoped_lock scoped_cs(startupMallocLock);
186151c0b2f7Stbbdev         // Re-check whether we need a new block (conditions might have changed)
186251c0b2f7Stbbdev         if (!firstStartupBlock || firstStartupBlock->availableSize() < reqSize) {
186351c0b2f7Stbbdev             if (!newBlock) {
186451c0b2f7Stbbdev                 newBlock = StartupBlock::getBlock();
186551c0b2f7Stbbdev                 if (!newBlock) return NULL;
186651c0b2f7Stbbdev             }
186751c0b2f7Stbbdev             newBlock->next = (Block*)firstStartupBlock;
186851c0b2f7Stbbdev             if (firstStartupBlock)
186951c0b2f7Stbbdev                 firstStartupBlock->previous = (Block*)newBlock;
187051c0b2f7Stbbdev             firstStartupBlock = newBlock;
187151c0b2f7Stbbdev         } else
187251c0b2f7Stbbdev             newBlockUnused = true;
187351c0b2f7Stbbdev         result = firstStartupBlock->bumpPtr;
187451c0b2f7Stbbdev         firstStartupBlock->allocatedCount++;
187551c0b2f7Stbbdev         firstStartupBlock->bumpPtr =
187651c0b2f7Stbbdev             (FreeObject *)((uintptr_t)firstStartupBlock->bumpPtr + reqSize);
187751c0b2f7Stbbdev     }
187851c0b2f7Stbbdev     if (newBlock && newBlockUnused)
187951c0b2f7Stbbdev         defaultMemPool->returnEmptyBlock(newBlock, /*poolTheBlock=*/false);
188051c0b2f7Stbbdev 
188151c0b2f7Stbbdev     // keep object size at the negative offset
188251c0b2f7Stbbdev     *((size_t*)result) = size;
188351c0b2f7Stbbdev     return (FreeObject*)((size_t*)result+1);
188451c0b2f7Stbbdev }
188551c0b2f7Stbbdev 
188651c0b2f7Stbbdev void StartupBlock::free(void *ptr)
188751c0b2f7Stbbdev {
188851c0b2f7Stbbdev     Block* blockToRelease = NULL;
188951c0b2f7Stbbdev     {
189051c0b2f7Stbbdev         MallocMutex::scoped_lock scoped_cs(startupMallocLock);
189151c0b2f7Stbbdev 
189251c0b2f7Stbbdev         MALLOC_ASSERT(firstStartupBlock, ASSERT_TEXT);
189351c0b2f7Stbbdev         MALLOC_ASSERT(startupAllocObjSizeMark==objectSize
189451c0b2f7Stbbdev                       && allocatedCount>0, ASSERT_TEXT);
189551c0b2f7Stbbdev         MALLOC_ASSERT((uintptr_t)ptr>=(uintptr_t)this+sizeof(StartupBlock)
189651c0b2f7Stbbdev                       && (uintptr_t)ptr+StartupBlock::msize(ptr)<=(uintptr_t)this+slabSize,
189751c0b2f7Stbbdev                       ASSERT_TEXT);
189851c0b2f7Stbbdev         if (0 == --allocatedCount) {
189951c0b2f7Stbbdev             if (this == firstStartupBlock)
190051c0b2f7Stbbdev                 firstStartupBlock = (StartupBlock*)firstStartupBlock->next;
190151c0b2f7Stbbdev             if (previous)
190251c0b2f7Stbbdev                 previous->next = next;
190351c0b2f7Stbbdev             if (next)
190451c0b2f7Stbbdev                 next->previous = previous;
190551c0b2f7Stbbdev             blockToRelease = this;
190651c0b2f7Stbbdev         } else if ((uintptr_t)ptr + StartupBlock::msize(ptr) == (uintptr_t)bumpPtr) {
190751c0b2f7Stbbdev             // last object in the block released
190851c0b2f7Stbbdev             FreeObject *newBump = (FreeObject*)((size_t*)ptr - 1);
190951c0b2f7Stbbdev             MALLOC_ASSERT((uintptr_t)newBump>(uintptr_t)this+sizeof(StartupBlock),
191051c0b2f7Stbbdev                           ASSERT_TEXT);
191151c0b2f7Stbbdev             bumpPtr = newBump;
191251c0b2f7Stbbdev         }
191351c0b2f7Stbbdev     }
191451c0b2f7Stbbdev     if (blockToRelease) {
191551c0b2f7Stbbdev         blockToRelease->previous = blockToRelease->next = NULL;
191651c0b2f7Stbbdev         defaultMemPool->returnEmptyBlock(blockToRelease, /*poolTheBlock=*/false);
191751c0b2f7Stbbdev     }
191851c0b2f7Stbbdev }
191951c0b2f7Stbbdev 
192051c0b2f7Stbbdev #endif /* MALLOC_CHECK_RECURSION */
192151c0b2f7Stbbdev 
192251c0b2f7Stbbdev /********* End thread related code  *************/
192351c0b2f7Stbbdev 
192451c0b2f7Stbbdev /********* Library initialization *************/
192551c0b2f7Stbbdev 
192651c0b2f7Stbbdev //! Value indicating the state of initialization.
192751c0b2f7Stbbdev /* 0 = initialization not started.
192851c0b2f7Stbbdev  * 1 = initialization started but not finished.
192951c0b2f7Stbbdev  * 2 = initialization finished.
193051c0b2f7Stbbdev  * In theory, we only need values 0 and 2. But value 1 is nonetheless
193151c0b2f7Stbbdev  * useful for detecting errors in the double-check pattern.
193251c0b2f7Stbbdev  */
193351c0b2f7Stbbdev static std::atomic<intptr_t> mallocInitialized{0};   // implicitly initialized to 0
193451c0b2f7Stbbdev static MallocMutex initMutex;
193551c0b2f7Stbbdev 
193651c0b2f7Stbbdev /** The leading "\0" is here so that applying "strings" to the binary
193751c0b2f7Stbbdev     delivers a clean result. */
193851c0b2f7Stbbdev static char VersionString[] = "\0" TBBMALLOC_VERSION_STRINGS;
193951c0b2f7Stbbdev 
194051c0b2f7Stbbdev #if USE_PTHREAD && (__TBB_SOURCE_DIRECTLY_INCLUDED || __TBB_USE_DLOPEN_REENTRANCY_WORKAROUND)
194151c0b2f7Stbbdev 
194251c0b2f7Stbbdev /* Decrease race interval between dynamic library unloading and pthread key
194351c0b2f7Stbbdev    destructor. Protect only Pthreads with supported unloading. */
194451c0b2f7Stbbdev class ShutdownSync {
194551c0b2f7Stbbdev /* flag is the number of threads in pthread key dtor body
194651c0b2f7Stbbdev    (i.e., between threadDtorStart() and threadDtorDone())
194751c0b2f7Stbbdev    or the signal to skip dtor, if flag < 0 */
194851c0b2f7Stbbdev     std::atomic<intptr_t> flag;
194951c0b2f7Stbbdev     static const intptr_t skipDtor = INTPTR_MIN/2;
195051c0b2f7Stbbdev public:
195151c0b2f7Stbbdev     void init() { flag.store(0, std::memory_order_release); }
195251c0b2f7Stbbdev /* Suppose that 2*abs(skipDtor) or more threads never call threadDtorStart()
195351c0b2f7Stbbdev    simultaneously, so flag never becomes negative because of that. */
195451c0b2f7Stbbdev     bool threadDtorStart() {
195551c0b2f7Stbbdev         if (flag.load(std::memory_order_acquire) < 0)
195651c0b2f7Stbbdev             return false;
195751c0b2f7Stbbdev         if (++flag <= 0) { // note that new value returned
195851c0b2f7Stbbdev             flag.fetch_sub(1); // flag is spoiled by us, restore it
195951c0b2f7Stbbdev             return false;
196051c0b2f7Stbbdev         }
196151c0b2f7Stbbdev         return true;
196251c0b2f7Stbbdev     }
196351c0b2f7Stbbdev     void threadDtorDone() {
196451c0b2f7Stbbdev         flag.fetch_sub(1);
196551c0b2f7Stbbdev     }
196651c0b2f7Stbbdev     void processExit() {
196751c0b2f7Stbbdev         if (flag.fetch_add(skipDtor) != 0) {
196851c0b2f7Stbbdev             SpinWaitUntilEq(flag, skipDtor);
196951c0b2f7Stbbdev         }
197051c0b2f7Stbbdev     }
197151c0b2f7Stbbdev };
197251c0b2f7Stbbdev 
197351c0b2f7Stbbdev #else
197451c0b2f7Stbbdev 
197551c0b2f7Stbbdev class ShutdownSync {
197651c0b2f7Stbbdev public:
197751c0b2f7Stbbdev     void init() { }
197851c0b2f7Stbbdev     bool threadDtorStart() { return true; }
197951c0b2f7Stbbdev     void threadDtorDone() { }
198051c0b2f7Stbbdev     void processExit() { }
198151c0b2f7Stbbdev };
198251c0b2f7Stbbdev 
198351c0b2f7Stbbdev #endif // USE_PTHREAD && (__TBB_SOURCE_DIRECTLY_INCLUDED || __TBB_USE_DLOPEN_REENTRANCY_WORKAROUND)
198451c0b2f7Stbbdev 
198551c0b2f7Stbbdev static ShutdownSync shutdownSync;
198651c0b2f7Stbbdev 
198751c0b2f7Stbbdev inline bool isMallocInitialized() {
198851c0b2f7Stbbdev     // Load must have acquire fence; otherwise thread taking "initialized" path
198951c0b2f7Stbbdev     // might perform textually later loads *before* mallocInitialized becomes 2.
199051c0b2f7Stbbdev     return 2 == mallocInitialized.load(std::memory_order_acquire);
199151c0b2f7Stbbdev }
199251c0b2f7Stbbdev 
199351c0b2f7Stbbdev bool isMallocInitializedExt() {
199451c0b2f7Stbbdev     return isMallocInitialized();
199551c0b2f7Stbbdev }
199651c0b2f7Stbbdev 
199751c0b2f7Stbbdev /* Caller is responsible for ensuring this routine is called exactly once. */
199851c0b2f7Stbbdev extern "C" void MallocInitializeITT() {
199951c0b2f7Stbbdev #if __TBB_USE_ITT_NOTIFY
200051c0b2f7Stbbdev     if (!usedBySrcIncluded)
200151c0b2f7Stbbdev         tbb::detail::r1::__TBB_load_ittnotify();
200251c0b2f7Stbbdev #endif
200351c0b2f7Stbbdev }
200451c0b2f7Stbbdev 
200551c0b2f7Stbbdev void MemoryPool::initDefaultPool() {
200651c0b2f7Stbbdev     hugePages.init();
200751c0b2f7Stbbdev }
200851c0b2f7Stbbdev 
200951c0b2f7Stbbdev /*
201051c0b2f7Stbbdev  * Allocator initialization routine;
201151c0b2f7Stbbdev  * it is called lazily on the very first scalable_malloc call.
201251c0b2f7Stbbdev  */
201351c0b2f7Stbbdev static bool initMemoryManager()
201451c0b2f7Stbbdev {
201551c0b2f7Stbbdev     TRACEF(( "[ScalableMalloc trace] sizeof(Block) is %d (expected 128); sizeof(uintptr_t) is %d\n",
201651c0b2f7Stbbdev              sizeof(Block), sizeof(uintptr_t) ));
201751c0b2f7Stbbdev     MALLOC_ASSERT( 2*blockHeaderAlignment == sizeof(Block), ASSERT_TEXT );
201851c0b2f7Stbbdev     MALLOC_ASSERT( sizeof(FreeObject) == sizeof(void*), ASSERT_TEXT );
201951c0b2f7Stbbdev     MALLOC_ASSERT( isAligned(defaultMemPool, sizeof(intptr_t)),
202051c0b2f7Stbbdev                    "Memory pool must be void*-aligned for atomic to work over aligned arguments.");
202151c0b2f7Stbbdev 
202251c0b2f7Stbbdev #if USE_WINTHREAD
202351c0b2f7Stbbdev     const size_t granularity = 64*1024; // granulatity of VirtualAlloc
202451c0b2f7Stbbdev #else
202551c0b2f7Stbbdev     // POSIX.1-2001-compliant way to get page size
202651c0b2f7Stbbdev     const size_t granularity = sysconf(_SC_PAGESIZE);
202751c0b2f7Stbbdev #endif
202851c0b2f7Stbbdev     if (!defaultMemPool) {
202951c0b2f7Stbbdev         // Do not rely on static constructors and do the assignment in case
203051c0b2f7Stbbdev         // of library static section not initialized at this call yet.
203151c0b2f7Stbbdev         defaultMemPool = (MemoryPool*)defaultMemPool_space;
203251c0b2f7Stbbdev     }
203351c0b2f7Stbbdev     bool initOk = defaultMemPool->
203451c0b2f7Stbbdev         extMemPool.init(0, NULL, NULL, granularity,
203551c0b2f7Stbbdev                         /*keepAllMemory=*/false, /*fixedPool=*/false);
203651c0b2f7Stbbdev // TODO: extMemPool.init() to not allocate memory
203751c0b2f7Stbbdev     if (!initOk || !initBackRefMaster(&defaultMemPool->extMemPool.backend) || !ThreadId::init())
203851c0b2f7Stbbdev         return false;
203951c0b2f7Stbbdev     MemoryPool::initDefaultPool();
204051c0b2f7Stbbdev     // init() is required iff initMemoryManager() is called
204151c0b2f7Stbbdev     // after mallocProcessShutdownNotification()
204251c0b2f7Stbbdev     shutdownSync.init();
204351c0b2f7Stbbdev #if COLLECT_STATISTICS
204451c0b2f7Stbbdev     initStatisticsCollection();
204551c0b2f7Stbbdev #endif
204651c0b2f7Stbbdev     return true;
204751c0b2f7Stbbdev }
204851c0b2f7Stbbdev 
204951c0b2f7Stbbdev static bool GetBoolEnvironmentVariable(const char* name) {
205051c0b2f7Stbbdev     return tbb::detail::r1::GetBoolEnvironmentVariable(name);
205151c0b2f7Stbbdev }
205251c0b2f7Stbbdev 
205351c0b2f7Stbbdev //! Ensures that initMemoryManager() is called once and only once.
205451c0b2f7Stbbdev /** Does not return until initMemoryManager() has been completed by a thread.
205551c0b2f7Stbbdev     There is no need to call this routine if mallocInitialized==2 . */
205651c0b2f7Stbbdev static bool doInitialization()
205751c0b2f7Stbbdev {
205851c0b2f7Stbbdev     MallocMutex::scoped_lock lock( initMutex );
205951c0b2f7Stbbdev     if (mallocInitialized.load(std::memory_order_relaxed)!=2) {
206051c0b2f7Stbbdev         MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==0, ASSERT_TEXT );
206151c0b2f7Stbbdev         mallocInitialized.store(1, std::memory_order_relaxed);
206251c0b2f7Stbbdev         RecursiveMallocCallProtector scoped;
206351c0b2f7Stbbdev         if (!initMemoryManager()) {
206451c0b2f7Stbbdev             mallocInitialized.store(0, std::memory_order_relaxed); // restore and out
206551c0b2f7Stbbdev             return false;
206651c0b2f7Stbbdev         }
206751c0b2f7Stbbdev #ifdef  MALLOC_EXTRA_INITIALIZATION
206851c0b2f7Stbbdev         MALLOC_EXTRA_INITIALIZATION;
206951c0b2f7Stbbdev #endif
207051c0b2f7Stbbdev #if MALLOC_CHECK_RECURSION
207151c0b2f7Stbbdev         RecursiveMallocCallProtector::detectNaiveOverload();
207251c0b2f7Stbbdev #endif
207351c0b2f7Stbbdev         MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==1, ASSERT_TEXT );
207451c0b2f7Stbbdev         // Store must have release fence, otherwise mallocInitialized==2
207551c0b2f7Stbbdev         // might become remotely visible before side effects of
207651c0b2f7Stbbdev         // initMemoryManager() become remotely visible.
207751c0b2f7Stbbdev         mallocInitialized.store(2, std::memory_order_release);
207851c0b2f7Stbbdev         if( GetBoolEnvironmentVariable("TBB_VERSION") ) {
207951c0b2f7Stbbdev             fputs(VersionString+1,stderr);
208051c0b2f7Stbbdev             hugePages.printStatus();
208151c0b2f7Stbbdev         }
208251c0b2f7Stbbdev     }
208351c0b2f7Stbbdev     /* It can't be 0 or I would have initialized it */
208451c0b2f7Stbbdev     MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==2, ASSERT_TEXT );
208551c0b2f7Stbbdev     return true;
208651c0b2f7Stbbdev }
208751c0b2f7Stbbdev 
208851c0b2f7Stbbdev /********* End library initialization *************/
208951c0b2f7Stbbdev 
209051c0b2f7Stbbdev /********* The malloc show begins     *************/
209151c0b2f7Stbbdev 
209251c0b2f7Stbbdev 
209351c0b2f7Stbbdev FreeObject *Block::allocateFromFreeList()
209451c0b2f7Stbbdev {
209551c0b2f7Stbbdev     FreeObject *result;
209651c0b2f7Stbbdev 
209751c0b2f7Stbbdev     if (!freeList) return NULL;
209851c0b2f7Stbbdev 
209951c0b2f7Stbbdev     result = freeList;
210051c0b2f7Stbbdev     MALLOC_ASSERT( result, ASSERT_TEXT );
210151c0b2f7Stbbdev 
210251c0b2f7Stbbdev     freeList = result->next;
210351c0b2f7Stbbdev     MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT );
210451c0b2f7Stbbdev     allocatedCount++;
210551c0b2f7Stbbdev     STAT_increment(getThreadId(), getIndex(objectSize), allocFreeListUsed);
210651c0b2f7Stbbdev 
210751c0b2f7Stbbdev     return result;
210851c0b2f7Stbbdev }
210951c0b2f7Stbbdev 
211051c0b2f7Stbbdev FreeObject *Block::allocateFromBumpPtr()
211151c0b2f7Stbbdev {
211251c0b2f7Stbbdev     FreeObject *result = bumpPtr;
211351c0b2f7Stbbdev     if (result) {
211451c0b2f7Stbbdev         bumpPtr = (FreeObject *) ((uintptr_t) bumpPtr - objectSize);
211551c0b2f7Stbbdev         if ( (uintptr_t)bumpPtr < (uintptr_t)this+sizeof(Block) ) {
211651c0b2f7Stbbdev             bumpPtr = NULL;
211751c0b2f7Stbbdev         }
211851c0b2f7Stbbdev         MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT );
211951c0b2f7Stbbdev         allocatedCount++;
212051c0b2f7Stbbdev         STAT_increment(getThreadId(), getIndex(objectSize), allocBumpPtrUsed);
212151c0b2f7Stbbdev     }
212251c0b2f7Stbbdev     return result;
212351c0b2f7Stbbdev }
212451c0b2f7Stbbdev 
212551c0b2f7Stbbdev inline FreeObject* Block::allocate()
212651c0b2f7Stbbdev {
212751c0b2f7Stbbdev     MALLOC_ASSERT( isOwnedByCurrentThread(), ASSERT_TEXT );
212851c0b2f7Stbbdev 
212951c0b2f7Stbbdev     /* for better cache locality, first looking in the free list. */
213051c0b2f7Stbbdev     if ( FreeObject *result = allocateFromFreeList() ) {
213151c0b2f7Stbbdev         return result;
213251c0b2f7Stbbdev     }
213351c0b2f7Stbbdev     MALLOC_ASSERT( !freeList, ASSERT_TEXT );
213451c0b2f7Stbbdev 
213551c0b2f7Stbbdev     /* if free list is empty, try thread local bump pointer allocation. */
213651c0b2f7Stbbdev     if ( FreeObject *result = allocateFromBumpPtr() ) {
213751c0b2f7Stbbdev         return result;
213851c0b2f7Stbbdev     }
213951c0b2f7Stbbdev     MALLOC_ASSERT( !bumpPtr, ASSERT_TEXT );
214051c0b2f7Stbbdev 
214151c0b2f7Stbbdev     /* the block is considered full. */
214251c0b2f7Stbbdev     isFull = true;
214351c0b2f7Stbbdev     return NULL;
214451c0b2f7Stbbdev }
214551c0b2f7Stbbdev 
214651c0b2f7Stbbdev size_t Block::findObjectSize(void *object) const
214751c0b2f7Stbbdev {
214851c0b2f7Stbbdev     size_t blSize = getSize();
214951c0b2f7Stbbdev #if MALLOC_CHECK_RECURSION
215051c0b2f7Stbbdev     // Currently, there is no aligned allocations from startup blocks,
215151c0b2f7Stbbdev     // so we can return just StartupBlock::msize().
215251c0b2f7Stbbdev     // TODO: This must be extended if we add aligned allocation from startup blocks.
215351c0b2f7Stbbdev     if (!blSize)
215451c0b2f7Stbbdev         return StartupBlock::msize(object);
215551c0b2f7Stbbdev #endif
215651c0b2f7Stbbdev     // object can be aligned, so real size can be less than block's
215751c0b2f7Stbbdev     size_t size =
215851c0b2f7Stbbdev         blSize - ((uintptr_t)object - (uintptr_t)findObjectToFree(object));
215951c0b2f7Stbbdev     MALLOC_ASSERT(size>0 && size<minLargeObjectSize, ASSERT_TEXT);
216051c0b2f7Stbbdev     return size;
216151c0b2f7Stbbdev }
216251c0b2f7Stbbdev 
216351c0b2f7Stbbdev void Bin::moveBlockToFront(Block *block)
216451c0b2f7Stbbdev {
216551c0b2f7Stbbdev     /* move the block to the front of the bin */
216651c0b2f7Stbbdev     if (block == activeBlk) return;
216751c0b2f7Stbbdev     outofTLSBin(block);
216851c0b2f7Stbbdev     pushTLSBin(block);
216951c0b2f7Stbbdev }
217051c0b2f7Stbbdev 
217151c0b2f7Stbbdev void Bin::processEmptyBlock(Block *block, bool poolTheBlock)
217251c0b2f7Stbbdev {
217351c0b2f7Stbbdev     if (block != activeBlk) {
217451c0b2f7Stbbdev         /* We are not using this block; return it to the pool */
217551c0b2f7Stbbdev         outofTLSBin(block);
217651c0b2f7Stbbdev         block->getMemPool()->returnEmptyBlock(block, poolTheBlock);
217751c0b2f7Stbbdev     } else {
217851c0b2f7Stbbdev         /* all objects are free - let's restore the bump pointer */
217951c0b2f7Stbbdev         block->restoreBumpPtr();
218051c0b2f7Stbbdev     }
218151c0b2f7Stbbdev }
218251c0b2f7Stbbdev 
218351c0b2f7Stbbdev template<int LOW_MARK, int HIGH_MARK>
218451c0b2f7Stbbdev bool LocalLOCImpl<LOW_MARK, HIGH_MARK>::put(LargeMemoryBlock *object, ExtMemoryPool *extMemPool)
218551c0b2f7Stbbdev {
218651c0b2f7Stbbdev     const size_t size = object->unalignedSize;
218751c0b2f7Stbbdev     // not spoil cache with too large object, that can cause its total cleanup
218851c0b2f7Stbbdev     if (size > MAX_TOTAL_SIZE)
218951c0b2f7Stbbdev         return false;
219051c0b2f7Stbbdev     LargeMemoryBlock *localHead = head.exchange(NULL);
219151c0b2f7Stbbdev 
219251c0b2f7Stbbdev     object->prev = NULL;
219351c0b2f7Stbbdev     object->next = localHead;
219451c0b2f7Stbbdev     if (localHead)
219551c0b2f7Stbbdev         localHead->prev = object;
219651c0b2f7Stbbdev     else {
219751c0b2f7Stbbdev         // those might not be cleaned during local cache stealing, correct them
219851c0b2f7Stbbdev         totalSize = 0;
219951c0b2f7Stbbdev         numOfBlocks = 0;
220051c0b2f7Stbbdev         tail = object;
220151c0b2f7Stbbdev     }
220251c0b2f7Stbbdev     localHead = object;
220351c0b2f7Stbbdev     totalSize += size;
220451c0b2f7Stbbdev     numOfBlocks++;
220551c0b2f7Stbbdev     // must meet both size and number of cached objects constrains
220651c0b2f7Stbbdev     if (totalSize > MAX_TOTAL_SIZE || numOfBlocks >= HIGH_MARK) {
220751c0b2f7Stbbdev         // scanning from tail until meet conditions
220851c0b2f7Stbbdev         while (totalSize > MAX_TOTAL_SIZE || numOfBlocks > LOW_MARK) {
220951c0b2f7Stbbdev             totalSize -= tail->unalignedSize;
221051c0b2f7Stbbdev             numOfBlocks--;
221151c0b2f7Stbbdev             tail = tail->prev;
221251c0b2f7Stbbdev         }
221351c0b2f7Stbbdev         LargeMemoryBlock *headToRelease = tail->next;
221451c0b2f7Stbbdev         tail->next = NULL;
221551c0b2f7Stbbdev 
221651c0b2f7Stbbdev         extMemPool->freeLargeObjectList(headToRelease);
221751c0b2f7Stbbdev     }
221851c0b2f7Stbbdev 
221951c0b2f7Stbbdev     head.store(localHead, std::memory_order_release);
222051c0b2f7Stbbdev     return true;
222151c0b2f7Stbbdev }
222251c0b2f7Stbbdev 
222351c0b2f7Stbbdev template<int LOW_MARK, int HIGH_MARK>
222451c0b2f7Stbbdev LargeMemoryBlock *LocalLOCImpl<LOW_MARK, HIGH_MARK>::get(size_t size)
222551c0b2f7Stbbdev {
222651c0b2f7Stbbdev     LargeMemoryBlock *localHead, *res = NULL;
222751c0b2f7Stbbdev 
222851c0b2f7Stbbdev     if (size > MAX_TOTAL_SIZE)
222951c0b2f7Stbbdev         return NULL;
223051c0b2f7Stbbdev 
223151c0b2f7Stbbdev     // TBB_REVAMP_TODO: review this line
223251c0b2f7Stbbdev     if (!head.load(std::memory_order_acquire) || (localHead = head.exchange(NULL)) == NULL) {
223351c0b2f7Stbbdev         // do not restore totalSize, numOfBlocks and tail at this point,
223451c0b2f7Stbbdev         // as they are used only in put(), where they must be restored
223551c0b2f7Stbbdev         return NULL;
223651c0b2f7Stbbdev     }
223751c0b2f7Stbbdev 
223851c0b2f7Stbbdev     for (LargeMemoryBlock *curr = localHead; curr; curr=curr->next) {
223951c0b2f7Stbbdev         if (curr->unalignedSize == size) {
224051c0b2f7Stbbdev             res = curr;
224151c0b2f7Stbbdev             if (curr->next)
224251c0b2f7Stbbdev                 curr->next->prev = curr->prev;
224351c0b2f7Stbbdev             else
224451c0b2f7Stbbdev                 tail = curr->prev;
224551c0b2f7Stbbdev             if (curr != localHead)
224651c0b2f7Stbbdev                 curr->prev->next = curr->next;
224751c0b2f7Stbbdev             else
224851c0b2f7Stbbdev                 localHead = curr->next;
224951c0b2f7Stbbdev             totalSize -= size;
225051c0b2f7Stbbdev             numOfBlocks--;
225151c0b2f7Stbbdev             break;
225251c0b2f7Stbbdev         }
225351c0b2f7Stbbdev     }
225451c0b2f7Stbbdev 
225551c0b2f7Stbbdev     head.store(localHead, std::memory_order_release);
225651c0b2f7Stbbdev     return res;
225751c0b2f7Stbbdev }
225851c0b2f7Stbbdev 
225951c0b2f7Stbbdev template<int LOW_MARK, int HIGH_MARK>
226051c0b2f7Stbbdev bool LocalLOCImpl<LOW_MARK, HIGH_MARK>::externalCleanup(ExtMemoryPool *extMemPool)
226151c0b2f7Stbbdev {
226251c0b2f7Stbbdev     if (LargeMemoryBlock *localHead = head.exchange(NULL)) {
226351c0b2f7Stbbdev         extMemPool->freeLargeObjectList(localHead);
226451c0b2f7Stbbdev         return true;
226551c0b2f7Stbbdev     }
226651c0b2f7Stbbdev     return false;
226751c0b2f7Stbbdev }
226851c0b2f7Stbbdev 
226951c0b2f7Stbbdev void *MemoryPool::getFromLLOCache(TLSData* tls, size_t size, size_t alignment)
227051c0b2f7Stbbdev {
227151c0b2f7Stbbdev     LargeMemoryBlock *lmb = NULL;
227251c0b2f7Stbbdev 
227351c0b2f7Stbbdev     size_t headersSize = sizeof(LargeMemoryBlock)+sizeof(LargeObjectHdr);
227451c0b2f7Stbbdev     size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+alignment);
227551c0b2f7Stbbdev     if (allocationSize < size) // allocationSize is wrapped around after alignToBin
227651c0b2f7Stbbdev         return NULL;
227751c0b2f7Stbbdev     MALLOC_ASSERT(allocationSize >= alignment, "Overflow must be checked before.");
227851c0b2f7Stbbdev 
227951c0b2f7Stbbdev     if (tls) {
228051c0b2f7Stbbdev         tls->markUsed();
228151c0b2f7Stbbdev         lmb = tls->lloc.get(allocationSize);
228251c0b2f7Stbbdev     }
228351c0b2f7Stbbdev     if (!lmb)
228451c0b2f7Stbbdev         lmb = extMemPool.mallocLargeObject(this, allocationSize);
228551c0b2f7Stbbdev 
228651c0b2f7Stbbdev     if (lmb) {
228751c0b2f7Stbbdev         // doing shuffle we suppose that alignment offset guarantees
228851c0b2f7Stbbdev         // that different cache lines are in use
228951c0b2f7Stbbdev         MALLOC_ASSERT(alignment >= estimatedCacheLineSize, ASSERT_TEXT);
229051c0b2f7Stbbdev 
229151c0b2f7Stbbdev         void *alignedArea = (void*)alignUp((uintptr_t)lmb+headersSize, alignment);
229251c0b2f7Stbbdev         uintptr_t alignedRight =
229351c0b2f7Stbbdev             alignDown((uintptr_t)lmb+lmb->unalignedSize - size, alignment);
229451c0b2f7Stbbdev         // Has some room to shuffle object between cache lines?
229551c0b2f7Stbbdev         // Note that alignedRight and alignedArea are aligned at alignment.
229651c0b2f7Stbbdev         unsigned ptrDelta = alignedRight - (uintptr_t)alignedArea;
229751c0b2f7Stbbdev         if (ptrDelta && tls) { // !tls is cold path
229851c0b2f7Stbbdev             // for the hot path of alignment==estimatedCacheLineSize,
229951c0b2f7Stbbdev             // allow compilers to use shift for division
230051c0b2f7Stbbdev             // (since estimatedCacheLineSize is a power-of-2 constant)
230151c0b2f7Stbbdev             unsigned numOfPossibleOffsets = alignment == estimatedCacheLineSize?
230251c0b2f7Stbbdev                   ptrDelta / estimatedCacheLineSize :
230351c0b2f7Stbbdev                   ptrDelta / alignment;
230451c0b2f7Stbbdev             unsigned myCacheIdx = ++tls->currCacheIdx;
230551c0b2f7Stbbdev             unsigned offset = myCacheIdx % numOfPossibleOffsets;
230651c0b2f7Stbbdev 
230751c0b2f7Stbbdev             // Move object to a cache line with an offset that is different from
230851c0b2f7Stbbdev             // previous allocation. This supposedly allows us to use cache
230951c0b2f7Stbbdev             // associativity more efficiently.
231051c0b2f7Stbbdev             alignedArea = (void*)((uintptr_t)alignedArea + offset*alignment);
231151c0b2f7Stbbdev         }
231251c0b2f7Stbbdev         MALLOC_ASSERT((uintptr_t)lmb+lmb->unalignedSize >=
231351c0b2f7Stbbdev                       (uintptr_t)alignedArea+size, "Object doesn't fit the block.");
231451c0b2f7Stbbdev         LargeObjectHdr *header = (LargeObjectHdr*)alignedArea-1;
231551c0b2f7Stbbdev         header->memoryBlock = lmb;
231651c0b2f7Stbbdev         header->backRefIdx = lmb->backRefIdx;
231751c0b2f7Stbbdev         setBackRef(header->backRefIdx, header);
231851c0b2f7Stbbdev 
231951c0b2f7Stbbdev         lmb->objectSize = size;
232051c0b2f7Stbbdev 
232151c0b2f7Stbbdev         MALLOC_ASSERT( isLargeObject<unknownMem>(alignedArea), ASSERT_TEXT );
232251c0b2f7Stbbdev         MALLOC_ASSERT( isAligned(alignedArea, alignment), ASSERT_TEXT );
232351c0b2f7Stbbdev 
232451c0b2f7Stbbdev         return alignedArea;
232551c0b2f7Stbbdev     }
232651c0b2f7Stbbdev     return NULL;
232751c0b2f7Stbbdev }
232851c0b2f7Stbbdev 
232951c0b2f7Stbbdev void MemoryPool::putToLLOCache(TLSData *tls, void *object)
233051c0b2f7Stbbdev {
233151c0b2f7Stbbdev     LargeObjectHdr *header = (LargeObjectHdr*)object - 1;
233251c0b2f7Stbbdev     // overwrite backRefIdx to simplify double free detection
233351c0b2f7Stbbdev     header->backRefIdx = BackRefIdx();
233451c0b2f7Stbbdev 
233551c0b2f7Stbbdev     if (tls) {
233651c0b2f7Stbbdev         tls->markUsed();
233751c0b2f7Stbbdev         if (tls->lloc.put(header->memoryBlock, &extMemPool))
233851c0b2f7Stbbdev             return;
233951c0b2f7Stbbdev     }
234051c0b2f7Stbbdev     extMemPool.freeLargeObject(header->memoryBlock);
234151c0b2f7Stbbdev }
234251c0b2f7Stbbdev 
234351c0b2f7Stbbdev /*
234451c0b2f7Stbbdev  * All aligned allocations fall into one of the following categories:
234551c0b2f7Stbbdev  *  1. if both request size and alignment are <= maxSegregatedObjectSize,
234651c0b2f7Stbbdev  *       we just align the size up, and request this amount, because for every size
234751c0b2f7Stbbdev  *       aligned to some power of 2, the allocated object is at least that aligned.
234851c0b2f7Stbbdev  * 2. for size<minLargeObjectSize, check if already guaranteed fittingAlignment is enough.
234951c0b2f7Stbbdev  * 3. if size+alignment<minLargeObjectSize, we take an object of fittingSizeN and align
235051c0b2f7Stbbdev  *       its address up; given such pointer, scalable_free could find the real object.
235151c0b2f7Stbbdev  *       Wrapping of size+alignment is impossible because maximal allowed
235251c0b2f7Stbbdev  *       alignment plus minLargeObjectSize can't lead to wrapping.
235351c0b2f7Stbbdev  * 4. otherwise, aligned large object is allocated.
235451c0b2f7Stbbdev  */
235551c0b2f7Stbbdev static void *allocateAligned(MemoryPool *memPool, size_t size, size_t alignment)
235651c0b2f7Stbbdev {
235751c0b2f7Stbbdev     MALLOC_ASSERT( isPowerOfTwo(alignment), ASSERT_TEXT );
235851c0b2f7Stbbdev 
235951c0b2f7Stbbdev     if (!isMallocInitialized())
236051c0b2f7Stbbdev         if (!doInitialization())
236151c0b2f7Stbbdev             return NULL;
236251c0b2f7Stbbdev 
236351c0b2f7Stbbdev     void *result;
236451c0b2f7Stbbdev     if (size<=maxSegregatedObjectSize && alignment<=maxSegregatedObjectSize)
236551c0b2f7Stbbdev         result = internalPoolMalloc(memPool, alignUp(size? size: sizeof(size_t), alignment));
236651c0b2f7Stbbdev     else if (size<minLargeObjectSize) {
236751c0b2f7Stbbdev         if (alignment<=fittingAlignment)
236851c0b2f7Stbbdev             result = internalPoolMalloc(memPool, size);
236951c0b2f7Stbbdev         else if (size+alignment < minLargeObjectSize) {
237051c0b2f7Stbbdev             void *unaligned = internalPoolMalloc(memPool, size+alignment);
237151c0b2f7Stbbdev             if (!unaligned) return NULL;
237251c0b2f7Stbbdev             result = alignUp(unaligned, alignment);
237351c0b2f7Stbbdev         } else
237451c0b2f7Stbbdev             goto LargeObjAlloc;
237551c0b2f7Stbbdev     } else {
237651c0b2f7Stbbdev     LargeObjAlloc:
237751c0b2f7Stbbdev         TLSData *tls = memPool->getTLS(/*create=*/true);
237851c0b2f7Stbbdev         // take into account only alignment that are higher then natural
237951c0b2f7Stbbdev         result =
238051c0b2f7Stbbdev             memPool->getFromLLOCache(tls, size, largeObjectAlignment>alignment?
238151c0b2f7Stbbdev                                                largeObjectAlignment: alignment);
238251c0b2f7Stbbdev     }
238351c0b2f7Stbbdev 
238451c0b2f7Stbbdev     MALLOC_ASSERT( isAligned(result, alignment), ASSERT_TEXT );
238551c0b2f7Stbbdev     return result;
238651c0b2f7Stbbdev }
238751c0b2f7Stbbdev 
238851c0b2f7Stbbdev static void *reallocAligned(MemoryPool *memPool, void *ptr,
238951c0b2f7Stbbdev                             size_t newSize, size_t alignment = 0)
239051c0b2f7Stbbdev {
239151c0b2f7Stbbdev     void *result;
239251c0b2f7Stbbdev     size_t copySize;
239351c0b2f7Stbbdev 
239451c0b2f7Stbbdev     if (isLargeObject<ourMem>(ptr)) {
239551c0b2f7Stbbdev         LargeMemoryBlock* lmb = ((LargeObjectHdr *)ptr - 1)->memoryBlock;
239651c0b2f7Stbbdev         copySize = lmb->unalignedSize-((uintptr_t)ptr-(uintptr_t)lmb);
239751c0b2f7Stbbdev 
239851c0b2f7Stbbdev         // Apply different strategies if size decreases
239951c0b2f7Stbbdev         if (newSize <= copySize && (0 == alignment || isAligned(ptr, alignment))) {
240051c0b2f7Stbbdev 
240151c0b2f7Stbbdev             // For huge objects (that do not fit in backend cache), keep the same space unless
240251c0b2f7Stbbdev             // the new size is at least twice smaller
240351c0b2f7Stbbdev             bool isMemoryBlockHuge = copySize > memPool->extMemPool.backend.getMaxBinnedSize();
240451c0b2f7Stbbdev             size_t threshold = isMemoryBlockHuge ? copySize / 2 : 0;
240551c0b2f7Stbbdev             if (newSize > threshold) {
240651c0b2f7Stbbdev                 lmb->objectSize = newSize;
240751c0b2f7Stbbdev                 return ptr;
240851c0b2f7Stbbdev             }
240951c0b2f7Stbbdev             // TODO: For large objects suitable for the backend cache,
241051c0b2f7Stbbdev             // split out the excessive part and put it to the backend.
241151c0b2f7Stbbdev         }
241251c0b2f7Stbbdev         // Reallocate for real
241351c0b2f7Stbbdev         copySize = lmb->objectSize;
241451c0b2f7Stbbdev #if BACKEND_HAS_MREMAP
241551c0b2f7Stbbdev         if (void *r = memPool->extMemPool.remap(ptr, copySize, newSize,
241651c0b2f7Stbbdev                           alignment < largeObjectAlignment ? largeObjectAlignment : alignment))
241751c0b2f7Stbbdev             return r;
241851c0b2f7Stbbdev #endif
241951c0b2f7Stbbdev         result = alignment ? allocateAligned(memPool, newSize, alignment) :
242051c0b2f7Stbbdev             internalPoolMalloc(memPool, newSize);
242151c0b2f7Stbbdev 
242251c0b2f7Stbbdev     } else {
242351c0b2f7Stbbdev         Block* block = (Block *)alignDown(ptr, slabSize);
242451c0b2f7Stbbdev         copySize = block->findObjectSize(ptr);
242551c0b2f7Stbbdev 
242651c0b2f7Stbbdev         // TODO: Move object to another bin if size decreases and the current bin is "empty enough".
242751c0b2f7Stbbdev         // Currently, in case of size decreasing, old pointer is returned
242851c0b2f7Stbbdev         if (newSize <= copySize && (0==alignment || isAligned(ptr, alignment))) {
242951c0b2f7Stbbdev             return ptr;
243051c0b2f7Stbbdev         } else {
243151c0b2f7Stbbdev             result = alignment ? allocateAligned(memPool, newSize, alignment) :
243251c0b2f7Stbbdev                 internalPoolMalloc(memPool, newSize);
243351c0b2f7Stbbdev         }
243451c0b2f7Stbbdev     }
243551c0b2f7Stbbdev     if (result) {
243651c0b2f7Stbbdev         memcpy(result, ptr, copySize < newSize ? copySize : newSize);
243751c0b2f7Stbbdev         internalPoolFree(memPool, ptr, 0);
243851c0b2f7Stbbdev     }
243951c0b2f7Stbbdev     return result;
244051c0b2f7Stbbdev }
244151c0b2f7Stbbdev 
244251c0b2f7Stbbdev /* A predicate checks if an object is properly placed inside its block */
244351c0b2f7Stbbdev inline bool Block::isProperlyPlaced(const void *object) const
244451c0b2f7Stbbdev {
244551c0b2f7Stbbdev     return 0 == ((uintptr_t)this + slabSize - (uintptr_t)object) % objectSize;
244651c0b2f7Stbbdev }
244751c0b2f7Stbbdev 
244851c0b2f7Stbbdev /* Finds the real object inside the block */
244951c0b2f7Stbbdev FreeObject *Block::findAllocatedObject(const void *address) const
245051c0b2f7Stbbdev {
245151c0b2f7Stbbdev     // calculate offset from the end of the block space
245251c0b2f7Stbbdev     uint16_t offset = (uintptr_t)this + slabSize - (uintptr_t)address;
245351c0b2f7Stbbdev     MALLOC_ASSERT( offset<=slabSize-sizeof(Block), ASSERT_TEXT );
245451c0b2f7Stbbdev     // find offset difference from a multiple of allocation size
245551c0b2f7Stbbdev     offset %= objectSize;
245651c0b2f7Stbbdev     // and move the address down to where the real object starts.
245751c0b2f7Stbbdev     return (FreeObject*)((uintptr_t)address - (offset? objectSize-offset: 0));
245851c0b2f7Stbbdev }
245951c0b2f7Stbbdev 
246051c0b2f7Stbbdev /*
246151c0b2f7Stbbdev  * Bad dereference caused by a foreign pointer is possible only here, not earlier in call chain.
246251c0b2f7Stbbdev  * Separate function isolates SEH code, as it has bad influence on compiler optimization.
246351c0b2f7Stbbdev  */
246451c0b2f7Stbbdev static inline BackRefIdx safer_dereference (const BackRefIdx *ptr)
246551c0b2f7Stbbdev {
246651c0b2f7Stbbdev     BackRefIdx id;
246751c0b2f7Stbbdev #if _MSC_VER
246851c0b2f7Stbbdev     __try {
246951c0b2f7Stbbdev #endif
247051c0b2f7Stbbdev         id = *ptr;
247151c0b2f7Stbbdev #if _MSC_VER
247251c0b2f7Stbbdev     } __except( GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION?
247351c0b2f7Stbbdev                 EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH ) {
247451c0b2f7Stbbdev         id = BackRefIdx();
247551c0b2f7Stbbdev     }
247651c0b2f7Stbbdev #endif
247751c0b2f7Stbbdev     return id;
247851c0b2f7Stbbdev }
247951c0b2f7Stbbdev 
248051c0b2f7Stbbdev template<MemoryOrigin memOrigin>
248151c0b2f7Stbbdev bool isLargeObject(void *object)
248251c0b2f7Stbbdev {
248351c0b2f7Stbbdev     if (!isAligned(object, largeObjectAlignment))
248451c0b2f7Stbbdev         return false;
248551c0b2f7Stbbdev     LargeObjectHdr *header = (LargeObjectHdr*)object - 1;
248651c0b2f7Stbbdev     BackRefIdx idx = (memOrigin == unknownMem) ?
248751c0b2f7Stbbdev         safer_dereference(&header->backRefIdx) : header->backRefIdx;
248851c0b2f7Stbbdev 
248951c0b2f7Stbbdev     return idx.isLargeObject()
249051c0b2f7Stbbdev         // in valid LargeObjectHdr memoryBlock is not NULL
249151c0b2f7Stbbdev         && header->memoryBlock
249251c0b2f7Stbbdev         // in valid LargeObjectHdr memoryBlock points somewhere before header
249351c0b2f7Stbbdev         // TODO: more strict check
249451c0b2f7Stbbdev         && (uintptr_t)header->memoryBlock < (uintptr_t)header
249551c0b2f7Stbbdev         && getBackRef(idx) == header;
249651c0b2f7Stbbdev }
249751c0b2f7Stbbdev 
249851c0b2f7Stbbdev static inline bool isSmallObject (void *ptr)
249951c0b2f7Stbbdev {
250051c0b2f7Stbbdev     Block* expectedBlock = (Block*)alignDown(ptr, slabSize);
250151c0b2f7Stbbdev     const BackRefIdx* idx = expectedBlock->getBackRefIdx();
250251c0b2f7Stbbdev 
250351c0b2f7Stbbdev     bool isSmall = expectedBlock == getBackRef(safer_dereference(idx));
250451c0b2f7Stbbdev     if (isSmall)
250551c0b2f7Stbbdev         expectedBlock->checkFreePrecond(ptr);
250651c0b2f7Stbbdev     return isSmall;
250751c0b2f7Stbbdev }
250851c0b2f7Stbbdev 
250951c0b2f7Stbbdev /**** Check if an object was allocated by scalable_malloc ****/
251051c0b2f7Stbbdev static inline bool isRecognized (void* ptr)
251151c0b2f7Stbbdev {
251251c0b2f7Stbbdev     return defaultMemPool->extMemPool.backend.ptrCanBeValid(ptr) &&
251351c0b2f7Stbbdev         (isLargeObject<unknownMem>(ptr) || isSmallObject(ptr));
251451c0b2f7Stbbdev }
251551c0b2f7Stbbdev 
251651c0b2f7Stbbdev static inline void freeSmallObject(void *object)
251751c0b2f7Stbbdev {
251851c0b2f7Stbbdev     /* mask low bits to get the block */
251951c0b2f7Stbbdev     Block *block = (Block *)alignDown(object, slabSize);
252051c0b2f7Stbbdev     block->checkFreePrecond(object);
252151c0b2f7Stbbdev 
252251c0b2f7Stbbdev #if MALLOC_CHECK_RECURSION
252351c0b2f7Stbbdev     if (block->isStartupAllocObject()) {
252451c0b2f7Stbbdev         ((StartupBlock *)block)->free(object);
252551c0b2f7Stbbdev         return;
252651c0b2f7Stbbdev     }
252751c0b2f7Stbbdev #endif
252851c0b2f7Stbbdev     if (block->isOwnedByCurrentThread()) {
252951c0b2f7Stbbdev         block->freeOwnObject(object);
253051c0b2f7Stbbdev     } else { /* Slower path to add to the shared list, the allocatedCount is updated by the owner thread in malloc. */
253151c0b2f7Stbbdev         FreeObject *objectToFree = block->findObjectToFree(object);
253251c0b2f7Stbbdev         block->freePublicObject(objectToFree);
253351c0b2f7Stbbdev     }
253451c0b2f7Stbbdev }
253551c0b2f7Stbbdev 
253651c0b2f7Stbbdev static void *internalPoolMalloc(MemoryPool* memPool, size_t size)
253751c0b2f7Stbbdev {
253851c0b2f7Stbbdev     Bin* bin;
253951c0b2f7Stbbdev     Block * mallocBlock;
254051c0b2f7Stbbdev 
254151c0b2f7Stbbdev     if (!memPool) return NULL;
254251c0b2f7Stbbdev 
254351c0b2f7Stbbdev     if (!size) size = sizeof(size_t);
254451c0b2f7Stbbdev 
254551c0b2f7Stbbdev     TLSData *tls = memPool->getTLS(/*create=*/true);
254651c0b2f7Stbbdev 
254751c0b2f7Stbbdev     /* Allocate a large object */
254851c0b2f7Stbbdev     if (size >= minLargeObjectSize)
254951c0b2f7Stbbdev         return memPool->getFromLLOCache(tls, size, largeObjectAlignment);
255051c0b2f7Stbbdev 
255151c0b2f7Stbbdev     if (!tls) return NULL;
255251c0b2f7Stbbdev 
255351c0b2f7Stbbdev     tls->markUsed();
255451c0b2f7Stbbdev     /*
255551c0b2f7Stbbdev      * Get an element in thread-local array corresponding to the given size;
255651c0b2f7Stbbdev      * It keeps ptr to the active block for allocations of this size
255751c0b2f7Stbbdev      */
255851c0b2f7Stbbdev     bin = tls->getAllocationBin(size);
255951c0b2f7Stbbdev     if ( !bin ) return NULL;
256051c0b2f7Stbbdev 
256151c0b2f7Stbbdev     /* Get a block to try to allocate in. */
256251c0b2f7Stbbdev     for( mallocBlock = bin->getActiveBlock(); mallocBlock;
256351c0b2f7Stbbdev          mallocBlock = bin->setPreviousBlockActive() ) // the previous block should be empty enough
256451c0b2f7Stbbdev     {
256551c0b2f7Stbbdev         if( FreeObject *result = mallocBlock->allocate() )
256651c0b2f7Stbbdev             return result;
256751c0b2f7Stbbdev     }
256851c0b2f7Stbbdev 
256951c0b2f7Stbbdev     /*
257051c0b2f7Stbbdev      * else privatize publicly freed objects in some block and allocate from it
257151c0b2f7Stbbdev      */
257251c0b2f7Stbbdev     mallocBlock = bin->getPrivatizedFreeListBlock();
257351c0b2f7Stbbdev     if (mallocBlock) {
257451c0b2f7Stbbdev         MALLOC_ASSERT( mallocBlock->freeListNonNull(), ASSERT_TEXT );
257551c0b2f7Stbbdev         if ( FreeObject *result = mallocBlock->allocateFromFreeList() )
257651c0b2f7Stbbdev             return result;
257751c0b2f7Stbbdev         /* Else something strange happened, need to retry from the beginning; */
257851c0b2f7Stbbdev         TRACEF(( "[ScalableMalloc trace] Something is wrong: no objects in public free list; reentering.\n" ));
257951c0b2f7Stbbdev         return internalPoolMalloc(memPool, size);
258051c0b2f7Stbbdev     }
258151c0b2f7Stbbdev 
258251c0b2f7Stbbdev     /*
258351c0b2f7Stbbdev      * no suitable own blocks, try to get a partial block that some other thread has discarded.
258451c0b2f7Stbbdev      */
258551c0b2f7Stbbdev     mallocBlock = memPool->extMemPool.orphanedBlocks.get(tls, size);
258651c0b2f7Stbbdev     while (mallocBlock) {
258751c0b2f7Stbbdev         bin->pushTLSBin(mallocBlock);
258851c0b2f7Stbbdev         bin->setActiveBlock(mallocBlock); // TODO: move under the below condition?
258951c0b2f7Stbbdev         if( FreeObject *result = mallocBlock->allocate() )
259051c0b2f7Stbbdev             return result;
259151c0b2f7Stbbdev         mallocBlock = memPool->extMemPool.orphanedBlocks.get(tls, size);
259251c0b2f7Stbbdev     }
259351c0b2f7Stbbdev 
259451c0b2f7Stbbdev     /*
259551c0b2f7Stbbdev      * else try to get a new empty block
259651c0b2f7Stbbdev      */
259751c0b2f7Stbbdev     mallocBlock = memPool->getEmptyBlock(size);
259851c0b2f7Stbbdev     if (mallocBlock) {
259951c0b2f7Stbbdev         bin->pushTLSBin(mallocBlock);
260051c0b2f7Stbbdev         bin->setActiveBlock(mallocBlock);
260151c0b2f7Stbbdev         if( FreeObject *result = mallocBlock->allocate() )
260251c0b2f7Stbbdev             return result;
260351c0b2f7Stbbdev         /* Else something strange happened, need to retry from the beginning; */
260451c0b2f7Stbbdev         TRACEF(( "[ScalableMalloc trace] Something is wrong: no objects in empty block; reentering.\n" ));
260551c0b2f7Stbbdev         return internalPoolMalloc(memPool, size);
260651c0b2f7Stbbdev     }
260751c0b2f7Stbbdev     /*
260851c0b2f7Stbbdev      * else nothing works so return NULL
260951c0b2f7Stbbdev      */
261051c0b2f7Stbbdev     TRACEF(( "[ScalableMalloc trace] No memory found, returning NULL.\n" ));
261151c0b2f7Stbbdev     return NULL;
261251c0b2f7Stbbdev }
261351c0b2f7Stbbdev 
261451c0b2f7Stbbdev // When size==0 (i.e. unknown), detect here whether the object is large.
261551c0b2f7Stbbdev // For size is known and < minLargeObjectSize, we still need to check
261651c0b2f7Stbbdev // if the actual object is large, because large objects might be used
261751c0b2f7Stbbdev // for aligned small allocations.
261851c0b2f7Stbbdev static bool internalPoolFree(MemoryPool *memPool, void *object, size_t size)
261951c0b2f7Stbbdev {
262051c0b2f7Stbbdev     if (!memPool || !object) return false;
262151c0b2f7Stbbdev 
262251c0b2f7Stbbdev     // The library is initialized at allocation call, so releasing while
262351c0b2f7Stbbdev     // not initialized means foreign object is releasing.
262451c0b2f7Stbbdev     MALLOC_ASSERT(isMallocInitialized(), ASSERT_TEXT);
262551c0b2f7Stbbdev     MALLOC_ASSERT(memPool->extMemPool.userPool() || isRecognized(object),
262651c0b2f7Stbbdev                   "Invalid pointer during object releasing is detected.");
262751c0b2f7Stbbdev 
262851c0b2f7Stbbdev     if (size >= minLargeObjectSize || isLargeObject<ourMem>(object))
262951c0b2f7Stbbdev         memPool->putToLLOCache(memPool->getTLS(/*create=*/false), object);
263051c0b2f7Stbbdev     else
263151c0b2f7Stbbdev         freeSmallObject(object);
263251c0b2f7Stbbdev     return true;
263351c0b2f7Stbbdev }
263451c0b2f7Stbbdev 
263551c0b2f7Stbbdev static void *internalMalloc(size_t size)
263651c0b2f7Stbbdev {
263751c0b2f7Stbbdev     if (!size) size = sizeof(size_t);
263851c0b2f7Stbbdev 
263951c0b2f7Stbbdev #if MALLOC_CHECK_RECURSION
264051c0b2f7Stbbdev     if (RecursiveMallocCallProtector::sameThreadActive())
264151c0b2f7Stbbdev         return size<minLargeObjectSize? StartupBlock::allocate(size) :
264251c0b2f7Stbbdev             // nested allocation, so skip tls
264351c0b2f7Stbbdev             (FreeObject*)defaultMemPool->getFromLLOCache(NULL, size, slabSize);
264451c0b2f7Stbbdev #endif
264551c0b2f7Stbbdev 
264651c0b2f7Stbbdev     if (!isMallocInitialized())
264751c0b2f7Stbbdev         if (!doInitialization())
264851c0b2f7Stbbdev             return NULL;
264951c0b2f7Stbbdev     return internalPoolMalloc(defaultMemPool, size);
265051c0b2f7Stbbdev }
265151c0b2f7Stbbdev 
265251c0b2f7Stbbdev static void internalFree(void *object)
265351c0b2f7Stbbdev {
265451c0b2f7Stbbdev     internalPoolFree(defaultMemPool, object, 0);
265551c0b2f7Stbbdev }
265651c0b2f7Stbbdev 
265751c0b2f7Stbbdev static size_t internalMsize(void* ptr)
265851c0b2f7Stbbdev {
265951c0b2f7Stbbdev     MALLOC_ASSERT(ptr, "Invalid pointer passed to internalMsize");
266051c0b2f7Stbbdev     if (isLargeObject<ourMem>(ptr)) {
266151c0b2f7Stbbdev         // TODO: return the maximum memory size, that can be written to this object
266251c0b2f7Stbbdev         LargeMemoryBlock* lmb = ((LargeObjectHdr*)ptr - 1)->memoryBlock;
266351c0b2f7Stbbdev         return lmb->objectSize;
266451c0b2f7Stbbdev     } else {
266551c0b2f7Stbbdev         Block *block = (Block*)alignDown(ptr, slabSize);
266651c0b2f7Stbbdev         return block->findObjectSize(ptr);
266751c0b2f7Stbbdev     }
266851c0b2f7Stbbdev }
266951c0b2f7Stbbdev 
267051c0b2f7Stbbdev } // namespace internal
267151c0b2f7Stbbdev 
267251c0b2f7Stbbdev using namespace rml::internal;
267351c0b2f7Stbbdev 
267451c0b2f7Stbbdev // legacy entry point saved for compatibility with binaries complied
267551c0b2f7Stbbdev // with pre-6003 versions of TBB
267651c0b2f7Stbbdev rml::MemoryPool *pool_create(intptr_t pool_id, const MemPoolPolicy *policy)
267751c0b2f7Stbbdev {
267851c0b2f7Stbbdev     rml::MemoryPool *pool;
267951c0b2f7Stbbdev     MemPoolPolicy pol(policy->pAlloc, policy->pFree, policy->granularity);
268051c0b2f7Stbbdev 
268151c0b2f7Stbbdev     pool_create_v1(pool_id, &pol, &pool);
268251c0b2f7Stbbdev     return pool;
268351c0b2f7Stbbdev }
268451c0b2f7Stbbdev 
268551c0b2f7Stbbdev rml::MemPoolError pool_create_v1(intptr_t pool_id, const MemPoolPolicy *policy,
268651c0b2f7Stbbdev                                  rml::MemoryPool **pool)
268751c0b2f7Stbbdev {
268851c0b2f7Stbbdev     if ( !policy->pAlloc || policy->version<MemPoolPolicy::TBBMALLOC_POOL_VERSION
268951c0b2f7Stbbdev          // empty pFree allowed only for fixed pools
269051c0b2f7Stbbdev          || !(policy->fixedPool || policy->pFree)) {
269151c0b2f7Stbbdev         *pool = NULL;
269251c0b2f7Stbbdev         return INVALID_POLICY;
269351c0b2f7Stbbdev     }
269451c0b2f7Stbbdev     if ( policy->version>MemPoolPolicy::TBBMALLOC_POOL_VERSION // future versions are not supported
269551c0b2f7Stbbdev          // new flags can be added in place of reserved, but default
269651c0b2f7Stbbdev          // behaviour must be supported by this version
269751c0b2f7Stbbdev          || policy->reserved ) {
269851c0b2f7Stbbdev         *pool = NULL;
269951c0b2f7Stbbdev         return UNSUPPORTED_POLICY;
270051c0b2f7Stbbdev     }
270151c0b2f7Stbbdev     if (!isMallocInitialized())
270251c0b2f7Stbbdev         if (!doInitialization()) {
270351c0b2f7Stbbdev             *pool = NULL;
270451c0b2f7Stbbdev             return NO_MEMORY;
270551c0b2f7Stbbdev         }
270651c0b2f7Stbbdev     rml::internal::MemoryPool *memPool =
270751c0b2f7Stbbdev         (rml::internal::MemoryPool*)internalMalloc((sizeof(rml::internal::MemoryPool)));
270851c0b2f7Stbbdev     if (!memPool) {
270951c0b2f7Stbbdev         *pool = NULL;
271051c0b2f7Stbbdev         return NO_MEMORY;
271151c0b2f7Stbbdev     }
271251c0b2f7Stbbdev     memset(memPool, 0, sizeof(rml::internal::MemoryPool));
271351c0b2f7Stbbdev     if (!memPool->init(pool_id, policy)) {
271451c0b2f7Stbbdev         internalFree(memPool);
271551c0b2f7Stbbdev         *pool = NULL;
271651c0b2f7Stbbdev         return NO_MEMORY;
271751c0b2f7Stbbdev     }
271851c0b2f7Stbbdev 
271951c0b2f7Stbbdev     *pool = (rml::MemoryPool*)memPool;
272051c0b2f7Stbbdev     return POOL_OK;
272151c0b2f7Stbbdev }
272251c0b2f7Stbbdev 
272351c0b2f7Stbbdev bool pool_destroy(rml::MemoryPool* memPool)
272451c0b2f7Stbbdev {
272551c0b2f7Stbbdev     if (!memPool) return false;
272651c0b2f7Stbbdev     bool ret = ((rml::internal::MemoryPool*)memPool)->destroy();
272751c0b2f7Stbbdev     internalFree(memPool);
272851c0b2f7Stbbdev 
272951c0b2f7Stbbdev     return ret;
273051c0b2f7Stbbdev }
273151c0b2f7Stbbdev 
273251c0b2f7Stbbdev bool pool_reset(rml::MemoryPool* memPool)
273351c0b2f7Stbbdev {
273451c0b2f7Stbbdev     if (!memPool) return false;
273551c0b2f7Stbbdev 
273651c0b2f7Stbbdev     return ((rml::internal::MemoryPool*)memPool)->reset();
273751c0b2f7Stbbdev }
273851c0b2f7Stbbdev 
273951c0b2f7Stbbdev void *pool_malloc(rml::MemoryPool* mPool, size_t size)
274051c0b2f7Stbbdev {
274151c0b2f7Stbbdev     return internalPoolMalloc((rml::internal::MemoryPool*)mPool, size);
274251c0b2f7Stbbdev }
274351c0b2f7Stbbdev 
274451c0b2f7Stbbdev void *pool_realloc(rml::MemoryPool* mPool, void *object, size_t size)
274551c0b2f7Stbbdev {
274651c0b2f7Stbbdev     if (!object)
274751c0b2f7Stbbdev         return internalPoolMalloc((rml::internal::MemoryPool*)mPool, size);
274851c0b2f7Stbbdev     if (!size) {
274951c0b2f7Stbbdev         internalPoolFree((rml::internal::MemoryPool*)mPool, object, 0);
275051c0b2f7Stbbdev         return NULL;
275151c0b2f7Stbbdev     }
275251c0b2f7Stbbdev     return reallocAligned((rml::internal::MemoryPool*)mPool, object, size, 0);
275351c0b2f7Stbbdev }
275451c0b2f7Stbbdev 
275551c0b2f7Stbbdev void *pool_aligned_malloc(rml::MemoryPool* mPool, size_t size, size_t alignment)
275651c0b2f7Stbbdev {
275751c0b2f7Stbbdev     if (!isPowerOfTwo(alignment) || 0==size)
275851c0b2f7Stbbdev         return NULL;
275951c0b2f7Stbbdev 
276051c0b2f7Stbbdev     return allocateAligned((rml::internal::MemoryPool*)mPool, size, alignment);
276151c0b2f7Stbbdev }
276251c0b2f7Stbbdev 
276351c0b2f7Stbbdev void *pool_aligned_realloc(rml::MemoryPool* memPool, void *ptr, size_t size, size_t alignment)
276451c0b2f7Stbbdev {
276551c0b2f7Stbbdev     if (!isPowerOfTwo(alignment))
276651c0b2f7Stbbdev         return NULL;
276751c0b2f7Stbbdev     rml::internal::MemoryPool *mPool = (rml::internal::MemoryPool*)memPool;
276851c0b2f7Stbbdev     void *tmp;
276951c0b2f7Stbbdev 
277051c0b2f7Stbbdev     if (!ptr)
277151c0b2f7Stbbdev         tmp = allocateAligned(mPool, size, alignment);
277251c0b2f7Stbbdev     else if (!size) {
277351c0b2f7Stbbdev         internalPoolFree(mPool, ptr, 0);
277451c0b2f7Stbbdev         return NULL;
277551c0b2f7Stbbdev     } else
277651c0b2f7Stbbdev         tmp = reallocAligned(mPool, ptr, size, alignment);
277751c0b2f7Stbbdev 
277851c0b2f7Stbbdev     return tmp;
277951c0b2f7Stbbdev }
278051c0b2f7Stbbdev 
278151c0b2f7Stbbdev bool pool_free(rml::MemoryPool *mPool, void *object)
278251c0b2f7Stbbdev {
278351c0b2f7Stbbdev     return internalPoolFree((rml::internal::MemoryPool*)mPool, object, 0);
278451c0b2f7Stbbdev }
278551c0b2f7Stbbdev 
278651c0b2f7Stbbdev rml::MemoryPool *pool_identify(void *object)
278751c0b2f7Stbbdev {
278851c0b2f7Stbbdev     rml::internal::MemoryPool *pool;
278951c0b2f7Stbbdev     if (isLargeObject<ourMem>(object)) {
279051c0b2f7Stbbdev         LargeObjectHdr *header = (LargeObjectHdr*)object - 1;
279151c0b2f7Stbbdev         pool = header->memoryBlock->pool;
279251c0b2f7Stbbdev     } else {
279351c0b2f7Stbbdev         Block *block = (Block*)alignDown(object, slabSize);
279451c0b2f7Stbbdev         pool = block->getMemPool();
279551c0b2f7Stbbdev     }
279651c0b2f7Stbbdev     // do not return defaultMemPool, as it can't be used in pool_free() etc
279751c0b2f7Stbbdev     __TBB_ASSERT_RELEASE(pool!=defaultMemPool,
279851c0b2f7Stbbdev         "rml::pool_identify() can't be used for scalable_malloc() etc results.");
279951c0b2f7Stbbdev     return (rml::MemoryPool*)pool;
280051c0b2f7Stbbdev }
280151c0b2f7Stbbdev 
280251c0b2f7Stbbdev size_t pool_msize(rml::MemoryPool *mPool, void* object)
280351c0b2f7Stbbdev {
280451c0b2f7Stbbdev     if (object) {
280551c0b2f7Stbbdev         // No assert for object recognition, cause objects allocated from non-default
280651c0b2f7Stbbdev         // memory pool do not participate in range checking and do not have valid backreferences for
280751c0b2f7Stbbdev         // small objects. Instead, check that an object belong to the certain memory pool.
280851c0b2f7Stbbdev         MALLOC_ASSERT_EX(mPool == pool_identify(object), "Object does not belong to the specified pool");
280951c0b2f7Stbbdev         return internalMsize(object);
281051c0b2f7Stbbdev     }
281151c0b2f7Stbbdev     errno = EINVAL;
281251c0b2f7Stbbdev     // Unlike _msize, return 0 in case of parameter error.
281351c0b2f7Stbbdev     // Returning size_t(-1) looks more like the way to troubles.
281451c0b2f7Stbbdev     return 0;
281551c0b2f7Stbbdev }
281651c0b2f7Stbbdev 
281751c0b2f7Stbbdev } // namespace rml
281851c0b2f7Stbbdev 
281951c0b2f7Stbbdev using namespace rml::internal;
282051c0b2f7Stbbdev 
282151c0b2f7Stbbdev #if MALLOC_TRACE
282251c0b2f7Stbbdev static unsigned int threadGoingDownCount = 0;
282351c0b2f7Stbbdev #endif
282451c0b2f7Stbbdev 
282551c0b2f7Stbbdev /*
282651c0b2f7Stbbdev  * When a thread is shutting down this routine should be called to remove all the thread ids
282751c0b2f7Stbbdev  * from the malloc blocks and replace them with a NULL thread id.
282851c0b2f7Stbbdev  *
282951c0b2f7Stbbdev  * For pthreads, the function is set as a callback in pthread_key_create for TLS bin.
283051c0b2f7Stbbdev  * It will be automatically called at thread exit with the key value as the argument,
283151c0b2f7Stbbdev  * unless that value is NULL.
283251c0b2f7Stbbdev  * For Windows, it is called from DllMain( DLL_THREAD_DETACH ).
283351c0b2f7Stbbdev  *
283451c0b2f7Stbbdev  * However neither of the above is called for the main process thread, so the routine
283551c0b2f7Stbbdev  * also needs to be called during the process shutdown.
283651c0b2f7Stbbdev  *
283751c0b2f7Stbbdev */
283851c0b2f7Stbbdev // TODO: Consider making this function part of class MemoryPool.
283951c0b2f7Stbbdev void doThreadShutdownNotification(TLSData* tls, bool main_thread)
284051c0b2f7Stbbdev {
284151c0b2f7Stbbdev     TRACEF(( "[ScalableMalloc trace] Thread id %d blocks return start %d\n",
284251c0b2f7Stbbdev              getThreadId(),  threadGoingDownCount++ ));
284351c0b2f7Stbbdev 
284451c0b2f7Stbbdev #if USE_PTHREAD
284551c0b2f7Stbbdev     if (tls) {
284651c0b2f7Stbbdev         if (!shutdownSync.threadDtorStart()) return;
284751c0b2f7Stbbdev         tls->getMemPool()->onThreadShutdown(tls);
284851c0b2f7Stbbdev         shutdownSync.threadDtorDone();
284951c0b2f7Stbbdev     } else
285051c0b2f7Stbbdev #endif
285151c0b2f7Stbbdev     {
285251c0b2f7Stbbdev         suppress_unused_warning(tls); // not used on Windows
285351c0b2f7Stbbdev         // The default pool is safe to use at this point:
285451c0b2f7Stbbdev         //   on Linux, only the main thread can go here before destroying defaultMemPool;
285551c0b2f7Stbbdev         //   on Windows, shutdown is synchronized via loader lock and isMallocInitialized().
285651c0b2f7Stbbdev         // See also __TBB_mallocProcessShutdownNotification()
285751c0b2f7Stbbdev         defaultMemPool->onThreadShutdown(defaultMemPool->getTLS(/*create=*/false));
285851c0b2f7Stbbdev         // Take lock to walk through other pools; but waiting might be dangerous at this point
285951c0b2f7Stbbdev         // (e.g. on Windows the main thread might deadlock)
286051c0b2f7Stbbdev         bool locked;
286151c0b2f7Stbbdev         MallocMutex::scoped_lock lock(MemoryPool::memPoolListLock, /*wait=*/!main_thread, &locked);
286251c0b2f7Stbbdev         if (locked) { // the list is safe to process
286351c0b2f7Stbbdev             for (MemoryPool *memPool = defaultMemPool->next; memPool; memPool = memPool->next)
286451c0b2f7Stbbdev                 memPool->onThreadShutdown(memPool->getTLS(/*create=*/false));
286551c0b2f7Stbbdev         }
286651c0b2f7Stbbdev     }
286751c0b2f7Stbbdev 
286851c0b2f7Stbbdev     TRACEF(( "[ScalableMalloc trace] Thread id %d blocks return end\n", getThreadId() ));
286951c0b2f7Stbbdev }
287051c0b2f7Stbbdev 
287151c0b2f7Stbbdev #if USE_PTHREAD
287251c0b2f7Stbbdev void mallocThreadShutdownNotification(void* arg)
287351c0b2f7Stbbdev {
287451c0b2f7Stbbdev     // The routine is called for each pool (as TLS dtor) on each thread, except for the main thread
287551c0b2f7Stbbdev     if (!isMallocInitialized()) return;
287651c0b2f7Stbbdev     doThreadShutdownNotification((TLSData*)arg, false);
287751c0b2f7Stbbdev }
287851c0b2f7Stbbdev #else
287951c0b2f7Stbbdev extern "C" void __TBB_mallocThreadShutdownNotification()
288051c0b2f7Stbbdev {
288151c0b2f7Stbbdev     // The routine is called once per thread on Windows
288251c0b2f7Stbbdev     if (!isMallocInitialized()) return;
288351c0b2f7Stbbdev     doThreadShutdownNotification(NULL, false);
288451c0b2f7Stbbdev }
288551c0b2f7Stbbdev #endif
288651c0b2f7Stbbdev 
288751c0b2f7Stbbdev extern "C" void __TBB_mallocProcessShutdownNotification(bool windows_process_dying)
288851c0b2f7Stbbdev {
288951c0b2f7Stbbdev     if (!isMallocInitialized()) return;
289051c0b2f7Stbbdev 
289151c0b2f7Stbbdev     // Don't clean allocator internals if the entire process is exiting
289251c0b2f7Stbbdev     if (!windows_process_dying) {
289351c0b2f7Stbbdev         doThreadShutdownNotification(NULL, /*main_thread=*/true);
289451c0b2f7Stbbdev     }
289551c0b2f7Stbbdev #if  __TBB_MALLOC_LOCACHE_STAT
289651c0b2f7Stbbdev     printf("cache hit ratio %f, size hit %f\n",
289751c0b2f7Stbbdev            1.*cacheHits/mallocCalls, 1.*memHitKB/memAllocKB);
289851c0b2f7Stbbdev     defaultMemPool->extMemPool.loc.reportStat(stdout);
289951c0b2f7Stbbdev #endif
290051c0b2f7Stbbdev 
290151c0b2f7Stbbdev     shutdownSync.processExit();
290251c0b2f7Stbbdev #if __TBB_SOURCE_DIRECTLY_INCLUDED
290351c0b2f7Stbbdev /* Pthread keys must be deleted as soon as possible to not call key dtor
290451c0b2f7Stbbdev    on thread termination when then the tbbmalloc code can be already unloaded.
290551c0b2f7Stbbdev */
290651c0b2f7Stbbdev     defaultMemPool->destroy();
290751c0b2f7Stbbdev     destroyBackRefMaster(&defaultMemPool->extMemPool.backend);
290851c0b2f7Stbbdev     ThreadId::destroy();      // Delete key for thread id
290951c0b2f7Stbbdev     hugePages.reset();
291051c0b2f7Stbbdev     // new total malloc initialization is possible after this point
291151c0b2f7Stbbdev     mallocInitialized.store(0, std::memory_order_release);
291251c0b2f7Stbbdev #elif __TBB_USE_DLOPEN_REENTRANCY_WORKAROUND
291351c0b2f7Stbbdev /* In most cases we prevent unloading tbbmalloc, and don't clean up memory
291451c0b2f7Stbbdev    on process shutdown. When impossible to prevent, library unload results
291551c0b2f7Stbbdev    in shutdown notification, and it makes sense to release unused memory
291651c0b2f7Stbbdev    at that point (we can't release all memory because it's possible that
291751c0b2f7Stbbdev    it will be accessed after this point).
291851c0b2f7Stbbdev    TODO: better support systems where we can't prevent unloading by removing
291951c0b2f7Stbbdev    pthread destructors and releasing caches.
292051c0b2f7Stbbdev  */
292151c0b2f7Stbbdev     defaultMemPool->extMemPool.hardCachesCleanup();
292251c0b2f7Stbbdev #endif // __TBB_SOURCE_DIRECTLY_INCLUDED
292351c0b2f7Stbbdev 
292451c0b2f7Stbbdev #if COLLECT_STATISTICS
292551c0b2f7Stbbdev     unsigned nThreads = ThreadId::getMaxThreadId();
292651c0b2f7Stbbdev     for( int i=1; i<=nThreads && i<MAX_THREADS; ++i )
292751c0b2f7Stbbdev         STAT_print(i);
292851c0b2f7Stbbdev #endif
292951c0b2f7Stbbdev     if (!usedBySrcIncluded)
293051c0b2f7Stbbdev         MALLOC_ITT_FINI_ITTLIB();
293151c0b2f7Stbbdev }
293251c0b2f7Stbbdev 
293351c0b2f7Stbbdev extern "C" void * scalable_malloc(size_t size)
293451c0b2f7Stbbdev {
293551c0b2f7Stbbdev     void *ptr = internalMalloc(size);
293651c0b2f7Stbbdev     if (!ptr) errno = ENOMEM;
293751c0b2f7Stbbdev     return ptr;
293851c0b2f7Stbbdev }
293951c0b2f7Stbbdev 
294051c0b2f7Stbbdev extern "C" void scalable_free(void *object)
294151c0b2f7Stbbdev {
294251c0b2f7Stbbdev     internalFree(object);
294351c0b2f7Stbbdev }
294451c0b2f7Stbbdev 
294551c0b2f7Stbbdev #if MALLOC_ZONE_OVERLOAD_ENABLED
294651c0b2f7Stbbdev extern "C" void __TBB_malloc_free_definite_size(void *object, size_t size)
294751c0b2f7Stbbdev {
294851c0b2f7Stbbdev     internalPoolFree(defaultMemPool, object, size);
294951c0b2f7Stbbdev }
295051c0b2f7Stbbdev #endif
295151c0b2f7Stbbdev 
295251c0b2f7Stbbdev /*
295351c0b2f7Stbbdev  * A variant that provides additional memory safety, by checking whether the given address
295451c0b2f7Stbbdev  * was obtained with this allocator, and if not redirecting to the provided alternative call.
295551c0b2f7Stbbdev  */
295651c0b2f7Stbbdev extern "C" void __TBB_malloc_safer_free(void *object, void (*original_free)(void*))
295751c0b2f7Stbbdev {
295851c0b2f7Stbbdev     if (!object)
295951c0b2f7Stbbdev         return;
296051c0b2f7Stbbdev 
296151c0b2f7Stbbdev     // tbbmalloc can allocate object only when tbbmalloc has been initialized
296251c0b2f7Stbbdev     if (mallocInitialized.load(std::memory_order_acquire) && defaultMemPool->extMemPool.backend.ptrCanBeValid(object)) {
296351c0b2f7Stbbdev         if (isLargeObject<unknownMem>(object)) {
296451c0b2f7Stbbdev             // must check 1st for large object, because small object check touches 4 pages on left,
296551c0b2f7Stbbdev             // and it can be inaccessible
296651c0b2f7Stbbdev             TLSData *tls = defaultMemPool->getTLS(/*create=*/false);
296751c0b2f7Stbbdev 
296851c0b2f7Stbbdev             defaultMemPool->putToLLOCache(tls, object);
296951c0b2f7Stbbdev             return;
297051c0b2f7Stbbdev         } else if (isSmallObject(object)) {
297151c0b2f7Stbbdev             freeSmallObject(object);
297251c0b2f7Stbbdev             return;
297351c0b2f7Stbbdev         }
297451c0b2f7Stbbdev     }
297551c0b2f7Stbbdev     if (original_free)
297651c0b2f7Stbbdev         original_free(object);
297751c0b2f7Stbbdev }
297851c0b2f7Stbbdev 
297951c0b2f7Stbbdev /********* End the free code        *************/
298051c0b2f7Stbbdev 
298151c0b2f7Stbbdev /********* Code for scalable_realloc       ***********/
298251c0b2f7Stbbdev 
298351c0b2f7Stbbdev /*
298451c0b2f7Stbbdev  * From K&R
298551c0b2f7Stbbdev  * "realloc changes the size of the object pointed to by p to size. The contents will
298651c0b2f7Stbbdev  * be unchanged up to the minimum of the old and the new sizes. If the new size is larger,
298751c0b2f7Stbbdev  * the new space is uninitialized. realloc returns a pointer to the new space, or
298851c0b2f7Stbbdev  * NULL if the request cannot be satisfied, in which case *p is unchanged."
298951c0b2f7Stbbdev  *
299051c0b2f7Stbbdev  */
299151c0b2f7Stbbdev extern "C" void* scalable_realloc(void* ptr, size_t size)
299251c0b2f7Stbbdev {
299351c0b2f7Stbbdev     void *tmp;
299451c0b2f7Stbbdev 
299551c0b2f7Stbbdev     if (!ptr)
299651c0b2f7Stbbdev         tmp = internalMalloc(size);
299751c0b2f7Stbbdev     else if (!size) {
299851c0b2f7Stbbdev         internalFree(ptr);
299951c0b2f7Stbbdev         return NULL;
300051c0b2f7Stbbdev     } else
300151c0b2f7Stbbdev         tmp = reallocAligned(defaultMemPool, ptr, size, 0);
300251c0b2f7Stbbdev 
300351c0b2f7Stbbdev     if (!tmp) errno = ENOMEM;
300451c0b2f7Stbbdev     return tmp;
300551c0b2f7Stbbdev }
300651c0b2f7Stbbdev 
300751c0b2f7Stbbdev /*
300851c0b2f7Stbbdev  * A variant that provides additional memory safety, by checking whether the given address
300951c0b2f7Stbbdev  * was obtained with this allocator, and if not redirecting to the provided alternative call.
301051c0b2f7Stbbdev  */
301151c0b2f7Stbbdev extern "C" void* __TBB_malloc_safer_realloc(void* ptr, size_t sz, void* original_realloc)
301251c0b2f7Stbbdev {
301351c0b2f7Stbbdev     void *tmp; // TODO: fix warnings about uninitialized use of tmp
301451c0b2f7Stbbdev 
301551c0b2f7Stbbdev     if (!ptr) {
301651c0b2f7Stbbdev         tmp = internalMalloc(sz);
301751c0b2f7Stbbdev     } else if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(ptr)) {
301851c0b2f7Stbbdev         if (!sz) {
301951c0b2f7Stbbdev             internalFree(ptr);
302051c0b2f7Stbbdev             return NULL;
302151c0b2f7Stbbdev         } else {
302251c0b2f7Stbbdev             tmp = reallocAligned(defaultMemPool, ptr, sz, 0);
302351c0b2f7Stbbdev         }
302451c0b2f7Stbbdev     }
302551c0b2f7Stbbdev #if USE_WINTHREAD
302651c0b2f7Stbbdev     else if (original_realloc && sz) {
302751c0b2f7Stbbdev         orig_ptrs *original_ptrs = static_cast<orig_ptrs*>(original_realloc);
302851c0b2f7Stbbdev         if ( original_ptrs->msize ){
302951c0b2f7Stbbdev             size_t oldSize = original_ptrs->msize(ptr);
303051c0b2f7Stbbdev             tmp = internalMalloc(sz);
303151c0b2f7Stbbdev             if (tmp) {
303251c0b2f7Stbbdev                 memcpy(tmp, ptr, sz<oldSize? sz : oldSize);
303351c0b2f7Stbbdev                 if ( original_ptrs->free ){
303451c0b2f7Stbbdev                     original_ptrs->free( ptr );
303551c0b2f7Stbbdev                 }
303651c0b2f7Stbbdev             }
303751c0b2f7Stbbdev         } else
303851c0b2f7Stbbdev             tmp = NULL;
303951c0b2f7Stbbdev     }
304051c0b2f7Stbbdev #else
304151c0b2f7Stbbdev     else if (original_realloc) {
304251c0b2f7Stbbdev         typedef void* (*realloc_ptr_t)(void*,size_t);
304351c0b2f7Stbbdev         realloc_ptr_t original_realloc_ptr;
304451c0b2f7Stbbdev         (void *&)original_realloc_ptr = original_realloc;
304551c0b2f7Stbbdev         tmp = original_realloc_ptr(ptr,sz);
304651c0b2f7Stbbdev     }
304751c0b2f7Stbbdev #endif
304851c0b2f7Stbbdev     else tmp = NULL;
304951c0b2f7Stbbdev 
305051c0b2f7Stbbdev     if (!tmp) errno = ENOMEM;
305151c0b2f7Stbbdev     return tmp;
305251c0b2f7Stbbdev }
305351c0b2f7Stbbdev 
305451c0b2f7Stbbdev /********* End code for scalable_realloc   ***********/
305551c0b2f7Stbbdev 
305651c0b2f7Stbbdev /********* Code for scalable_calloc   ***********/
305751c0b2f7Stbbdev 
305851c0b2f7Stbbdev /*
305951c0b2f7Stbbdev  * From K&R
306051c0b2f7Stbbdev  * calloc returns a pointer to space for an array of nobj objects,
306151c0b2f7Stbbdev  * each of size size, or NULL if the request cannot be satisfied.
306251c0b2f7Stbbdev  * The space is initialized to zero bytes.
306351c0b2f7Stbbdev  *
306451c0b2f7Stbbdev  */
306551c0b2f7Stbbdev 
306651c0b2f7Stbbdev extern "C" void * scalable_calloc(size_t nobj, size_t size)
306751c0b2f7Stbbdev {
306851c0b2f7Stbbdev     // it's square root of maximal size_t value
306951c0b2f7Stbbdev     const size_t mult_not_overflow = size_t(1) << (sizeof(size_t)*CHAR_BIT/2);
307051c0b2f7Stbbdev     const size_t arraySize = nobj * size;
307151c0b2f7Stbbdev 
307251c0b2f7Stbbdev     // check for overflow during multiplication:
307351c0b2f7Stbbdev     if (nobj>=mult_not_overflow || size>=mult_not_overflow) // 1) heuristic check
307451c0b2f7Stbbdev         if (nobj && arraySize / nobj != size) {             // 2) exact check
307551c0b2f7Stbbdev             errno = ENOMEM;
307651c0b2f7Stbbdev             return NULL;
307751c0b2f7Stbbdev         }
307851c0b2f7Stbbdev     void* result = internalMalloc(arraySize);
307951c0b2f7Stbbdev     if (result)
308051c0b2f7Stbbdev         memset(result, 0, arraySize);
308151c0b2f7Stbbdev     else
308251c0b2f7Stbbdev         errno = ENOMEM;
308351c0b2f7Stbbdev     return result;
308451c0b2f7Stbbdev }
308551c0b2f7Stbbdev 
308651c0b2f7Stbbdev /********* End code for scalable_calloc   ***********/
308751c0b2f7Stbbdev 
308851c0b2f7Stbbdev /********* Code for aligned allocation API **********/
308951c0b2f7Stbbdev 
309051c0b2f7Stbbdev extern "C" int scalable_posix_memalign(void **memptr, size_t alignment, size_t size)
309151c0b2f7Stbbdev {
309251c0b2f7Stbbdev     if ( !isPowerOfTwoAtLeast(alignment, sizeof(void*)) )
309351c0b2f7Stbbdev         return EINVAL;
309451c0b2f7Stbbdev     void *result = allocateAligned(defaultMemPool, size, alignment);
309551c0b2f7Stbbdev     if (!result)
309651c0b2f7Stbbdev         return ENOMEM;
309751c0b2f7Stbbdev     *memptr = result;
309851c0b2f7Stbbdev     return 0;
309951c0b2f7Stbbdev }
310051c0b2f7Stbbdev 
310151c0b2f7Stbbdev extern "C" void * scalable_aligned_malloc(size_t size, size_t alignment)
310251c0b2f7Stbbdev {
310351c0b2f7Stbbdev     if (!isPowerOfTwo(alignment) || 0==size) {
310451c0b2f7Stbbdev         errno = EINVAL;
310551c0b2f7Stbbdev         return NULL;
310651c0b2f7Stbbdev     }
310751c0b2f7Stbbdev     void *tmp = allocateAligned(defaultMemPool, size, alignment);
310851c0b2f7Stbbdev     if (!tmp) errno = ENOMEM;
310951c0b2f7Stbbdev     return tmp;
311051c0b2f7Stbbdev }
311151c0b2f7Stbbdev 
311251c0b2f7Stbbdev extern "C" void * scalable_aligned_realloc(void *ptr, size_t size, size_t alignment)
311351c0b2f7Stbbdev {
311451c0b2f7Stbbdev     if (!isPowerOfTwo(alignment)) {
311551c0b2f7Stbbdev         errno = EINVAL;
311651c0b2f7Stbbdev         return NULL;
311751c0b2f7Stbbdev     }
311851c0b2f7Stbbdev     void *tmp;
311951c0b2f7Stbbdev 
312051c0b2f7Stbbdev     if (!ptr)
312151c0b2f7Stbbdev         tmp = allocateAligned(defaultMemPool, size, alignment);
312251c0b2f7Stbbdev     else if (!size) {
312351c0b2f7Stbbdev         scalable_free(ptr);
312451c0b2f7Stbbdev         return NULL;
312551c0b2f7Stbbdev     } else
312651c0b2f7Stbbdev         tmp = reallocAligned(defaultMemPool, ptr, size, alignment);
312751c0b2f7Stbbdev 
312851c0b2f7Stbbdev     if (!tmp) errno = ENOMEM;
312951c0b2f7Stbbdev     return tmp;
313051c0b2f7Stbbdev }
313151c0b2f7Stbbdev 
313251c0b2f7Stbbdev extern "C" void * __TBB_malloc_safer_aligned_realloc(void *ptr, size_t size, size_t alignment, void* orig_function)
313351c0b2f7Stbbdev {
313451c0b2f7Stbbdev     /* corner cases left out of reallocAligned to not deal with errno there */
313551c0b2f7Stbbdev     if (!isPowerOfTwo(alignment)) {
313651c0b2f7Stbbdev         errno = EINVAL;
313751c0b2f7Stbbdev         return NULL;
313851c0b2f7Stbbdev     }
313951c0b2f7Stbbdev     void *tmp = NULL;
314051c0b2f7Stbbdev 
314151c0b2f7Stbbdev     if (!ptr) {
314251c0b2f7Stbbdev         tmp = allocateAligned(defaultMemPool, size, alignment);
314351c0b2f7Stbbdev     } else if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(ptr)) {
314451c0b2f7Stbbdev         if (!size) {
314551c0b2f7Stbbdev             internalFree(ptr);
314651c0b2f7Stbbdev             return NULL;
314751c0b2f7Stbbdev         } else {
314851c0b2f7Stbbdev             tmp = reallocAligned(defaultMemPool, ptr, size, alignment);
314951c0b2f7Stbbdev         }
315051c0b2f7Stbbdev     }
315151c0b2f7Stbbdev #if USE_WINTHREAD
315251c0b2f7Stbbdev     else {
315351c0b2f7Stbbdev         orig_aligned_ptrs *original_ptrs = static_cast<orig_aligned_ptrs*>(orig_function);
315451c0b2f7Stbbdev         if (size) {
315551c0b2f7Stbbdev             // Without orig_msize, we can't do anything with this.
315651c0b2f7Stbbdev             // Just keeping old pointer.
315751c0b2f7Stbbdev             if ( original_ptrs->aligned_msize ){
315851c0b2f7Stbbdev                 // set alignment and offset to have possibly correct oldSize
315951c0b2f7Stbbdev                 size_t oldSize = original_ptrs->aligned_msize(ptr, sizeof(void*), 0);
316051c0b2f7Stbbdev                 tmp = allocateAligned(defaultMemPool, size, alignment);
316151c0b2f7Stbbdev                 if (tmp) {
316251c0b2f7Stbbdev                     memcpy(tmp, ptr, size<oldSize? size : oldSize);
316351c0b2f7Stbbdev                     if ( original_ptrs->aligned_free ){
316451c0b2f7Stbbdev                         original_ptrs->aligned_free( ptr );
316551c0b2f7Stbbdev                     }
316651c0b2f7Stbbdev                 }
316751c0b2f7Stbbdev             }
316851c0b2f7Stbbdev         } else {
316951c0b2f7Stbbdev             if ( original_ptrs->aligned_free ){
317051c0b2f7Stbbdev                 original_ptrs->aligned_free( ptr );
317151c0b2f7Stbbdev             }
317251c0b2f7Stbbdev             return NULL;
317351c0b2f7Stbbdev         }
317451c0b2f7Stbbdev     }
317551c0b2f7Stbbdev #else
317651c0b2f7Stbbdev     // As original_realloc can't align result, and there is no way to find
317751c0b2f7Stbbdev     // size of reallocating object, we are giving up.
317851c0b2f7Stbbdev     suppress_unused_warning(orig_function);
317951c0b2f7Stbbdev #endif
318051c0b2f7Stbbdev     if (!tmp) errno = ENOMEM;
318151c0b2f7Stbbdev     return tmp;
318251c0b2f7Stbbdev }
318351c0b2f7Stbbdev 
318451c0b2f7Stbbdev extern "C" void scalable_aligned_free(void *ptr)
318551c0b2f7Stbbdev {
318651c0b2f7Stbbdev     internalFree(ptr);
318751c0b2f7Stbbdev }
318851c0b2f7Stbbdev 
318951c0b2f7Stbbdev /********* end code for aligned allocation API **********/
319051c0b2f7Stbbdev 
319151c0b2f7Stbbdev /********* Code for scalable_msize       ***********/
319251c0b2f7Stbbdev 
319351c0b2f7Stbbdev /*
319451c0b2f7Stbbdev  * Returns the size of a memory block allocated in the heap.
319551c0b2f7Stbbdev  */
319651c0b2f7Stbbdev extern "C" size_t scalable_msize(void* ptr)
319751c0b2f7Stbbdev {
319851c0b2f7Stbbdev     if (ptr) {
319951c0b2f7Stbbdev         MALLOC_ASSERT(isRecognized(ptr), "Invalid pointer in scalable_msize detected.");
320051c0b2f7Stbbdev         return internalMsize(ptr);
320151c0b2f7Stbbdev     }
320251c0b2f7Stbbdev     errno = EINVAL;
320351c0b2f7Stbbdev     // Unlike _msize, return 0 in case of parameter error.
320451c0b2f7Stbbdev     // Returning size_t(-1) looks more like the way to troubles.
320551c0b2f7Stbbdev     return 0;
320651c0b2f7Stbbdev }
320751c0b2f7Stbbdev 
320851c0b2f7Stbbdev /*
320951c0b2f7Stbbdev  * A variant that provides additional memory safety, by checking whether the given address
321051c0b2f7Stbbdev  * was obtained with this allocator, and if not redirecting to the provided alternative call.
321151c0b2f7Stbbdev  */
321251c0b2f7Stbbdev extern "C" size_t __TBB_malloc_safer_msize(void *object, size_t (*original_msize)(void*))
321351c0b2f7Stbbdev {
321451c0b2f7Stbbdev     if (object) {
321551c0b2f7Stbbdev         // Check if the memory was allocated by scalable_malloc
321651c0b2f7Stbbdev         if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(object))
321751c0b2f7Stbbdev             return internalMsize(object);
321851c0b2f7Stbbdev         else if (original_msize)
321951c0b2f7Stbbdev             return original_msize(object);
322051c0b2f7Stbbdev     }
322151c0b2f7Stbbdev     // object is NULL or unknown, or foreign and no original_msize
322251c0b2f7Stbbdev #if USE_WINTHREAD
322351c0b2f7Stbbdev     errno = EINVAL; // errno expected to be set only on this platform
322451c0b2f7Stbbdev #endif
322551c0b2f7Stbbdev     return 0;
322651c0b2f7Stbbdev }
322751c0b2f7Stbbdev 
322851c0b2f7Stbbdev /*
322951c0b2f7Stbbdev  * The same as above but for _aligned_msize case
323051c0b2f7Stbbdev  */
323151c0b2f7Stbbdev extern "C" size_t __TBB_malloc_safer_aligned_msize(void *object, size_t alignment, size_t offset, size_t (*orig_aligned_msize)(void*,size_t,size_t))
323251c0b2f7Stbbdev {
323351c0b2f7Stbbdev     if (object) {
323451c0b2f7Stbbdev         // Check if the memory was allocated by scalable_malloc
323551c0b2f7Stbbdev         if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(object))
323651c0b2f7Stbbdev             return internalMsize(object);
323751c0b2f7Stbbdev         else if (orig_aligned_msize)
323851c0b2f7Stbbdev             return orig_aligned_msize(object,alignment,offset);
323951c0b2f7Stbbdev     }
324051c0b2f7Stbbdev     // object is NULL or unknown
324151c0b2f7Stbbdev     errno = EINVAL;
324251c0b2f7Stbbdev     return 0;
324351c0b2f7Stbbdev }
324451c0b2f7Stbbdev 
324551c0b2f7Stbbdev /********* End code for scalable_msize   ***********/
324651c0b2f7Stbbdev 
324751c0b2f7Stbbdev extern "C" int scalable_allocation_mode(int param, intptr_t value)
324851c0b2f7Stbbdev {
324951c0b2f7Stbbdev     if (param == TBBMALLOC_SET_SOFT_HEAP_LIMIT) {
325051c0b2f7Stbbdev         defaultMemPool->extMemPool.backend.setRecommendedMaxSize((size_t)value);
325151c0b2f7Stbbdev         return TBBMALLOC_OK;
325251c0b2f7Stbbdev     } else if (param == USE_HUGE_PAGES) {
325351c0b2f7Stbbdev #if __linux__
325451c0b2f7Stbbdev         switch (value) {
325551c0b2f7Stbbdev         case 0:
325651c0b2f7Stbbdev         case 1:
325751c0b2f7Stbbdev             hugePages.setMode(value);
325851c0b2f7Stbbdev             return TBBMALLOC_OK;
325951c0b2f7Stbbdev         default:
326051c0b2f7Stbbdev             return TBBMALLOC_INVALID_PARAM;
326151c0b2f7Stbbdev         }
326251c0b2f7Stbbdev #else
326351c0b2f7Stbbdev         return TBBMALLOC_NO_EFFECT;
326451c0b2f7Stbbdev #endif
326551c0b2f7Stbbdev #if __TBB_SOURCE_DIRECTLY_INCLUDED
326651c0b2f7Stbbdev     } else if (param == TBBMALLOC_INTERNAL_SOURCE_INCLUDED) {
326751c0b2f7Stbbdev         switch (value) {
326851c0b2f7Stbbdev         case 0: // used by dynamic library
326951c0b2f7Stbbdev         case 1: // used by static library or directly included sources
327051c0b2f7Stbbdev             usedBySrcIncluded = value;
327151c0b2f7Stbbdev             return TBBMALLOC_OK;
327251c0b2f7Stbbdev         default:
327351c0b2f7Stbbdev             return TBBMALLOC_INVALID_PARAM;
327451c0b2f7Stbbdev         }
327551c0b2f7Stbbdev #endif
327651c0b2f7Stbbdev     } else if (param == TBBMALLOC_SET_HUGE_SIZE_THRESHOLD) {
327751c0b2f7Stbbdev         defaultMemPool->extMemPool.loc.setHugeSizeThreshold((size_t)value);
327851c0b2f7Stbbdev         return TBBMALLOC_OK;
327951c0b2f7Stbbdev     }
328051c0b2f7Stbbdev     return TBBMALLOC_INVALID_PARAM;
328151c0b2f7Stbbdev }
328251c0b2f7Stbbdev 
328351c0b2f7Stbbdev extern "C" int scalable_allocation_command(int cmd, void *param)
328451c0b2f7Stbbdev {
328551c0b2f7Stbbdev     if (param)
328651c0b2f7Stbbdev         return TBBMALLOC_INVALID_PARAM;
328751c0b2f7Stbbdev 
328851c0b2f7Stbbdev     bool released = false;
328951c0b2f7Stbbdev     switch(cmd) {
329051c0b2f7Stbbdev     case TBBMALLOC_CLEAN_THREAD_BUFFERS:
329151c0b2f7Stbbdev         if (TLSData *tls = defaultMemPool->getTLS(/*create=*/false))
329251c0b2f7Stbbdev             released = tls->externalCleanup(/*cleanOnlyUnused*/false, /*cleanBins=*/true);
329351c0b2f7Stbbdev         break;
329451c0b2f7Stbbdev     case TBBMALLOC_CLEAN_ALL_BUFFERS:
329551c0b2f7Stbbdev         released = defaultMemPool->extMemPool.hardCachesCleanup();
329651c0b2f7Stbbdev         break;
329751c0b2f7Stbbdev     default:
329851c0b2f7Stbbdev         return TBBMALLOC_INVALID_PARAM;
329951c0b2f7Stbbdev     }
330051c0b2f7Stbbdev     return released ? TBBMALLOC_OK : TBBMALLOC_NO_EFFECT;
330151c0b2f7Stbbdev }
3302