xref: /oneTBB/src/tbbmalloc/frontend.cpp (revision 57f524ca)
151c0b2f7Stbbdev /*
2b15aabb3Stbbdev     Copyright (c) 2005-2021 Intel Corporation
351c0b2f7Stbbdev 
451c0b2f7Stbbdev     Licensed under the Apache License, Version 2.0 (the "License");
551c0b2f7Stbbdev     you may not use this file except in compliance with the License.
651c0b2f7Stbbdev     You may obtain a copy of the License at
751c0b2f7Stbbdev 
851c0b2f7Stbbdev         http://www.apache.org/licenses/LICENSE-2.0
951c0b2f7Stbbdev 
1051c0b2f7Stbbdev     Unless required by applicable law or agreed to in writing, software
1151c0b2f7Stbbdev     distributed under the License is distributed on an "AS IS" BASIS,
1251c0b2f7Stbbdev     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1351c0b2f7Stbbdev     See the License for the specific language governing permissions and
1451c0b2f7Stbbdev     limitations under the License.
1551c0b2f7Stbbdev */
1651c0b2f7Stbbdev 
1751c0b2f7Stbbdev #include "tbbmalloc_internal.h"
1851c0b2f7Stbbdev #include <errno.h>
1951c0b2f7Stbbdev #include <new>        /* for placement new */
2051c0b2f7Stbbdev #include <string.h>   /* for memset */
2151c0b2f7Stbbdev 
2249e08aacStbbdev #include "oneapi/tbb/version.h"
2351c0b2f7Stbbdev #include "../tbb/environment.h"
2451c0b2f7Stbbdev #include "../tbb/itt_notify.h" // for __TBB_load_ittnotify()
2551c0b2f7Stbbdev 
2651c0b2f7Stbbdev #if USE_PTHREAD
2751c0b2f7Stbbdev     #define TlsSetValue_func pthread_setspecific
2851c0b2f7Stbbdev     #define TlsGetValue_func pthread_getspecific
2951c0b2f7Stbbdev     #define GetMyTID() pthread_self()
3051c0b2f7Stbbdev     #include <sched.h>
3151c0b2f7Stbbdev     extern "C" { static void mallocThreadShutdownNotification(void*); }
3251c0b2f7Stbbdev     #if __sun || __SUNPRO_CC
3351c0b2f7Stbbdev     #define __asm__ asm
3451c0b2f7Stbbdev     #endif
3551c0b2f7Stbbdev     #include <unistd.h> // sysconf(_SC_PAGESIZE)
3651c0b2f7Stbbdev #elif USE_WINTHREAD
3751c0b2f7Stbbdev     #define GetMyTID() GetCurrentThreadId()
3851c0b2f7Stbbdev #if __TBB_WIN8UI_SUPPORT
3951c0b2f7Stbbdev     #include<thread>
4051c0b2f7Stbbdev     #define TlsSetValue_func FlsSetValue
4151c0b2f7Stbbdev     #define TlsGetValue_func FlsGetValue
42*57f524caSIlya Isaev     #define TlsAlloc() FlsAlloc(nullptr)
4351c0b2f7Stbbdev     #define TLS_ALLOC_FAILURE FLS_OUT_OF_INDEXES
4451c0b2f7Stbbdev     #define TlsFree FlsFree
4551c0b2f7Stbbdev #else
4651c0b2f7Stbbdev     #define TlsSetValue_func TlsSetValue
4751c0b2f7Stbbdev     #define TlsGetValue_func TlsGetValue
4851c0b2f7Stbbdev     #define TLS_ALLOC_FAILURE TLS_OUT_OF_INDEXES
4951c0b2f7Stbbdev #endif
5051c0b2f7Stbbdev #else
5151c0b2f7Stbbdev     #error Must define USE_PTHREAD or USE_WINTHREAD
5251c0b2f7Stbbdev #endif
5351c0b2f7Stbbdev 
5451c0b2f7Stbbdev #define FREELIST_NONBLOCKING 1
5551c0b2f7Stbbdev 
5651c0b2f7Stbbdev namespace rml {
5751c0b2f7Stbbdev class MemoryPool;
5851c0b2f7Stbbdev namespace internal {
5951c0b2f7Stbbdev 
6051c0b2f7Stbbdev class Block;
6151c0b2f7Stbbdev class MemoryPool;
6251c0b2f7Stbbdev 
6351c0b2f7Stbbdev #if MALLOC_CHECK_RECURSION
6451c0b2f7Stbbdev 
6551c0b2f7Stbbdev inline bool isMallocInitialized();
6651c0b2f7Stbbdev 
6751c0b2f7Stbbdev #endif // MALLOC_CHECK_RECURSION
6851c0b2f7Stbbdev 
6951c0b2f7Stbbdev /** Support for handling the special UNUSABLE pointer state **/
7051c0b2f7Stbbdev const intptr_t UNUSABLE = 0x1;
7151c0b2f7Stbbdev inline bool isSolidPtr( void* ptr ) {
7251c0b2f7Stbbdev     return (UNUSABLE|(intptr_t)ptr)!=UNUSABLE;
7351c0b2f7Stbbdev }
7451c0b2f7Stbbdev inline bool isNotForUse( void* ptr ) {
7551c0b2f7Stbbdev     return (intptr_t)ptr==UNUSABLE;
7651c0b2f7Stbbdev }
7751c0b2f7Stbbdev 
7851c0b2f7Stbbdev /*
7951c0b2f7Stbbdev  * Block::objectSize value used to mark blocks allocated by startupAlloc
8051c0b2f7Stbbdev  */
8151c0b2f7Stbbdev const uint16_t startupAllocObjSizeMark = ~(uint16_t)0;
8251c0b2f7Stbbdev 
8351c0b2f7Stbbdev /*
8451c0b2f7Stbbdev  * The following constant is used to define the size of struct Block, the block header.
8551c0b2f7Stbbdev  * The intent is to have the size of a Block multiple of the cache line size, this allows us to
8651c0b2f7Stbbdev  * get good alignment at the cost of some overhead equal to the amount of padding included in the Block.
8751c0b2f7Stbbdev  */
8851c0b2f7Stbbdev const int blockHeaderAlignment = estimatedCacheLineSize;
8951c0b2f7Stbbdev 
9051c0b2f7Stbbdev /********* The data structures and global objects        **************/
9151c0b2f7Stbbdev 
9251c0b2f7Stbbdev /*
9351c0b2f7Stbbdev  * The malloc routines themselves need to be able to occasionally malloc some space,
9451c0b2f7Stbbdev  * in order to set up the structures used by the thread local structures. This
9551c0b2f7Stbbdev  * routine performs that functions.
9651c0b2f7Stbbdev  */
9751c0b2f7Stbbdev class BootStrapBlocks {
9851c0b2f7Stbbdev     MallocMutex bootStrapLock;
9951c0b2f7Stbbdev     Block      *bootStrapBlock;
10051c0b2f7Stbbdev     Block      *bootStrapBlockUsed;
10151c0b2f7Stbbdev     FreeObject *bootStrapObjectList;
10251c0b2f7Stbbdev public:
10351c0b2f7Stbbdev     void *allocate(MemoryPool *memPool, size_t size);
10451c0b2f7Stbbdev     void free(void* ptr);
10551c0b2f7Stbbdev     void reset();
10651c0b2f7Stbbdev };
10751c0b2f7Stbbdev 
10851c0b2f7Stbbdev #if USE_INTERNAL_TID
10951c0b2f7Stbbdev class ThreadId {
11051c0b2f7Stbbdev     static tls_key_t Tid_key;
11151c0b2f7Stbbdev     std::atomic<intptr_t> ThreadCount;
11251c0b2f7Stbbdev 
11351c0b2f7Stbbdev     unsigned int id;
11451c0b2f7Stbbdev 
11551c0b2f7Stbbdev     static unsigned int tlsNumber() {
11651c0b2f7Stbbdev         unsigned int result = reinterpret_cast<intptr_t>(TlsGetValue_func(Tid_key));
11751c0b2f7Stbbdev         if( !result ) {
11851c0b2f7Stbbdev             RecursiveMallocCallProtector scoped;
11951c0b2f7Stbbdev             // Thread-local value is zero -> first call from this thread,
12051c0b2f7Stbbdev             // need to initialize with next ID value (IDs start from 1)
12151c0b2f7Stbbdev             result = ++ThreadCount; // returned new value!
12251c0b2f7Stbbdev             TlsSetValue_func( Tid_key, reinterpret_cast<void*>(result) );
12351c0b2f7Stbbdev         }
12451c0b2f7Stbbdev         return result;
12551c0b2f7Stbbdev     }
12651c0b2f7Stbbdev public:
12751c0b2f7Stbbdev     static bool init() {
12851c0b2f7Stbbdev #if USE_WINTHREAD
12951c0b2f7Stbbdev         Tid_key = TlsAlloc();
13051c0b2f7Stbbdev         if (Tid_key == TLS_ALLOC_FAILURE)
13151c0b2f7Stbbdev             return false;
13251c0b2f7Stbbdev #else
133*57f524caSIlya Isaev         int status = pthread_key_create( &Tid_key, nullptr );
13451c0b2f7Stbbdev         if ( status ) {
13551c0b2f7Stbbdev             fprintf (stderr, "The memory manager cannot create tls key during initialization\n");
13651c0b2f7Stbbdev             return false;
13751c0b2f7Stbbdev         }
13851c0b2f7Stbbdev #endif /* USE_WINTHREAD */
13951c0b2f7Stbbdev         return true;
14051c0b2f7Stbbdev     }
141478de5b1Stbbdev #if __TBB_SOURCE_DIRECTLY_INCLUDED
14251c0b2f7Stbbdev     static void destroy() {
14351c0b2f7Stbbdev         if( Tid_key ) {
14451c0b2f7Stbbdev #if USE_WINTHREAD
14551c0b2f7Stbbdev             BOOL status = !(TlsFree( Tid_key ));  // fail is zero
14651c0b2f7Stbbdev #else
14751c0b2f7Stbbdev             int status = pthread_key_delete( Tid_key );
14851c0b2f7Stbbdev #endif /* USE_WINTHREAD */
14951c0b2f7Stbbdev             if ( status )
15051c0b2f7Stbbdev                 fprintf (stderr, "The memory manager cannot delete tls key\n");
15151c0b2f7Stbbdev             Tid_key = 0;
15251c0b2f7Stbbdev         }
15351c0b2f7Stbbdev     }
154478de5b1Stbbdev #endif
15551c0b2f7Stbbdev 
15651c0b2f7Stbbdev     ThreadId() : id(ThreadId::tlsNumber()) {}
15751c0b2f7Stbbdev     bool isCurrentThreadId() const { return id == ThreadId::tlsNumber(); }
15851c0b2f7Stbbdev 
15951c0b2f7Stbbdev #if COLLECT_STATISTICS || MALLOC_TRACE
16051c0b2f7Stbbdev     friend unsigned int getThreadId() { return ThreadId::tlsNumber(); }
16151c0b2f7Stbbdev #endif
16251c0b2f7Stbbdev #if COLLECT_STATISTICS
16351c0b2f7Stbbdev     static unsigned getMaxThreadId() { return ThreadCount.load(std::memory_order_relaxed); }
16451c0b2f7Stbbdev 
16551c0b2f7Stbbdev     friend int STAT_increment(ThreadId tid, int bin, int ctr);
16651c0b2f7Stbbdev #endif
16751c0b2f7Stbbdev };
16851c0b2f7Stbbdev 
16951c0b2f7Stbbdev tls_key_t ThreadId::Tid_key;
17051c0b2f7Stbbdev intptr_t ThreadId::ThreadCount;
17151c0b2f7Stbbdev 
17251c0b2f7Stbbdev #if COLLECT_STATISTICS
17351c0b2f7Stbbdev int STAT_increment(ThreadId tid, int bin, int ctr)
17451c0b2f7Stbbdev {
17551c0b2f7Stbbdev     return ::STAT_increment(tid.id, bin, ctr);
17651c0b2f7Stbbdev }
17751c0b2f7Stbbdev #endif
17851c0b2f7Stbbdev 
17951c0b2f7Stbbdev #else // USE_INTERNAL_TID
18051c0b2f7Stbbdev 
18151c0b2f7Stbbdev class ThreadId {
18251c0b2f7Stbbdev #if USE_PTHREAD
183478de5b1Stbbdev     std::atomic<pthread_t> tid;
18451c0b2f7Stbbdev #else
185478de5b1Stbbdev     std::atomic<DWORD>     tid;
18651c0b2f7Stbbdev #endif
18751c0b2f7Stbbdev public:
18851c0b2f7Stbbdev     ThreadId() : tid(GetMyTID()) {}
18951c0b2f7Stbbdev #if USE_PTHREAD
190478de5b1Stbbdev     bool isCurrentThreadId() const { return pthread_equal(pthread_self(), tid.load(std::memory_order_relaxed)); }
19151c0b2f7Stbbdev #else
192478de5b1Stbbdev     bool isCurrentThreadId() const { return GetCurrentThreadId() == tid.load(std::memory_order_relaxed); }
19351c0b2f7Stbbdev #endif
194478de5b1Stbbdev     ThreadId& operator=(const ThreadId& other) {
195478de5b1Stbbdev         tid.store(other.tid.load(std::memory_order_relaxed), std::memory_order_relaxed);
196478de5b1Stbbdev         return *this;
197478de5b1Stbbdev     }
19851c0b2f7Stbbdev     static bool init() { return true; }
199478de5b1Stbbdev #if __TBB_SOURCE_DIRECTLY_INCLUDED
20051c0b2f7Stbbdev     static void destroy() {}
201478de5b1Stbbdev #endif
20251c0b2f7Stbbdev };
20351c0b2f7Stbbdev 
20451c0b2f7Stbbdev #endif // USE_INTERNAL_TID
20551c0b2f7Stbbdev 
20651c0b2f7Stbbdev /*********** Code to provide thread ID and a thread-local void pointer **********/
20751c0b2f7Stbbdev 
20851c0b2f7Stbbdev bool TLSKey::init()
20951c0b2f7Stbbdev {
21051c0b2f7Stbbdev #if USE_WINTHREAD
21151c0b2f7Stbbdev     TLS_pointer_key = TlsAlloc();
21251c0b2f7Stbbdev     if (TLS_pointer_key == TLS_ALLOC_FAILURE)
21351c0b2f7Stbbdev         return false;
21451c0b2f7Stbbdev #else
21551c0b2f7Stbbdev     int status = pthread_key_create( &TLS_pointer_key, mallocThreadShutdownNotification );
21651c0b2f7Stbbdev     if ( status )
21751c0b2f7Stbbdev         return false;
21851c0b2f7Stbbdev #endif /* USE_WINTHREAD */
21951c0b2f7Stbbdev     return true;
22051c0b2f7Stbbdev }
22151c0b2f7Stbbdev 
22251c0b2f7Stbbdev bool TLSKey::destroy()
22351c0b2f7Stbbdev {
22451c0b2f7Stbbdev #if USE_WINTHREAD
22551c0b2f7Stbbdev     BOOL status1 = !(TlsFree(TLS_pointer_key)); // fail is zero
22651c0b2f7Stbbdev #else
22751c0b2f7Stbbdev     int status1 = pthread_key_delete(TLS_pointer_key);
22851c0b2f7Stbbdev #endif /* USE_WINTHREAD */
22951c0b2f7Stbbdev     MALLOC_ASSERT(!status1, "The memory manager cannot delete tls key.");
23051c0b2f7Stbbdev     return status1==0;
23151c0b2f7Stbbdev }
23251c0b2f7Stbbdev 
23351c0b2f7Stbbdev inline TLSData* TLSKey::getThreadMallocTLS() const
23451c0b2f7Stbbdev {
23551c0b2f7Stbbdev     return (TLSData *)TlsGetValue_func( TLS_pointer_key );
23651c0b2f7Stbbdev }
23751c0b2f7Stbbdev 
23851c0b2f7Stbbdev inline void TLSKey::setThreadMallocTLS( TLSData * newvalue ) {
23951c0b2f7Stbbdev     RecursiveMallocCallProtector scoped;
24051c0b2f7Stbbdev     TlsSetValue_func( TLS_pointer_key, newvalue );
24151c0b2f7Stbbdev }
24251c0b2f7Stbbdev 
24351c0b2f7Stbbdev /* The 'next' field in the block header has to maintain some invariants:
24451c0b2f7Stbbdev  *   it needs to be on a 16K boundary and the first field in the block.
24551c0b2f7Stbbdev  *   Any value stored there needs to have the lower 14 bits set to 0
24651c0b2f7Stbbdev  *   so that various assert work. This means that if you want to smash this memory
24751c0b2f7Stbbdev  *   for debugging purposes you will need to obey this invariant.
24851c0b2f7Stbbdev  * The total size of the header needs to be a power of 2 to simplify
24951c0b2f7Stbbdev  * the alignment requirements. For now it is a 128 byte structure.
25051c0b2f7Stbbdev  * To avoid false sharing, the fields changed only locally are separated
25151c0b2f7Stbbdev  * from the fields changed by foreign threads.
25251c0b2f7Stbbdev  * Changing the size of the block header would require to change
25351c0b2f7Stbbdev  * some bin allocation sizes, in particular "fitting" sizes (see above).
25451c0b2f7Stbbdev  */
25551c0b2f7Stbbdev class Bin;
25651c0b2f7Stbbdev class StartupBlock;
25751c0b2f7Stbbdev 
25851c0b2f7Stbbdev class MemoryPool {
25951c0b2f7Stbbdev     // if no explicit grainsize, expect to see malloc in user's pAlloc
26051c0b2f7Stbbdev     // and set reasonable low granularity
26151c0b2f7Stbbdev     static const size_t defaultGranularity = estimatedCacheLineSize;
26251c0b2f7Stbbdev 
263ba947f18SIlya Isaev     MemoryPool() = delete;                  // deny
26451c0b2f7Stbbdev public:
26551c0b2f7Stbbdev     static MallocMutex  memPoolListLock;
26651c0b2f7Stbbdev 
26751c0b2f7Stbbdev     // list of all active pools is used to release
26851c0b2f7Stbbdev     // all TLS data on thread termination or library unload
26951c0b2f7Stbbdev     MemoryPool    *next,
27051c0b2f7Stbbdev                   *prev;
27151c0b2f7Stbbdev     ExtMemoryPool  extMemPool;
27251c0b2f7Stbbdev     BootStrapBlocks bootStrapBlocks;
27351c0b2f7Stbbdev 
27451c0b2f7Stbbdev     static void initDefaultPool();
27551c0b2f7Stbbdev 
27651c0b2f7Stbbdev     bool init(intptr_t poolId, const MemPoolPolicy* memPoolPolicy);
27751c0b2f7Stbbdev     bool reset();
27851c0b2f7Stbbdev     bool destroy();
27951c0b2f7Stbbdev     void onThreadShutdown(TLSData *tlsData);
28051c0b2f7Stbbdev 
28151c0b2f7Stbbdev     inline TLSData *getTLS(bool create);
282*57f524caSIlya Isaev     void clearTLS() { extMemPool.tlsPointerKey.setThreadMallocTLS(nullptr); }
28351c0b2f7Stbbdev 
28451c0b2f7Stbbdev     Block *getEmptyBlock(size_t size);
28551c0b2f7Stbbdev     void returnEmptyBlock(Block *block, bool poolTheBlock);
28651c0b2f7Stbbdev 
28751c0b2f7Stbbdev     // get/put large object to/from local large object cache
28851c0b2f7Stbbdev     void *getFromLLOCache(TLSData *tls, size_t size, size_t alignment);
28951c0b2f7Stbbdev     void putToLLOCache(TLSData *tls, void *object);
29051c0b2f7Stbbdev };
29151c0b2f7Stbbdev 
29251c0b2f7Stbbdev static intptr_t defaultMemPool_space[sizeof(MemoryPool)/sizeof(intptr_t) +
29351c0b2f7Stbbdev                                      (sizeof(MemoryPool)%sizeof(intptr_t)? 1 : 0)];
29451c0b2f7Stbbdev static MemoryPool *defaultMemPool = (MemoryPool*)defaultMemPool_space;
29551c0b2f7Stbbdev const size_t MemoryPool::defaultGranularity;
29651c0b2f7Stbbdev // zero-initialized
29751c0b2f7Stbbdev MallocMutex  MemoryPool::memPoolListLock;
29851c0b2f7Stbbdev // TODO: move huge page status to default pool, because that's its states
29951c0b2f7Stbbdev HugePagesStatus hugePages;
30051c0b2f7Stbbdev static bool usedBySrcIncluded = false;
30151c0b2f7Stbbdev 
30251c0b2f7Stbbdev // Padding helpers
30351c0b2f7Stbbdev template<size_t padd>
30451c0b2f7Stbbdev struct PaddingImpl {
30551c0b2f7Stbbdev     size_t       __padding[padd];
30651c0b2f7Stbbdev };
30751c0b2f7Stbbdev 
30851c0b2f7Stbbdev template<>
30951c0b2f7Stbbdev struct PaddingImpl<0> {};
31051c0b2f7Stbbdev 
31151c0b2f7Stbbdev template<int N>
31251c0b2f7Stbbdev struct Padding : PaddingImpl<N/sizeof(size_t)> {};
31351c0b2f7Stbbdev 
31451c0b2f7Stbbdev // Slab block is 16KB-aligned. To prevent false sharing, separate locally-accessed
31551c0b2f7Stbbdev // fields and fields commonly accessed by not owner threads.
31651c0b2f7Stbbdev class GlobalBlockFields : public BlockI {
31751c0b2f7Stbbdev protected:
31851c0b2f7Stbbdev     std::atomic<FreeObject*> publicFreeList;
319478de5b1Stbbdev     std::atomic<Block*> nextPrivatizable;
32051c0b2f7Stbbdev     MemoryPool  *poolPtr;
32151c0b2f7Stbbdev };
32251c0b2f7Stbbdev 
32351c0b2f7Stbbdev class LocalBlockFields : public GlobalBlockFields, Padding<blockHeaderAlignment - sizeof(GlobalBlockFields)>  {
32451c0b2f7Stbbdev protected:
32551c0b2f7Stbbdev     Block       *next;
32651c0b2f7Stbbdev     Block       *previous;        /* Use double linked list to speed up removal */
32751c0b2f7Stbbdev     FreeObject  *bumpPtr;         /* Bump pointer moves from the end to the beginning of a block */
32851c0b2f7Stbbdev     FreeObject  *freeList;
32951c0b2f7Stbbdev     /* Pointer to local data for the owner thread. Used for fast finding tls
33051c0b2f7Stbbdev        when releasing object from a block that current thread owned.
331*57f524caSIlya Isaev        nullptr for orphaned blocks. */
332478de5b1Stbbdev     std::atomic<TLSData*> tlsPtr;
33351c0b2f7Stbbdev     ThreadId     ownerTid;        /* the ID of the thread that owns or last owned the block */
33451c0b2f7Stbbdev     BackRefIdx   backRefIdx;
33551c0b2f7Stbbdev     uint16_t     allocatedCount;  /* Number of objects allocated (obviously by the owning thread) */
33651c0b2f7Stbbdev     uint16_t     objectSize;
33751c0b2f7Stbbdev     bool         isFull;
33851c0b2f7Stbbdev 
33951c0b2f7Stbbdev     friend class FreeBlockPool;
34051c0b2f7Stbbdev     friend class StartupBlock;
34151c0b2f7Stbbdev     friend class LifoList;
34251c0b2f7Stbbdev     friend void *BootStrapBlocks::allocate(MemoryPool *, size_t);
34351c0b2f7Stbbdev     friend bool OrphanedBlocks::cleanup(Backend*);
34451c0b2f7Stbbdev     friend Block *MemoryPool::getEmptyBlock(size_t);
34551c0b2f7Stbbdev };
34651c0b2f7Stbbdev 
34751c0b2f7Stbbdev // Use inheritance to guarantee that a user data start on next cache line.
34851c0b2f7Stbbdev // Can't use member for it, because when LocalBlockFields already on cache line,
34951c0b2f7Stbbdev // we must have no additional memory consumption for all compilers.
35051c0b2f7Stbbdev class Block : public LocalBlockFields,
35151c0b2f7Stbbdev               Padding<2*blockHeaderAlignment - sizeof(LocalBlockFields)> {
35251c0b2f7Stbbdev public:
35351c0b2f7Stbbdev     bool empty() const {
35451c0b2f7Stbbdev         if (allocatedCount > 0) return false;
35551c0b2f7Stbbdev         MALLOC_ASSERT(!isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT);
35651c0b2f7Stbbdev         return true;
35751c0b2f7Stbbdev     }
35851c0b2f7Stbbdev     inline FreeObject* allocate();
35951c0b2f7Stbbdev     inline FreeObject *allocateFromFreeList();
36051c0b2f7Stbbdev 
36151c0b2f7Stbbdev     inline bool adjustFullness();
362*57f524caSIlya Isaev     void adjustPositionInBin(Bin* bin = nullptr);
363478de5b1Stbbdev #if MALLOC_DEBUG
36451c0b2f7Stbbdev     bool freeListNonNull() { return freeList; }
365478de5b1Stbbdev #endif
36651c0b2f7Stbbdev     void freePublicObject(FreeObject *objectToFree);
36751c0b2f7Stbbdev     inline void freeOwnObject(void *object);
36851c0b2f7Stbbdev     void reset();
36951c0b2f7Stbbdev     void privatizePublicFreeList( bool reset = true );
37051c0b2f7Stbbdev     void restoreBumpPtr();
37151c0b2f7Stbbdev     void privatizeOrphaned(TLSData *tls, unsigned index);
37251c0b2f7Stbbdev     bool readyToShare();
37351c0b2f7Stbbdev     void shareOrphaned(intptr_t binTag, unsigned index);
37451c0b2f7Stbbdev     unsigned int getSize() const {
37551c0b2f7Stbbdev         MALLOC_ASSERT(isStartupAllocObject() || objectSize<minLargeObjectSize,
37651c0b2f7Stbbdev                       "Invalid object size");
37751c0b2f7Stbbdev         return isStartupAllocObject()? 0 : objectSize;
37851c0b2f7Stbbdev     }
37951c0b2f7Stbbdev     const BackRefIdx *getBackRefIdx() const { return &backRefIdx; }
38051c0b2f7Stbbdev     inline bool isOwnedByCurrentThread() const;
38151c0b2f7Stbbdev     bool isStartupAllocObject() const { return objectSize == startupAllocObjSizeMark; }
38251c0b2f7Stbbdev     inline FreeObject *findObjectToFree(const void *object) const;
38351c0b2f7Stbbdev     void checkFreePrecond(const void *object) const {
38451c0b2f7Stbbdev #if MALLOC_DEBUG
38551c0b2f7Stbbdev         const char *msg = "Possible double free or heap corruption.";
38651c0b2f7Stbbdev         // small objects are always at least sizeof(size_t) Byte aligned,
38751c0b2f7Stbbdev         // try to check this before this dereference as for invalid objects
38851c0b2f7Stbbdev         // this may be unreadable
38951c0b2f7Stbbdev         MALLOC_ASSERT(isAligned(object, sizeof(size_t)), "Try to free invalid small object");
390478de5b1Stbbdev #if !__TBB_USE_THREAD_SANITIZER
39151c0b2f7Stbbdev         // releasing to free slab
39251c0b2f7Stbbdev         MALLOC_ASSERT(allocatedCount>0, msg);
393478de5b1Stbbdev #endif
39451c0b2f7Stbbdev         // must not point to slab's header
39551c0b2f7Stbbdev         MALLOC_ASSERT((uintptr_t)object - (uintptr_t)this >= sizeof(Block), msg);
39651c0b2f7Stbbdev         if (startupAllocObjSizeMark == objectSize) // startup block
39751c0b2f7Stbbdev             MALLOC_ASSERT(object<=bumpPtr, msg);
39851c0b2f7Stbbdev         else {
39951c0b2f7Stbbdev             // non-startup objects are 8 Byte aligned
40051c0b2f7Stbbdev             MALLOC_ASSERT(isAligned(object, 8), "Try to free invalid small object");
401478de5b1Stbbdev             FreeObject *toFree = findObjectToFree(object);
402478de5b1Stbbdev #if !__TBB_USE_THREAD_SANITIZER
40351c0b2f7Stbbdev             MALLOC_ASSERT(allocatedCount <= (slabSize-sizeof(Block))/objectSize
40451c0b2f7Stbbdev                           && (!bumpPtr || object>bumpPtr), msg);
40551c0b2f7Stbbdev             // check against head of freeList, as this is mostly
40651c0b2f7Stbbdev             // expected after double free
40751c0b2f7Stbbdev             MALLOC_ASSERT(toFree != freeList, msg);
408478de5b1Stbbdev #endif
40951c0b2f7Stbbdev             // check against head of publicFreeList, to detect double free
41051c0b2f7Stbbdev             // involving foreign thread
41151c0b2f7Stbbdev             MALLOC_ASSERT(toFree != publicFreeList.load(std::memory_order_relaxed), msg);
41251c0b2f7Stbbdev         }
41351c0b2f7Stbbdev #else
41451c0b2f7Stbbdev         suppress_unused_warning(object);
41551c0b2f7Stbbdev #endif
41651c0b2f7Stbbdev     }
41751c0b2f7Stbbdev     void initEmptyBlock(TLSData *tls, size_t size);
41851c0b2f7Stbbdev     size_t findObjectSize(void *object) const;
41951c0b2f7Stbbdev     MemoryPool *getMemPool() const { return poolPtr; } // do not use on the hot path!
42051c0b2f7Stbbdev 
42151c0b2f7Stbbdev protected:
42251c0b2f7Stbbdev     void cleanBlockHeader();
42351c0b2f7Stbbdev 
42451c0b2f7Stbbdev private:
42551c0b2f7Stbbdev     static const float emptyEnoughRatio; /* Threshold on free space needed to "reactivate" a block */
42651c0b2f7Stbbdev 
42751c0b2f7Stbbdev     inline FreeObject *allocateFromBumpPtr();
42851c0b2f7Stbbdev     inline FreeObject *findAllocatedObject(const void *address) const;
429478de5b1Stbbdev #if MALLOC_DEBUG
43051c0b2f7Stbbdev     inline bool isProperlyPlaced(const void *object) const;
431478de5b1Stbbdev #endif
43251c0b2f7Stbbdev     inline void markOwned(TLSData *tls) {
433478de5b1Stbbdev         MALLOC_ASSERT(!tlsPtr.load(std::memory_order_relaxed), ASSERT_TEXT);
43451c0b2f7Stbbdev         ownerTid = ThreadId(); /* save the ID of the current thread */
435478de5b1Stbbdev         tlsPtr.store(tls, std::memory_order_relaxed);
43651c0b2f7Stbbdev     }
43751c0b2f7Stbbdev     inline void markOrphaned() {
438478de5b1Stbbdev         MALLOC_ASSERT(tlsPtr.load(std::memory_order_relaxed), ASSERT_TEXT);
439478de5b1Stbbdev         tlsPtr.store(nullptr, std::memory_order_relaxed);
44051c0b2f7Stbbdev     }
44151c0b2f7Stbbdev 
44251c0b2f7Stbbdev     friend class Bin;
44351c0b2f7Stbbdev     friend class TLSData;
44451c0b2f7Stbbdev     friend bool MemoryPool::destroy();
44551c0b2f7Stbbdev };
44651c0b2f7Stbbdev 
44751c0b2f7Stbbdev const float Block::emptyEnoughRatio = 1.0 / 4.0;
44851c0b2f7Stbbdev 
44951c0b2f7Stbbdev static_assert(sizeof(Block) <= 2*estimatedCacheLineSize,
45051c0b2f7Stbbdev     "The class Block does not fit into 2 cache lines on this platform. "
45151c0b2f7Stbbdev     "Defining USE_INTERNAL_TID may help to fix it.");
45251c0b2f7Stbbdev 
45351c0b2f7Stbbdev class Bin {
45451c0b2f7Stbbdev private:
455478de5b1Stbbdev public:
45651c0b2f7Stbbdev     Block *activeBlk;
45751c0b2f7Stbbdev     std::atomic<Block*> mailbox;
45851c0b2f7Stbbdev     MallocMutex mailLock;
45951c0b2f7Stbbdev 
46051c0b2f7Stbbdev public:
46151c0b2f7Stbbdev     inline Block* getActiveBlock() const { return activeBlk; }
462*57f524caSIlya Isaev     void resetActiveBlock() { activeBlk = nullptr; }
46351c0b2f7Stbbdev     inline void setActiveBlock(Block *block);
46451c0b2f7Stbbdev     inline Block* setPreviousBlockActive();
46551c0b2f7Stbbdev     Block* getPrivatizedFreeListBlock();
46651c0b2f7Stbbdev     void moveBlockToFront(Block *block);
46751c0b2f7Stbbdev     bool cleanPublicFreeLists();
46851c0b2f7Stbbdev     void processEmptyBlock(Block *block, bool poolTheBlock);
46951c0b2f7Stbbdev     void addPublicFreeListBlock(Block* block);
47051c0b2f7Stbbdev 
47151c0b2f7Stbbdev     void outofTLSBin(Block* block);
47251c0b2f7Stbbdev     void verifyTLSBin(size_t size) const;
47351c0b2f7Stbbdev     void pushTLSBin(Block* block);
47451c0b2f7Stbbdev 
475478de5b1Stbbdev #if MALLOC_DEBUG
47651c0b2f7Stbbdev     void verifyInitState() const {
47751c0b2f7Stbbdev         MALLOC_ASSERT( !activeBlk, ASSERT_TEXT );
47851c0b2f7Stbbdev         MALLOC_ASSERT( !mailbox.load(std::memory_order_relaxed), ASSERT_TEXT );
47951c0b2f7Stbbdev     }
480478de5b1Stbbdev #endif
48151c0b2f7Stbbdev 
48251c0b2f7Stbbdev     friend void Block::freePublicObject (FreeObject *objectToFree);
48351c0b2f7Stbbdev };
48451c0b2f7Stbbdev 
48551c0b2f7Stbbdev /********* End of the data structures                    **************/
48651c0b2f7Stbbdev 
48751c0b2f7Stbbdev /*
48851c0b2f7Stbbdev  * There are bins for all 8 byte aligned objects less than this segregated size; 8 bins in total
48951c0b2f7Stbbdev  */
49051c0b2f7Stbbdev const uint32_t minSmallObjectIndex = 0;
49151c0b2f7Stbbdev const uint32_t numSmallObjectBins = 8;
49251c0b2f7Stbbdev const uint32_t maxSmallObjectSize = 64;
49351c0b2f7Stbbdev 
49451c0b2f7Stbbdev /*
49551c0b2f7Stbbdev  * There are 4 bins between each couple of powers of 2 [64-128-256-...]
49651c0b2f7Stbbdev  * from maxSmallObjectSize till this size; 16 bins in total
49751c0b2f7Stbbdev  */
49851c0b2f7Stbbdev const uint32_t minSegregatedObjectIndex = minSmallObjectIndex+numSmallObjectBins;
49951c0b2f7Stbbdev const uint32_t numSegregatedObjectBins = 16;
50051c0b2f7Stbbdev const uint32_t maxSegregatedObjectSize = 1024;
50151c0b2f7Stbbdev 
50251c0b2f7Stbbdev /*
50351c0b2f7Stbbdev  * And there are 5 bins with allocation sizes that are multiples of estimatedCacheLineSize
50451c0b2f7Stbbdev  * and selected to fit 9, 6, 4, 3, and 2 allocations in a block.
50551c0b2f7Stbbdev  */
50651c0b2f7Stbbdev const uint32_t minFittingIndex = minSegregatedObjectIndex+numSegregatedObjectBins;
50751c0b2f7Stbbdev const uint32_t numFittingBins = 5;
50851c0b2f7Stbbdev 
50951c0b2f7Stbbdev const uint32_t fittingAlignment = estimatedCacheLineSize;
51051c0b2f7Stbbdev 
51151c0b2f7Stbbdev #define SET_FITTING_SIZE(N) ( (slabSize-sizeof(Block))/N ) & ~(fittingAlignment-1)
51251c0b2f7Stbbdev // For blockSize=16*1024, sizeof(Block)=2*estimatedCacheLineSize and fittingAlignment=estimatedCacheLineSize,
51351c0b2f7Stbbdev // the comments show the fitting sizes and the amounts left unused for estimatedCacheLineSize=64/128:
51451c0b2f7Stbbdev const uint32_t fittingSize1 = SET_FITTING_SIZE(9); // 1792/1792 128/000
51551c0b2f7Stbbdev const uint32_t fittingSize2 = SET_FITTING_SIZE(6); // 2688/2688 128/000
51651c0b2f7Stbbdev const uint32_t fittingSize3 = SET_FITTING_SIZE(4); // 4032/3968 128/256
51751c0b2f7Stbbdev const uint32_t fittingSize4 = SET_FITTING_SIZE(3); // 5376/5376 128/000
51851c0b2f7Stbbdev const uint32_t fittingSize5 = SET_FITTING_SIZE(2); // 8128/8064 000/000
51951c0b2f7Stbbdev #undef SET_FITTING_SIZE
52051c0b2f7Stbbdev 
52151c0b2f7Stbbdev /*
52251c0b2f7Stbbdev  * The total number of thread-specific Block-based bins
52351c0b2f7Stbbdev  */
52451c0b2f7Stbbdev const uint32_t numBlockBins = minFittingIndex+numFittingBins;
52551c0b2f7Stbbdev 
52651c0b2f7Stbbdev /*
52751c0b2f7Stbbdev  * Objects of this size and larger are considered large objects.
52851c0b2f7Stbbdev  */
52951c0b2f7Stbbdev const uint32_t minLargeObjectSize = fittingSize5 + 1;
53051c0b2f7Stbbdev 
53151c0b2f7Stbbdev /*
53251c0b2f7Stbbdev  * Per-thread pool of slab blocks. Idea behind it is to not share with other
53351c0b2f7Stbbdev  * threads memory that are likely in local cache(s) of our CPU.
53451c0b2f7Stbbdev  */
53551c0b2f7Stbbdev class FreeBlockPool {
53651c0b2f7Stbbdev private:
53751c0b2f7Stbbdev     std::atomic<Block*> head;
53851c0b2f7Stbbdev     int         size;
53951c0b2f7Stbbdev     Backend    *backend;
54051c0b2f7Stbbdev     bool        lastAccessMiss;
54151c0b2f7Stbbdev public:
54251c0b2f7Stbbdev     static const int POOL_HIGH_MARK = 32;
54351c0b2f7Stbbdev     static const int POOL_LOW_MARK  = 8;
54451c0b2f7Stbbdev 
54551c0b2f7Stbbdev     class ResOfGet {
546ba947f18SIlya Isaev         ResOfGet() = delete;
54751c0b2f7Stbbdev     public:
54851c0b2f7Stbbdev         Block* block;
54951c0b2f7Stbbdev         bool   lastAccMiss;
55051c0b2f7Stbbdev         ResOfGet(Block *b, bool lastMiss) : block(b), lastAccMiss(lastMiss) {}
55151c0b2f7Stbbdev     };
55251c0b2f7Stbbdev 
55351c0b2f7Stbbdev     // allocated in zero-initialized memory
55451c0b2f7Stbbdev     FreeBlockPool(Backend *bknd) : backend(bknd) {}
55551c0b2f7Stbbdev     ResOfGet getBlock();
55651c0b2f7Stbbdev     void returnBlock(Block *block);
55751c0b2f7Stbbdev     bool externalCleanup(); // can be called by another thread
55851c0b2f7Stbbdev };
55951c0b2f7Stbbdev 
56051c0b2f7Stbbdev template<int LOW_MARK, int HIGH_MARK>
56151c0b2f7Stbbdev class LocalLOCImpl {
56251c0b2f7Stbbdev private:
56351c0b2f7Stbbdev     static const size_t MAX_TOTAL_SIZE = 4*1024*1024;
56451c0b2f7Stbbdev     // TODO: can single-linked list be faster here?
56551c0b2f7Stbbdev     LargeMemoryBlock *tail; // need it when do releasing on overflow
56651c0b2f7Stbbdev     std::atomic<LargeMemoryBlock*> head;
56751c0b2f7Stbbdev     size_t            totalSize;
56851c0b2f7Stbbdev     int               numOfBlocks;
56951c0b2f7Stbbdev public:
57051c0b2f7Stbbdev     bool put(LargeMemoryBlock *object, ExtMemoryPool *extMemPool);
57151c0b2f7Stbbdev     LargeMemoryBlock *get(size_t size);
57251c0b2f7Stbbdev     bool externalCleanup(ExtMemoryPool *extMemPool);
57351c0b2f7Stbbdev #if __TBB_MALLOC_WHITEBOX_TEST
574*57f524caSIlya Isaev     LocalLOCImpl() : tail(nullptr), head(nullptr), totalSize(0), numOfBlocks(0) {}
57551c0b2f7Stbbdev     static size_t getMaxSize() { return MAX_TOTAL_SIZE; }
57651c0b2f7Stbbdev     static const int LOC_HIGH_MARK = HIGH_MARK;
57751c0b2f7Stbbdev #else
57851c0b2f7Stbbdev     // no ctor, object must be created in zero-initialized memory
57951c0b2f7Stbbdev #endif
58051c0b2f7Stbbdev };
58151c0b2f7Stbbdev 
58251c0b2f7Stbbdev typedef LocalLOCImpl<8,32> LocalLOC; // set production code parameters
58351c0b2f7Stbbdev 
58451c0b2f7Stbbdev class TLSData : public TLSRemote {
58551c0b2f7Stbbdev     MemoryPool   *memPool;
58651c0b2f7Stbbdev public:
58751c0b2f7Stbbdev     Bin           bin[numBlockBinLimit];
58851c0b2f7Stbbdev     FreeBlockPool freeSlabBlocks;
58951c0b2f7Stbbdev     LocalLOC      lloc;
59051c0b2f7Stbbdev     unsigned      currCacheIdx;
59151c0b2f7Stbbdev private:
592478de5b1Stbbdev     std::atomic<bool> unused;
59351c0b2f7Stbbdev public:
59451c0b2f7Stbbdev     TLSData(MemoryPool *mPool, Backend *bknd) : memPool(mPool), freeSlabBlocks(bknd) {}
59551c0b2f7Stbbdev     MemoryPool *getMemPool() const { return memPool; }
59651c0b2f7Stbbdev     Bin* getAllocationBin(size_t size);
59751c0b2f7Stbbdev     void release();
59851c0b2f7Stbbdev     bool externalCleanup(bool cleanOnlyUnused, bool cleanBins) {
599478de5b1Stbbdev         if (!unused.load(std::memory_order_relaxed) && cleanOnlyUnused) return false;
60051c0b2f7Stbbdev         // Heavy operation in terms of synchronization complexity,
60151c0b2f7Stbbdev         // should be called only for the current thread
60251c0b2f7Stbbdev         bool released = cleanBins ? cleanupBlockBins() : false;
60351c0b2f7Stbbdev         // both cleanups to be called, and the order is not important
60451c0b2f7Stbbdev         return released | lloc.externalCleanup(&memPool->extMemPool) | freeSlabBlocks.externalCleanup();
60551c0b2f7Stbbdev     }
60651c0b2f7Stbbdev     bool cleanupBlockBins();
607478de5b1Stbbdev     void markUsed() { unused.store(false, std::memory_order_relaxed); } // called by owner when TLS touched
608478de5b1Stbbdev     void markUnused() { unused.store(true, std::memory_order_relaxed); } // can be called by not owner thread
60951c0b2f7Stbbdev };
61051c0b2f7Stbbdev 
61151c0b2f7Stbbdev TLSData *TLSKey::createTLS(MemoryPool *memPool, Backend *backend)
61251c0b2f7Stbbdev {
61351c0b2f7Stbbdev     MALLOC_ASSERT( sizeof(TLSData) >= sizeof(Bin) * numBlockBins + sizeof(FreeBlockPool), ASSERT_TEXT );
61451c0b2f7Stbbdev     TLSData* tls = (TLSData*) memPool->bootStrapBlocks.allocate(memPool, sizeof(TLSData));
61551c0b2f7Stbbdev     if ( !tls )
616*57f524caSIlya Isaev         return nullptr;
61751c0b2f7Stbbdev     new(tls) TLSData(memPool, backend);
61851c0b2f7Stbbdev     /* the block contains zeroes after bootStrapMalloc, so bins are initialized */
61951c0b2f7Stbbdev #if MALLOC_DEBUG
62051c0b2f7Stbbdev     for (uint32_t i = 0; i < numBlockBinLimit; i++)
62151c0b2f7Stbbdev         tls->bin[i].verifyInitState();
62251c0b2f7Stbbdev #endif
62351c0b2f7Stbbdev     setThreadMallocTLS(tls);
62451c0b2f7Stbbdev     memPool->extMemPool.allLocalCaches.registerThread(tls);
62551c0b2f7Stbbdev     return tls;
62651c0b2f7Stbbdev }
62751c0b2f7Stbbdev 
62851c0b2f7Stbbdev bool TLSData::cleanupBlockBins()
62951c0b2f7Stbbdev {
63051c0b2f7Stbbdev     bool released = false;
63151c0b2f7Stbbdev     for (uint32_t i = 0; i < numBlockBinLimit; i++) {
63251c0b2f7Stbbdev         released |= bin[i].cleanPublicFreeLists();
63351c0b2f7Stbbdev         // After cleaning public free lists, only the active block might be empty.
63451c0b2f7Stbbdev         // Do not use processEmptyBlock because it will just restore bumpPtr.
63551c0b2f7Stbbdev         Block *block = bin[i].getActiveBlock();
63651c0b2f7Stbbdev         if (block && block->empty()) {
63751c0b2f7Stbbdev             bin[i].outofTLSBin(block);
63851c0b2f7Stbbdev             memPool->returnEmptyBlock(block, /*poolTheBlock=*/false);
63951c0b2f7Stbbdev             released = true;
64051c0b2f7Stbbdev         }
64151c0b2f7Stbbdev     }
64251c0b2f7Stbbdev     return released;
64351c0b2f7Stbbdev }
64451c0b2f7Stbbdev 
64551c0b2f7Stbbdev bool ExtMemoryPool::releaseAllLocalCaches()
64651c0b2f7Stbbdev {
64751c0b2f7Stbbdev     // Iterate all registered TLS data and clean LLOC and Slab pools
64851c0b2f7Stbbdev     bool released = allLocalCaches.cleanup(/*cleanOnlyUnused=*/false);
64951c0b2f7Stbbdev 
65051c0b2f7Stbbdev     // Bins privatization is done only for the current thread
65151c0b2f7Stbbdev     if (TLSData *tlsData = tlsPointerKey.getThreadMallocTLS())
65251c0b2f7Stbbdev         released |= tlsData->cleanupBlockBins();
65351c0b2f7Stbbdev 
65451c0b2f7Stbbdev     return released;
65551c0b2f7Stbbdev }
65651c0b2f7Stbbdev 
65751c0b2f7Stbbdev void AllLocalCaches::registerThread(TLSRemote *tls)
65851c0b2f7Stbbdev {
659*57f524caSIlya Isaev     tls->prev = nullptr;
66051c0b2f7Stbbdev     MallocMutex::scoped_lock lock(listLock);
66151c0b2f7Stbbdev     MALLOC_ASSERT(head!=tls, ASSERT_TEXT);
66251c0b2f7Stbbdev     tls->next = head;
66351c0b2f7Stbbdev     if (head)
66451c0b2f7Stbbdev         head->prev = tls;
66551c0b2f7Stbbdev     head = tls;
66651c0b2f7Stbbdev     MALLOC_ASSERT(head->next!=head, ASSERT_TEXT);
66751c0b2f7Stbbdev }
66851c0b2f7Stbbdev 
66951c0b2f7Stbbdev void AllLocalCaches::unregisterThread(TLSRemote *tls)
67051c0b2f7Stbbdev {
67151c0b2f7Stbbdev     MallocMutex::scoped_lock lock(listLock);
67251c0b2f7Stbbdev     MALLOC_ASSERT(head, "Can't unregister thread: no threads are registered.");
67351c0b2f7Stbbdev     if (head == tls)
67451c0b2f7Stbbdev         head = tls->next;
67551c0b2f7Stbbdev     if (tls->next)
67651c0b2f7Stbbdev         tls->next->prev = tls->prev;
67751c0b2f7Stbbdev     if (tls->prev)
67851c0b2f7Stbbdev         tls->prev->next = tls->next;
67951c0b2f7Stbbdev     MALLOC_ASSERT(!tls->next || tls->next->next!=tls->next, ASSERT_TEXT);
68051c0b2f7Stbbdev }
68151c0b2f7Stbbdev 
68251c0b2f7Stbbdev bool AllLocalCaches::cleanup(bool cleanOnlyUnused)
68351c0b2f7Stbbdev {
68451c0b2f7Stbbdev     bool released = false;
68551c0b2f7Stbbdev     {
68651c0b2f7Stbbdev         MallocMutex::scoped_lock lock(listLock);
68751c0b2f7Stbbdev         for (TLSRemote *curr=head; curr; curr=curr->next)
68851c0b2f7Stbbdev             released |= static_cast<TLSData*>(curr)->externalCleanup(cleanOnlyUnused, /*cleanBins=*/false);
68951c0b2f7Stbbdev     }
69051c0b2f7Stbbdev     return released;
69151c0b2f7Stbbdev }
69251c0b2f7Stbbdev 
69351c0b2f7Stbbdev void AllLocalCaches::markUnused()
69451c0b2f7Stbbdev {
69551c0b2f7Stbbdev     bool locked;
69651c0b2f7Stbbdev     MallocMutex::scoped_lock lock(listLock, /*block=*/false, &locked);
69751c0b2f7Stbbdev     if (!locked) // not wait for marking if someone doing something with it
69851c0b2f7Stbbdev         return;
69951c0b2f7Stbbdev 
70051c0b2f7Stbbdev     for (TLSRemote *curr=head; curr; curr=curr->next)
70151c0b2f7Stbbdev         static_cast<TLSData*>(curr)->markUnused();
70251c0b2f7Stbbdev }
70351c0b2f7Stbbdev 
70451c0b2f7Stbbdev #if MALLOC_CHECK_RECURSION
70551c0b2f7Stbbdev MallocMutex RecursiveMallocCallProtector::rmc_mutex;
7068b6f831cStbbdev std::atomic<pthread_t> RecursiveMallocCallProtector::owner_thread;
707478de5b1Stbbdev std::atomic<void*> RecursiveMallocCallProtector::autoObjPtr;
70851c0b2f7Stbbdev bool        RecursiveMallocCallProtector::mallocRecursionDetected;
70951c0b2f7Stbbdev #if __FreeBSD__
71051c0b2f7Stbbdev bool        RecursiveMallocCallProtector::canUsePthread;
71151c0b2f7Stbbdev #endif
71251c0b2f7Stbbdev 
71351c0b2f7Stbbdev #endif
71451c0b2f7Stbbdev 
71551c0b2f7Stbbdev /*********** End code to provide thread ID and a TLS pointer **********/
71651c0b2f7Stbbdev 
71751c0b2f7Stbbdev // Parameter for isLargeObject, keeps our expectations on memory origin.
71851c0b2f7Stbbdev // Assertions must use unknownMem to reliably report object invalidity.
71951c0b2f7Stbbdev enum MemoryOrigin {
72051c0b2f7Stbbdev     ourMem,    // allocated by TBB allocator
72151c0b2f7Stbbdev     unknownMem // can be allocated by system allocator or TBB allocator
72251c0b2f7Stbbdev };
72351c0b2f7Stbbdev 
724478de5b1Stbbdev template<MemoryOrigin>
725478de5b1Stbbdev #if __TBB_USE_THREAD_SANITIZER
726478de5b1Stbbdev // We have a real race when accessing the large object header for
727478de5b1Stbbdev // non large objects (e.g. small or foreign objects).
728478de5b1Stbbdev // Therefore, we need to hide this access from the thread sanitizer
729478de5b1Stbbdev __attribute__((no_sanitize("thread")))
730478de5b1Stbbdev #endif
731478de5b1Stbbdev bool isLargeObject(void *object);
73251c0b2f7Stbbdev static void *internalMalloc(size_t size);
73351c0b2f7Stbbdev static void internalFree(void *object);
73451c0b2f7Stbbdev static void *internalPoolMalloc(MemoryPool* mPool, size_t size);
73551c0b2f7Stbbdev static bool internalPoolFree(MemoryPool *mPool, void *object, size_t size);
73651c0b2f7Stbbdev 
73751c0b2f7Stbbdev #if !MALLOC_DEBUG
73851c0b2f7Stbbdev #if __INTEL_COMPILER || _MSC_VER
73951c0b2f7Stbbdev #define NOINLINE(decl) __declspec(noinline) decl
74051c0b2f7Stbbdev #define ALWAYSINLINE(decl) __forceinline decl
74151c0b2f7Stbbdev #elif __GNUC__
74251c0b2f7Stbbdev #define NOINLINE(decl) decl __attribute__ ((noinline))
74351c0b2f7Stbbdev #define ALWAYSINLINE(decl) decl __attribute__ ((always_inline))
74451c0b2f7Stbbdev #else
74551c0b2f7Stbbdev #define NOINLINE(decl) decl
74651c0b2f7Stbbdev #define ALWAYSINLINE(decl) decl
74751c0b2f7Stbbdev #endif
74851c0b2f7Stbbdev 
74951c0b2f7Stbbdev static NOINLINE( bool doInitialization() );
75051c0b2f7Stbbdev ALWAYSINLINE( bool isMallocInitialized() );
75151c0b2f7Stbbdev 
75251c0b2f7Stbbdev #undef ALWAYSINLINE
75351c0b2f7Stbbdev #undef NOINLINE
75451c0b2f7Stbbdev #endif /* !MALLOC_DEBUG */
75551c0b2f7Stbbdev 
75651c0b2f7Stbbdev 
75751c0b2f7Stbbdev /********* Now some rough utility code to deal with indexing the size bins. **************/
75851c0b2f7Stbbdev 
75951c0b2f7Stbbdev /*
76051c0b2f7Stbbdev  * Given a number return the highest non-zero bit in it. It is intended to work with 32-bit values only.
7619e15720bStbbdev  * Moreover, on some platforms, for sake of simplicity and performance, it is narrowed to only serve for 64 to 1023.
76251c0b2f7Stbbdev  * This is enough for current algorithm of distribution of sizes among bins.
76351c0b2f7Stbbdev  * __TBB_Log2 is not used here to minimize dependencies on TBB specific sources.
76451c0b2f7Stbbdev  */
76551c0b2f7Stbbdev #if _WIN64 && _MSC_VER>=1400 && !__INTEL_COMPILER
76651c0b2f7Stbbdev extern "C" unsigned char _BitScanReverse( unsigned long* i, unsigned long w );
76751c0b2f7Stbbdev #pragma intrinsic(_BitScanReverse)
76851c0b2f7Stbbdev #endif
76951c0b2f7Stbbdev static inline unsigned int highestBitPos(unsigned int n)
77051c0b2f7Stbbdev {
77151c0b2f7Stbbdev     MALLOC_ASSERT( n>=64 && n<1024, ASSERT_TEXT ); // only needed for bsr array lookup, but always true
77251c0b2f7Stbbdev     unsigned int pos;
77351c0b2f7Stbbdev #if __ARCH_x86_32||__ARCH_x86_64
77451c0b2f7Stbbdev 
775734f0bc0SPablo Romero # if __unix__||__APPLE__||__MINGW32__
77651c0b2f7Stbbdev     __asm__ ("bsr %1,%0" : "=r"(pos) : "r"(n));
77751c0b2f7Stbbdev # elif (_WIN32 && (!_WIN64 || __INTEL_COMPILER))
77851c0b2f7Stbbdev     __asm
77951c0b2f7Stbbdev     {
78051c0b2f7Stbbdev         bsr eax, n
78151c0b2f7Stbbdev         mov pos, eax
78251c0b2f7Stbbdev     }
78351c0b2f7Stbbdev # elif _WIN64 && _MSC_VER>=1400
78451c0b2f7Stbbdev     _BitScanReverse((unsigned long*)&pos, (unsigned long)n);
78551c0b2f7Stbbdev # else
78651c0b2f7Stbbdev #   error highestBitPos() not implemented for this platform
78751c0b2f7Stbbdev # endif
78851c0b2f7Stbbdev #elif __arm__
78951c0b2f7Stbbdev     __asm__ __volatile__
79051c0b2f7Stbbdev     (
79151c0b2f7Stbbdev        "clz %0, %1\n"
79251c0b2f7Stbbdev        "rsb %0, %0, %2\n"
79351c0b2f7Stbbdev        :"=r" (pos) :"r" (n), "I" (31)
79451c0b2f7Stbbdev     );
79551c0b2f7Stbbdev #else
79651c0b2f7Stbbdev     static unsigned int bsr[16] = {0/*N/A*/,6,7,7,8,8,8,8,9,9,9,9,9,9,9,9};
79751c0b2f7Stbbdev     pos = bsr[ n>>6 ];
79851c0b2f7Stbbdev #endif /* __ARCH_* */
79951c0b2f7Stbbdev     return pos;
80051c0b2f7Stbbdev }
80151c0b2f7Stbbdev 
80251c0b2f7Stbbdev unsigned int getSmallObjectIndex(unsigned int size)
80351c0b2f7Stbbdev {
80451c0b2f7Stbbdev     unsigned int result = (size-1)>>3;
805fa944e19SMircho Rodozov     if (sizeof(void*)==8) {
806fa944e19SMircho Rodozov         // For 64-bit malloc, 16 byte alignment is needed except for bin 0.
80751c0b2f7Stbbdev         if (result) result |= 1; // 0,1,3,5,7; bins 2,4,6 are not aligned to 16 bytes
808fa944e19SMircho Rodozov     }
80951c0b2f7Stbbdev     return result;
81051c0b2f7Stbbdev }
811478de5b1Stbbdev 
81251c0b2f7Stbbdev /*
81351c0b2f7Stbbdev  * Depending on indexRequest, for a given size return either the index into the bin
81451c0b2f7Stbbdev  * for objects of this size, or the actual size of objects in this bin.
81551c0b2f7Stbbdev  */
81651c0b2f7Stbbdev template<bool indexRequest>
81751c0b2f7Stbbdev static unsigned int getIndexOrObjectSize (unsigned int size)
81851c0b2f7Stbbdev {
81951c0b2f7Stbbdev     if (size <= maxSmallObjectSize) { // selection from 8/16/24/32/40/48/56/64
820478de5b1Stbbdev         unsigned int index = getSmallObjectIndex( size );
82151c0b2f7Stbbdev          /* Bin 0 is for 8 bytes, bin 1 is for 16, and so forth */
82251c0b2f7Stbbdev         return indexRequest ? index : (index+1)<<3;
82351c0b2f7Stbbdev     }
82451c0b2f7Stbbdev     else if (size <= maxSegregatedObjectSize ) { // 80/96/112/128 / 160/192/224/256 / 320/384/448/512 / 640/768/896/1024
82551c0b2f7Stbbdev         unsigned int order = highestBitPos(size-1); // which group of bin sizes?
82651c0b2f7Stbbdev         MALLOC_ASSERT( 6<=order && order<=9, ASSERT_TEXT );
82751c0b2f7Stbbdev         if (indexRequest)
82851c0b2f7Stbbdev             return minSegregatedObjectIndex - (4*6) - 4 + (4*order) + ((size-1)>>(order-2));
82951c0b2f7Stbbdev         else {
83051c0b2f7Stbbdev             unsigned int alignment = 128 >> (9-order); // alignment in the group
83151c0b2f7Stbbdev             MALLOC_ASSERT( alignment==16 || alignment==32 || alignment==64 || alignment==128, ASSERT_TEXT );
83251c0b2f7Stbbdev             return alignUp(size,alignment);
83351c0b2f7Stbbdev         }
83451c0b2f7Stbbdev     }
83551c0b2f7Stbbdev     else {
83651c0b2f7Stbbdev         if( size <= fittingSize3 ) {
83751c0b2f7Stbbdev             if( size <= fittingSize2 ) {
83851c0b2f7Stbbdev                 if( size <= fittingSize1 )
83951c0b2f7Stbbdev                     return indexRequest ? minFittingIndex : fittingSize1;
84051c0b2f7Stbbdev                 else
84151c0b2f7Stbbdev                     return indexRequest ? minFittingIndex+1 : fittingSize2;
84251c0b2f7Stbbdev             } else
84351c0b2f7Stbbdev                 return indexRequest ? minFittingIndex+2 : fittingSize3;
84451c0b2f7Stbbdev         } else {
84551c0b2f7Stbbdev             if( size <= fittingSize5 ) {
84651c0b2f7Stbbdev                 if( size <= fittingSize4 )
84751c0b2f7Stbbdev                     return indexRequest ? minFittingIndex+3 : fittingSize4;
84851c0b2f7Stbbdev                 else
84951c0b2f7Stbbdev                     return indexRequest ? minFittingIndex+4 : fittingSize5;
85051c0b2f7Stbbdev             } else {
85151c0b2f7Stbbdev                 MALLOC_ASSERT( 0,ASSERT_TEXT ); // this should not happen
85251c0b2f7Stbbdev                 return ~0U;
85351c0b2f7Stbbdev             }
85451c0b2f7Stbbdev         }
85551c0b2f7Stbbdev     }
85651c0b2f7Stbbdev }
85751c0b2f7Stbbdev 
85851c0b2f7Stbbdev static unsigned int getIndex (unsigned int size)
85951c0b2f7Stbbdev {
86051c0b2f7Stbbdev     return getIndexOrObjectSize</*indexRequest=*/true>(size);
86151c0b2f7Stbbdev }
86251c0b2f7Stbbdev 
86351c0b2f7Stbbdev static unsigned int getObjectSize (unsigned int size)
86451c0b2f7Stbbdev {
86551c0b2f7Stbbdev     return getIndexOrObjectSize</*indexRequest=*/false>(size);
86651c0b2f7Stbbdev }
86751c0b2f7Stbbdev 
86851c0b2f7Stbbdev 
86951c0b2f7Stbbdev void *BootStrapBlocks::allocate(MemoryPool *memPool, size_t size)
87051c0b2f7Stbbdev {
87151c0b2f7Stbbdev     FreeObject *result;
87251c0b2f7Stbbdev 
87351c0b2f7Stbbdev     MALLOC_ASSERT( size == sizeof(TLSData), ASSERT_TEXT );
87451c0b2f7Stbbdev 
87551c0b2f7Stbbdev     { // Lock with acquire
87651c0b2f7Stbbdev         MallocMutex::scoped_lock scoped_cs(bootStrapLock);
87751c0b2f7Stbbdev 
87851c0b2f7Stbbdev         if( bootStrapObjectList) {
87951c0b2f7Stbbdev             result = bootStrapObjectList;
88051c0b2f7Stbbdev             bootStrapObjectList = bootStrapObjectList->next;
88151c0b2f7Stbbdev         } else {
88251c0b2f7Stbbdev             if (!bootStrapBlock) {
88351c0b2f7Stbbdev                 bootStrapBlock = memPool->getEmptyBlock(size);
884*57f524caSIlya Isaev                 if (!bootStrapBlock) return nullptr;
88551c0b2f7Stbbdev             }
88651c0b2f7Stbbdev             result = bootStrapBlock->bumpPtr;
88751c0b2f7Stbbdev             bootStrapBlock->bumpPtr = (FreeObject *)((uintptr_t)bootStrapBlock->bumpPtr - bootStrapBlock->objectSize);
88851c0b2f7Stbbdev             if ((uintptr_t)bootStrapBlock->bumpPtr < (uintptr_t)bootStrapBlock+sizeof(Block)) {
889*57f524caSIlya Isaev                 bootStrapBlock->bumpPtr = nullptr;
89051c0b2f7Stbbdev                 bootStrapBlock->next = bootStrapBlockUsed;
89151c0b2f7Stbbdev                 bootStrapBlockUsed = bootStrapBlock;
892*57f524caSIlya Isaev                 bootStrapBlock = nullptr;
89351c0b2f7Stbbdev             }
89451c0b2f7Stbbdev         }
89551c0b2f7Stbbdev     } // Unlock with release
89651c0b2f7Stbbdev     memset (result, 0, size);
89751c0b2f7Stbbdev     return (void*)result;
89851c0b2f7Stbbdev }
89951c0b2f7Stbbdev 
90051c0b2f7Stbbdev void BootStrapBlocks::free(void* ptr)
90151c0b2f7Stbbdev {
90251c0b2f7Stbbdev     MALLOC_ASSERT( ptr, ASSERT_TEXT );
90351c0b2f7Stbbdev     { // Lock with acquire
90451c0b2f7Stbbdev         MallocMutex::scoped_lock scoped_cs(bootStrapLock);
90551c0b2f7Stbbdev         ((FreeObject*)ptr)->next = bootStrapObjectList;
90651c0b2f7Stbbdev         bootStrapObjectList = (FreeObject*)ptr;
90751c0b2f7Stbbdev     } // Unlock with release
90851c0b2f7Stbbdev }
90951c0b2f7Stbbdev 
91051c0b2f7Stbbdev void BootStrapBlocks::reset()
91151c0b2f7Stbbdev {
912*57f524caSIlya Isaev     bootStrapBlock = bootStrapBlockUsed = nullptr;
913*57f524caSIlya Isaev     bootStrapObjectList = nullptr;
91451c0b2f7Stbbdev }
91551c0b2f7Stbbdev 
91651c0b2f7Stbbdev #if !(FREELIST_NONBLOCKING)
91751c0b2f7Stbbdev static MallocMutex publicFreeListLock; // lock for changes of publicFreeList
91851c0b2f7Stbbdev #endif
91951c0b2f7Stbbdev 
92051c0b2f7Stbbdev /********* End rough utility code  **************/
92151c0b2f7Stbbdev 
92251c0b2f7Stbbdev /* LifoList assumes zero initialization so a vector of it can be created
92351c0b2f7Stbbdev  * by just allocating some space with no call to constructor.
92451c0b2f7Stbbdev  * On Linux, it seems to be necessary to avoid linking with C++ libraries.
92551c0b2f7Stbbdev  *
92651c0b2f7Stbbdev  * By usage convention there is no race on the initialization. */
927478de5b1Stbbdev LifoList::LifoList( ) : top(nullptr)
92851c0b2f7Stbbdev {
92951c0b2f7Stbbdev     // MallocMutex assumes zero initialization
93051c0b2f7Stbbdev     memset(&lock, 0, sizeof(MallocMutex));
93151c0b2f7Stbbdev }
93251c0b2f7Stbbdev 
93351c0b2f7Stbbdev void LifoList::push(Block *block)
93451c0b2f7Stbbdev {
93551c0b2f7Stbbdev     MallocMutex::scoped_lock scoped_cs(lock);
936478de5b1Stbbdev     block->next = top.load(std::memory_order_relaxed);
937478de5b1Stbbdev     top.store(block, std::memory_order_relaxed);
93851c0b2f7Stbbdev }
93951c0b2f7Stbbdev 
94051c0b2f7Stbbdev Block *LifoList::pop()
94151c0b2f7Stbbdev {
942478de5b1Stbbdev     Block* block = nullptr;
943478de5b1Stbbdev     if (top.load(std::memory_order_relaxed)) {
94451c0b2f7Stbbdev         MallocMutex::scoped_lock scoped_cs(lock);
945478de5b1Stbbdev         block = top.load(std::memory_order_relaxed);
946478de5b1Stbbdev         if (block) {
947478de5b1Stbbdev             top.store(block->next, std::memory_order_relaxed);
94851c0b2f7Stbbdev         }
94951c0b2f7Stbbdev     }
95051c0b2f7Stbbdev     return block;
95151c0b2f7Stbbdev }
95251c0b2f7Stbbdev 
95351c0b2f7Stbbdev Block *LifoList::grab()
95451c0b2f7Stbbdev {
955478de5b1Stbbdev     Block *block = nullptr;
956478de5b1Stbbdev     if (top.load(std::memory_order_relaxed)) {
95751c0b2f7Stbbdev         MallocMutex::scoped_lock scoped_cs(lock);
958478de5b1Stbbdev         block = top.load(std::memory_order_relaxed);
959478de5b1Stbbdev         top.store(nullptr, std::memory_order_relaxed);
96051c0b2f7Stbbdev     }
96151c0b2f7Stbbdev     return block;
96251c0b2f7Stbbdev }
96351c0b2f7Stbbdev 
96451c0b2f7Stbbdev /********* Thread and block related code      *************/
96551c0b2f7Stbbdev 
96651c0b2f7Stbbdev template<bool poolDestroy> void AllLargeBlocksList::releaseAll(Backend *backend) {
96751c0b2f7Stbbdev      LargeMemoryBlock *next, *lmb = loHead;
968*57f524caSIlya Isaev      loHead = nullptr;
96951c0b2f7Stbbdev 
97051c0b2f7Stbbdev      for (; lmb; lmb = next) {
97151c0b2f7Stbbdev          next = lmb->gNext;
97251c0b2f7Stbbdev          if (poolDestroy) {
97351c0b2f7Stbbdev              // as it's pool destruction, no need to return object to backend,
97451c0b2f7Stbbdev              // only remove backrefs, as they are global
97551c0b2f7Stbbdev              removeBackRef(lmb->backRefIdx);
97651c0b2f7Stbbdev          } else {
97751c0b2f7Stbbdev              // clean g(Next|Prev) to prevent removing lmb
97851c0b2f7Stbbdev              // from AllLargeBlocksList inside returnLargeObject
979*57f524caSIlya Isaev              lmb->gNext = lmb->gPrev = nullptr;
98051c0b2f7Stbbdev              backend->returnLargeObject(lmb);
98151c0b2f7Stbbdev          }
98251c0b2f7Stbbdev      }
98351c0b2f7Stbbdev }
98451c0b2f7Stbbdev 
98551c0b2f7Stbbdev TLSData* MemoryPool::getTLS(bool create)
98651c0b2f7Stbbdev {
98751c0b2f7Stbbdev     TLSData* tls = extMemPool.tlsPointerKey.getThreadMallocTLS();
98851c0b2f7Stbbdev     if (create && !tls)
98951c0b2f7Stbbdev         tls = extMemPool.tlsPointerKey.createTLS(this, &extMemPool.backend);
99051c0b2f7Stbbdev     return tls;
99151c0b2f7Stbbdev }
99251c0b2f7Stbbdev 
99351c0b2f7Stbbdev /*
99451c0b2f7Stbbdev  * Return the bin for the given size.
99551c0b2f7Stbbdev  */
99651c0b2f7Stbbdev inline Bin* TLSData::getAllocationBin(size_t size)
99751c0b2f7Stbbdev {
99851c0b2f7Stbbdev     return bin + getIndex(size);
99951c0b2f7Stbbdev }
100051c0b2f7Stbbdev 
100151c0b2f7Stbbdev /* Return an empty uninitialized block in a non-blocking fashion. */
100251c0b2f7Stbbdev Block *MemoryPool::getEmptyBlock(size_t size)
100351c0b2f7Stbbdev {
100451c0b2f7Stbbdev     TLSData* tls = getTLS(/*create=*/false);
100551c0b2f7Stbbdev     // try to use per-thread cache, if TLS available
100651c0b2f7Stbbdev     FreeBlockPool::ResOfGet resOfGet = tls?
1007*57f524caSIlya Isaev         tls->freeSlabBlocks.getBlock() : FreeBlockPool::ResOfGet(nullptr, false);
100851c0b2f7Stbbdev     Block *result = resOfGet.block;
100951c0b2f7Stbbdev 
101051c0b2f7Stbbdev     if (!result) { // not found in local cache, asks backend for slabs
101151c0b2f7Stbbdev         int num = resOfGet.lastAccMiss? Backend::numOfSlabAllocOnMiss : 1;
101251c0b2f7Stbbdev         BackRefIdx backRefIdx[Backend::numOfSlabAllocOnMiss];
101351c0b2f7Stbbdev 
101451c0b2f7Stbbdev         result = static_cast<Block*>(extMemPool.backend.getSlabBlock(num));
1015*57f524caSIlya Isaev         if (!result) return nullptr;
101651c0b2f7Stbbdev 
101751c0b2f7Stbbdev         if (!extMemPool.userPool())
101851c0b2f7Stbbdev             for (int i=0; i<num; i++) {
101951c0b2f7Stbbdev                 backRefIdx[i] = BackRefIdx::newBackRef(/*largeObj=*/false);
102051c0b2f7Stbbdev                 if (backRefIdx[i].isInvalid()) {
102151c0b2f7Stbbdev                     // roll back resource allocation
102251c0b2f7Stbbdev                     for (int j=0; j<i; j++)
102351c0b2f7Stbbdev                         removeBackRef(backRefIdx[j]);
102451c0b2f7Stbbdev                     Block *b = result;
102551c0b2f7Stbbdev                     for (int j=0; j<num; b=(Block*)((uintptr_t)b+slabSize), j++)
102651c0b2f7Stbbdev                         extMemPool.backend.putSlabBlock(b);
1027*57f524caSIlya Isaev                     return nullptr;
102851c0b2f7Stbbdev                 }
102951c0b2f7Stbbdev             }
103051c0b2f7Stbbdev         // resources were allocated, register blocks
103151c0b2f7Stbbdev         Block *b = result;
103251c0b2f7Stbbdev         for (int i=0; i<num; b=(Block*)((uintptr_t)b+slabSize), i++) {
103351c0b2f7Stbbdev             // slab block in user's pool must have invalid backRefIdx
103451c0b2f7Stbbdev             if (extMemPool.userPool()) {
103551c0b2f7Stbbdev                 new (&b->backRefIdx) BackRefIdx();
103651c0b2f7Stbbdev             } else {
103751c0b2f7Stbbdev                 setBackRef(backRefIdx[i], b);
103851c0b2f7Stbbdev                 b->backRefIdx = backRefIdx[i];
103951c0b2f7Stbbdev             }
1040478de5b1Stbbdev             b->tlsPtr.store(tls, std::memory_order_relaxed);
104151c0b2f7Stbbdev             b->poolPtr = this;
104251c0b2f7Stbbdev             // all but first one go to per-thread pool
104351c0b2f7Stbbdev             if (i > 0) {
104451c0b2f7Stbbdev                 MALLOC_ASSERT(tls, ASSERT_TEXT);
104551c0b2f7Stbbdev                 tls->freeSlabBlocks.returnBlock(b);
104651c0b2f7Stbbdev             }
104751c0b2f7Stbbdev         }
104851c0b2f7Stbbdev     }
104951c0b2f7Stbbdev     MALLOC_ASSERT(result, ASSERT_TEXT);
105051c0b2f7Stbbdev     result->initEmptyBlock(tls, size);
105151c0b2f7Stbbdev     STAT_increment(getThreadId(), getIndex(result->objectSize), allocBlockNew);
105251c0b2f7Stbbdev     return result;
105351c0b2f7Stbbdev }
105451c0b2f7Stbbdev 
105551c0b2f7Stbbdev void MemoryPool::returnEmptyBlock(Block *block, bool poolTheBlock)
105651c0b2f7Stbbdev {
105751c0b2f7Stbbdev     block->reset();
105851c0b2f7Stbbdev     if (poolTheBlock) {
105951c0b2f7Stbbdev         getTLS(/*create=*/false)->freeSlabBlocks.returnBlock(block);
106051c0b2f7Stbbdev     } else {
106151c0b2f7Stbbdev         // slab blocks in user's pools do not have valid backRefIdx
106251c0b2f7Stbbdev         if (!extMemPool.userPool())
106351c0b2f7Stbbdev             removeBackRef(*(block->getBackRefIdx()));
106451c0b2f7Stbbdev         extMemPool.backend.putSlabBlock(block);
106551c0b2f7Stbbdev     }
106651c0b2f7Stbbdev }
106751c0b2f7Stbbdev 
106851c0b2f7Stbbdev bool ExtMemoryPool::init(intptr_t poolId, rawAllocType rawAlloc,
106951c0b2f7Stbbdev                          rawFreeType rawFree, size_t granularity,
107051c0b2f7Stbbdev                          bool keepAllMemory, bool fixedPool)
107151c0b2f7Stbbdev {
107251c0b2f7Stbbdev     this->poolId = poolId;
107351c0b2f7Stbbdev     this->rawAlloc = rawAlloc;
107451c0b2f7Stbbdev     this->rawFree = rawFree;
107551c0b2f7Stbbdev     this->granularity = granularity;
107651c0b2f7Stbbdev     this->keepAllMemory = keepAllMemory;
107751c0b2f7Stbbdev     this->fixedPool = fixedPool;
107851c0b2f7Stbbdev     this->delayRegsReleasing = false;
107951c0b2f7Stbbdev     if (!initTLS())
108051c0b2f7Stbbdev         return false;
108151c0b2f7Stbbdev     loc.init(this);
108251c0b2f7Stbbdev     backend.init(this);
1083*57f524caSIlya Isaev     MALLOC_ASSERT(isPoolValid(), nullptr);
108451c0b2f7Stbbdev     return true;
108551c0b2f7Stbbdev }
108651c0b2f7Stbbdev 
108751c0b2f7Stbbdev bool ExtMemoryPool::initTLS() { return tlsPointerKey.init(); }
108851c0b2f7Stbbdev 
108951c0b2f7Stbbdev bool MemoryPool::init(intptr_t poolId, const MemPoolPolicy *policy)
109051c0b2f7Stbbdev {
109151c0b2f7Stbbdev     if (!extMemPool.init(poolId, policy->pAlloc, policy->pFree,
109251c0b2f7Stbbdev                policy->granularity? policy->granularity : defaultGranularity,
109351c0b2f7Stbbdev                policy->keepAllMemory, policy->fixedPool))
109451c0b2f7Stbbdev         return false;
109551c0b2f7Stbbdev     {
109651c0b2f7Stbbdev         MallocMutex::scoped_lock lock(memPoolListLock);
109751c0b2f7Stbbdev         next = defaultMemPool->next;
109851c0b2f7Stbbdev         defaultMemPool->next = this;
109951c0b2f7Stbbdev         prev = defaultMemPool;
110051c0b2f7Stbbdev         if (next)
110151c0b2f7Stbbdev             next->prev = this;
110251c0b2f7Stbbdev     }
110351c0b2f7Stbbdev     return true;
110451c0b2f7Stbbdev }
110551c0b2f7Stbbdev 
110651c0b2f7Stbbdev bool MemoryPool::reset()
110751c0b2f7Stbbdev {
110851c0b2f7Stbbdev     MALLOC_ASSERT(extMemPool.userPool(), "No reset for the system pool.");
110951c0b2f7Stbbdev     // memory is not releasing during pool reset
111051c0b2f7Stbbdev     // TODO: mark regions to release unused on next reset()
111151c0b2f7Stbbdev     extMemPool.delayRegionsReleasing(true);
111251c0b2f7Stbbdev 
111351c0b2f7Stbbdev     bootStrapBlocks.reset();
111451c0b2f7Stbbdev     extMemPool.lmbList.releaseAll</*poolDestroy=*/false>(&extMemPool.backend);
111551c0b2f7Stbbdev     if (!extMemPool.reset())
111651c0b2f7Stbbdev         return false;
111751c0b2f7Stbbdev 
111851c0b2f7Stbbdev     if (!extMemPool.initTLS())
111951c0b2f7Stbbdev         return false;
112051c0b2f7Stbbdev     extMemPool.delayRegionsReleasing(false);
112151c0b2f7Stbbdev     return true;
112251c0b2f7Stbbdev }
112351c0b2f7Stbbdev 
112451c0b2f7Stbbdev bool MemoryPool::destroy()
112551c0b2f7Stbbdev {
112651c0b2f7Stbbdev #if __TBB_MALLOC_LOCACHE_STAT
112751c0b2f7Stbbdev     extMemPool.loc.reportStat(stdout);
112851c0b2f7Stbbdev #endif
112951c0b2f7Stbbdev #if __TBB_MALLOC_BACKEND_STAT
113051c0b2f7Stbbdev     extMemPool.backend.reportStat(stdout);
113151c0b2f7Stbbdev #endif
113251c0b2f7Stbbdev     {
113351c0b2f7Stbbdev         MallocMutex::scoped_lock lock(memPoolListLock);
113451c0b2f7Stbbdev         // remove itself from global pool list
113551c0b2f7Stbbdev         if (prev)
113651c0b2f7Stbbdev             prev->next = next;
113751c0b2f7Stbbdev         if (next)
113851c0b2f7Stbbdev             next->prev = prev;
113951c0b2f7Stbbdev     }
114051c0b2f7Stbbdev     // slab blocks in non-default pool do not have backreferences,
114151c0b2f7Stbbdev     // only large objects do
114251c0b2f7Stbbdev     if (extMemPool.userPool())
114351c0b2f7Stbbdev         extMemPool.lmbList.releaseAll</*poolDestroy=*/true>(&extMemPool.backend);
114451c0b2f7Stbbdev     else {
114551c0b2f7Stbbdev         // only one non-userPool() is supported now
1146*57f524caSIlya Isaev         MALLOC_ASSERT(this==defaultMemPool, nullptr);
114751c0b2f7Stbbdev         // There and below in extMemPool.destroy(), do not restore initial state
114851c0b2f7Stbbdev         // for user pool, because it's just about to be released. But for system
114951c0b2f7Stbbdev         // pool restoring, we do not want to do zeroing of it on subsequent reload.
115051c0b2f7Stbbdev         bootStrapBlocks.reset();
115151c0b2f7Stbbdev         extMemPool.orphanedBlocks.reset();
115251c0b2f7Stbbdev     }
115351c0b2f7Stbbdev     return extMemPool.destroy();
115451c0b2f7Stbbdev }
115551c0b2f7Stbbdev 
115651c0b2f7Stbbdev void MemoryPool::onThreadShutdown(TLSData *tlsData)
115751c0b2f7Stbbdev {
115851c0b2f7Stbbdev     if (tlsData) { // might be called for "empty" TLS
115951c0b2f7Stbbdev         tlsData->release();
116051c0b2f7Stbbdev         bootStrapBlocks.free(tlsData);
116151c0b2f7Stbbdev         clearTLS();
116251c0b2f7Stbbdev     }
116351c0b2f7Stbbdev }
116451c0b2f7Stbbdev 
116551c0b2f7Stbbdev #if MALLOC_DEBUG
116651c0b2f7Stbbdev void Bin::verifyTLSBin (size_t size) const
116751c0b2f7Stbbdev {
116851c0b2f7Stbbdev /* The debug version verifies the TLSBin as needed */
116951c0b2f7Stbbdev     uint32_t objSize = getObjectSize(size);
117051c0b2f7Stbbdev 
117151c0b2f7Stbbdev     if (activeBlk) {
117251c0b2f7Stbbdev         MALLOC_ASSERT( activeBlk->isOwnedByCurrentThread(), ASSERT_TEXT );
117351c0b2f7Stbbdev         MALLOC_ASSERT( activeBlk->objectSize == objSize, ASSERT_TEXT );
117451c0b2f7Stbbdev #if MALLOC_DEBUG>1
117551c0b2f7Stbbdev         for (Block* temp = activeBlk->next; temp; temp=temp->next) {
117651c0b2f7Stbbdev             MALLOC_ASSERT( temp!=activeBlk, ASSERT_TEXT );
117751c0b2f7Stbbdev             MALLOC_ASSERT( temp->isOwnedByCurrentThread(), ASSERT_TEXT );
117851c0b2f7Stbbdev             MALLOC_ASSERT( temp->objectSize == objSize, ASSERT_TEXT );
117951c0b2f7Stbbdev             MALLOC_ASSERT( temp->previous->next == temp, ASSERT_TEXT );
118051c0b2f7Stbbdev             if (temp->next) {
118151c0b2f7Stbbdev                 MALLOC_ASSERT( temp->next->previous == temp, ASSERT_TEXT );
118251c0b2f7Stbbdev             }
118351c0b2f7Stbbdev         }
118451c0b2f7Stbbdev         for (Block* temp = activeBlk->previous; temp; temp=temp->previous) {
118551c0b2f7Stbbdev             MALLOC_ASSERT( temp!=activeBlk, ASSERT_TEXT );
118651c0b2f7Stbbdev             MALLOC_ASSERT( temp->isOwnedByCurrentThread(), ASSERT_TEXT );
118751c0b2f7Stbbdev             MALLOC_ASSERT( temp->objectSize == objSize, ASSERT_TEXT );
118851c0b2f7Stbbdev             MALLOC_ASSERT( temp->next->previous == temp, ASSERT_TEXT );
118951c0b2f7Stbbdev             if (temp->previous) {
119051c0b2f7Stbbdev                 MALLOC_ASSERT( temp->previous->next == temp, ASSERT_TEXT );
119151c0b2f7Stbbdev             }
119251c0b2f7Stbbdev         }
119351c0b2f7Stbbdev #endif /* MALLOC_DEBUG>1 */
119451c0b2f7Stbbdev     }
119551c0b2f7Stbbdev }
119651c0b2f7Stbbdev #else /* MALLOC_DEBUG */
119751c0b2f7Stbbdev inline void Bin::verifyTLSBin (size_t) const { }
119851c0b2f7Stbbdev #endif /* MALLOC_DEBUG */
119951c0b2f7Stbbdev 
120051c0b2f7Stbbdev /*
120151c0b2f7Stbbdev  * Add a block to the start of this tls bin list.
120251c0b2f7Stbbdev  */
120351c0b2f7Stbbdev void Bin::pushTLSBin(Block* block)
120451c0b2f7Stbbdev {
120551c0b2f7Stbbdev     /* The objectSize should be defined and not a parameter
120651c0b2f7Stbbdev        because the function is applied to partially filled blocks as well */
120751c0b2f7Stbbdev     unsigned int size = block->objectSize;
120851c0b2f7Stbbdev 
120951c0b2f7Stbbdev     MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT );
121051c0b2f7Stbbdev     MALLOC_ASSERT( block->objectSize != 0, ASSERT_TEXT );
1211*57f524caSIlya Isaev     MALLOC_ASSERT( block->next == nullptr, ASSERT_TEXT );
1212*57f524caSIlya Isaev     MALLOC_ASSERT( block->previous == nullptr, ASSERT_TEXT );
121351c0b2f7Stbbdev 
121451c0b2f7Stbbdev     MALLOC_ASSERT( this, ASSERT_TEXT );
121551c0b2f7Stbbdev     verifyTLSBin(size);
121651c0b2f7Stbbdev 
121751c0b2f7Stbbdev     block->next = activeBlk;
121851c0b2f7Stbbdev     if( activeBlk ) {
121951c0b2f7Stbbdev         block->previous = activeBlk->previous;
122051c0b2f7Stbbdev         activeBlk->previous = block;
122151c0b2f7Stbbdev         if( block->previous )
122251c0b2f7Stbbdev             block->previous->next = block;
122351c0b2f7Stbbdev     } else {
122451c0b2f7Stbbdev         activeBlk = block;
122551c0b2f7Stbbdev     }
122651c0b2f7Stbbdev 
122751c0b2f7Stbbdev     verifyTLSBin(size);
122851c0b2f7Stbbdev }
122951c0b2f7Stbbdev 
123051c0b2f7Stbbdev /*
123151c0b2f7Stbbdev  * Take a block out of its tls bin (e.g. before removal).
123251c0b2f7Stbbdev  */
123351c0b2f7Stbbdev void Bin::outofTLSBin(Block* block)
123451c0b2f7Stbbdev {
123551c0b2f7Stbbdev     unsigned int size = block->objectSize;
123651c0b2f7Stbbdev 
123751c0b2f7Stbbdev     MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT );
123851c0b2f7Stbbdev     MALLOC_ASSERT( block->objectSize != 0, ASSERT_TEXT );
123951c0b2f7Stbbdev 
124051c0b2f7Stbbdev     MALLOC_ASSERT( this, ASSERT_TEXT );
124151c0b2f7Stbbdev     verifyTLSBin(size);
124251c0b2f7Stbbdev 
124351c0b2f7Stbbdev     if (block == activeBlk) {
124451c0b2f7Stbbdev         activeBlk = block->previous? block->previous : block->next;
124551c0b2f7Stbbdev     }
124651c0b2f7Stbbdev     /* Unlink the block */
124751c0b2f7Stbbdev     if (block->previous) {
124851c0b2f7Stbbdev         MALLOC_ASSERT( block->previous->next == block, ASSERT_TEXT );
124951c0b2f7Stbbdev         block->previous->next = block->next;
125051c0b2f7Stbbdev     }
125151c0b2f7Stbbdev     if (block->next) {
125251c0b2f7Stbbdev         MALLOC_ASSERT( block->next->previous == block, ASSERT_TEXT );
125351c0b2f7Stbbdev         block->next->previous = block->previous;
125451c0b2f7Stbbdev     }
1255*57f524caSIlya Isaev     block->next = nullptr;
1256*57f524caSIlya Isaev     block->previous = nullptr;
125751c0b2f7Stbbdev 
125851c0b2f7Stbbdev     verifyTLSBin(size);
125951c0b2f7Stbbdev }
126051c0b2f7Stbbdev 
126151c0b2f7Stbbdev Block* Bin::getPrivatizedFreeListBlock()
126251c0b2f7Stbbdev {
126351c0b2f7Stbbdev     Block* block;
126451c0b2f7Stbbdev     MALLOC_ASSERT( this, ASSERT_TEXT );
126551c0b2f7Stbbdev     // if this method is called, active block usage must be unsuccessful
126651c0b2f7Stbbdev     MALLOC_ASSERT( !activeBlk && !mailbox.load(std::memory_order_relaxed) || activeBlk && activeBlk->isFull, ASSERT_TEXT );
126751c0b2f7Stbbdev 
126851c0b2f7Stbbdev // the counter should be changed    STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList);
126951c0b2f7Stbbdev     if (!mailbox.load(std::memory_order_acquire)) // hotpath is empty mailbox
1270*57f524caSIlya Isaev         return nullptr;
127151c0b2f7Stbbdev     else { // mailbox is not empty, take lock and inspect it
127251c0b2f7Stbbdev         MallocMutex::scoped_lock scoped_cs(mailLock);
127351c0b2f7Stbbdev         block = mailbox.load(std::memory_order_relaxed);
127451c0b2f7Stbbdev         if( block ) {
127551c0b2f7Stbbdev             MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT );
1276478de5b1Stbbdev             MALLOC_ASSERT( !isNotForUse(block->nextPrivatizable.load(std::memory_order_relaxed)), ASSERT_TEXT );
1277478de5b1Stbbdev             mailbox.store(block->nextPrivatizable.load(std::memory_order_relaxed), std::memory_order_relaxed);
1278478de5b1Stbbdev             block->nextPrivatizable.store((Block*)this, std::memory_order_relaxed);
127951c0b2f7Stbbdev         }
128051c0b2f7Stbbdev     }
128151c0b2f7Stbbdev     if( block ) {
128251c0b2f7Stbbdev         MALLOC_ASSERT( isSolidPtr(block->publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT );
128351c0b2f7Stbbdev         block->privatizePublicFreeList();
128451c0b2f7Stbbdev         block->adjustPositionInBin(this);
128551c0b2f7Stbbdev     }
128651c0b2f7Stbbdev     return block;
128751c0b2f7Stbbdev }
128851c0b2f7Stbbdev 
128951c0b2f7Stbbdev void Bin::addPublicFreeListBlock(Block* block)
129051c0b2f7Stbbdev {
129151c0b2f7Stbbdev     MallocMutex::scoped_lock scoped_cs(mailLock);
1292478de5b1Stbbdev     block->nextPrivatizable.store(mailbox.load(std::memory_order_relaxed), std::memory_order_relaxed);
129351c0b2f7Stbbdev     mailbox.store(block, std::memory_order_relaxed);
129451c0b2f7Stbbdev }
129551c0b2f7Stbbdev 
129651c0b2f7Stbbdev // Process publicly freed objects in all blocks and return empty blocks
129751c0b2f7Stbbdev // to the backend in order to reduce overall footprint.
129851c0b2f7Stbbdev bool Bin::cleanPublicFreeLists()
129951c0b2f7Stbbdev {
130051c0b2f7Stbbdev     Block* block;
130151c0b2f7Stbbdev     if (!mailbox.load(std::memory_order_acquire))
130251c0b2f7Stbbdev         return false;
130351c0b2f7Stbbdev     else {
130451c0b2f7Stbbdev         // Grab all the blocks in the mailbox
130551c0b2f7Stbbdev         MallocMutex::scoped_lock scoped_cs(mailLock);
130651c0b2f7Stbbdev         block = mailbox.load(std::memory_order_relaxed);
1307*57f524caSIlya Isaev         mailbox.store(nullptr, std::memory_order_relaxed);
130851c0b2f7Stbbdev     }
130951c0b2f7Stbbdev     bool released = false;
131051c0b2f7Stbbdev     while (block) {
131151c0b2f7Stbbdev         MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT );
1312478de5b1Stbbdev         Block* tmp = block->nextPrivatizable.load(std::memory_order_relaxed);
1313478de5b1Stbbdev         block->nextPrivatizable.store((Block*)this, std::memory_order_relaxed);
131451c0b2f7Stbbdev         block->privatizePublicFreeList();
131551c0b2f7Stbbdev         if (block->empty()) {
131651c0b2f7Stbbdev             processEmptyBlock(block, /*poolTheBlock=*/false);
131751c0b2f7Stbbdev             released = true;
131851c0b2f7Stbbdev         } else
131951c0b2f7Stbbdev             block->adjustPositionInBin(this);
132051c0b2f7Stbbdev         block = tmp;
132151c0b2f7Stbbdev     }
132251c0b2f7Stbbdev     return released;
132351c0b2f7Stbbdev }
132451c0b2f7Stbbdev 
132551c0b2f7Stbbdev bool Block::adjustFullness()
132651c0b2f7Stbbdev {
132751c0b2f7Stbbdev     if (bumpPtr) {
132851c0b2f7Stbbdev         /* If we are still using a bump ptr for this block it is empty enough to use. */
132951c0b2f7Stbbdev         STAT_increment(getThreadId(), getIndex(objectSize), examineEmptyEnough);
133051c0b2f7Stbbdev         isFull = false;
133151c0b2f7Stbbdev     } else {
133251c0b2f7Stbbdev         const float threshold = (slabSize - sizeof(Block)) * (1 - emptyEnoughRatio);
133351c0b2f7Stbbdev         /* allocatedCount shows how many objects in the block are in use; however it still counts
133451c0b2f7Stbbdev          * blocks freed by other threads; so prior call to privatizePublicFreeList() is recommended */
133551c0b2f7Stbbdev         isFull = (allocatedCount*objectSize > threshold) ? true : false;
133651c0b2f7Stbbdev #if COLLECT_STATISTICS
133751c0b2f7Stbbdev         if (isFull)
133851c0b2f7Stbbdev             STAT_increment(getThreadId(), getIndex(objectSize), examineNotEmpty);
133951c0b2f7Stbbdev         else
134051c0b2f7Stbbdev             STAT_increment(getThreadId(), getIndex(objectSize), examineEmptyEnough);
134151c0b2f7Stbbdev #endif
134251c0b2f7Stbbdev     }
134351c0b2f7Stbbdev     return isFull;
134451c0b2f7Stbbdev }
134551c0b2f7Stbbdev 
134651c0b2f7Stbbdev // This method resides in class Block, and not in class Bin, in order to avoid
134751c0b2f7Stbbdev // calling getAllocationBin on a reasonably hot path in Block::freeOwnObject
1348*57f524caSIlya Isaev void Block::adjustPositionInBin(Bin* bin/*=nullptr*/)
134951c0b2f7Stbbdev {
135051c0b2f7Stbbdev     // If the block were full, but became empty enough to use,
135151c0b2f7Stbbdev     // move it to the front of the list
135251c0b2f7Stbbdev     if (isFull && !adjustFullness()) {
135351c0b2f7Stbbdev         if (!bin)
1354478de5b1Stbbdev             bin = tlsPtr.load(std::memory_order_relaxed)->getAllocationBin(objectSize);
135551c0b2f7Stbbdev         bin->moveBlockToFront(this);
135651c0b2f7Stbbdev     }
135751c0b2f7Stbbdev }
135851c0b2f7Stbbdev 
135951c0b2f7Stbbdev /* Restore the bump pointer for an empty block that is planned to use */
136051c0b2f7Stbbdev void Block::restoreBumpPtr()
136151c0b2f7Stbbdev {
136251c0b2f7Stbbdev     MALLOC_ASSERT( allocatedCount == 0, ASSERT_TEXT );
136351c0b2f7Stbbdev     MALLOC_ASSERT( !isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT );
136451c0b2f7Stbbdev     STAT_increment(getThreadId(), getIndex(objectSize), freeRestoreBumpPtr);
136551c0b2f7Stbbdev     bumpPtr = (FreeObject *)((uintptr_t)this + slabSize - objectSize);
1366*57f524caSIlya Isaev     freeList = nullptr;
136751c0b2f7Stbbdev     isFull = false;
136851c0b2f7Stbbdev }
136951c0b2f7Stbbdev 
137051c0b2f7Stbbdev void Block::freeOwnObject(void *object)
137151c0b2f7Stbbdev {
1372478de5b1Stbbdev     tlsPtr.load(std::memory_order_relaxed)->markUsed();
137351c0b2f7Stbbdev     allocatedCount--;
137451c0b2f7Stbbdev     MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT );
137551c0b2f7Stbbdev #if COLLECT_STATISTICS
137651c0b2f7Stbbdev     // Note that getAllocationBin is not called on the hottest path with statistics off.
1377478de5b1Stbbdev     if (tlsPtr.load(std::memory_order_relaxed)->getAllocationBin(objectSize)->getActiveBlock() != this)
137851c0b2f7Stbbdev         STAT_increment(getThreadId(), getIndex(objectSize), freeToInactiveBlock);
137951c0b2f7Stbbdev     else
138051c0b2f7Stbbdev         STAT_increment(getThreadId(), getIndex(objectSize), freeToActiveBlock);
138151c0b2f7Stbbdev #endif
138251c0b2f7Stbbdev     if (empty()) {
138351c0b2f7Stbbdev         // If the last object of a slab is freed, the slab cannot be marked full
138451c0b2f7Stbbdev         MALLOC_ASSERT(!isFull, ASSERT_TEXT);
1385478de5b1Stbbdev         tlsPtr.load(std::memory_order_relaxed)->getAllocationBin(objectSize)->processEmptyBlock(this, /*poolTheBlock=*/true);
138651c0b2f7Stbbdev     } else { // hot path
138751c0b2f7Stbbdev         FreeObject *objectToFree = findObjectToFree(object);
138851c0b2f7Stbbdev         objectToFree->next = freeList;
138951c0b2f7Stbbdev         freeList = objectToFree;
139051c0b2f7Stbbdev         adjustPositionInBin();
139151c0b2f7Stbbdev     }
139251c0b2f7Stbbdev }
139351c0b2f7Stbbdev 
139451c0b2f7Stbbdev void Block::freePublicObject (FreeObject *objectToFree)
139551c0b2f7Stbbdev {
1396478de5b1Stbbdev     FreeObject* localPublicFreeList{};
139751c0b2f7Stbbdev 
139851c0b2f7Stbbdev     MALLOC_ITT_SYNC_RELEASING(&publicFreeList);
139951c0b2f7Stbbdev #if FREELIST_NONBLOCKING
140051c0b2f7Stbbdev     // TBB_REVAMP_TODO: make it non atomic in non-blocking scenario
1401478de5b1Stbbdev     localPublicFreeList = publicFreeList.load(std::memory_order_relaxed);
140251c0b2f7Stbbdev     do {
1403478de5b1Stbbdev         objectToFree->next = localPublicFreeList;
140451c0b2f7Stbbdev         // no backoff necessary because trying to make change, not waiting for a change
1405478de5b1Stbbdev     } while( !publicFreeList.compare_exchange_strong(localPublicFreeList, objectToFree) );
140651c0b2f7Stbbdev #else
140751c0b2f7Stbbdev     STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList);
140851c0b2f7Stbbdev     {
140951c0b2f7Stbbdev         MallocMutex::scoped_lock scoped_cs(publicFreeListLock);
141051c0b2f7Stbbdev         localPublicFreeList = objectToFree->next = publicFreeList;
141151c0b2f7Stbbdev         publicFreeList = objectToFree;
141251c0b2f7Stbbdev     }
141351c0b2f7Stbbdev #endif
141451c0b2f7Stbbdev 
1415*57f524caSIlya Isaev     if( localPublicFreeList==nullptr ) {
141651c0b2f7Stbbdev         // if the block is abandoned, its nextPrivatizable pointer should be UNUSABLE
141751c0b2f7Stbbdev         // otherwise, it should point to the bin the block belongs to.
141851c0b2f7Stbbdev         // reading nextPrivatizable is thread-safe below, because:
1419*57f524caSIlya Isaev         // 1) the executing thread atomically got publicFreeList==nullptr and changed it to non-nullptr;
1420*57f524caSIlya Isaev         // 2) only owning thread can change it back to nullptr,
142151c0b2f7Stbbdev         // 3) but it can not be done until the block is put to the mailbox
142251c0b2f7Stbbdev         // So the executing thread is now the only one that can change nextPrivatizable
1423478de5b1Stbbdev         Block* next = nextPrivatizable.load(std::memory_order_acquire);
1424478de5b1Stbbdev         if( !isNotForUse(next) ) {
1425478de5b1Stbbdev             MALLOC_ASSERT( next!=nullptr, ASSERT_TEXT );
1426478de5b1Stbbdev             Bin* theBin = (Bin*) next;
142751c0b2f7Stbbdev #if MALLOC_DEBUG && TBB_REVAMP_TODO
142851c0b2f7Stbbdev             // FIXME: The thread that returns the block is not the block's owner.
142951c0b2f7Stbbdev             // The below assertion compares 'theBin' against the caller's local bin, thus, it always fails.
143051c0b2f7Stbbdev             // Need to find a way to get the correct remote bin for comparison.
143151c0b2f7Stbbdev             { // check that nextPrivatizable points to the bin the block belongs to
143251c0b2f7Stbbdev                 uint32_t index = getIndex( objectSize );
143351c0b2f7Stbbdev                 TLSData* tls = getThreadMallocTLS();
143451c0b2f7Stbbdev                 MALLOC_ASSERT( theBin==tls->bin+index, ASSERT_TEXT );
143551c0b2f7Stbbdev             }
143651c0b2f7Stbbdev #endif // MALLOC_DEBUG
143751c0b2f7Stbbdev             theBin->addPublicFreeListBlock(this);
143851c0b2f7Stbbdev         }
143951c0b2f7Stbbdev     }
144051c0b2f7Stbbdev     STAT_increment(getThreadId(), ThreadCommonCounters, freeToOtherThread);
1441478de5b1Stbbdev     STAT_increment(ownerTid.load(std::memory_order_relaxed), getIndex(objectSize), freeByOtherThread);
144251c0b2f7Stbbdev }
144351c0b2f7Stbbdev 
144451c0b2f7Stbbdev // Make objects freed by other threads available for use again
144551c0b2f7Stbbdev void Block::privatizePublicFreeList( bool reset )
144651c0b2f7Stbbdev {
144751c0b2f7Stbbdev     FreeObject *localPublicFreeList;
144851c0b2f7Stbbdev     // If reset is false, publicFreeList should not be zeroed but set to UNUSABLE
144951c0b2f7Stbbdev     // to properly synchronize with other threads freeing objects to this slab.
145051c0b2f7Stbbdev     const intptr_t endMarker = reset ? 0 : UNUSABLE;
145151c0b2f7Stbbdev 
1452*57f524caSIlya Isaev     // Only the owner thread may reset the pointer to nullptr
145351c0b2f7Stbbdev     MALLOC_ASSERT( isOwnedByCurrentThread() || !reset, ASSERT_TEXT );
145451c0b2f7Stbbdev #if FREELIST_NONBLOCKING
145551c0b2f7Stbbdev     localPublicFreeList = publicFreeList.exchange((FreeObject*)endMarker);
145651c0b2f7Stbbdev #else
145751c0b2f7Stbbdev     STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList);
145851c0b2f7Stbbdev     {
145951c0b2f7Stbbdev         MallocMutex::scoped_lock scoped_cs(publicFreeListLock);
146051c0b2f7Stbbdev         localPublicFreeList = publicFreeList;
146151c0b2f7Stbbdev         publicFreeList = endMarker;
146251c0b2f7Stbbdev     }
146351c0b2f7Stbbdev #endif
146451c0b2f7Stbbdev     MALLOC_ITT_SYNC_ACQUIRED(&publicFreeList);
146551c0b2f7Stbbdev     MALLOC_ASSERT( !(reset && isNotForUse(publicFreeList)), ASSERT_TEXT );
146651c0b2f7Stbbdev 
1467*57f524caSIlya Isaev     // publicFreeList must have been UNUSABLE or valid, but not nullptr
1468*57f524caSIlya Isaev     MALLOC_ASSERT( localPublicFreeList!=nullptr, ASSERT_TEXT );
146951c0b2f7Stbbdev     if( isSolidPtr(localPublicFreeList) ) {
147051c0b2f7Stbbdev         MALLOC_ASSERT( allocatedCount <= (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT );
147151c0b2f7Stbbdev         /* other threads did not change the counter freeing our blocks */
147251c0b2f7Stbbdev         allocatedCount--;
147351c0b2f7Stbbdev         FreeObject *temp = localPublicFreeList;
1474*57f524caSIlya Isaev         while( isSolidPtr(temp->next) ){ // the list will end with either nullptr or UNUSABLE
147551c0b2f7Stbbdev             temp = temp->next;
147651c0b2f7Stbbdev             allocatedCount--;
147751c0b2f7Stbbdev             MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT );
147851c0b2f7Stbbdev         }
147951c0b2f7Stbbdev         /* merge with local freeList */
148051c0b2f7Stbbdev         temp->next = freeList;
148151c0b2f7Stbbdev         freeList = localPublicFreeList;
148251c0b2f7Stbbdev         STAT_increment(getThreadId(), getIndex(objectSize), allocPrivatized);
148351c0b2f7Stbbdev     }
148451c0b2f7Stbbdev }
148551c0b2f7Stbbdev 
148651c0b2f7Stbbdev void Block::privatizeOrphaned(TLSData *tls, unsigned index)
148751c0b2f7Stbbdev {
148851c0b2f7Stbbdev     Bin* bin = tls->bin + index;
148951c0b2f7Stbbdev     STAT_increment(getThreadId(), index, allocBlockPublic);
1490*57f524caSIlya Isaev     next = nullptr;
1491*57f524caSIlya Isaev     previous = nullptr;
1492*57f524caSIlya Isaev     MALLOC_ASSERT( publicFreeList.load(std::memory_order_relaxed) != nullptr, ASSERT_TEXT );
149351c0b2f7Stbbdev     /* There is not a race here since no other thread owns this block */
149451c0b2f7Stbbdev     markOwned(tls);
149551c0b2f7Stbbdev     // It is safe to change nextPrivatizable, as publicFreeList is not null
1496478de5b1Stbbdev     MALLOC_ASSERT( isNotForUse(nextPrivatizable.load(std::memory_order_relaxed)), ASSERT_TEXT );
1497478de5b1Stbbdev     nextPrivatizable.store((Block*)bin, std::memory_order_relaxed);
149851c0b2f7Stbbdev     // the next call is required to change publicFreeList to 0
149951c0b2f7Stbbdev     privatizePublicFreeList();
150051c0b2f7Stbbdev     if( empty() ) {
150151c0b2f7Stbbdev         restoreBumpPtr();
150251c0b2f7Stbbdev     } else {
150351c0b2f7Stbbdev         adjustFullness(); // check the block fullness and set isFull
150451c0b2f7Stbbdev     }
150551c0b2f7Stbbdev     MALLOC_ASSERT( !isNotForUse(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT );
150651c0b2f7Stbbdev }
150751c0b2f7Stbbdev 
150851c0b2f7Stbbdev 
150951c0b2f7Stbbdev bool Block::readyToShare()
151051c0b2f7Stbbdev {
1511*57f524caSIlya Isaev     FreeObject* oldVal = nullptr;
151251c0b2f7Stbbdev #if FREELIST_NONBLOCKING
151351c0b2f7Stbbdev     publicFreeList.compare_exchange_strong(oldVal, (FreeObject*)UNUSABLE);
151451c0b2f7Stbbdev #else
151551c0b2f7Stbbdev     STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList);
151651c0b2f7Stbbdev     {
151751c0b2f7Stbbdev         MallocMutex::scoped_lock scoped_cs(publicFreeListLock);
1518*57f524caSIlya Isaev         if ( (oldVal=publicFreeList)==nullptr )
151951c0b2f7Stbbdev             (intptr_t&)(publicFreeList) = UNUSABLE;
152051c0b2f7Stbbdev     }
152151c0b2f7Stbbdev #endif
1522*57f524caSIlya Isaev     return oldVal==nullptr;
152351c0b2f7Stbbdev }
152451c0b2f7Stbbdev 
152551c0b2f7Stbbdev void Block::shareOrphaned(intptr_t binTag, unsigned index)
152651c0b2f7Stbbdev {
152751c0b2f7Stbbdev     MALLOC_ASSERT( binTag, ASSERT_TEXT );
152851c0b2f7Stbbdev     // unreferenced formal parameter warning
152951c0b2f7Stbbdev     tbb::detail::suppress_unused_warning(index);
153051c0b2f7Stbbdev     STAT_increment(getThreadId(), index, freeBlockPublic);
153151c0b2f7Stbbdev     markOrphaned();
1532478de5b1Stbbdev     if ((intptr_t)nextPrivatizable.load(std::memory_order_relaxed) == binTag) {
153351c0b2f7Stbbdev         // First check passed: the block is not in mailbox yet.
153451c0b2f7Stbbdev         // Need to set publicFreeList to non-zero, so other threads
153551c0b2f7Stbbdev         // will not change nextPrivatizable and it can be zeroed.
153651c0b2f7Stbbdev         if ( !readyToShare() ) {
153751c0b2f7Stbbdev             // another thread freed an object; we need to wait until it finishes.
153851c0b2f7Stbbdev             // There is no need for exponential backoff, as the wait here is not for a lock;
153951c0b2f7Stbbdev             // but need to yield, so the thread we wait has a chance to run.
154051c0b2f7Stbbdev             // TODO: add a pause to also be friendly to hyperthreads
154151c0b2f7Stbbdev             int count = 256;
1542478de5b1Stbbdev             while ((intptr_t)nextPrivatizable.load(std::memory_order_relaxed) == binTag) {
154351c0b2f7Stbbdev                 if (--count==0) {
154451c0b2f7Stbbdev                     do_yield();
154551c0b2f7Stbbdev                     count = 256;
154651c0b2f7Stbbdev                 }
154751c0b2f7Stbbdev             }
154851c0b2f7Stbbdev         }
154951c0b2f7Stbbdev     }
1550*57f524caSIlya Isaev     MALLOC_ASSERT( publicFreeList.load(std::memory_order_relaxed) !=nullptr, ASSERT_TEXT );
155151c0b2f7Stbbdev     // now it is safe to change our data
1552*57f524caSIlya Isaev     previous = nullptr;
155351c0b2f7Stbbdev     // it is caller responsibility to ensure that the list of blocks
155451c0b2f7Stbbdev     // formed by nextPrivatizable pointers is kept consistent if required.
155551c0b2f7Stbbdev     // if only called from thread shutdown code, it does not matter.
1556478de5b1Stbbdev     nextPrivatizable.store((Block*)UNUSABLE, std::memory_order_relaxed);
155751c0b2f7Stbbdev }
155851c0b2f7Stbbdev 
155951c0b2f7Stbbdev void Block::cleanBlockHeader()
156051c0b2f7Stbbdev {
1561478de5b1Stbbdev     next = nullptr;
1562478de5b1Stbbdev     previous = nullptr;
1563478de5b1Stbbdev     freeList = nullptr;
156451c0b2f7Stbbdev     allocatedCount = 0;
156551c0b2f7Stbbdev     isFull = false;
1566478de5b1Stbbdev     tlsPtr.store(nullptr, std::memory_order_relaxed);
156751c0b2f7Stbbdev 
1568478de5b1Stbbdev     publicFreeList.store(nullptr, std::memory_order_relaxed);
156951c0b2f7Stbbdev }
157051c0b2f7Stbbdev 
157151c0b2f7Stbbdev void Block::initEmptyBlock(TLSData *tls, size_t size)
157251c0b2f7Stbbdev {
157351c0b2f7Stbbdev     // Having getIndex and getObjectSize called next to each other
157451c0b2f7Stbbdev     // allows better compiler optimization as they basically share the code.
157551c0b2f7Stbbdev     unsigned int index = getIndex(size);
157651c0b2f7Stbbdev     unsigned int objSz = getObjectSize(size);
157751c0b2f7Stbbdev 
157851c0b2f7Stbbdev     cleanBlockHeader();
157951c0b2f7Stbbdev     objectSize = objSz;
158051c0b2f7Stbbdev     markOwned(tls);
158151c0b2f7Stbbdev     // bump pointer should be prepared for first allocation - thus mode it down to objectSize
158251c0b2f7Stbbdev     bumpPtr = (FreeObject *)((uintptr_t)this + slabSize - objectSize);
158351c0b2f7Stbbdev 
158451c0b2f7Stbbdev     // each block should have the address where the head of the list of "privatizable" blocks is kept
1585*57f524caSIlya Isaev     // the only exception is a block for boot strap which is initialized when TLS is yet nullptr
1586478de5b1Stbbdev     nextPrivatizable.store( tls? (Block*)(tls->bin + index) : nullptr, std::memory_order_relaxed);
158751c0b2f7Stbbdev     TRACEF(( "[ScalableMalloc trace] Empty block %p is initialized, owner is %ld, objectSize is %d, bumpPtr is %p\n",
1588478de5b1Stbbdev              this, tlsPtr.load(std::memory_order_relaxed) ? getThreadId() : -1, objectSize, bumpPtr ));
158951c0b2f7Stbbdev }
159051c0b2f7Stbbdev 
159151c0b2f7Stbbdev Block *OrphanedBlocks::get(TLSData *tls, unsigned int size)
159251c0b2f7Stbbdev {
159351c0b2f7Stbbdev     // TODO: try to use index from getAllocationBin
159451c0b2f7Stbbdev     unsigned int index = getIndex(size);
159551c0b2f7Stbbdev     Block *block = bins[index].pop();
159651c0b2f7Stbbdev     if (block) {
159751c0b2f7Stbbdev         MALLOC_ITT_SYNC_ACQUIRED(bins+index);
159851c0b2f7Stbbdev         block->privatizeOrphaned(tls, index);
159951c0b2f7Stbbdev     }
160051c0b2f7Stbbdev     return block;
160151c0b2f7Stbbdev }
160251c0b2f7Stbbdev 
160351c0b2f7Stbbdev void OrphanedBlocks::put(intptr_t binTag, Block *block)
160451c0b2f7Stbbdev {
160551c0b2f7Stbbdev     unsigned int index = getIndex(block->getSize());
160651c0b2f7Stbbdev     block->shareOrphaned(binTag, index);
160751c0b2f7Stbbdev     MALLOC_ITT_SYNC_RELEASING(bins+index);
160851c0b2f7Stbbdev     bins[index].push(block);
160951c0b2f7Stbbdev }
161051c0b2f7Stbbdev 
161151c0b2f7Stbbdev void OrphanedBlocks::reset()
161251c0b2f7Stbbdev {
161351c0b2f7Stbbdev     for (uint32_t i=0; i<numBlockBinLimit; i++)
161451c0b2f7Stbbdev         new (bins+i) LifoList();
161551c0b2f7Stbbdev }
161651c0b2f7Stbbdev 
161751c0b2f7Stbbdev bool OrphanedBlocks::cleanup(Backend* backend)
161851c0b2f7Stbbdev {
161951c0b2f7Stbbdev     bool released = false;
162051c0b2f7Stbbdev     for (uint32_t i=0; i<numBlockBinLimit; i++) {
162151c0b2f7Stbbdev         Block* block = bins[i].grab();
162251c0b2f7Stbbdev         MALLOC_ITT_SYNC_ACQUIRED(bins+i);
162351c0b2f7Stbbdev         while (block) {
162451c0b2f7Stbbdev             Block* next = block->next;
1625*57f524caSIlya Isaev             block->privatizePublicFreeList( /*reset=*/false ); // do not set publicFreeList to nullptr
162651c0b2f7Stbbdev             if (block->empty()) {
162751c0b2f7Stbbdev                 block->reset();
162851c0b2f7Stbbdev                 // slab blocks in user's pools do not have valid backRefIdx
162951c0b2f7Stbbdev                 if (!backend->inUserPool())
163051c0b2f7Stbbdev                     removeBackRef(*(block->getBackRefIdx()));
163151c0b2f7Stbbdev                 backend->putSlabBlock(block);
163251c0b2f7Stbbdev                 released = true;
163351c0b2f7Stbbdev             } else {
163451c0b2f7Stbbdev                 MALLOC_ITT_SYNC_RELEASING(bins+i);
163551c0b2f7Stbbdev                 bins[i].push(block);
163651c0b2f7Stbbdev             }
163751c0b2f7Stbbdev             block = next;
163851c0b2f7Stbbdev         }
163951c0b2f7Stbbdev     }
164051c0b2f7Stbbdev     return released;
164151c0b2f7Stbbdev }
164251c0b2f7Stbbdev 
164351c0b2f7Stbbdev FreeBlockPool::ResOfGet FreeBlockPool::getBlock()
164451c0b2f7Stbbdev {
1645*57f524caSIlya Isaev     Block *b = head.exchange(nullptr);
164651c0b2f7Stbbdev 
164751c0b2f7Stbbdev     if (b) {
164851c0b2f7Stbbdev         size--;
164951c0b2f7Stbbdev         Block *newHead = b->next;
165051c0b2f7Stbbdev         lastAccessMiss = false;
165151c0b2f7Stbbdev         head.store(newHead, std::memory_order_release);
165251c0b2f7Stbbdev     } else {
165351c0b2f7Stbbdev         lastAccessMiss = true;
165451c0b2f7Stbbdev     }
165551c0b2f7Stbbdev     return ResOfGet(b, lastAccessMiss);
165651c0b2f7Stbbdev }
165751c0b2f7Stbbdev 
165851c0b2f7Stbbdev void FreeBlockPool::returnBlock(Block *block)
165951c0b2f7Stbbdev {
166051c0b2f7Stbbdev     MALLOC_ASSERT( size <= POOL_HIGH_MARK, ASSERT_TEXT );
1661*57f524caSIlya Isaev     Block *localHead = head.exchange(nullptr);
166251c0b2f7Stbbdev 
166351c0b2f7Stbbdev     if (!localHead) {
166451c0b2f7Stbbdev         size = 0; // head was stolen by externalClean, correct size accordingly
166551c0b2f7Stbbdev     } else if (size == POOL_HIGH_MARK) {
166651c0b2f7Stbbdev         // release cold blocks and add hot one,
166751c0b2f7Stbbdev         // so keep POOL_LOW_MARK-1 blocks and add new block to head
166851c0b2f7Stbbdev         Block *headToFree = localHead, *helper;
166951c0b2f7Stbbdev         for (int i=0; i<POOL_LOW_MARK-2; i++)
167051c0b2f7Stbbdev             headToFree = headToFree->next;
167151c0b2f7Stbbdev         Block *last = headToFree;
167251c0b2f7Stbbdev         headToFree = headToFree->next;
1673*57f524caSIlya Isaev         last->next = nullptr;
167451c0b2f7Stbbdev         size = POOL_LOW_MARK-1;
167551c0b2f7Stbbdev         for (Block *currBl = headToFree; currBl; currBl = helper) {
167651c0b2f7Stbbdev             helper = currBl->next;
167751c0b2f7Stbbdev             // slab blocks in user's pools do not have valid backRefIdx
167851c0b2f7Stbbdev             if (!backend->inUserPool())
167951c0b2f7Stbbdev                 removeBackRef(currBl->backRefIdx);
168051c0b2f7Stbbdev             backend->putSlabBlock(currBl);
168151c0b2f7Stbbdev         }
168251c0b2f7Stbbdev     }
168351c0b2f7Stbbdev     size++;
168451c0b2f7Stbbdev     block->next = localHead;
168551c0b2f7Stbbdev     head.store(block, std::memory_order_release);
168651c0b2f7Stbbdev }
168751c0b2f7Stbbdev 
168851c0b2f7Stbbdev bool FreeBlockPool::externalCleanup()
168951c0b2f7Stbbdev {
169051c0b2f7Stbbdev     Block *helper;
169151c0b2f7Stbbdev     bool released = false;
169251c0b2f7Stbbdev 
1693*57f524caSIlya Isaev     for (Block *currBl=head.exchange(nullptr); currBl; currBl=helper) {
169451c0b2f7Stbbdev         helper = currBl->next;
169551c0b2f7Stbbdev         // slab blocks in user's pools do not have valid backRefIdx
169651c0b2f7Stbbdev         if (!backend->inUserPool())
169751c0b2f7Stbbdev             removeBackRef(currBl->backRefIdx);
169851c0b2f7Stbbdev         backend->putSlabBlock(currBl);
169951c0b2f7Stbbdev         released = true;
170051c0b2f7Stbbdev     }
170151c0b2f7Stbbdev     return released;
170251c0b2f7Stbbdev }
170351c0b2f7Stbbdev 
170451c0b2f7Stbbdev /* Prepare the block for returning to FreeBlockPool */
170551c0b2f7Stbbdev void Block::reset()
170651c0b2f7Stbbdev {
170751c0b2f7Stbbdev     // it is caller's responsibility to ensure no data is lost before calling this
170851c0b2f7Stbbdev     MALLOC_ASSERT( allocatedCount==0, ASSERT_TEXT );
170951c0b2f7Stbbdev     MALLOC_ASSERT( !isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT );
171051c0b2f7Stbbdev     if (!isStartupAllocObject())
171151c0b2f7Stbbdev         STAT_increment(getThreadId(), getIndex(objectSize), freeBlockBack);
171251c0b2f7Stbbdev 
171351c0b2f7Stbbdev     cleanBlockHeader();
171451c0b2f7Stbbdev 
1715478de5b1Stbbdev     nextPrivatizable.store(nullptr, std::memory_order_relaxed);
171651c0b2f7Stbbdev 
171751c0b2f7Stbbdev     objectSize = 0;
171851c0b2f7Stbbdev     // for an empty block, bump pointer should point right after the end of the block
171951c0b2f7Stbbdev     bumpPtr = (FreeObject *)((uintptr_t)this + slabSize);
172051c0b2f7Stbbdev }
172151c0b2f7Stbbdev 
172251c0b2f7Stbbdev inline void Bin::setActiveBlock (Block *block)
172351c0b2f7Stbbdev {
172451c0b2f7Stbbdev //    MALLOC_ASSERT( bin, ASSERT_TEXT );
172551c0b2f7Stbbdev     MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT );
172651c0b2f7Stbbdev     // it is the caller responsibility to keep bin consistence (i.e. ensure this block is in the bin list)
172751c0b2f7Stbbdev     activeBlk = block;
172851c0b2f7Stbbdev }
172951c0b2f7Stbbdev 
173051c0b2f7Stbbdev inline Block* Bin::setPreviousBlockActive()
173151c0b2f7Stbbdev {
173251c0b2f7Stbbdev     MALLOC_ASSERT( activeBlk, ASSERT_TEXT );
173351c0b2f7Stbbdev     Block* temp = activeBlk->previous;
173451c0b2f7Stbbdev     if( temp ) {
173551c0b2f7Stbbdev         MALLOC_ASSERT( !(temp->isFull), ASSERT_TEXT );
173651c0b2f7Stbbdev         activeBlk = temp;
173751c0b2f7Stbbdev     }
173851c0b2f7Stbbdev     return temp;
173951c0b2f7Stbbdev }
174051c0b2f7Stbbdev 
174151c0b2f7Stbbdev inline bool Block::isOwnedByCurrentThread() const {
1742478de5b1Stbbdev     return tlsPtr.load(std::memory_order_relaxed) && ownerTid.isCurrentThreadId();
174351c0b2f7Stbbdev }
174451c0b2f7Stbbdev 
174551c0b2f7Stbbdev FreeObject *Block::findObjectToFree(const void *object) const
174651c0b2f7Stbbdev {
174751c0b2f7Stbbdev     FreeObject *objectToFree;
174851c0b2f7Stbbdev     // Due to aligned allocations, a pointer passed to scalable_free
174951c0b2f7Stbbdev     // might differ from the address of internally allocated object.
175051c0b2f7Stbbdev     // Small objects however should always be fine.
175151c0b2f7Stbbdev     if (objectSize <= maxSegregatedObjectSize)
175251c0b2f7Stbbdev         objectToFree = (FreeObject*)object;
175351c0b2f7Stbbdev     // "Fitting size" allocations are suspicious if aligned higher than naturally
175451c0b2f7Stbbdev     else {
175551c0b2f7Stbbdev         if ( ! isAligned(object,2*fittingAlignment) )
175651c0b2f7Stbbdev             // TODO: the above check is questionable - it gives false negatives in ~50% cases,
175751c0b2f7Stbbdev             //       so might even be slower in average than unconditional use of findAllocatedObject.
175851c0b2f7Stbbdev             // here it should be a "real" object
175951c0b2f7Stbbdev             objectToFree = (FreeObject*)object;
176051c0b2f7Stbbdev         else
176151c0b2f7Stbbdev             // here object can be an aligned address, so applying additional checks
176251c0b2f7Stbbdev             objectToFree = findAllocatedObject(object);
176351c0b2f7Stbbdev         MALLOC_ASSERT( isAligned(objectToFree,fittingAlignment), ASSERT_TEXT );
176451c0b2f7Stbbdev     }
176551c0b2f7Stbbdev     MALLOC_ASSERT( isProperlyPlaced(objectToFree), ASSERT_TEXT );
176651c0b2f7Stbbdev 
176751c0b2f7Stbbdev     return objectToFree;
176851c0b2f7Stbbdev }
176951c0b2f7Stbbdev 
177051c0b2f7Stbbdev void TLSData::release()
177151c0b2f7Stbbdev {
177251c0b2f7Stbbdev     memPool->extMemPool.allLocalCaches.unregisterThread(this);
177351c0b2f7Stbbdev     externalCleanup(/*cleanOnlyUnused=*/false, /*cleanBins=*/false);
177451c0b2f7Stbbdev 
177551c0b2f7Stbbdev     for (unsigned index = 0; index < numBlockBins; index++) {
177651c0b2f7Stbbdev         Block *activeBlk = bin[index].getActiveBlock();
177751c0b2f7Stbbdev         if (!activeBlk)
177851c0b2f7Stbbdev             continue;
177951c0b2f7Stbbdev         Block *threadlessBlock = activeBlk->previous;
1780478de5b1Stbbdev         bool syncOnMailbox = false;
178151c0b2f7Stbbdev         while (threadlessBlock) {
178251c0b2f7Stbbdev             Block *threadBlock = threadlessBlock->previous;
178351c0b2f7Stbbdev             if (threadlessBlock->empty()) {
178451c0b2f7Stbbdev                 /* we destroy the thread, so not use its block pool */
178551c0b2f7Stbbdev                 memPool->returnEmptyBlock(threadlessBlock, /*poolTheBlock=*/false);
178651c0b2f7Stbbdev             } else {
178751c0b2f7Stbbdev                 memPool->extMemPool.orphanedBlocks.put(intptr_t(bin+index), threadlessBlock);
1788478de5b1Stbbdev                 syncOnMailbox = true;
178951c0b2f7Stbbdev             }
179051c0b2f7Stbbdev             threadlessBlock = threadBlock;
179151c0b2f7Stbbdev         }
179251c0b2f7Stbbdev         threadlessBlock = activeBlk;
179351c0b2f7Stbbdev         while (threadlessBlock) {
179451c0b2f7Stbbdev             Block *threadBlock = threadlessBlock->next;
179551c0b2f7Stbbdev             if (threadlessBlock->empty()) {
179651c0b2f7Stbbdev                 /* we destroy the thread, so not use its block pool */
179751c0b2f7Stbbdev                 memPool->returnEmptyBlock(threadlessBlock, /*poolTheBlock=*/false);
179851c0b2f7Stbbdev             } else {
179951c0b2f7Stbbdev                 memPool->extMemPool.orphanedBlocks.put(intptr_t(bin+index), threadlessBlock);
1800478de5b1Stbbdev                 syncOnMailbox = true;
180151c0b2f7Stbbdev             }
180251c0b2f7Stbbdev             threadlessBlock = threadBlock;
180351c0b2f7Stbbdev         }
180451c0b2f7Stbbdev         bin[index].resetActiveBlock();
1805478de5b1Stbbdev 
1806478de5b1Stbbdev         if (syncOnMailbox) {
1807478de5b1Stbbdev             // Although, we synchronized on nextPrivatizable inside a block, we still need to
1808478de5b1Stbbdev             // synchronize on the bin lifetime because the thread releasing an object into the public
1809478de5b1Stbbdev             // free list is touching the bin (mailbox and mailLock)
1810478de5b1Stbbdev             MallocMutex::scoped_lock scoped_cs(bin[index].mailLock);
1811478de5b1Stbbdev         }
181251c0b2f7Stbbdev     }
181351c0b2f7Stbbdev }
181451c0b2f7Stbbdev 
181551c0b2f7Stbbdev 
181651c0b2f7Stbbdev #if MALLOC_CHECK_RECURSION
181751c0b2f7Stbbdev // TODO: Use dedicated heap for this
181851c0b2f7Stbbdev 
181951c0b2f7Stbbdev /*
182051c0b2f7Stbbdev  * It's a special kind of allocation that can be used when malloc is
182151c0b2f7Stbbdev  * not available (either during startup or when malloc was already called and
182251c0b2f7Stbbdev  * we are, say, inside pthread_setspecific's call).
182351c0b2f7Stbbdev  * Block can contain objects of different sizes,
182451c0b2f7Stbbdev  * allocations are performed by moving bump pointer and increasing of object counter,
182551c0b2f7Stbbdev  * releasing is done via counter of objects allocated in the block
182651c0b2f7Stbbdev  * or moving bump pointer if releasing object is on a bound.
182751c0b2f7Stbbdev  * TODO: make bump pointer to grow to the same backward direction as all the others.
182851c0b2f7Stbbdev  */
182951c0b2f7Stbbdev 
183051c0b2f7Stbbdev class StartupBlock : public Block {
183151c0b2f7Stbbdev     size_t availableSize() const {
183251c0b2f7Stbbdev         return slabSize - ((uintptr_t)bumpPtr - (uintptr_t)this);
183351c0b2f7Stbbdev     }
183451c0b2f7Stbbdev     static StartupBlock *getBlock();
183551c0b2f7Stbbdev public:
183651c0b2f7Stbbdev     static FreeObject *allocate(size_t size);
183751c0b2f7Stbbdev     static size_t msize(void *ptr) { return *((size_t*)ptr - 1); }
183851c0b2f7Stbbdev     void free(void *ptr);
183951c0b2f7Stbbdev };
184051c0b2f7Stbbdev 
184151c0b2f7Stbbdev static MallocMutex startupMallocLock;
184251c0b2f7Stbbdev static StartupBlock *firstStartupBlock;
184351c0b2f7Stbbdev 
184451c0b2f7Stbbdev StartupBlock *StartupBlock::getBlock()
184551c0b2f7Stbbdev {
184651c0b2f7Stbbdev     BackRefIdx backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/false);
1847*57f524caSIlya Isaev     if (backRefIdx.isInvalid()) return nullptr;
184851c0b2f7Stbbdev 
184951c0b2f7Stbbdev     StartupBlock *block = static_cast<StartupBlock*>(
185051c0b2f7Stbbdev         defaultMemPool->extMemPool.backend.getSlabBlock(1));
1851*57f524caSIlya Isaev     if (!block) return nullptr;
185251c0b2f7Stbbdev 
185351c0b2f7Stbbdev     block->cleanBlockHeader();
185451c0b2f7Stbbdev     setBackRef(backRefIdx, block);
185551c0b2f7Stbbdev     block->backRefIdx = backRefIdx;
185651c0b2f7Stbbdev     // use startupAllocObjSizeMark to mark objects from startup block marker
185751c0b2f7Stbbdev     block->objectSize = startupAllocObjSizeMark;
185851c0b2f7Stbbdev     block->bumpPtr = (FreeObject *)((uintptr_t)block + sizeof(StartupBlock));
185951c0b2f7Stbbdev     return block;
186051c0b2f7Stbbdev }
186151c0b2f7Stbbdev 
186251c0b2f7Stbbdev FreeObject *StartupBlock::allocate(size_t size)
186351c0b2f7Stbbdev {
186451c0b2f7Stbbdev     FreeObject *result;
1865*57f524caSIlya Isaev     StartupBlock *newBlock = nullptr;
186651c0b2f7Stbbdev     bool newBlockUnused = false;
186751c0b2f7Stbbdev 
186851c0b2f7Stbbdev     /* Objects must be aligned on their natural bounds,
186951c0b2f7Stbbdev        and objects bigger than word on word's bound. */
187051c0b2f7Stbbdev     size = alignUp(size, sizeof(size_t));
187151c0b2f7Stbbdev     // We need size of an object to implement msize.
187251c0b2f7Stbbdev     size_t reqSize = size + sizeof(size_t);
187351c0b2f7Stbbdev     {
187451c0b2f7Stbbdev         MallocMutex::scoped_lock scoped_cs(startupMallocLock);
187551c0b2f7Stbbdev         // Re-check whether we need a new block (conditions might have changed)
187651c0b2f7Stbbdev         if (!firstStartupBlock || firstStartupBlock->availableSize() < reqSize) {
187751c0b2f7Stbbdev             if (!newBlock) {
187851c0b2f7Stbbdev                 newBlock = StartupBlock::getBlock();
1879*57f524caSIlya Isaev                 if (!newBlock) return nullptr;
188051c0b2f7Stbbdev             }
188151c0b2f7Stbbdev             newBlock->next = (Block*)firstStartupBlock;
188251c0b2f7Stbbdev             if (firstStartupBlock)
188351c0b2f7Stbbdev                 firstStartupBlock->previous = (Block*)newBlock;
188451c0b2f7Stbbdev             firstStartupBlock = newBlock;
1885478de5b1Stbbdev         }
188651c0b2f7Stbbdev         result = firstStartupBlock->bumpPtr;
188751c0b2f7Stbbdev         firstStartupBlock->allocatedCount++;
188851c0b2f7Stbbdev         firstStartupBlock->bumpPtr =
188951c0b2f7Stbbdev             (FreeObject *)((uintptr_t)firstStartupBlock->bumpPtr + reqSize);
189051c0b2f7Stbbdev     }
189151c0b2f7Stbbdev 
189251c0b2f7Stbbdev     // keep object size at the negative offset
189351c0b2f7Stbbdev     *((size_t*)result) = size;
189451c0b2f7Stbbdev     return (FreeObject*)((size_t*)result+1);
189551c0b2f7Stbbdev }
189651c0b2f7Stbbdev 
189751c0b2f7Stbbdev void StartupBlock::free(void *ptr)
189851c0b2f7Stbbdev {
1899*57f524caSIlya Isaev     Block* blockToRelease = nullptr;
190051c0b2f7Stbbdev     {
190151c0b2f7Stbbdev         MallocMutex::scoped_lock scoped_cs(startupMallocLock);
190251c0b2f7Stbbdev 
190351c0b2f7Stbbdev         MALLOC_ASSERT(firstStartupBlock, ASSERT_TEXT);
190451c0b2f7Stbbdev         MALLOC_ASSERT(startupAllocObjSizeMark==objectSize
190551c0b2f7Stbbdev                       && allocatedCount>0, ASSERT_TEXT);
190651c0b2f7Stbbdev         MALLOC_ASSERT((uintptr_t)ptr>=(uintptr_t)this+sizeof(StartupBlock)
190751c0b2f7Stbbdev                       && (uintptr_t)ptr+StartupBlock::msize(ptr)<=(uintptr_t)this+slabSize,
190851c0b2f7Stbbdev                       ASSERT_TEXT);
190951c0b2f7Stbbdev         if (0 == --allocatedCount) {
191051c0b2f7Stbbdev             if (this == firstStartupBlock)
191151c0b2f7Stbbdev                 firstStartupBlock = (StartupBlock*)firstStartupBlock->next;
191251c0b2f7Stbbdev             if (previous)
191351c0b2f7Stbbdev                 previous->next = next;
191451c0b2f7Stbbdev             if (next)
191551c0b2f7Stbbdev                 next->previous = previous;
191651c0b2f7Stbbdev             blockToRelease = this;
191751c0b2f7Stbbdev         } else if ((uintptr_t)ptr + StartupBlock::msize(ptr) == (uintptr_t)bumpPtr) {
191851c0b2f7Stbbdev             // last object in the block released
191951c0b2f7Stbbdev             FreeObject *newBump = (FreeObject*)((size_t*)ptr - 1);
192051c0b2f7Stbbdev             MALLOC_ASSERT((uintptr_t)newBump>(uintptr_t)this+sizeof(StartupBlock),
192151c0b2f7Stbbdev                           ASSERT_TEXT);
192251c0b2f7Stbbdev             bumpPtr = newBump;
192351c0b2f7Stbbdev         }
192451c0b2f7Stbbdev     }
192551c0b2f7Stbbdev     if (blockToRelease) {
1926*57f524caSIlya Isaev         blockToRelease->previous = blockToRelease->next = nullptr;
192751c0b2f7Stbbdev         defaultMemPool->returnEmptyBlock(blockToRelease, /*poolTheBlock=*/false);
192851c0b2f7Stbbdev     }
192951c0b2f7Stbbdev }
193051c0b2f7Stbbdev 
193151c0b2f7Stbbdev #endif /* MALLOC_CHECK_RECURSION */
193251c0b2f7Stbbdev 
193351c0b2f7Stbbdev /********* End thread related code  *************/
193451c0b2f7Stbbdev 
193551c0b2f7Stbbdev /********* Library initialization *************/
193651c0b2f7Stbbdev 
193751c0b2f7Stbbdev //! Value indicating the state of initialization.
193851c0b2f7Stbbdev /* 0 = initialization not started.
193951c0b2f7Stbbdev  * 1 = initialization started but not finished.
194051c0b2f7Stbbdev  * 2 = initialization finished.
194151c0b2f7Stbbdev  * In theory, we only need values 0 and 2. But value 1 is nonetheless
194251c0b2f7Stbbdev  * useful for detecting errors in the double-check pattern.
194351c0b2f7Stbbdev  */
194451c0b2f7Stbbdev static std::atomic<intptr_t> mallocInitialized{0};   // implicitly initialized to 0
194551c0b2f7Stbbdev static MallocMutex initMutex;
194651c0b2f7Stbbdev 
194751c0b2f7Stbbdev /** The leading "\0" is here so that applying "strings" to the binary
194851c0b2f7Stbbdev     delivers a clean result. */
194951c0b2f7Stbbdev static char VersionString[] = "\0" TBBMALLOC_VERSION_STRINGS;
195051c0b2f7Stbbdev 
1951112076d0SIlya Isaev #if USE_PTHREAD && __TBB_SOURCE_DIRECTLY_INCLUDED
195251c0b2f7Stbbdev 
195351c0b2f7Stbbdev /* Decrease race interval between dynamic library unloading and pthread key
195451c0b2f7Stbbdev    destructor. Protect only Pthreads with supported unloading. */
195551c0b2f7Stbbdev class ShutdownSync {
195651c0b2f7Stbbdev /* flag is the number of threads in pthread key dtor body
195751c0b2f7Stbbdev    (i.e., between threadDtorStart() and threadDtorDone())
195851c0b2f7Stbbdev    or the signal to skip dtor, if flag < 0 */
195951c0b2f7Stbbdev     std::atomic<intptr_t> flag;
196051c0b2f7Stbbdev     static const intptr_t skipDtor = INTPTR_MIN/2;
196151c0b2f7Stbbdev public:
196251c0b2f7Stbbdev     void init() { flag.store(0, std::memory_order_release); }
196351c0b2f7Stbbdev /* Suppose that 2*abs(skipDtor) or more threads never call threadDtorStart()
196451c0b2f7Stbbdev    simultaneously, so flag never becomes negative because of that. */
196551c0b2f7Stbbdev     bool threadDtorStart() {
196651c0b2f7Stbbdev         if (flag.load(std::memory_order_acquire) < 0)
196751c0b2f7Stbbdev             return false;
196851c0b2f7Stbbdev         if (++flag <= 0) { // note that new value returned
196951c0b2f7Stbbdev             flag.fetch_sub(1); // flag is spoiled by us, restore it
197051c0b2f7Stbbdev             return false;
197151c0b2f7Stbbdev         }
197251c0b2f7Stbbdev         return true;
197351c0b2f7Stbbdev     }
197451c0b2f7Stbbdev     void threadDtorDone() {
197551c0b2f7Stbbdev         flag.fetch_sub(1);
197651c0b2f7Stbbdev     }
197751c0b2f7Stbbdev     void processExit() {
197851c0b2f7Stbbdev         if (flag.fetch_add(skipDtor) != 0) {
197951c0b2f7Stbbdev             SpinWaitUntilEq(flag, skipDtor);
198051c0b2f7Stbbdev         }
198151c0b2f7Stbbdev     }
198251c0b2f7Stbbdev };
198351c0b2f7Stbbdev 
198451c0b2f7Stbbdev #else
198551c0b2f7Stbbdev 
198651c0b2f7Stbbdev class ShutdownSync {
198751c0b2f7Stbbdev public:
198851c0b2f7Stbbdev     void init() { }
198951c0b2f7Stbbdev     bool threadDtorStart() { return true; }
199051c0b2f7Stbbdev     void threadDtorDone() { }
199151c0b2f7Stbbdev     void processExit() { }
199251c0b2f7Stbbdev };
199351c0b2f7Stbbdev 
1994112076d0SIlya Isaev #endif // USE_PTHREAD && __TBB_SOURCE_DIRECTLY_INCLUDED
199551c0b2f7Stbbdev 
199651c0b2f7Stbbdev static ShutdownSync shutdownSync;
199751c0b2f7Stbbdev 
199851c0b2f7Stbbdev inline bool isMallocInitialized() {
199951c0b2f7Stbbdev     // Load must have acquire fence; otherwise thread taking "initialized" path
200051c0b2f7Stbbdev     // might perform textually later loads *before* mallocInitialized becomes 2.
200151c0b2f7Stbbdev     return 2 == mallocInitialized.load(std::memory_order_acquire);
200251c0b2f7Stbbdev }
200351c0b2f7Stbbdev 
200451c0b2f7Stbbdev /* Caller is responsible for ensuring this routine is called exactly once. */
200551c0b2f7Stbbdev extern "C" void MallocInitializeITT() {
200651c0b2f7Stbbdev #if __TBB_USE_ITT_NOTIFY
200751c0b2f7Stbbdev     if (!usedBySrcIncluded)
200851c0b2f7Stbbdev         tbb::detail::r1::__TBB_load_ittnotify();
200951c0b2f7Stbbdev #endif
201051c0b2f7Stbbdev }
201151c0b2f7Stbbdev 
201251c0b2f7Stbbdev void MemoryPool::initDefaultPool() {
201351c0b2f7Stbbdev     hugePages.init();
201451c0b2f7Stbbdev }
201551c0b2f7Stbbdev 
201651c0b2f7Stbbdev /*
201751c0b2f7Stbbdev  * Allocator initialization routine;
201851c0b2f7Stbbdev  * it is called lazily on the very first scalable_malloc call.
201951c0b2f7Stbbdev  */
202051c0b2f7Stbbdev static bool initMemoryManager()
202151c0b2f7Stbbdev {
202251c0b2f7Stbbdev     TRACEF(( "[ScalableMalloc trace] sizeof(Block) is %d (expected 128); sizeof(uintptr_t) is %d\n",
202351c0b2f7Stbbdev              sizeof(Block), sizeof(uintptr_t) ));
202451c0b2f7Stbbdev     MALLOC_ASSERT( 2*blockHeaderAlignment == sizeof(Block), ASSERT_TEXT );
202551c0b2f7Stbbdev     MALLOC_ASSERT( sizeof(FreeObject) == sizeof(void*), ASSERT_TEXT );
202651c0b2f7Stbbdev     MALLOC_ASSERT( isAligned(defaultMemPool, sizeof(intptr_t)),
202751c0b2f7Stbbdev                    "Memory pool must be void*-aligned for atomic to work over aligned arguments.");
202851c0b2f7Stbbdev 
202951c0b2f7Stbbdev #if USE_WINTHREAD
203051c0b2f7Stbbdev     const size_t granularity = 64*1024; // granulatity of VirtualAlloc
203151c0b2f7Stbbdev #else
203251c0b2f7Stbbdev     // POSIX.1-2001-compliant way to get page size
203351c0b2f7Stbbdev     const size_t granularity = sysconf(_SC_PAGESIZE);
203451c0b2f7Stbbdev #endif
203551c0b2f7Stbbdev     if (!defaultMemPool) {
203651c0b2f7Stbbdev         // Do not rely on static constructors and do the assignment in case
203751c0b2f7Stbbdev         // of library static section not initialized at this call yet.
203851c0b2f7Stbbdev         defaultMemPool = (MemoryPool*)defaultMemPool_space;
203951c0b2f7Stbbdev     }
204051c0b2f7Stbbdev     bool initOk = defaultMemPool->
2041*57f524caSIlya Isaev         extMemPool.init(0, nullptr, nullptr, granularity,
204251c0b2f7Stbbdev                         /*keepAllMemory=*/false, /*fixedPool=*/false);
204351c0b2f7Stbbdev // TODO: extMemPool.init() to not allocate memory
20441ecde27fSIlya Mishin     if (!initOk || !initBackRefMain(&defaultMemPool->extMemPool.backend) || !ThreadId::init())
204551c0b2f7Stbbdev         return false;
204651c0b2f7Stbbdev     MemoryPool::initDefaultPool();
204751c0b2f7Stbbdev     // init() is required iff initMemoryManager() is called
204851c0b2f7Stbbdev     // after mallocProcessShutdownNotification()
204951c0b2f7Stbbdev     shutdownSync.init();
205051c0b2f7Stbbdev #if COLLECT_STATISTICS
205151c0b2f7Stbbdev     initStatisticsCollection();
205251c0b2f7Stbbdev #endif
205351c0b2f7Stbbdev     return true;
205451c0b2f7Stbbdev }
205551c0b2f7Stbbdev 
205651c0b2f7Stbbdev static bool GetBoolEnvironmentVariable(const char* name) {
205751c0b2f7Stbbdev     return tbb::detail::r1::GetBoolEnvironmentVariable(name);
205851c0b2f7Stbbdev }
205951c0b2f7Stbbdev 
206051c0b2f7Stbbdev //! Ensures that initMemoryManager() is called once and only once.
206151c0b2f7Stbbdev /** Does not return until initMemoryManager() has been completed by a thread.
206251c0b2f7Stbbdev     There is no need to call this routine if mallocInitialized==2 . */
206351c0b2f7Stbbdev static bool doInitialization()
206451c0b2f7Stbbdev {
206551c0b2f7Stbbdev     MallocMutex::scoped_lock lock( initMutex );
206651c0b2f7Stbbdev     if (mallocInitialized.load(std::memory_order_relaxed)!=2) {
206751c0b2f7Stbbdev         MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==0, ASSERT_TEXT );
206851c0b2f7Stbbdev         mallocInitialized.store(1, std::memory_order_relaxed);
206951c0b2f7Stbbdev         RecursiveMallocCallProtector scoped;
207051c0b2f7Stbbdev         if (!initMemoryManager()) {
207151c0b2f7Stbbdev             mallocInitialized.store(0, std::memory_order_relaxed); // restore and out
207251c0b2f7Stbbdev             return false;
207351c0b2f7Stbbdev         }
207451c0b2f7Stbbdev #ifdef  MALLOC_EXTRA_INITIALIZATION
207551c0b2f7Stbbdev         MALLOC_EXTRA_INITIALIZATION;
207651c0b2f7Stbbdev #endif
207751c0b2f7Stbbdev #if MALLOC_CHECK_RECURSION
207851c0b2f7Stbbdev         RecursiveMallocCallProtector::detectNaiveOverload();
207951c0b2f7Stbbdev #endif
208051c0b2f7Stbbdev         MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==1, ASSERT_TEXT );
208151c0b2f7Stbbdev         // Store must have release fence, otherwise mallocInitialized==2
208251c0b2f7Stbbdev         // might become remotely visible before side effects of
208351c0b2f7Stbbdev         // initMemoryManager() become remotely visible.
208451c0b2f7Stbbdev         mallocInitialized.store(2, std::memory_order_release);
208551c0b2f7Stbbdev         if( GetBoolEnvironmentVariable("TBB_VERSION") ) {
208651c0b2f7Stbbdev             fputs(VersionString+1,stderr);
208751c0b2f7Stbbdev             hugePages.printStatus();
208851c0b2f7Stbbdev         }
208951c0b2f7Stbbdev     }
209051c0b2f7Stbbdev     /* It can't be 0 or I would have initialized it */
209151c0b2f7Stbbdev     MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==2, ASSERT_TEXT );
209251c0b2f7Stbbdev     return true;
209351c0b2f7Stbbdev }
209451c0b2f7Stbbdev 
209551c0b2f7Stbbdev /********* End library initialization *************/
209651c0b2f7Stbbdev 
209751c0b2f7Stbbdev /********* The malloc show begins     *************/
209851c0b2f7Stbbdev 
209951c0b2f7Stbbdev 
210051c0b2f7Stbbdev FreeObject *Block::allocateFromFreeList()
210151c0b2f7Stbbdev {
210251c0b2f7Stbbdev     FreeObject *result;
210351c0b2f7Stbbdev 
2104*57f524caSIlya Isaev     if (!freeList) return nullptr;
210551c0b2f7Stbbdev 
210651c0b2f7Stbbdev     result = freeList;
210751c0b2f7Stbbdev     MALLOC_ASSERT( result, ASSERT_TEXT );
210851c0b2f7Stbbdev 
210951c0b2f7Stbbdev     freeList = result->next;
211051c0b2f7Stbbdev     MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT );
211151c0b2f7Stbbdev     allocatedCount++;
211251c0b2f7Stbbdev     STAT_increment(getThreadId(), getIndex(objectSize), allocFreeListUsed);
211351c0b2f7Stbbdev 
211451c0b2f7Stbbdev     return result;
211551c0b2f7Stbbdev }
211651c0b2f7Stbbdev 
211751c0b2f7Stbbdev FreeObject *Block::allocateFromBumpPtr()
211851c0b2f7Stbbdev {
211951c0b2f7Stbbdev     FreeObject *result = bumpPtr;
212051c0b2f7Stbbdev     if (result) {
212151c0b2f7Stbbdev         bumpPtr = (FreeObject *) ((uintptr_t) bumpPtr - objectSize);
212251c0b2f7Stbbdev         if ( (uintptr_t)bumpPtr < (uintptr_t)this+sizeof(Block) ) {
2123*57f524caSIlya Isaev             bumpPtr = nullptr;
212451c0b2f7Stbbdev         }
212551c0b2f7Stbbdev         MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT );
212651c0b2f7Stbbdev         allocatedCount++;
212751c0b2f7Stbbdev         STAT_increment(getThreadId(), getIndex(objectSize), allocBumpPtrUsed);
212851c0b2f7Stbbdev     }
212951c0b2f7Stbbdev     return result;
213051c0b2f7Stbbdev }
213151c0b2f7Stbbdev 
213251c0b2f7Stbbdev inline FreeObject* Block::allocate()
213351c0b2f7Stbbdev {
213451c0b2f7Stbbdev     MALLOC_ASSERT( isOwnedByCurrentThread(), ASSERT_TEXT );
213551c0b2f7Stbbdev 
213651c0b2f7Stbbdev     /* for better cache locality, first looking in the free list. */
213751c0b2f7Stbbdev     if ( FreeObject *result = allocateFromFreeList() ) {
213851c0b2f7Stbbdev         return result;
213951c0b2f7Stbbdev     }
214051c0b2f7Stbbdev     MALLOC_ASSERT( !freeList, ASSERT_TEXT );
214151c0b2f7Stbbdev 
214251c0b2f7Stbbdev     /* if free list is empty, try thread local bump pointer allocation. */
214351c0b2f7Stbbdev     if ( FreeObject *result = allocateFromBumpPtr() ) {
214451c0b2f7Stbbdev         return result;
214551c0b2f7Stbbdev     }
214651c0b2f7Stbbdev     MALLOC_ASSERT( !bumpPtr, ASSERT_TEXT );
214751c0b2f7Stbbdev 
214851c0b2f7Stbbdev     /* the block is considered full. */
214951c0b2f7Stbbdev     isFull = true;
2150*57f524caSIlya Isaev     return nullptr;
215151c0b2f7Stbbdev }
215251c0b2f7Stbbdev 
215351c0b2f7Stbbdev size_t Block::findObjectSize(void *object) const
215451c0b2f7Stbbdev {
215551c0b2f7Stbbdev     size_t blSize = getSize();
215651c0b2f7Stbbdev #if MALLOC_CHECK_RECURSION
215751c0b2f7Stbbdev     // Currently, there is no aligned allocations from startup blocks,
215851c0b2f7Stbbdev     // so we can return just StartupBlock::msize().
215951c0b2f7Stbbdev     // TODO: This must be extended if we add aligned allocation from startup blocks.
216051c0b2f7Stbbdev     if (!blSize)
216151c0b2f7Stbbdev         return StartupBlock::msize(object);
216251c0b2f7Stbbdev #endif
216351c0b2f7Stbbdev     // object can be aligned, so real size can be less than block's
216451c0b2f7Stbbdev     size_t size =
216551c0b2f7Stbbdev         blSize - ((uintptr_t)object - (uintptr_t)findObjectToFree(object));
216651c0b2f7Stbbdev     MALLOC_ASSERT(size>0 && size<minLargeObjectSize, ASSERT_TEXT);
216751c0b2f7Stbbdev     return size;
216851c0b2f7Stbbdev }
216951c0b2f7Stbbdev 
217051c0b2f7Stbbdev void Bin::moveBlockToFront(Block *block)
217151c0b2f7Stbbdev {
217251c0b2f7Stbbdev     /* move the block to the front of the bin */
217351c0b2f7Stbbdev     if (block == activeBlk) return;
217451c0b2f7Stbbdev     outofTLSBin(block);
217551c0b2f7Stbbdev     pushTLSBin(block);
217651c0b2f7Stbbdev }
217751c0b2f7Stbbdev 
217851c0b2f7Stbbdev void Bin::processEmptyBlock(Block *block, bool poolTheBlock)
217951c0b2f7Stbbdev {
218051c0b2f7Stbbdev     if (block != activeBlk) {
218151c0b2f7Stbbdev         /* We are not using this block; return it to the pool */
218251c0b2f7Stbbdev         outofTLSBin(block);
218351c0b2f7Stbbdev         block->getMemPool()->returnEmptyBlock(block, poolTheBlock);
218451c0b2f7Stbbdev     } else {
218551c0b2f7Stbbdev         /* all objects are free - let's restore the bump pointer */
218651c0b2f7Stbbdev         block->restoreBumpPtr();
218751c0b2f7Stbbdev     }
218851c0b2f7Stbbdev }
218951c0b2f7Stbbdev 
219051c0b2f7Stbbdev template<int LOW_MARK, int HIGH_MARK>
219151c0b2f7Stbbdev bool LocalLOCImpl<LOW_MARK, HIGH_MARK>::put(LargeMemoryBlock *object, ExtMemoryPool *extMemPool)
219251c0b2f7Stbbdev {
219351c0b2f7Stbbdev     const size_t size = object->unalignedSize;
219451c0b2f7Stbbdev     // not spoil cache with too large object, that can cause its total cleanup
219551c0b2f7Stbbdev     if (size > MAX_TOTAL_SIZE)
219651c0b2f7Stbbdev         return false;
2197*57f524caSIlya Isaev     LargeMemoryBlock *localHead = head.exchange(nullptr);
219851c0b2f7Stbbdev 
2199*57f524caSIlya Isaev     object->prev = nullptr;
220051c0b2f7Stbbdev     object->next = localHead;
220151c0b2f7Stbbdev     if (localHead)
220251c0b2f7Stbbdev         localHead->prev = object;
220351c0b2f7Stbbdev     else {
220451c0b2f7Stbbdev         // those might not be cleaned during local cache stealing, correct them
220551c0b2f7Stbbdev         totalSize = 0;
220651c0b2f7Stbbdev         numOfBlocks = 0;
220751c0b2f7Stbbdev         tail = object;
220851c0b2f7Stbbdev     }
220951c0b2f7Stbbdev     localHead = object;
221051c0b2f7Stbbdev     totalSize += size;
221151c0b2f7Stbbdev     numOfBlocks++;
221251c0b2f7Stbbdev     // must meet both size and number of cached objects constrains
221351c0b2f7Stbbdev     if (totalSize > MAX_TOTAL_SIZE || numOfBlocks >= HIGH_MARK) {
221451c0b2f7Stbbdev         // scanning from tail until meet conditions
221551c0b2f7Stbbdev         while (totalSize > MAX_TOTAL_SIZE || numOfBlocks > LOW_MARK) {
221651c0b2f7Stbbdev             totalSize -= tail->unalignedSize;
221751c0b2f7Stbbdev             numOfBlocks--;
221851c0b2f7Stbbdev             tail = tail->prev;
221951c0b2f7Stbbdev         }
222051c0b2f7Stbbdev         LargeMemoryBlock *headToRelease = tail->next;
2221*57f524caSIlya Isaev         tail->next = nullptr;
222251c0b2f7Stbbdev 
222351c0b2f7Stbbdev         extMemPool->freeLargeObjectList(headToRelease);
222451c0b2f7Stbbdev     }
222551c0b2f7Stbbdev 
222651c0b2f7Stbbdev     head.store(localHead, std::memory_order_release);
222751c0b2f7Stbbdev     return true;
222851c0b2f7Stbbdev }
222951c0b2f7Stbbdev 
223051c0b2f7Stbbdev template<int LOW_MARK, int HIGH_MARK>
223151c0b2f7Stbbdev LargeMemoryBlock *LocalLOCImpl<LOW_MARK, HIGH_MARK>::get(size_t size)
223251c0b2f7Stbbdev {
2233*57f524caSIlya Isaev     LargeMemoryBlock *localHead, *res = nullptr;
223451c0b2f7Stbbdev 
223551c0b2f7Stbbdev     if (size > MAX_TOTAL_SIZE)
2236*57f524caSIlya Isaev         return nullptr;
223751c0b2f7Stbbdev 
223851c0b2f7Stbbdev     // TBB_REVAMP_TODO: review this line
2239*57f524caSIlya Isaev     if (!head.load(std::memory_order_acquire) || (localHead = head.exchange(nullptr)) == nullptr) {
224051c0b2f7Stbbdev         // do not restore totalSize, numOfBlocks and tail at this point,
224151c0b2f7Stbbdev         // as they are used only in put(), where they must be restored
2242*57f524caSIlya Isaev         return nullptr;
224351c0b2f7Stbbdev     }
224451c0b2f7Stbbdev 
224551c0b2f7Stbbdev     for (LargeMemoryBlock *curr = localHead; curr; curr=curr->next) {
224651c0b2f7Stbbdev         if (curr->unalignedSize == size) {
224751c0b2f7Stbbdev             res = curr;
224851c0b2f7Stbbdev             if (curr->next)
224951c0b2f7Stbbdev                 curr->next->prev = curr->prev;
225051c0b2f7Stbbdev             else
225151c0b2f7Stbbdev                 tail = curr->prev;
225251c0b2f7Stbbdev             if (curr != localHead)
225351c0b2f7Stbbdev                 curr->prev->next = curr->next;
225451c0b2f7Stbbdev             else
225551c0b2f7Stbbdev                 localHead = curr->next;
225651c0b2f7Stbbdev             totalSize -= size;
225751c0b2f7Stbbdev             numOfBlocks--;
225851c0b2f7Stbbdev             break;
225951c0b2f7Stbbdev         }
226051c0b2f7Stbbdev     }
226151c0b2f7Stbbdev 
226251c0b2f7Stbbdev     head.store(localHead, std::memory_order_release);
226351c0b2f7Stbbdev     return res;
226451c0b2f7Stbbdev }
226551c0b2f7Stbbdev 
226651c0b2f7Stbbdev template<int LOW_MARK, int HIGH_MARK>
226751c0b2f7Stbbdev bool LocalLOCImpl<LOW_MARK, HIGH_MARK>::externalCleanup(ExtMemoryPool *extMemPool)
226851c0b2f7Stbbdev {
2269*57f524caSIlya Isaev     if (LargeMemoryBlock *localHead = head.exchange(nullptr)) {
227051c0b2f7Stbbdev         extMemPool->freeLargeObjectList(localHead);
227151c0b2f7Stbbdev         return true;
227251c0b2f7Stbbdev     }
227351c0b2f7Stbbdev     return false;
227451c0b2f7Stbbdev }
227551c0b2f7Stbbdev 
227651c0b2f7Stbbdev void *MemoryPool::getFromLLOCache(TLSData* tls, size_t size, size_t alignment)
227751c0b2f7Stbbdev {
2278*57f524caSIlya Isaev     LargeMemoryBlock *lmb = nullptr;
227951c0b2f7Stbbdev 
228051c0b2f7Stbbdev     size_t headersSize = sizeof(LargeMemoryBlock)+sizeof(LargeObjectHdr);
228151c0b2f7Stbbdev     size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+alignment);
228251c0b2f7Stbbdev     if (allocationSize < size) // allocationSize is wrapped around after alignToBin
2283*57f524caSIlya Isaev         return nullptr;
228451c0b2f7Stbbdev     MALLOC_ASSERT(allocationSize >= alignment, "Overflow must be checked before.");
228551c0b2f7Stbbdev 
228651c0b2f7Stbbdev     if (tls) {
228751c0b2f7Stbbdev         tls->markUsed();
228851c0b2f7Stbbdev         lmb = tls->lloc.get(allocationSize);
228951c0b2f7Stbbdev     }
229051c0b2f7Stbbdev     if (!lmb)
229151c0b2f7Stbbdev         lmb = extMemPool.mallocLargeObject(this, allocationSize);
229251c0b2f7Stbbdev 
229351c0b2f7Stbbdev     if (lmb) {
229451c0b2f7Stbbdev         // doing shuffle we suppose that alignment offset guarantees
229551c0b2f7Stbbdev         // that different cache lines are in use
229651c0b2f7Stbbdev         MALLOC_ASSERT(alignment >= estimatedCacheLineSize, ASSERT_TEXT);
229751c0b2f7Stbbdev 
229851c0b2f7Stbbdev         void *alignedArea = (void*)alignUp((uintptr_t)lmb+headersSize, alignment);
229951c0b2f7Stbbdev         uintptr_t alignedRight =
230051c0b2f7Stbbdev             alignDown((uintptr_t)lmb+lmb->unalignedSize - size, alignment);
230151c0b2f7Stbbdev         // Has some room to shuffle object between cache lines?
230251c0b2f7Stbbdev         // Note that alignedRight and alignedArea are aligned at alignment.
230351c0b2f7Stbbdev         unsigned ptrDelta = alignedRight - (uintptr_t)alignedArea;
230451c0b2f7Stbbdev         if (ptrDelta && tls) { // !tls is cold path
230551c0b2f7Stbbdev             // for the hot path of alignment==estimatedCacheLineSize,
230651c0b2f7Stbbdev             // allow compilers to use shift for division
230751c0b2f7Stbbdev             // (since estimatedCacheLineSize is a power-of-2 constant)
230851c0b2f7Stbbdev             unsigned numOfPossibleOffsets = alignment == estimatedCacheLineSize?
230951c0b2f7Stbbdev                   ptrDelta / estimatedCacheLineSize :
231051c0b2f7Stbbdev                   ptrDelta / alignment;
231151c0b2f7Stbbdev             unsigned myCacheIdx = ++tls->currCacheIdx;
231251c0b2f7Stbbdev             unsigned offset = myCacheIdx % numOfPossibleOffsets;
231351c0b2f7Stbbdev 
231451c0b2f7Stbbdev             // Move object to a cache line with an offset that is different from
231551c0b2f7Stbbdev             // previous allocation. This supposedly allows us to use cache
231651c0b2f7Stbbdev             // associativity more efficiently.
231751c0b2f7Stbbdev             alignedArea = (void*)((uintptr_t)alignedArea + offset*alignment);
231851c0b2f7Stbbdev         }
231951c0b2f7Stbbdev         MALLOC_ASSERT((uintptr_t)lmb+lmb->unalignedSize >=
232051c0b2f7Stbbdev                       (uintptr_t)alignedArea+size, "Object doesn't fit the block.");
232151c0b2f7Stbbdev         LargeObjectHdr *header = (LargeObjectHdr*)alignedArea-1;
232251c0b2f7Stbbdev         header->memoryBlock = lmb;
232351c0b2f7Stbbdev         header->backRefIdx = lmb->backRefIdx;
232451c0b2f7Stbbdev         setBackRef(header->backRefIdx, header);
232551c0b2f7Stbbdev 
232651c0b2f7Stbbdev         lmb->objectSize = size;
232751c0b2f7Stbbdev 
232851c0b2f7Stbbdev         MALLOC_ASSERT( isLargeObject<unknownMem>(alignedArea), ASSERT_TEXT );
232951c0b2f7Stbbdev         MALLOC_ASSERT( isAligned(alignedArea, alignment), ASSERT_TEXT );
233051c0b2f7Stbbdev 
233151c0b2f7Stbbdev         return alignedArea;
233251c0b2f7Stbbdev     }
2333*57f524caSIlya Isaev     return nullptr;
233451c0b2f7Stbbdev }
233551c0b2f7Stbbdev 
233651c0b2f7Stbbdev void MemoryPool::putToLLOCache(TLSData *tls, void *object)
233751c0b2f7Stbbdev {
233851c0b2f7Stbbdev     LargeObjectHdr *header = (LargeObjectHdr*)object - 1;
233951c0b2f7Stbbdev     // overwrite backRefIdx to simplify double free detection
234051c0b2f7Stbbdev     header->backRefIdx = BackRefIdx();
234151c0b2f7Stbbdev 
234251c0b2f7Stbbdev     if (tls) {
234351c0b2f7Stbbdev         tls->markUsed();
234451c0b2f7Stbbdev         if (tls->lloc.put(header->memoryBlock, &extMemPool))
234551c0b2f7Stbbdev             return;
234651c0b2f7Stbbdev     }
234751c0b2f7Stbbdev     extMemPool.freeLargeObject(header->memoryBlock);
234851c0b2f7Stbbdev }
234951c0b2f7Stbbdev 
235051c0b2f7Stbbdev /*
235151c0b2f7Stbbdev  * All aligned allocations fall into one of the following categories:
235251c0b2f7Stbbdev  *  1. if both request size and alignment are <= maxSegregatedObjectSize,
235351c0b2f7Stbbdev  *       we just align the size up, and request this amount, because for every size
235451c0b2f7Stbbdev  *       aligned to some power of 2, the allocated object is at least that aligned.
235551c0b2f7Stbbdev  * 2. for size<minLargeObjectSize, check if already guaranteed fittingAlignment is enough.
235651c0b2f7Stbbdev  * 3. if size+alignment<minLargeObjectSize, we take an object of fittingSizeN and align
235751c0b2f7Stbbdev  *       its address up; given such pointer, scalable_free could find the real object.
235851c0b2f7Stbbdev  *       Wrapping of size+alignment is impossible because maximal allowed
235951c0b2f7Stbbdev  *       alignment plus minLargeObjectSize can't lead to wrapping.
236051c0b2f7Stbbdev  * 4. otherwise, aligned large object is allocated.
236151c0b2f7Stbbdev  */
236251c0b2f7Stbbdev static void *allocateAligned(MemoryPool *memPool, size_t size, size_t alignment)
236351c0b2f7Stbbdev {
236451c0b2f7Stbbdev     MALLOC_ASSERT( isPowerOfTwo(alignment), ASSERT_TEXT );
236551c0b2f7Stbbdev 
236651c0b2f7Stbbdev     if (!isMallocInitialized())
236751c0b2f7Stbbdev         if (!doInitialization())
2368*57f524caSIlya Isaev             return nullptr;
236951c0b2f7Stbbdev 
237051c0b2f7Stbbdev     void *result;
237151c0b2f7Stbbdev     if (size<=maxSegregatedObjectSize && alignment<=maxSegregatedObjectSize)
237251c0b2f7Stbbdev         result = internalPoolMalloc(memPool, alignUp(size? size: sizeof(size_t), alignment));
237351c0b2f7Stbbdev     else if (size<minLargeObjectSize) {
237451c0b2f7Stbbdev         if (alignment<=fittingAlignment)
237551c0b2f7Stbbdev             result = internalPoolMalloc(memPool, size);
237651c0b2f7Stbbdev         else if (size+alignment < minLargeObjectSize) {
237751c0b2f7Stbbdev             void *unaligned = internalPoolMalloc(memPool, size+alignment);
2378*57f524caSIlya Isaev             if (!unaligned) return nullptr;
237951c0b2f7Stbbdev             result = alignUp(unaligned, alignment);
238051c0b2f7Stbbdev         } else
238151c0b2f7Stbbdev             goto LargeObjAlloc;
238251c0b2f7Stbbdev     } else {
238351c0b2f7Stbbdev     LargeObjAlloc:
238451c0b2f7Stbbdev         TLSData *tls = memPool->getTLS(/*create=*/true);
238551c0b2f7Stbbdev         // take into account only alignment that are higher then natural
238651c0b2f7Stbbdev         result =
238751c0b2f7Stbbdev             memPool->getFromLLOCache(tls, size, largeObjectAlignment>alignment?
238851c0b2f7Stbbdev                                                largeObjectAlignment: alignment);
238951c0b2f7Stbbdev     }
239051c0b2f7Stbbdev 
239151c0b2f7Stbbdev     MALLOC_ASSERT( isAligned(result, alignment), ASSERT_TEXT );
239251c0b2f7Stbbdev     return result;
239351c0b2f7Stbbdev }
239451c0b2f7Stbbdev 
239551c0b2f7Stbbdev static void *reallocAligned(MemoryPool *memPool, void *ptr,
239651c0b2f7Stbbdev                             size_t newSize, size_t alignment = 0)
239751c0b2f7Stbbdev {
239851c0b2f7Stbbdev     void *result;
239951c0b2f7Stbbdev     size_t copySize;
240051c0b2f7Stbbdev 
240151c0b2f7Stbbdev     if (isLargeObject<ourMem>(ptr)) {
240251c0b2f7Stbbdev         LargeMemoryBlock* lmb = ((LargeObjectHdr *)ptr - 1)->memoryBlock;
240351c0b2f7Stbbdev         copySize = lmb->unalignedSize-((uintptr_t)ptr-(uintptr_t)lmb);
240451c0b2f7Stbbdev 
240551c0b2f7Stbbdev         // Apply different strategies if size decreases
240651c0b2f7Stbbdev         if (newSize <= copySize && (0 == alignment || isAligned(ptr, alignment))) {
240751c0b2f7Stbbdev 
240851c0b2f7Stbbdev             // For huge objects (that do not fit in backend cache), keep the same space unless
240951c0b2f7Stbbdev             // the new size is at least twice smaller
241051c0b2f7Stbbdev             bool isMemoryBlockHuge = copySize > memPool->extMemPool.backend.getMaxBinnedSize();
241151c0b2f7Stbbdev             size_t threshold = isMemoryBlockHuge ? copySize / 2 : 0;
241251c0b2f7Stbbdev             if (newSize > threshold) {
241351c0b2f7Stbbdev                 lmb->objectSize = newSize;
241451c0b2f7Stbbdev                 return ptr;
241551c0b2f7Stbbdev             }
241651c0b2f7Stbbdev             // TODO: For large objects suitable for the backend cache,
241751c0b2f7Stbbdev             // split out the excessive part and put it to the backend.
241851c0b2f7Stbbdev         }
241951c0b2f7Stbbdev         // Reallocate for real
242051c0b2f7Stbbdev         copySize = lmb->objectSize;
242151c0b2f7Stbbdev #if BACKEND_HAS_MREMAP
242251c0b2f7Stbbdev         if (void *r = memPool->extMemPool.remap(ptr, copySize, newSize,
242351c0b2f7Stbbdev                           alignment < largeObjectAlignment ? largeObjectAlignment : alignment))
242451c0b2f7Stbbdev             return r;
242551c0b2f7Stbbdev #endif
242651c0b2f7Stbbdev         result = alignment ? allocateAligned(memPool, newSize, alignment) :
242751c0b2f7Stbbdev             internalPoolMalloc(memPool, newSize);
242851c0b2f7Stbbdev 
242951c0b2f7Stbbdev     } else {
243051c0b2f7Stbbdev         Block* block = (Block *)alignDown(ptr, slabSize);
243151c0b2f7Stbbdev         copySize = block->findObjectSize(ptr);
243251c0b2f7Stbbdev 
243351c0b2f7Stbbdev         // TODO: Move object to another bin if size decreases and the current bin is "empty enough".
243451c0b2f7Stbbdev         // Currently, in case of size decreasing, old pointer is returned
243551c0b2f7Stbbdev         if (newSize <= copySize && (0==alignment || isAligned(ptr, alignment))) {
243651c0b2f7Stbbdev             return ptr;
243751c0b2f7Stbbdev         } else {
243851c0b2f7Stbbdev             result = alignment ? allocateAligned(memPool, newSize, alignment) :
243951c0b2f7Stbbdev                 internalPoolMalloc(memPool, newSize);
244051c0b2f7Stbbdev         }
244151c0b2f7Stbbdev     }
244251c0b2f7Stbbdev     if (result) {
244351c0b2f7Stbbdev         memcpy(result, ptr, copySize < newSize ? copySize : newSize);
244451c0b2f7Stbbdev         internalPoolFree(memPool, ptr, 0);
244551c0b2f7Stbbdev     }
244651c0b2f7Stbbdev     return result;
244751c0b2f7Stbbdev }
244851c0b2f7Stbbdev 
2449478de5b1Stbbdev #if MALLOC_DEBUG
245051c0b2f7Stbbdev /* A predicate checks if an object is properly placed inside its block */
245151c0b2f7Stbbdev inline bool Block::isProperlyPlaced(const void *object) const
245251c0b2f7Stbbdev {
245351c0b2f7Stbbdev     return 0 == ((uintptr_t)this + slabSize - (uintptr_t)object) % objectSize;
245451c0b2f7Stbbdev }
2455478de5b1Stbbdev #endif
245651c0b2f7Stbbdev 
245751c0b2f7Stbbdev /* Finds the real object inside the block */
245851c0b2f7Stbbdev FreeObject *Block::findAllocatedObject(const void *address) const
245951c0b2f7Stbbdev {
246051c0b2f7Stbbdev     // calculate offset from the end of the block space
246151c0b2f7Stbbdev     uint16_t offset = (uintptr_t)this + slabSize - (uintptr_t)address;
246251c0b2f7Stbbdev     MALLOC_ASSERT( offset<=slabSize-sizeof(Block), ASSERT_TEXT );
246351c0b2f7Stbbdev     // find offset difference from a multiple of allocation size
246451c0b2f7Stbbdev     offset %= objectSize;
246551c0b2f7Stbbdev     // and move the address down to where the real object starts.
246651c0b2f7Stbbdev     return (FreeObject*)((uintptr_t)address - (offset? objectSize-offset: 0));
246751c0b2f7Stbbdev }
246851c0b2f7Stbbdev 
246951c0b2f7Stbbdev /*
247051c0b2f7Stbbdev  * Bad dereference caused by a foreign pointer is possible only here, not earlier in call chain.
247151c0b2f7Stbbdev  * Separate function isolates SEH code, as it has bad influence on compiler optimization.
247251c0b2f7Stbbdev  */
247351c0b2f7Stbbdev static inline BackRefIdx safer_dereference (const BackRefIdx *ptr)
247451c0b2f7Stbbdev {
247551c0b2f7Stbbdev     BackRefIdx id;
247651c0b2f7Stbbdev #if _MSC_VER
247751c0b2f7Stbbdev     __try {
247851c0b2f7Stbbdev #endif
2479478de5b1Stbbdev         id = dereference(ptr);
248051c0b2f7Stbbdev #if _MSC_VER
248151c0b2f7Stbbdev     } __except( GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION?
248251c0b2f7Stbbdev                 EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH ) {
248351c0b2f7Stbbdev         id = BackRefIdx();
248451c0b2f7Stbbdev     }
248551c0b2f7Stbbdev #endif
248651c0b2f7Stbbdev     return id;
248751c0b2f7Stbbdev }
248851c0b2f7Stbbdev 
248951c0b2f7Stbbdev template<MemoryOrigin memOrigin>
249051c0b2f7Stbbdev bool isLargeObject(void *object)
249151c0b2f7Stbbdev {
249251c0b2f7Stbbdev     if (!isAligned(object, largeObjectAlignment))
249351c0b2f7Stbbdev         return false;
249451c0b2f7Stbbdev     LargeObjectHdr *header = (LargeObjectHdr*)object - 1;
249551c0b2f7Stbbdev     BackRefIdx idx = (memOrigin == unknownMem) ?
2496478de5b1Stbbdev         safer_dereference(&header->backRefIdx) : dereference(&header->backRefIdx);
249751c0b2f7Stbbdev 
249851c0b2f7Stbbdev     return idx.isLargeObject()
2499*57f524caSIlya Isaev         // in valid LargeObjectHdr memoryBlock is not nullptr
250051c0b2f7Stbbdev         && header->memoryBlock
250151c0b2f7Stbbdev         // in valid LargeObjectHdr memoryBlock points somewhere before header
250251c0b2f7Stbbdev         // TODO: more strict check
250351c0b2f7Stbbdev         && (uintptr_t)header->memoryBlock < (uintptr_t)header
250451c0b2f7Stbbdev         && getBackRef(idx) == header;
250551c0b2f7Stbbdev }
250651c0b2f7Stbbdev 
250751c0b2f7Stbbdev static inline bool isSmallObject (void *ptr)
250851c0b2f7Stbbdev {
250951c0b2f7Stbbdev     Block* expectedBlock = (Block*)alignDown(ptr, slabSize);
251051c0b2f7Stbbdev     const BackRefIdx* idx = expectedBlock->getBackRefIdx();
251151c0b2f7Stbbdev 
251251c0b2f7Stbbdev     bool isSmall = expectedBlock == getBackRef(safer_dereference(idx));
251351c0b2f7Stbbdev     if (isSmall)
251451c0b2f7Stbbdev         expectedBlock->checkFreePrecond(ptr);
251551c0b2f7Stbbdev     return isSmall;
251651c0b2f7Stbbdev }
251751c0b2f7Stbbdev 
251851c0b2f7Stbbdev /**** Check if an object was allocated by scalable_malloc ****/
251951c0b2f7Stbbdev static inline bool isRecognized (void* ptr)
252051c0b2f7Stbbdev {
252151c0b2f7Stbbdev     return defaultMemPool->extMemPool.backend.ptrCanBeValid(ptr) &&
252251c0b2f7Stbbdev         (isLargeObject<unknownMem>(ptr) || isSmallObject(ptr));
252351c0b2f7Stbbdev }
252451c0b2f7Stbbdev 
252551c0b2f7Stbbdev static inline void freeSmallObject(void *object)
252651c0b2f7Stbbdev {
252751c0b2f7Stbbdev     /* mask low bits to get the block */
252851c0b2f7Stbbdev     Block *block = (Block *)alignDown(object, slabSize);
252951c0b2f7Stbbdev     block->checkFreePrecond(object);
253051c0b2f7Stbbdev 
253151c0b2f7Stbbdev #if MALLOC_CHECK_RECURSION
253251c0b2f7Stbbdev     if (block->isStartupAllocObject()) {
253351c0b2f7Stbbdev         ((StartupBlock *)block)->free(object);
253451c0b2f7Stbbdev         return;
253551c0b2f7Stbbdev     }
253651c0b2f7Stbbdev #endif
253751c0b2f7Stbbdev     if (block->isOwnedByCurrentThread()) {
253851c0b2f7Stbbdev         block->freeOwnObject(object);
253951c0b2f7Stbbdev     } else { /* Slower path to add to the shared list, the allocatedCount is updated by the owner thread in malloc. */
254051c0b2f7Stbbdev         FreeObject *objectToFree = block->findObjectToFree(object);
254151c0b2f7Stbbdev         block->freePublicObject(objectToFree);
254251c0b2f7Stbbdev     }
254351c0b2f7Stbbdev }
254451c0b2f7Stbbdev 
254551c0b2f7Stbbdev static void *internalPoolMalloc(MemoryPool* memPool, size_t size)
254651c0b2f7Stbbdev {
254751c0b2f7Stbbdev     Bin* bin;
254851c0b2f7Stbbdev     Block * mallocBlock;
254951c0b2f7Stbbdev 
2550*57f524caSIlya Isaev     if (!memPool) return nullptr;
255151c0b2f7Stbbdev 
255251c0b2f7Stbbdev     if (!size) size = sizeof(size_t);
255351c0b2f7Stbbdev 
255451c0b2f7Stbbdev     TLSData *tls = memPool->getTLS(/*create=*/true);
255551c0b2f7Stbbdev 
255651c0b2f7Stbbdev     /* Allocate a large object */
255751c0b2f7Stbbdev     if (size >= minLargeObjectSize)
255851c0b2f7Stbbdev         return memPool->getFromLLOCache(tls, size, largeObjectAlignment);
255951c0b2f7Stbbdev 
2560*57f524caSIlya Isaev     if (!tls) return nullptr;
256151c0b2f7Stbbdev 
256251c0b2f7Stbbdev     tls->markUsed();
256351c0b2f7Stbbdev     /*
256451c0b2f7Stbbdev      * Get an element in thread-local array corresponding to the given size;
256551c0b2f7Stbbdev      * It keeps ptr to the active block for allocations of this size
256651c0b2f7Stbbdev      */
256751c0b2f7Stbbdev     bin = tls->getAllocationBin(size);
2568*57f524caSIlya Isaev     if ( !bin ) return nullptr;
256951c0b2f7Stbbdev 
257051c0b2f7Stbbdev     /* Get a block to try to allocate in. */
257151c0b2f7Stbbdev     for( mallocBlock = bin->getActiveBlock(); mallocBlock;
257251c0b2f7Stbbdev          mallocBlock = bin->setPreviousBlockActive() ) // the previous block should be empty enough
257351c0b2f7Stbbdev     {
257451c0b2f7Stbbdev         if( FreeObject *result = mallocBlock->allocate() )
257551c0b2f7Stbbdev             return result;
257651c0b2f7Stbbdev     }
257751c0b2f7Stbbdev 
257851c0b2f7Stbbdev     /*
257951c0b2f7Stbbdev      * else privatize publicly freed objects in some block and allocate from it
258051c0b2f7Stbbdev      */
258151c0b2f7Stbbdev     mallocBlock = bin->getPrivatizedFreeListBlock();
258251c0b2f7Stbbdev     if (mallocBlock) {
258351c0b2f7Stbbdev         MALLOC_ASSERT( mallocBlock->freeListNonNull(), ASSERT_TEXT );
258451c0b2f7Stbbdev         if ( FreeObject *result = mallocBlock->allocateFromFreeList() )
258551c0b2f7Stbbdev             return result;
258651c0b2f7Stbbdev         /* Else something strange happened, need to retry from the beginning; */
258751c0b2f7Stbbdev         TRACEF(( "[ScalableMalloc trace] Something is wrong: no objects in public free list; reentering.\n" ));
258851c0b2f7Stbbdev         return internalPoolMalloc(memPool, size);
258951c0b2f7Stbbdev     }
259051c0b2f7Stbbdev 
259151c0b2f7Stbbdev     /*
259251c0b2f7Stbbdev      * no suitable own blocks, try to get a partial block that some other thread has discarded.
259351c0b2f7Stbbdev      */
259451c0b2f7Stbbdev     mallocBlock = memPool->extMemPool.orphanedBlocks.get(tls, size);
259551c0b2f7Stbbdev     while (mallocBlock) {
259651c0b2f7Stbbdev         bin->pushTLSBin(mallocBlock);
259751c0b2f7Stbbdev         bin->setActiveBlock(mallocBlock); // TODO: move under the below condition?
259851c0b2f7Stbbdev         if( FreeObject *result = mallocBlock->allocate() )
259951c0b2f7Stbbdev             return result;
260051c0b2f7Stbbdev         mallocBlock = memPool->extMemPool.orphanedBlocks.get(tls, size);
260151c0b2f7Stbbdev     }
260251c0b2f7Stbbdev 
260351c0b2f7Stbbdev     /*
260451c0b2f7Stbbdev      * else try to get a new empty block
260551c0b2f7Stbbdev      */
260651c0b2f7Stbbdev     mallocBlock = memPool->getEmptyBlock(size);
260751c0b2f7Stbbdev     if (mallocBlock) {
260851c0b2f7Stbbdev         bin->pushTLSBin(mallocBlock);
260951c0b2f7Stbbdev         bin->setActiveBlock(mallocBlock);
261051c0b2f7Stbbdev         if( FreeObject *result = mallocBlock->allocate() )
261151c0b2f7Stbbdev             return result;
261251c0b2f7Stbbdev         /* Else something strange happened, need to retry from the beginning; */
261351c0b2f7Stbbdev         TRACEF(( "[ScalableMalloc trace] Something is wrong: no objects in empty block; reentering.\n" ));
261451c0b2f7Stbbdev         return internalPoolMalloc(memPool, size);
261551c0b2f7Stbbdev     }
261651c0b2f7Stbbdev     /*
2617*57f524caSIlya Isaev      * else nothing works so return nullptr
261851c0b2f7Stbbdev      */
2619*57f524caSIlya Isaev     TRACEF(( "[ScalableMalloc trace] No memory found, returning nullptr.\n" ));
2620*57f524caSIlya Isaev     return nullptr;
262151c0b2f7Stbbdev }
262251c0b2f7Stbbdev 
262351c0b2f7Stbbdev // When size==0 (i.e. unknown), detect here whether the object is large.
262451c0b2f7Stbbdev // For size is known and < minLargeObjectSize, we still need to check
262551c0b2f7Stbbdev // if the actual object is large, because large objects might be used
262651c0b2f7Stbbdev // for aligned small allocations.
262751c0b2f7Stbbdev static bool internalPoolFree(MemoryPool *memPool, void *object, size_t size)
262851c0b2f7Stbbdev {
262951c0b2f7Stbbdev     if (!memPool || !object) return false;
263051c0b2f7Stbbdev 
263151c0b2f7Stbbdev     // The library is initialized at allocation call, so releasing while
263251c0b2f7Stbbdev     // not initialized means foreign object is releasing.
263351c0b2f7Stbbdev     MALLOC_ASSERT(isMallocInitialized(), ASSERT_TEXT);
263451c0b2f7Stbbdev     MALLOC_ASSERT(memPool->extMemPool.userPool() || isRecognized(object),
263551c0b2f7Stbbdev                   "Invalid pointer during object releasing is detected.");
263651c0b2f7Stbbdev 
263751c0b2f7Stbbdev     if (size >= minLargeObjectSize || isLargeObject<ourMem>(object))
263851c0b2f7Stbbdev         memPool->putToLLOCache(memPool->getTLS(/*create=*/false), object);
263951c0b2f7Stbbdev     else
264051c0b2f7Stbbdev         freeSmallObject(object);
264151c0b2f7Stbbdev     return true;
264251c0b2f7Stbbdev }
264351c0b2f7Stbbdev 
264451c0b2f7Stbbdev static void *internalMalloc(size_t size)
264551c0b2f7Stbbdev {
264651c0b2f7Stbbdev     if (!size) size = sizeof(size_t);
264751c0b2f7Stbbdev 
264851c0b2f7Stbbdev #if MALLOC_CHECK_RECURSION
264951c0b2f7Stbbdev     if (RecursiveMallocCallProtector::sameThreadActive())
265051c0b2f7Stbbdev         return size<minLargeObjectSize? StartupBlock::allocate(size) :
265151c0b2f7Stbbdev             // nested allocation, so skip tls
2652*57f524caSIlya Isaev             (FreeObject*)defaultMemPool->getFromLLOCache(nullptr, size, slabSize);
265351c0b2f7Stbbdev #endif
265451c0b2f7Stbbdev 
265551c0b2f7Stbbdev     if (!isMallocInitialized())
265651c0b2f7Stbbdev         if (!doInitialization())
2657*57f524caSIlya Isaev             return nullptr;
265851c0b2f7Stbbdev     return internalPoolMalloc(defaultMemPool, size);
265951c0b2f7Stbbdev }
266051c0b2f7Stbbdev 
266151c0b2f7Stbbdev static void internalFree(void *object)
266251c0b2f7Stbbdev {
266351c0b2f7Stbbdev     internalPoolFree(defaultMemPool, object, 0);
266451c0b2f7Stbbdev }
266551c0b2f7Stbbdev 
266651c0b2f7Stbbdev static size_t internalMsize(void* ptr)
266751c0b2f7Stbbdev {
266851c0b2f7Stbbdev     MALLOC_ASSERT(ptr, "Invalid pointer passed to internalMsize");
266951c0b2f7Stbbdev     if (isLargeObject<ourMem>(ptr)) {
267051c0b2f7Stbbdev         // TODO: return the maximum memory size, that can be written to this object
267151c0b2f7Stbbdev         LargeMemoryBlock* lmb = ((LargeObjectHdr*)ptr - 1)->memoryBlock;
267251c0b2f7Stbbdev         return lmb->objectSize;
267351c0b2f7Stbbdev     } else {
267451c0b2f7Stbbdev         Block *block = (Block*)alignDown(ptr, slabSize);
267551c0b2f7Stbbdev         return block->findObjectSize(ptr);
267651c0b2f7Stbbdev     }
267751c0b2f7Stbbdev }
267851c0b2f7Stbbdev 
267951c0b2f7Stbbdev } // namespace internal
268051c0b2f7Stbbdev 
268151c0b2f7Stbbdev using namespace rml::internal;
268251c0b2f7Stbbdev 
268351c0b2f7Stbbdev // legacy entry point saved for compatibility with binaries complied
268451c0b2f7Stbbdev // with pre-6003 versions of TBB
26858827ea7dSLong Nguyen TBBMALLOC_EXPORT rml::MemoryPool *pool_create(intptr_t pool_id, const MemPoolPolicy *policy)
268651c0b2f7Stbbdev {
268751c0b2f7Stbbdev     rml::MemoryPool *pool;
268851c0b2f7Stbbdev     MemPoolPolicy pol(policy->pAlloc, policy->pFree, policy->granularity);
268951c0b2f7Stbbdev 
269051c0b2f7Stbbdev     pool_create_v1(pool_id, &pol, &pool);
269151c0b2f7Stbbdev     return pool;
269251c0b2f7Stbbdev }
269351c0b2f7Stbbdev 
269451c0b2f7Stbbdev rml::MemPoolError pool_create_v1(intptr_t pool_id, const MemPoolPolicy *policy,
269551c0b2f7Stbbdev                                  rml::MemoryPool **pool)
269651c0b2f7Stbbdev {
269751c0b2f7Stbbdev     if ( !policy->pAlloc || policy->version<MemPoolPolicy::TBBMALLOC_POOL_VERSION
269851c0b2f7Stbbdev          // empty pFree allowed only for fixed pools
269951c0b2f7Stbbdev          || !(policy->fixedPool || policy->pFree)) {
2700*57f524caSIlya Isaev         *pool = nullptr;
270151c0b2f7Stbbdev         return INVALID_POLICY;
270251c0b2f7Stbbdev     }
270351c0b2f7Stbbdev     if ( policy->version>MemPoolPolicy::TBBMALLOC_POOL_VERSION // future versions are not supported
270451c0b2f7Stbbdev          // new flags can be added in place of reserved, but default
270551c0b2f7Stbbdev          // behaviour must be supported by this version
270651c0b2f7Stbbdev          || policy->reserved ) {
2707*57f524caSIlya Isaev         *pool = nullptr;
270851c0b2f7Stbbdev         return UNSUPPORTED_POLICY;
270951c0b2f7Stbbdev     }
271051c0b2f7Stbbdev     if (!isMallocInitialized())
271151c0b2f7Stbbdev         if (!doInitialization()) {
2712*57f524caSIlya Isaev             *pool = nullptr;
271351c0b2f7Stbbdev             return NO_MEMORY;
271451c0b2f7Stbbdev         }
271551c0b2f7Stbbdev     rml::internal::MemoryPool *memPool =
271651c0b2f7Stbbdev         (rml::internal::MemoryPool*)internalMalloc((sizeof(rml::internal::MemoryPool)));
271751c0b2f7Stbbdev     if (!memPool) {
2718*57f524caSIlya Isaev         *pool = nullptr;
271951c0b2f7Stbbdev         return NO_MEMORY;
272051c0b2f7Stbbdev     }
272151c0b2f7Stbbdev     memset(memPool, 0, sizeof(rml::internal::MemoryPool));
272251c0b2f7Stbbdev     if (!memPool->init(pool_id, policy)) {
272351c0b2f7Stbbdev         internalFree(memPool);
2724*57f524caSIlya Isaev         *pool = nullptr;
272551c0b2f7Stbbdev         return NO_MEMORY;
272651c0b2f7Stbbdev     }
272751c0b2f7Stbbdev 
272851c0b2f7Stbbdev     *pool = (rml::MemoryPool*)memPool;
272951c0b2f7Stbbdev     return POOL_OK;
273051c0b2f7Stbbdev }
273151c0b2f7Stbbdev 
273251c0b2f7Stbbdev bool pool_destroy(rml::MemoryPool* memPool)
273351c0b2f7Stbbdev {
273451c0b2f7Stbbdev     if (!memPool) return false;
273551c0b2f7Stbbdev     bool ret = ((rml::internal::MemoryPool*)memPool)->destroy();
273651c0b2f7Stbbdev     internalFree(memPool);
273751c0b2f7Stbbdev 
273851c0b2f7Stbbdev     return ret;
273951c0b2f7Stbbdev }
274051c0b2f7Stbbdev 
274151c0b2f7Stbbdev bool pool_reset(rml::MemoryPool* memPool)
274251c0b2f7Stbbdev {
274351c0b2f7Stbbdev     if (!memPool) return false;
274451c0b2f7Stbbdev 
274551c0b2f7Stbbdev     return ((rml::internal::MemoryPool*)memPool)->reset();
274651c0b2f7Stbbdev }
274751c0b2f7Stbbdev 
274851c0b2f7Stbbdev void *pool_malloc(rml::MemoryPool* mPool, size_t size)
274951c0b2f7Stbbdev {
275051c0b2f7Stbbdev     return internalPoolMalloc((rml::internal::MemoryPool*)mPool, size);
275151c0b2f7Stbbdev }
275251c0b2f7Stbbdev 
275351c0b2f7Stbbdev void *pool_realloc(rml::MemoryPool* mPool, void *object, size_t size)
275451c0b2f7Stbbdev {
275551c0b2f7Stbbdev     if (!object)
275651c0b2f7Stbbdev         return internalPoolMalloc((rml::internal::MemoryPool*)mPool, size);
275751c0b2f7Stbbdev     if (!size) {
275851c0b2f7Stbbdev         internalPoolFree((rml::internal::MemoryPool*)mPool, object, 0);
2759*57f524caSIlya Isaev         return nullptr;
276051c0b2f7Stbbdev     }
276151c0b2f7Stbbdev     return reallocAligned((rml::internal::MemoryPool*)mPool, object, size, 0);
276251c0b2f7Stbbdev }
276351c0b2f7Stbbdev 
276451c0b2f7Stbbdev void *pool_aligned_malloc(rml::MemoryPool* mPool, size_t size, size_t alignment)
276551c0b2f7Stbbdev {
276651c0b2f7Stbbdev     if (!isPowerOfTwo(alignment) || 0==size)
2767*57f524caSIlya Isaev         return nullptr;
276851c0b2f7Stbbdev 
276951c0b2f7Stbbdev     return allocateAligned((rml::internal::MemoryPool*)mPool, size, alignment);
277051c0b2f7Stbbdev }
277151c0b2f7Stbbdev 
277251c0b2f7Stbbdev void *pool_aligned_realloc(rml::MemoryPool* memPool, void *ptr, size_t size, size_t alignment)
277351c0b2f7Stbbdev {
277451c0b2f7Stbbdev     if (!isPowerOfTwo(alignment))
2775*57f524caSIlya Isaev         return nullptr;
277651c0b2f7Stbbdev     rml::internal::MemoryPool *mPool = (rml::internal::MemoryPool*)memPool;
277751c0b2f7Stbbdev     void *tmp;
277851c0b2f7Stbbdev 
277951c0b2f7Stbbdev     if (!ptr)
278051c0b2f7Stbbdev         tmp = allocateAligned(mPool, size, alignment);
278151c0b2f7Stbbdev     else if (!size) {
278251c0b2f7Stbbdev         internalPoolFree(mPool, ptr, 0);
2783*57f524caSIlya Isaev         return nullptr;
278451c0b2f7Stbbdev     } else
278551c0b2f7Stbbdev         tmp = reallocAligned(mPool, ptr, size, alignment);
278651c0b2f7Stbbdev 
278751c0b2f7Stbbdev     return tmp;
278851c0b2f7Stbbdev }
278951c0b2f7Stbbdev 
279051c0b2f7Stbbdev bool pool_free(rml::MemoryPool *mPool, void *object)
279151c0b2f7Stbbdev {
279251c0b2f7Stbbdev     return internalPoolFree((rml::internal::MemoryPool*)mPool, object, 0);
279351c0b2f7Stbbdev }
279451c0b2f7Stbbdev 
279551c0b2f7Stbbdev rml::MemoryPool *pool_identify(void *object)
279651c0b2f7Stbbdev {
279751c0b2f7Stbbdev     rml::internal::MemoryPool *pool;
279851c0b2f7Stbbdev     if (isLargeObject<ourMem>(object)) {
279951c0b2f7Stbbdev         LargeObjectHdr *header = (LargeObjectHdr*)object - 1;
280051c0b2f7Stbbdev         pool = header->memoryBlock->pool;
280151c0b2f7Stbbdev     } else {
280251c0b2f7Stbbdev         Block *block = (Block*)alignDown(object, slabSize);
280351c0b2f7Stbbdev         pool = block->getMemPool();
280451c0b2f7Stbbdev     }
280551c0b2f7Stbbdev     // do not return defaultMemPool, as it can't be used in pool_free() etc
280651c0b2f7Stbbdev     __TBB_ASSERT_RELEASE(pool!=defaultMemPool,
280751c0b2f7Stbbdev         "rml::pool_identify() can't be used for scalable_malloc() etc results.");
280851c0b2f7Stbbdev     return (rml::MemoryPool*)pool;
280951c0b2f7Stbbdev }
281051c0b2f7Stbbdev 
281151c0b2f7Stbbdev size_t pool_msize(rml::MemoryPool *mPool, void* object)
281251c0b2f7Stbbdev {
281351c0b2f7Stbbdev     if (object) {
281451c0b2f7Stbbdev         // No assert for object recognition, cause objects allocated from non-default
281551c0b2f7Stbbdev         // memory pool do not participate in range checking and do not have valid backreferences for
281651c0b2f7Stbbdev         // small objects. Instead, check that an object belong to the certain memory pool.
281751c0b2f7Stbbdev         MALLOC_ASSERT_EX(mPool == pool_identify(object), "Object does not belong to the specified pool");
281851c0b2f7Stbbdev         return internalMsize(object);
281951c0b2f7Stbbdev     }
282051c0b2f7Stbbdev     errno = EINVAL;
282151c0b2f7Stbbdev     // Unlike _msize, return 0 in case of parameter error.
282251c0b2f7Stbbdev     // Returning size_t(-1) looks more like the way to troubles.
282351c0b2f7Stbbdev     return 0;
282451c0b2f7Stbbdev }
282551c0b2f7Stbbdev 
282651c0b2f7Stbbdev } // namespace rml
282751c0b2f7Stbbdev 
282851c0b2f7Stbbdev using namespace rml::internal;
282951c0b2f7Stbbdev 
283051c0b2f7Stbbdev #if MALLOC_TRACE
283151c0b2f7Stbbdev static unsigned int threadGoingDownCount = 0;
283251c0b2f7Stbbdev #endif
283351c0b2f7Stbbdev 
283451c0b2f7Stbbdev /*
283551c0b2f7Stbbdev  * When a thread is shutting down this routine should be called to remove all the thread ids
2836*57f524caSIlya Isaev  * from the malloc blocks and replace them with a nullptr thread id.
283751c0b2f7Stbbdev  *
283851c0b2f7Stbbdev  * For pthreads, the function is set as a callback in pthread_key_create for TLS bin.
283951c0b2f7Stbbdev  * It will be automatically called at thread exit with the key value as the argument,
2840*57f524caSIlya Isaev  * unless that value is nullptr.
284151c0b2f7Stbbdev  * For Windows, it is called from DllMain( DLL_THREAD_DETACH ).
284251c0b2f7Stbbdev  *
284351c0b2f7Stbbdev  * However neither of the above is called for the main process thread, so the routine
284451c0b2f7Stbbdev  * also needs to be called during the process shutdown.
284551c0b2f7Stbbdev  *
284651c0b2f7Stbbdev */
284751c0b2f7Stbbdev // TODO: Consider making this function part of class MemoryPool.
284851c0b2f7Stbbdev void doThreadShutdownNotification(TLSData* tls, bool main_thread)
284951c0b2f7Stbbdev {
285051c0b2f7Stbbdev     TRACEF(( "[ScalableMalloc trace] Thread id %d blocks return start %d\n",
285151c0b2f7Stbbdev              getThreadId(),  threadGoingDownCount++ ));
285251c0b2f7Stbbdev 
285351c0b2f7Stbbdev #if USE_PTHREAD
285451c0b2f7Stbbdev     if (tls) {
285551c0b2f7Stbbdev         if (!shutdownSync.threadDtorStart()) return;
285651c0b2f7Stbbdev         tls->getMemPool()->onThreadShutdown(tls);
285751c0b2f7Stbbdev         shutdownSync.threadDtorDone();
285851c0b2f7Stbbdev     } else
285951c0b2f7Stbbdev #endif
286051c0b2f7Stbbdev     {
286151c0b2f7Stbbdev         suppress_unused_warning(tls); // not used on Windows
286251c0b2f7Stbbdev         // The default pool is safe to use at this point:
286351c0b2f7Stbbdev         //   on Linux, only the main thread can go here before destroying defaultMemPool;
286451c0b2f7Stbbdev         //   on Windows, shutdown is synchronized via loader lock and isMallocInitialized().
286551c0b2f7Stbbdev         // See also __TBB_mallocProcessShutdownNotification()
286651c0b2f7Stbbdev         defaultMemPool->onThreadShutdown(defaultMemPool->getTLS(/*create=*/false));
286751c0b2f7Stbbdev         // Take lock to walk through other pools; but waiting might be dangerous at this point
286851c0b2f7Stbbdev         // (e.g. on Windows the main thread might deadlock)
286951c0b2f7Stbbdev         bool locked;
287051c0b2f7Stbbdev         MallocMutex::scoped_lock lock(MemoryPool::memPoolListLock, /*wait=*/!main_thread, &locked);
287151c0b2f7Stbbdev         if (locked) { // the list is safe to process
287251c0b2f7Stbbdev             for (MemoryPool *memPool = defaultMemPool->next; memPool; memPool = memPool->next)
287351c0b2f7Stbbdev                 memPool->onThreadShutdown(memPool->getTLS(/*create=*/false));
287451c0b2f7Stbbdev         }
287551c0b2f7Stbbdev     }
287651c0b2f7Stbbdev 
287751c0b2f7Stbbdev     TRACEF(( "[ScalableMalloc trace] Thread id %d blocks return end\n", getThreadId() ));
287851c0b2f7Stbbdev }
287951c0b2f7Stbbdev 
288051c0b2f7Stbbdev #if USE_PTHREAD
288151c0b2f7Stbbdev void mallocThreadShutdownNotification(void* arg)
288251c0b2f7Stbbdev {
288351c0b2f7Stbbdev     // The routine is called for each pool (as TLS dtor) on each thread, except for the main thread
288451c0b2f7Stbbdev     if (!isMallocInitialized()) return;
288551c0b2f7Stbbdev     doThreadShutdownNotification((TLSData*)arg, false);
288651c0b2f7Stbbdev }
288751c0b2f7Stbbdev #else
288851c0b2f7Stbbdev extern "C" void __TBB_mallocThreadShutdownNotification()
288951c0b2f7Stbbdev {
289051c0b2f7Stbbdev     // The routine is called once per thread on Windows
289151c0b2f7Stbbdev     if (!isMallocInitialized()) return;
2892*57f524caSIlya Isaev     doThreadShutdownNotification(nullptr, false);
289351c0b2f7Stbbdev }
289451c0b2f7Stbbdev #endif
289551c0b2f7Stbbdev 
289651c0b2f7Stbbdev extern "C" void __TBB_mallocProcessShutdownNotification(bool windows_process_dying)
289751c0b2f7Stbbdev {
289851c0b2f7Stbbdev     if (!isMallocInitialized()) return;
289951c0b2f7Stbbdev 
290051c0b2f7Stbbdev     // Don't clean allocator internals if the entire process is exiting
290151c0b2f7Stbbdev     if (!windows_process_dying) {
2902*57f524caSIlya Isaev         doThreadShutdownNotification(nullptr, /*main_thread=*/true);
290351c0b2f7Stbbdev     }
290451c0b2f7Stbbdev #if  __TBB_MALLOC_LOCACHE_STAT
290551c0b2f7Stbbdev     printf("cache hit ratio %f, size hit %f\n",
290651c0b2f7Stbbdev            1.*cacheHits/mallocCalls, 1.*memHitKB/memAllocKB);
290751c0b2f7Stbbdev     defaultMemPool->extMemPool.loc.reportStat(stdout);
290851c0b2f7Stbbdev #endif
290951c0b2f7Stbbdev 
291051c0b2f7Stbbdev     shutdownSync.processExit();
291151c0b2f7Stbbdev #if __TBB_SOURCE_DIRECTLY_INCLUDED
291251c0b2f7Stbbdev /* Pthread keys must be deleted as soon as possible to not call key dtor
291351c0b2f7Stbbdev    on thread termination when then the tbbmalloc code can be already unloaded.
291451c0b2f7Stbbdev */
291551c0b2f7Stbbdev     defaultMemPool->destroy();
29161ecde27fSIlya Mishin     destroyBackRefMain(&defaultMemPool->extMemPool.backend);
291751c0b2f7Stbbdev     ThreadId::destroy();      // Delete key for thread id
291851c0b2f7Stbbdev     hugePages.reset();
291951c0b2f7Stbbdev     // new total malloc initialization is possible after this point
292051c0b2f7Stbbdev     mallocInitialized.store(0, std::memory_order_release);
292151c0b2f7Stbbdev #endif // __TBB_SOURCE_DIRECTLY_INCLUDED
292251c0b2f7Stbbdev 
292351c0b2f7Stbbdev #if COLLECT_STATISTICS
292451c0b2f7Stbbdev     unsigned nThreads = ThreadId::getMaxThreadId();
292551c0b2f7Stbbdev     for( int i=1; i<=nThreads && i<MAX_THREADS; ++i )
292651c0b2f7Stbbdev         STAT_print(i);
292751c0b2f7Stbbdev #endif
292851c0b2f7Stbbdev     if (!usedBySrcIncluded)
292951c0b2f7Stbbdev         MALLOC_ITT_FINI_ITTLIB();
293051c0b2f7Stbbdev }
293151c0b2f7Stbbdev 
293251c0b2f7Stbbdev extern "C" void * scalable_malloc(size_t size)
293351c0b2f7Stbbdev {
293451c0b2f7Stbbdev     void *ptr = internalMalloc(size);
293551c0b2f7Stbbdev     if (!ptr) errno = ENOMEM;
293651c0b2f7Stbbdev     return ptr;
293751c0b2f7Stbbdev }
293851c0b2f7Stbbdev 
293951c0b2f7Stbbdev extern "C" void scalable_free(void *object)
294051c0b2f7Stbbdev {
294151c0b2f7Stbbdev     internalFree(object);
294251c0b2f7Stbbdev }
294351c0b2f7Stbbdev 
294451c0b2f7Stbbdev #if MALLOC_ZONE_OVERLOAD_ENABLED
294551c0b2f7Stbbdev extern "C" void __TBB_malloc_free_definite_size(void *object, size_t size)
294651c0b2f7Stbbdev {
294751c0b2f7Stbbdev     internalPoolFree(defaultMemPool, object, size);
294851c0b2f7Stbbdev }
294951c0b2f7Stbbdev #endif
295051c0b2f7Stbbdev 
295151c0b2f7Stbbdev /*
295251c0b2f7Stbbdev  * A variant that provides additional memory safety, by checking whether the given address
295351c0b2f7Stbbdev  * was obtained with this allocator, and if not redirecting to the provided alternative call.
295451c0b2f7Stbbdev  */
29558827ea7dSLong Nguyen extern "C" TBBMALLOC_EXPORT void __TBB_malloc_safer_free(void *object, void (*original_free)(void*))
295651c0b2f7Stbbdev {
295751c0b2f7Stbbdev     if (!object)
295851c0b2f7Stbbdev         return;
295951c0b2f7Stbbdev 
296051c0b2f7Stbbdev     // tbbmalloc can allocate object only when tbbmalloc has been initialized
296151c0b2f7Stbbdev     if (mallocInitialized.load(std::memory_order_acquire) && defaultMemPool->extMemPool.backend.ptrCanBeValid(object)) {
296251c0b2f7Stbbdev         if (isLargeObject<unknownMem>(object)) {
296351c0b2f7Stbbdev             // must check 1st for large object, because small object check touches 4 pages on left,
296451c0b2f7Stbbdev             // and it can be inaccessible
296551c0b2f7Stbbdev             TLSData *tls = defaultMemPool->getTLS(/*create=*/false);
296651c0b2f7Stbbdev 
296751c0b2f7Stbbdev             defaultMemPool->putToLLOCache(tls, object);
296851c0b2f7Stbbdev             return;
296951c0b2f7Stbbdev         } else if (isSmallObject(object)) {
297051c0b2f7Stbbdev             freeSmallObject(object);
297151c0b2f7Stbbdev             return;
297251c0b2f7Stbbdev         }
297351c0b2f7Stbbdev     }
297451c0b2f7Stbbdev     if (original_free)
297551c0b2f7Stbbdev         original_free(object);
297651c0b2f7Stbbdev }
297751c0b2f7Stbbdev 
297851c0b2f7Stbbdev /********* End the free code        *************/
297951c0b2f7Stbbdev 
298051c0b2f7Stbbdev /********* Code for scalable_realloc       ***********/
298151c0b2f7Stbbdev 
298251c0b2f7Stbbdev /*
298351c0b2f7Stbbdev  * From K&R
298451c0b2f7Stbbdev  * "realloc changes the size of the object pointed to by p to size. The contents will
298551c0b2f7Stbbdev  * be unchanged up to the minimum of the old and the new sizes. If the new size is larger,
298651c0b2f7Stbbdev  * the new space is uninitialized. realloc returns a pointer to the new space, or
2987*57f524caSIlya Isaev  * nullptr if the request cannot be satisfied, in which case *p is unchanged."
298851c0b2f7Stbbdev  *
298951c0b2f7Stbbdev  */
299051c0b2f7Stbbdev extern "C" void* scalable_realloc(void* ptr, size_t size)
299151c0b2f7Stbbdev {
299251c0b2f7Stbbdev     void *tmp;
299351c0b2f7Stbbdev 
299451c0b2f7Stbbdev     if (!ptr)
299551c0b2f7Stbbdev         tmp = internalMalloc(size);
299651c0b2f7Stbbdev     else if (!size) {
299751c0b2f7Stbbdev         internalFree(ptr);
2998*57f524caSIlya Isaev         return nullptr;
299951c0b2f7Stbbdev     } else
300051c0b2f7Stbbdev         tmp = reallocAligned(defaultMemPool, ptr, size, 0);
300151c0b2f7Stbbdev 
300251c0b2f7Stbbdev     if (!tmp) errno = ENOMEM;
300351c0b2f7Stbbdev     return tmp;
300451c0b2f7Stbbdev }
300551c0b2f7Stbbdev 
300651c0b2f7Stbbdev /*
300751c0b2f7Stbbdev  * A variant that provides additional memory safety, by checking whether the given address
300851c0b2f7Stbbdev  * was obtained with this allocator, and if not redirecting to the provided alternative call.
300951c0b2f7Stbbdev  */
30108827ea7dSLong Nguyen extern "C" TBBMALLOC_EXPORT void* __TBB_malloc_safer_realloc(void* ptr, size_t sz, void* original_realloc)
301151c0b2f7Stbbdev {
301251c0b2f7Stbbdev     void *tmp; // TODO: fix warnings about uninitialized use of tmp
301351c0b2f7Stbbdev 
301451c0b2f7Stbbdev     if (!ptr) {
301551c0b2f7Stbbdev         tmp = internalMalloc(sz);
301651c0b2f7Stbbdev     } else if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(ptr)) {
301751c0b2f7Stbbdev         if (!sz) {
301851c0b2f7Stbbdev             internalFree(ptr);
3019*57f524caSIlya Isaev             return nullptr;
302051c0b2f7Stbbdev         } else {
302151c0b2f7Stbbdev             tmp = reallocAligned(defaultMemPool, ptr, sz, 0);
302251c0b2f7Stbbdev         }
302351c0b2f7Stbbdev     }
302451c0b2f7Stbbdev #if USE_WINTHREAD
302551c0b2f7Stbbdev     else if (original_realloc && sz) {
302651c0b2f7Stbbdev         orig_ptrs *original_ptrs = static_cast<orig_ptrs*>(original_realloc);
302751c0b2f7Stbbdev         if ( original_ptrs->msize ){
302851c0b2f7Stbbdev             size_t oldSize = original_ptrs->msize(ptr);
302951c0b2f7Stbbdev             tmp = internalMalloc(sz);
303051c0b2f7Stbbdev             if (tmp) {
303151c0b2f7Stbbdev                 memcpy(tmp, ptr, sz<oldSize? sz : oldSize);
303251c0b2f7Stbbdev                 if ( original_ptrs->free ){
303351c0b2f7Stbbdev                     original_ptrs->free( ptr );
303451c0b2f7Stbbdev                 }
303551c0b2f7Stbbdev             }
303651c0b2f7Stbbdev         } else
3037*57f524caSIlya Isaev             tmp = nullptr;
303851c0b2f7Stbbdev     }
303951c0b2f7Stbbdev #else
304051c0b2f7Stbbdev     else if (original_realloc) {
304151c0b2f7Stbbdev         typedef void* (*realloc_ptr_t)(void*,size_t);
304251c0b2f7Stbbdev         realloc_ptr_t original_realloc_ptr;
304351c0b2f7Stbbdev         (void *&)original_realloc_ptr = original_realloc;
304451c0b2f7Stbbdev         tmp = original_realloc_ptr(ptr,sz);
304551c0b2f7Stbbdev     }
304651c0b2f7Stbbdev #endif
3047*57f524caSIlya Isaev     else tmp = nullptr;
304851c0b2f7Stbbdev 
304951c0b2f7Stbbdev     if (!tmp) errno = ENOMEM;
305051c0b2f7Stbbdev     return tmp;
305151c0b2f7Stbbdev }
305251c0b2f7Stbbdev 
305351c0b2f7Stbbdev /********* End code for scalable_realloc   ***********/
305451c0b2f7Stbbdev 
305551c0b2f7Stbbdev /********* Code for scalable_calloc   ***********/
305651c0b2f7Stbbdev 
305751c0b2f7Stbbdev /*
305851c0b2f7Stbbdev  * From K&R
305951c0b2f7Stbbdev  * calloc returns a pointer to space for an array of nobj objects,
3060*57f524caSIlya Isaev  * each of size size, or nullptr if the request cannot be satisfied.
306151c0b2f7Stbbdev  * The space is initialized to zero bytes.
306251c0b2f7Stbbdev  *
306351c0b2f7Stbbdev  */
306451c0b2f7Stbbdev 
306551c0b2f7Stbbdev extern "C" void * scalable_calloc(size_t nobj, size_t size)
306651c0b2f7Stbbdev {
306751c0b2f7Stbbdev     // it's square root of maximal size_t value
306851c0b2f7Stbbdev     const size_t mult_not_overflow = size_t(1) << (sizeof(size_t)*CHAR_BIT/2);
306951c0b2f7Stbbdev     const size_t arraySize = nobj * size;
307051c0b2f7Stbbdev 
307151c0b2f7Stbbdev     // check for overflow during multiplication:
307251c0b2f7Stbbdev     if (nobj>=mult_not_overflow || size>=mult_not_overflow) // 1) heuristic check
307351c0b2f7Stbbdev         if (nobj && arraySize / nobj != size) {             // 2) exact check
307451c0b2f7Stbbdev             errno = ENOMEM;
3075*57f524caSIlya Isaev             return nullptr;
307651c0b2f7Stbbdev         }
307751c0b2f7Stbbdev     void* result = internalMalloc(arraySize);
307851c0b2f7Stbbdev     if (result)
307951c0b2f7Stbbdev         memset(result, 0, arraySize);
308051c0b2f7Stbbdev     else
308151c0b2f7Stbbdev         errno = ENOMEM;
308251c0b2f7Stbbdev     return result;
308351c0b2f7Stbbdev }
308451c0b2f7Stbbdev 
308551c0b2f7Stbbdev /********* End code for scalable_calloc   ***********/
308651c0b2f7Stbbdev 
308751c0b2f7Stbbdev /********* Code for aligned allocation API **********/
308851c0b2f7Stbbdev 
308951c0b2f7Stbbdev extern "C" int scalable_posix_memalign(void **memptr, size_t alignment, size_t size)
309051c0b2f7Stbbdev {
309151c0b2f7Stbbdev     if ( !isPowerOfTwoAtLeast(alignment, sizeof(void*)) )
309251c0b2f7Stbbdev         return EINVAL;
309351c0b2f7Stbbdev     void *result = allocateAligned(defaultMemPool, size, alignment);
309451c0b2f7Stbbdev     if (!result)
309551c0b2f7Stbbdev         return ENOMEM;
309651c0b2f7Stbbdev     *memptr = result;
309751c0b2f7Stbbdev     return 0;
309851c0b2f7Stbbdev }
309951c0b2f7Stbbdev 
310051c0b2f7Stbbdev extern "C" void * scalable_aligned_malloc(size_t size, size_t alignment)
310151c0b2f7Stbbdev {
310251c0b2f7Stbbdev     if (!isPowerOfTwo(alignment) || 0==size) {
310351c0b2f7Stbbdev         errno = EINVAL;
3104*57f524caSIlya Isaev         return nullptr;
310551c0b2f7Stbbdev     }
310651c0b2f7Stbbdev     void *tmp = allocateAligned(defaultMemPool, size, alignment);
310751c0b2f7Stbbdev     if (!tmp) errno = ENOMEM;
310851c0b2f7Stbbdev     return tmp;
310951c0b2f7Stbbdev }
311051c0b2f7Stbbdev 
311151c0b2f7Stbbdev extern "C" void * scalable_aligned_realloc(void *ptr, size_t size, size_t alignment)
311251c0b2f7Stbbdev {
311351c0b2f7Stbbdev     if (!isPowerOfTwo(alignment)) {
311451c0b2f7Stbbdev         errno = EINVAL;
3115*57f524caSIlya Isaev         return nullptr;
311651c0b2f7Stbbdev     }
311751c0b2f7Stbbdev     void *tmp;
311851c0b2f7Stbbdev 
311951c0b2f7Stbbdev     if (!ptr)
312051c0b2f7Stbbdev         tmp = allocateAligned(defaultMemPool, size, alignment);
312151c0b2f7Stbbdev     else if (!size) {
312251c0b2f7Stbbdev         scalable_free(ptr);
3123*57f524caSIlya Isaev         return nullptr;
312451c0b2f7Stbbdev     } else
312551c0b2f7Stbbdev         tmp = reallocAligned(defaultMemPool, ptr, size, alignment);
312651c0b2f7Stbbdev 
312751c0b2f7Stbbdev     if (!tmp) errno = ENOMEM;
312851c0b2f7Stbbdev     return tmp;
312951c0b2f7Stbbdev }
313051c0b2f7Stbbdev 
31318827ea7dSLong Nguyen extern "C" TBBMALLOC_EXPORT void * __TBB_malloc_safer_aligned_realloc(void *ptr, size_t size, size_t alignment, void* orig_function)
313251c0b2f7Stbbdev {
313351c0b2f7Stbbdev     /* corner cases left out of reallocAligned to not deal with errno there */
313451c0b2f7Stbbdev     if (!isPowerOfTwo(alignment)) {
313551c0b2f7Stbbdev         errno = EINVAL;
3136*57f524caSIlya Isaev         return nullptr;
313751c0b2f7Stbbdev     }
3138*57f524caSIlya Isaev     void *tmp = nullptr;
313951c0b2f7Stbbdev 
314051c0b2f7Stbbdev     if (!ptr) {
314151c0b2f7Stbbdev         tmp = allocateAligned(defaultMemPool, size, alignment);
314251c0b2f7Stbbdev     } else if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(ptr)) {
314351c0b2f7Stbbdev         if (!size) {
314451c0b2f7Stbbdev             internalFree(ptr);
3145*57f524caSIlya Isaev             return nullptr;
314651c0b2f7Stbbdev         } else {
314751c0b2f7Stbbdev             tmp = reallocAligned(defaultMemPool, ptr, size, alignment);
314851c0b2f7Stbbdev         }
314951c0b2f7Stbbdev     }
315051c0b2f7Stbbdev #if USE_WINTHREAD
315151c0b2f7Stbbdev     else {
315251c0b2f7Stbbdev         orig_aligned_ptrs *original_ptrs = static_cast<orig_aligned_ptrs*>(orig_function);
315351c0b2f7Stbbdev         if (size) {
315451c0b2f7Stbbdev             // Without orig_msize, we can't do anything with this.
315551c0b2f7Stbbdev             // Just keeping old pointer.
315651c0b2f7Stbbdev             if ( original_ptrs->aligned_msize ){
315751c0b2f7Stbbdev                 // set alignment and offset to have possibly correct oldSize
315851c0b2f7Stbbdev                 size_t oldSize = original_ptrs->aligned_msize(ptr, sizeof(void*), 0);
315951c0b2f7Stbbdev                 tmp = allocateAligned(defaultMemPool, size, alignment);
316051c0b2f7Stbbdev                 if (tmp) {
316151c0b2f7Stbbdev                     memcpy(tmp, ptr, size<oldSize? size : oldSize);
316251c0b2f7Stbbdev                     if ( original_ptrs->aligned_free ){
316351c0b2f7Stbbdev                         original_ptrs->aligned_free( ptr );
316451c0b2f7Stbbdev                     }
316551c0b2f7Stbbdev                 }
316651c0b2f7Stbbdev             }
316751c0b2f7Stbbdev         } else {
316851c0b2f7Stbbdev             if ( original_ptrs->aligned_free ){
316951c0b2f7Stbbdev                 original_ptrs->aligned_free( ptr );
317051c0b2f7Stbbdev             }
3171*57f524caSIlya Isaev             return nullptr;
317251c0b2f7Stbbdev         }
317351c0b2f7Stbbdev     }
317451c0b2f7Stbbdev #else
317551c0b2f7Stbbdev     // As original_realloc can't align result, and there is no way to find
317651c0b2f7Stbbdev     // size of reallocating object, we are giving up.
317751c0b2f7Stbbdev     suppress_unused_warning(orig_function);
317851c0b2f7Stbbdev #endif
317951c0b2f7Stbbdev     if (!tmp) errno = ENOMEM;
318051c0b2f7Stbbdev     return tmp;
318151c0b2f7Stbbdev }
318251c0b2f7Stbbdev 
318351c0b2f7Stbbdev extern "C" void scalable_aligned_free(void *ptr)
318451c0b2f7Stbbdev {
318551c0b2f7Stbbdev     internalFree(ptr);
318651c0b2f7Stbbdev }
318751c0b2f7Stbbdev 
318851c0b2f7Stbbdev /********* end code for aligned allocation API **********/
318951c0b2f7Stbbdev 
319051c0b2f7Stbbdev /********* Code for scalable_msize       ***********/
319151c0b2f7Stbbdev 
319251c0b2f7Stbbdev /*
319351c0b2f7Stbbdev  * Returns the size of a memory block allocated in the heap.
319451c0b2f7Stbbdev  */
319551c0b2f7Stbbdev extern "C" size_t scalable_msize(void* ptr)
319651c0b2f7Stbbdev {
319751c0b2f7Stbbdev     if (ptr) {
319851c0b2f7Stbbdev         MALLOC_ASSERT(isRecognized(ptr), "Invalid pointer in scalable_msize detected.");
319951c0b2f7Stbbdev         return internalMsize(ptr);
320051c0b2f7Stbbdev     }
320151c0b2f7Stbbdev     errno = EINVAL;
320251c0b2f7Stbbdev     // Unlike _msize, return 0 in case of parameter error.
320351c0b2f7Stbbdev     // Returning size_t(-1) looks more like the way to troubles.
320451c0b2f7Stbbdev     return 0;
320551c0b2f7Stbbdev }
320651c0b2f7Stbbdev 
320751c0b2f7Stbbdev /*
320851c0b2f7Stbbdev  * A variant that provides additional memory safety, by checking whether the given address
320951c0b2f7Stbbdev  * was obtained with this allocator, and if not redirecting to the provided alternative call.
321051c0b2f7Stbbdev  */
32118827ea7dSLong Nguyen extern "C" TBBMALLOC_EXPORT size_t __TBB_malloc_safer_msize(void *object, size_t (*original_msize)(void*))
321251c0b2f7Stbbdev {
321351c0b2f7Stbbdev     if (object) {
321451c0b2f7Stbbdev         // Check if the memory was allocated by scalable_malloc
321551c0b2f7Stbbdev         if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(object))
321651c0b2f7Stbbdev             return internalMsize(object);
321751c0b2f7Stbbdev         else if (original_msize)
321851c0b2f7Stbbdev             return original_msize(object);
321951c0b2f7Stbbdev     }
3220*57f524caSIlya Isaev     // object is nullptr or unknown, or foreign and no original_msize
322151c0b2f7Stbbdev #if USE_WINTHREAD
322251c0b2f7Stbbdev     errno = EINVAL; // errno expected to be set only on this platform
322351c0b2f7Stbbdev #endif
322451c0b2f7Stbbdev     return 0;
322551c0b2f7Stbbdev }
322651c0b2f7Stbbdev 
322751c0b2f7Stbbdev /*
322851c0b2f7Stbbdev  * The same as above but for _aligned_msize case
322951c0b2f7Stbbdev  */
32308827ea7dSLong Nguyen extern "C" TBBMALLOC_EXPORT size_t __TBB_malloc_safer_aligned_msize(void *object, size_t alignment, size_t offset, size_t (*orig_aligned_msize)(void*,size_t,size_t))
323151c0b2f7Stbbdev {
323251c0b2f7Stbbdev     if (object) {
323351c0b2f7Stbbdev         // Check if the memory was allocated by scalable_malloc
323451c0b2f7Stbbdev         if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(object))
323551c0b2f7Stbbdev             return internalMsize(object);
323651c0b2f7Stbbdev         else if (orig_aligned_msize)
323751c0b2f7Stbbdev             return orig_aligned_msize(object,alignment,offset);
323851c0b2f7Stbbdev     }
3239*57f524caSIlya Isaev     // object is nullptr or unknown
324051c0b2f7Stbbdev     errno = EINVAL;
324151c0b2f7Stbbdev     return 0;
324251c0b2f7Stbbdev }
324351c0b2f7Stbbdev 
324451c0b2f7Stbbdev /********* End code for scalable_msize   ***********/
324551c0b2f7Stbbdev 
324651c0b2f7Stbbdev extern "C" int scalable_allocation_mode(int param, intptr_t value)
324751c0b2f7Stbbdev {
324851c0b2f7Stbbdev     if (param == TBBMALLOC_SET_SOFT_HEAP_LIMIT) {
324951c0b2f7Stbbdev         defaultMemPool->extMemPool.backend.setRecommendedMaxSize((size_t)value);
325051c0b2f7Stbbdev         return TBBMALLOC_OK;
325151c0b2f7Stbbdev     } else if (param == USE_HUGE_PAGES) {
3252734f0bc0SPablo Romero #if __unix__
325351c0b2f7Stbbdev         switch (value) {
325451c0b2f7Stbbdev         case 0:
325551c0b2f7Stbbdev         case 1:
325651c0b2f7Stbbdev             hugePages.setMode(value);
325751c0b2f7Stbbdev             return TBBMALLOC_OK;
325851c0b2f7Stbbdev         default:
325951c0b2f7Stbbdev             return TBBMALLOC_INVALID_PARAM;
326051c0b2f7Stbbdev         }
326151c0b2f7Stbbdev #else
326251c0b2f7Stbbdev         return TBBMALLOC_NO_EFFECT;
326351c0b2f7Stbbdev #endif
326451c0b2f7Stbbdev #if __TBB_SOURCE_DIRECTLY_INCLUDED
326551c0b2f7Stbbdev     } else if (param == TBBMALLOC_INTERNAL_SOURCE_INCLUDED) {
326651c0b2f7Stbbdev         switch (value) {
326751c0b2f7Stbbdev         case 0: // used by dynamic library
326851c0b2f7Stbbdev         case 1: // used by static library or directly included sources
326951c0b2f7Stbbdev             usedBySrcIncluded = value;
327051c0b2f7Stbbdev             return TBBMALLOC_OK;
327151c0b2f7Stbbdev         default:
327251c0b2f7Stbbdev             return TBBMALLOC_INVALID_PARAM;
327351c0b2f7Stbbdev         }
327451c0b2f7Stbbdev #endif
327551c0b2f7Stbbdev     } else if (param == TBBMALLOC_SET_HUGE_SIZE_THRESHOLD) {
327651c0b2f7Stbbdev         defaultMemPool->extMemPool.loc.setHugeSizeThreshold((size_t)value);
327751c0b2f7Stbbdev         return TBBMALLOC_OK;
327851c0b2f7Stbbdev     }
327951c0b2f7Stbbdev     return TBBMALLOC_INVALID_PARAM;
328051c0b2f7Stbbdev }
328151c0b2f7Stbbdev 
328251c0b2f7Stbbdev extern "C" int scalable_allocation_command(int cmd, void *param)
328351c0b2f7Stbbdev {
328451c0b2f7Stbbdev     if (param)
328551c0b2f7Stbbdev         return TBBMALLOC_INVALID_PARAM;
328651c0b2f7Stbbdev 
328751c0b2f7Stbbdev     bool released = false;
328851c0b2f7Stbbdev     switch(cmd) {
328951c0b2f7Stbbdev     case TBBMALLOC_CLEAN_THREAD_BUFFERS:
329051c0b2f7Stbbdev         if (TLSData *tls = defaultMemPool->getTLS(/*create=*/false))
329151c0b2f7Stbbdev             released = tls->externalCleanup(/*cleanOnlyUnused*/false, /*cleanBins=*/true);
329251c0b2f7Stbbdev         break;
329351c0b2f7Stbbdev     case TBBMALLOC_CLEAN_ALL_BUFFERS:
329451c0b2f7Stbbdev         released = defaultMemPool->extMemPool.hardCachesCleanup();
329551c0b2f7Stbbdev         break;
329651c0b2f7Stbbdev     default:
329751c0b2f7Stbbdev         return TBBMALLOC_INVALID_PARAM;
329851c0b2f7Stbbdev     }
329951c0b2f7Stbbdev     return released ? TBBMALLOC_OK : TBBMALLOC_NO_EFFECT;
330051c0b2f7Stbbdev }
3301