151c0b2f7Stbbdev /*
22110128eSsarathnandu Copyright (c) 2005-2023 Intel Corporation
351c0b2f7Stbbdev
451c0b2f7Stbbdev Licensed under the Apache License, Version 2.0 (the "License");
551c0b2f7Stbbdev you may not use this file except in compliance with the License.
651c0b2f7Stbbdev You may obtain a copy of the License at
751c0b2f7Stbbdev
851c0b2f7Stbbdev http://www.apache.org/licenses/LICENSE-2.0
951c0b2f7Stbbdev
1051c0b2f7Stbbdev Unless required by applicable law or agreed to in writing, software
1151c0b2f7Stbbdev distributed under the License is distributed on an "AS IS" BASIS,
1251c0b2f7Stbbdev WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1351c0b2f7Stbbdev See the License for the specific language governing permissions and
1451c0b2f7Stbbdev limitations under the License.
1551c0b2f7Stbbdev */
1651c0b2f7Stbbdev
1751c0b2f7Stbbdev #include "tbbmalloc_internal.h"
1851c0b2f7Stbbdev #include <errno.h>
1951c0b2f7Stbbdev #include <new> /* for placement new */
2051c0b2f7Stbbdev #include <string.h> /* for memset */
2151c0b2f7Stbbdev
2249e08aacStbbdev #include "oneapi/tbb/version.h"
2351c0b2f7Stbbdev #include "../tbb/environment.h"
2451c0b2f7Stbbdev #include "../tbb/itt_notify.h" // for __TBB_load_ittnotify()
2551c0b2f7Stbbdev
2651c0b2f7Stbbdev #if USE_PTHREAD
2751c0b2f7Stbbdev #define TlsSetValue_func pthread_setspecific
2851c0b2f7Stbbdev #define TlsGetValue_func pthread_getspecific
2951c0b2f7Stbbdev #define GetMyTID() pthread_self()
3051c0b2f7Stbbdev #include <sched.h>
3151c0b2f7Stbbdev extern "C" { static void mallocThreadShutdownNotification(void*); }
3251c0b2f7Stbbdev #if __sun || __SUNPRO_CC
3351c0b2f7Stbbdev #define __asm__ asm
3451c0b2f7Stbbdev #endif
3551c0b2f7Stbbdev #include <unistd.h> // sysconf(_SC_PAGESIZE)
3651c0b2f7Stbbdev #elif USE_WINTHREAD
3751c0b2f7Stbbdev #define GetMyTID() GetCurrentThreadId()
3851c0b2f7Stbbdev #if __TBB_WIN8UI_SUPPORT
3951c0b2f7Stbbdev #include<thread>
4051c0b2f7Stbbdev #define TlsSetValue_func FlsSetValue
4151c0b2f7Stbbdev #define TlsGetValue_func FlsGetValue
4257f524caSIlya Isaev #define TlsAlloc() FlsAlloc(nullptr)
4351c0b2f7Stbbdev #define TLS_ALLOC_FAILURE FLS_OUT_OF_INDEXES
4451c0b2f7Stbbdev #define TlsFree FlsFree
4551c0b2f7Stbbdev #else
4651c0b2f7Stbbdev #define TlsSetValue_func TlsSetValue
4751c0b2f7Stbbdev #define TlsGetValue_func TlsGetValue
4851c0b2f7Stbbdev #define TLS_ALLOC_FAILURE TLS_OUT_OF_INDEXES
4951c0b2f7Stbbdev #endif
5051c0b2f7Stbbdev #else
5151c0b2f7Stbbdev #error Must define USE_PTHREAD or USE_WINTHREAD
5251c0b2f7Stbbdev #endif
5351c0b2f7Stbbdev
5451c0b2f7Stbbdev #define FREELIST_NONBLOCKING 1
5551c0b2f7Stbbdev
5651c0b2f7Stbbdev namespace rml {
5751c0b2f7Stbbdev class MemoryPool;
5851c0b2f7Stbbdev namespace internal {
5951c0b2f7Stbbdev
6051c0b2f7Stbbdev class Block;
6151c0b2f7Stbbdev class MemoryPool;
6251c0b2f7Stbbdev
6351c0b2f7Stbbdev #if MALLOC_CHECK_RECURSION
6451c0b2f7Stbbdev
6551c0b2f7Stbbdev inline bool isMallocInitialized();
6651c0b2f7Stbbdev
6751c0b2f7Stbbdev #endif // MALLOC_CHECK_RECURSION
6851c0b2f7Stbbdev
6951c0b2f7Stbbdev /** Support for handling the special UNUSABLE pointer state **/
7051c0b2f7Stbbdev const intptr_t UNUSABLE = 0x1;
isSolidPtr(void * ptr)7151c0b2f7Stbbdev inline bool isSolidPtr( void* ptr ) {
7251c0b2f7Stbbdev return (UNUSABLE|(intptr_t)ptr)!=UNUSABLE;
7351c0b2f7Stbbdev }
isNotForUse(void * ptr)7451c0b2f7Stbbdev inline bool isNotForUse( void* ptr ) {
7551c0b2f7Stbbdev return (intptr_t)ptr==UNUSABLE;
7651c0b2f7Stbbdev }
7751c0b2f7Stbbdev
7851c0b2f7Stbbdev /*
7951c0b2f7Stbbdev * Block::objectSize value used to mark blocks allocated by startupAlloc
8051c0b2f7Stbbdev */
8151c0b2f7Stbbdev const uint16_t startupAllocObjSizeMark = ~(uint16_t)0;
8251c0b2f7Stbbdev
8351c0b2f7Stbbdev /*
8451c0b2f7Stbbdev * The following constant is used to define the size of struct Block, the block header.
8551c0b2f7Stbbdev * The intent is to have the size of a Block multiple of the cache line size, this allows us to
8651c0b2f7Stbbdev * get good alignment at the cost of some overhead equal to the amount of padding included in the Block.
8751c0b2f7Stbbdev */
8851c0b2f7Stbbdev const int blockHeaderAlignment = estimatedCacheLineSize;
8951c0b2f7Stbbdev
9051c0b2f7Stbbdev /********* The data structures and global objects **************/
9151c0b2f7Stbbdev
9251c0b2f7Stbbdev /*
9351c0b2f7Stbbdev * The malloc routines themselves need to be able to occasionally malloc some space,
9451c0b2f7Stbbdev * in order to set up the structures used by the thread local structures. This
9551c0b2f7Stbbdev * routine performs that functions.
9651c0b2f7Stbbdev */
9751c0b2f7Stbbdev class BootStrapBlocks {
9851c0b2f7Stbbdev MallocMutex bootStrapLock;
9951c0b2f7Stbbdev Block *bootStrapBlock;
10051c0b2f7Stbbdev Block *bootStrapBlockUsed;
10151c0b2f7Stbbdev FreeObject *bootStrapObjectList;
10251c0b2f7Stbbdev public:
10351c0b2f7Stbbdev void *allocate(MemoryPool *memPool, size_t size);
10451c0b2f7Stbbdev void free(void* ptr);
10551c0b2f7Stbbdev void reset();
10651c0b2f7Stbbdev };
10751c0b2f7Stbbdev
10851c0b2f7Stbbdev #if USE_INTERNAL_TID
10951c0b2f7Stbbdev class ThreadId {
11051c0b2f7Stbbdev static tls_key_t Tid_key;
11151c0b2f7Stbbdev std::atomic<intptr_t> ThreadCount;
11251c0b2f7Stbbdev
11351c0b2f7Stbbdev unsigned int id;
11451c0b2f7Stbbdev
tlsNumber()11551c0b2f7Stbbdev static unsigned int tlsNumber() {
11651c0b2f7Stbbdev unsigned int result = reinterpret_cast<intptr_t>(TlsGetValue_func(Tid_key));
11751c0b2f7Stbbdev if( !result ) {
11851c0b2f7Stbbdev RecursiveMallocCallProtector scoped;
11951c0b2f7Stbbdev // Thread-local value is zero -> first call from this thread,
12051c0b2f7Stbbdev // need to initialize with next ID value (IDs start from 1)
12151c0b2f7Stbbdev result = ++ThreadCount; // returned new value!
12251c0b2f7Stbbdev TlsSetValue_func( Tid_key, reinterpret_cast<void*>(result) );
12351c0b2f7Stbbdev }
12451c0b2f7Stbbdev return result;
12551c0b2f7Stbbdev }
12651c0b2f7Stbbdev public:
init()12751c0b2f7Stbbdev static bool init() {
12851c0b2f7Stbbdev #if USE_WINTHREAD
12951c0b2f7Stbbdev Tid_key = TlsAlloc();
13051c0b2f7Stbbdev if (Tid_key == TLS_ALLOC_FAILURE)
13151c0b2f7Stbbdev return false;
13251c0b2f7Stbbdev #else
13357f524caSIlya Isaev int status = pthread_key_create( &Tid_key, nullptr );
13451c0b2f7Stbbdev if ( status ) {
13551c0b2f7Stbbdev fprintf (stderr, "The memory manager cannot create tls key during initialization\n");
13651c0b2f7Stbbdev return false;
13751c0b2f7Stbbdev }
13851c0b2f7Stbbdev #endif /* USE_WINTHREAD */
13951c0b2f7Stbbdev return true;
14051c0b2f7Stbbdev }
141478de5b1Stbbdev #if __TBB_SOURCE_DIRECTLY_INCLUDED
destroy()14251c0b2f7Stbbdev static void destroy() {
14351c0b2f7Stbbdev if( Tid_key ) {
14451c0b2f7Stbbdev #if USE_WINTHREAD
14551c0b2f7Stbbdev BOOL status = !(TlsFree( Tid_key )); // fail is zero
14651c0b2f7Stbbdev #else
14751c0b2f7Stbbdev int status = pthread_key_delete( Tid_key );
14851c0b2f7Stbbdev #endif /* USE_WINTHREAD */
14951c0b2f7Stbbdev if ( status )
15051c0b2f7Stbbdev fprintf (stderr, "The memory manager cannot delete tls key\n");
15151c0b2f7Stbbdev Tid_key = 0;
15251c0b2f7Stbbdev }
15351c0b2f7Stbbdev }
154478de5b1Stbbdev #endif
15551c0b2f7Stbbdev
ThreadId()15651c0b2f7Stbbdev ThreadId() : id(ThreadId::tlsNumber()) {}
isCurrentThreadId() const15751c0b2f7Stbbdev bool isCurrentThreadId() const { return id == ThreadId::tlsNumber(); }
15851c0b2f7Stbbdev
15951c0b2f7Stbbdev #if COLLECT_STATISTICS || MALLOC_TRACE
getThreadId()16051c0b2f7Stbbdev friend unsigned int getThreadId() { return ThreadId::tlsNumber(); }
16151c0b2f7Stbbdev #endif
16251c0b2f7Stbbdev #if COLLECT_STATISTICS
getMaxThreadId()16351c0b2f7Stbbdev static unsigned getMaxThreadId() { return ThreadCount.load(std::memory_order_relaxed); }
16451c0b2f7Stbbdev
16551c0b2f7Stbbdev friend int STAT_increment(ThreadId tid, int bin, int ctr);
16651c0b2f7Stbbdev #endif
16751c0b2f7Stbbdev };
16851c0b2f7Stbbdev
16951c0b2f7Stbbdev tls_key_t ThreadId::Tid_key;
17051c0b2f7Stbbdev intptr_t ThreadId::ThreadCount;
17151c0b2f7Stbbdev
17251c0b2f7Stbbdev #if COLLECT_STATISTICS
STAT_increment(ThreadId tid,int bin,int ctr)17351c0b2f7Stbbdev int STAT_increment(ThreadId tid, int bin, int ctr)
17451c0b2f7Stbbdev {
17551c0b2f7Stbbdev return ::STAT_increment(tid.id, bin, ctr);
17651c0b2f7Stbbdev }
17751c0b2f7Stbbdev #endif
17851c0b2f7Stbbdev
17951c0b2f7Stbbdev #else // USE_INTERNAL_TID
18051c0b2f7Stbbdev
18151c0b2f7Stbbdev class ThreadId {
18251c0b2f7Stbbdev #if USE_PTHREAD
183478de5b1Stbbdev std::atomic<pthread_t> tid;
18451c0b2f7Stbbdev #else
185478de5b1Stbbdev std::atomic<DWORD> tid;
18651c0b2f7Stbbdev #endif
18751c0b2f7Stbbdev public:
ThreadId()18851c0b2f7Stbbdev ThreadId() : tid(GetMyTID()) {}
189*3b378356SKrzysztof Filipek ThreadId(ThreadId &other) = delete;
190*3b378356SKrzysztof Filipek ~ThreadId() = default;
191b9a0ab45SKrzysztof Filipek
19251c0b2f7Stbbdev #if USE_PTHREAD
isCurrentThreadId() const193478de5b1Stbbdev bool isCurrentThreadId() const { return pthread_equal(pthread_self(), tid.load(std::memory_order_relaxed)); }
19451c0b2f7Stbbdev #else
isCurrentThreadId() const195478de5b1Stbbdev bool isCurrentThreadId() const { return GetCurrentThreadId() == tid.load(std::memory_order_relaxed); }
19651c0b2f7Stbbdev #endif
operator =(const ThreadId & other)197478de5b1Stbbdev ThreadId& operator=(const ThreadId& other) {
198478de5b1Stbbdev tid.store(other.tid.load(std::memory_order_relaxed), std::memory_order_relaxed);
199478de5b1Stbbdev return *this;
200478de5b1Stbbdev }
init()20151c0b2f7Stbbdev static bool init() { return true; }
202478de5b1Stbbdev #if __TBB_SOURCE_DIRECTLY_INCLUDED
destroy()20351c0b2f7Stbbdev static void destroy() {}
204478de5b1Stbbdev #endif
20551c0b2f7Stbbdev };
20651c0b2f7Stbbdev
20751c0b2f7Stbbdev #endif // USE_INTERNAL_TID
20851c0b2f7Stbbdev
20951c0b2f7Stbbdev /*********** Code to provide thread ID and a thread-local void pointer **********/
21051c0b2f7Stbbdev
init()21151c0b2f7Stbbdev bool TLSKey::init()
21251c0b2f7Stbbdev {
21351c0b2f7Stbbdev #if USE_WINTHREAD
21451c0b2f7Stbbdev TLS_pointer_key = TlsAlloc();
21551c0b2f7Stbbdev if (TLS_pointer_key == TLS_ALLOC_FAILURE)
21651c0b2f7Stbbdev return false;
21751c0b2f7Stbbdev #else
21851c0b2f7Stbbdev int status = pthread_key_create( &TLS_pointer_key, mallocThreadShutdownNotification );
21951c0b2f7Stbbdev if ( status )
22051c0b2f7Stbbdev return false;
22151c0b2f7Stbbdev #endif /* USE_WINTHREAD */
22251c0b2f7Stbbdev return true;
22351c0b2f7Stbbdev }
22451c0b2f7Stbbdev
destroy()22551c0b2f7Stbbdev bool TLSKey::destroy()
22651c0b2f7Stbbdev {
22751c0b2f7Stbbdev #if USE_WINTHREAD
22851c0b2f7Stbbdev BOOL status1 = !(TlsFree(TLS_pointer_key)); // fail is zero
22951c0b2f7Stbbdev #else
23051c0b2f7Stbbdev int status1 = pthread_key_delete(TLS_pointer_key);
23151c0b2f7Stbbdev #endif /* USE_WINTHREAD */
23251c0b2f7Stbbdev MALLOC_ASSERT(!status1, "The memory manager cannot delete tls key.");
23351c0b2f7Stbbdev return status1==0;
23451c0b2f7Stbbdev }
23551c0b2f7Stbbdev
getThreadMallocTLS() const23651c0b2f7Stbbdev inline TLSData* TLSKey::getThreadMallocTLS() const
23751c0b2f7Stbbdev {
23851c0b2f7Stbbdev return (TLSData *)TlsGetValue_func( TLS_pointer_key );
23951c0b2f7Stbbdev }
24051c0b2f7Stbbdev
setThreadMallocTLS(TLSData * newvalue)24151c0b2f7Stbbdev inline void TLSKey::setThreadMallocTLS( TLSData * newvalue ) {
24251c0b2f7Stbbdev RecursiveMallocCallProtector scoped;
24351c0b2f7Stbbdev TlsSetValue_func( TLS_pointer_key, newvalue );
24451c0b2f7Stbbdev }
24551c0b2f7Stbbdev
24651c0b2f7Stbbdev /* The 'next' field in the block header has to maintain some invariants:
24751c0b2f7Stbbdev * it needs to be on a 16K boundary and the first field in the block.
24851c0b2f7Stbbdev * Any value stored there needs to have the lower 14 bits set to 0
24951c0b2f7Stbbdev * so that various assert work. This means that if you want to smash this memory
25051c0b2f7Stbbdev * for debugging purposes you will need to obey this invariant.
25151c0b2f7Stbbdev * The total size of the header needs to be a power of 2 to simplify
25251c0b2f7Stbbdev * the alignment requirements. For now it is a 128 byte structure.
25351c0b2f7Stbbdev * To avoid false sharing, the fields changed only locally are separated
25451c0b2f7Stbbdev * from the fields changed by foreign threads.
25551c0b2f7Stbbdev * Changing the size of the block header would require to change
25651c0b2f7Stbbdev * some bin allocation sizes, in particular "fitting" sizes (see above).
25751c0b2f7Stbbdev */
25851c0b2f7Stbbdev class Bin;
25951c0b2f7Stbbdev class StartupBlock;
26051c0b2f7Stbbdev
26151c0b2f7Stbbdev class MemoryPool {
26251c0b2f7Stbbdev // if no explicit grainsize, expect to see malloc in user's pAlloc
26351c0b2f7Stbbdev // and set reasonable low granularity
26451c0b2f7Stbbdev static const size_t defaultGranularity = estimatedCacheLineSize;
26551c0b2f7Stbbdev
266ba947f18SIlya Isaev MemoryPool() = delete; // deny
26751c0b2f7Stbbdev public:
26851c0b2f7Stbbdev static MallocMutex memPoolListLock;
26951c0b2f7Stbbdev
27051c0b2f7Stbbdev // list of all active pools is used to release
27151c0b2f7Stbbdev // all TLS data on thread termination or library unload
27251c0b2f7Stbbdev MemoryPool *next,
27351c0b2f7Stbbdev *prev;
27451c0b2f7Stbbdev ExtMemoryPool extMemPool;
27551c0b2f7Stbbdev BootStrapBlocks bootStrapBlocks;
27651c0b2f7Stbbdev
27751c0b2f7Stbbdev static void initDefaultPool();
27851c0b2f7Stbbdev
27951c0b2f7Stbbdev bool init(intptr_t poolId, const MemPoolPolicy* memPoolPolicy);
28051c0b2f7Stbbdev bool reset();
28151c0b2f7Stbbdev bool destroy();
28251c0b2f7Stbbdev void onThreadShutdown(TLSData *tlsData);
28351c0b2f7Stbbdev
28451c0b2f7Stbbdev inline TLSData *getTLS(bool create);
clearTLS()28557f524caSIlya Isaev void clearTLS() { extMemPool.tlsPointerKey.setThreadMallocTLS(nullptr); }
28651c0b2f7Stbbdev
28751c0b2f7Stbbdev Block *getEmptyBlock(size_t size);
28851c0b2f7Stbbdev void returnEmptyBlock(Block *block, bool poolTheBlock);
28951c0b2f7Stbbdev
29051c0b2f7Stbbdev // get/put large object to/from local large object cache
29151c0b2f7Stbbdev void *getFromLLOCache(TLSData *tls, size_t size, size_t alignment);
29251c0b2f7Stbbdev void putToLLOCache(TLSData *tls, void *object);
29351c0b2f7Stbbdev };
29451c0b2f7Stbbdev
29551c0b2f7Stbbdev static intptr_t defaultMemPool_space[sizeof(MemoryPool)/sizeof(intptr_t) +
29651c0b2f7Stbbdev (sizeof(MemoryPool)%sizeof(intptr_t)? 1 : 0)];
29751c0b2f7Stbbdev static MemoryPool *defaultMemPool = (MemoryPool*)defaultMemPool_space;
29851c0b2f7Stbbdev const size_t MemoryPool::defaultGranularity;
29951c0b2f7Stbbdev // zero-initialized
30051c0b2f7Stbbdev MallocMutex MemoryPool::memPoolListLock;
30151c0b2f7Stbbdev // TODO: move huge page status to default pool, because that's its states
30251c0b2f7Stbbdev HugePagesStatus hugePages;
30351c0b2f7Stbbdev static bool usedBySrcIncluded = false;
30451c0b2f7Stbbdev
30551c0b2f7Stbbdev // Padding helpers
30651c0b2f7Stbbdev template<size_t padd>
30751c0b2f7Stbbdev struct PaddingImpl {
30851c0b2f7Stbbdev size_t __padding[padd];
30951c0b2f7Stbbdev };
31051c0b2f7Stbbdev
31151c0b2f7Stbbdev template<>
31251c0b2f7Stbbdev struct PaddingImpl<0> {};
31351c0b2f7Stbbdev
31451c0b2f7Stbbdev template<int N>
31551c0b2f7Stbbdev struct Padding : PaddingImpl<N/sizeof(size_t)> {};
31651c0b2f7Stbbdev
31751c0b2f7Stbbdev // Slab block is 16KB-aligned. To prevent false sharing, separate locally-accessed
31851c0b2f7Stbbdev // fields and fields commonly accessed by not owner threads.
31951c0b2f7Stbbdev class GlobalBlockFields : public BlockI {
32051c0b2f7Stbbdev protected:
32151c0b2f7Stbbdev std::atomic<FreeObject*> publicFreeList;
322478de5b1Stbbdev std::atomic<Block*> nextPrivatizable;
32351c0b2f7Stbbdev MemoryPool *poolPtr;
32451c0b2f7Stbbdev };
32551c0b2f7Stbbdev
32651c0b2f7Stbbdev class LocalBlockFields : public GlobalBlockFields, Padding<blockHeaderAlignment - sizeof(GlobalBlockFields)> {
32751c0b2f7Stbbdev protected:
32851c0b2f7Stbbdev Block *next;
32951c0b2f7Stbbdev Block *previous; /* Use double linked list to speed up removal */
33051c0b2f7Stbbdev FreeObject *bumpPtr; /* Bump pointer moves from the end to the beginning of a block */
33151c0b2f7Stbbdev FreeObject *freeList;
33251c0b2f7Stbbdev /* Pointer to local data for the owner thread. Used for fast finding tls
33351c0b2f7Stbbdev when releasing object from a block that current thread owned.
33457f524caSIlya Isaev nullptr for orphaned blocks. */
335478de5b1Stbbdev std::atomic<TLSData*> tlsPtr;
33651c0b2f7Stbbdev ThreadId ownerTid; /* the ID of the thread that owns or last owned the block */
33751c0b2f7Stbbdev BackRefIdx backRefIdx;
33851c0b2f7Stbbdev uint16_t allocatedCount; /* Number of objects allocated (obviously by the owning thread) */
33951c0b2f7Stbbdev uint16_t objectSize;
34051c0b2f7Stbbdev bool isFull;
34151c0b2f7Stbbdev
34251c0b2f7Stbbdev friend class FreeBlockPool;
34351c0b2f7Stbbdev friend class StartupBlock;
34451c0b2f7Stbbdev friend class LifoList;
34551c0b2f7Stbbdev friend void *BootStrapBlocks::allocate(MemoryPool *, size_t);
34651c0b2f7Stbbdev friend bool OrphanedBlocks::cleanup(Backend*);
34751c0b2f7Stbbdev friend Block *MemoryPool::getEmptyBlock(size_t);
34851c0b2f7Stbbdev };
34951c0b2f7Stbbdev
35051c0b2f7Stbbdev // Use inheritance to guarantee that a user data start on next cache line.
35151c0b2f7Stbbdev // Can't use member for it, because when LocalBlockFields already on cache line,
35251c0b2f7Stbbdev // we must have no additional memory consumption for all compilers.
35351c0b2f7Stbbdev class Block : public LocalBlockFields,
35451c0b2f7Stbbdev Padding<2*blockHeaderAlignment - sizeof(LocalBlockFields)> {
35551c0b2f7Stbbdev public:
empty() const35651c0b2f7Stbbdev bool empty() const {
35751c0b2f7Stbbdev if (allocatedCount > 0) return false;
35851c0b2f7Stbbdev MALLOC_ASSERT(!isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT);
35951c0b2f7Stbbdev return true;
36051c0b2f7Stbbdev }
36151c0b2f7Stbbdev inline FreeObject* allocate();
36251c0b2f7Stbbdev inline FreeObject *allocateFromFreeList();
36351c0b2f7Stbbdev
36451c0b2f7Stbbdev inline bool adjustFullness();
36557f524caSIlya Isaev void adjustPositionInBin(Bin* bin = nullptr);
366478de5b1Stbbdev #if MALLOC_DEBUG
freeListNonNull()36751c0b2f7Stbbdev bool freeListNonNull() { return freeList; }
368478de5b1Stbbdev #endif
36951c0b2f7Stbbdev void freePublicObject(FreeObject *objectToFree);
37051c0b2f7Stbbdev inline void freeOwnObject(void *object);
37151c0b2f7Stbbdev void reset();
37251c0b2f7Stbbdev void privatizePublicFreeList( bool reset = true );
37351c0b2f7Stbbdev void restoreBumpPtr();
37451c0b2f7Stbbdev void privatizeOrphaned(TLSData *tls, unsigned index);
37551c0b2f7Stbbdev bool readyToShare();
37651c0b2f7Stbbdev void shareOrphaned(intptr_t binTag, unsigned index);
getSize() const37751c0b2f7Stbbdev unsigned int getSize() const {
37851c0b2f7Stbbdev MALLOC_ASSERT(isStartupAllocObject() || objectSize<minLargeObjectSize,
37951c0b2f7Stbbdev "Invalid object size");
38051c0b2f7Stbbdev return isStartupAllocObject()? 0 : objectSize;
38151c0b2f7Stbbdev }
getBackRefIdx() const38251c0b2f7Stbbdev const BackRefIdx *getBackRefIdx() const { return &backRefIdx; }
38351c0b2f7Stbbdev inline bool isOwnedByCurrentThread() const;
isStartupAllocObject() const38451c0b2f7Stbbdev bool isStartupAllocObject() const { return objectSize == startupAllocObjSizeMark; }
38551c0b2f7Stbbdev inline FreeObject *findObjectToFree(const void *object) const;
checkFreePrecond(const void * object) const38651c0b2f7Stbbdev void checkFreePrecond(const void *object) const {
38751c0b2f7Stbbdev #if MALLOC_DEBUG
38851c0b2f7Stbbdev const char *msg = "Possible double free or heap corruption.";
38951c0b2f7Stbbdev // small objects are always at least sizeof(size_t) Byte aligned,
39051c0b2f7Stbbdev // try to check this before this dereference as for invalid objects
39151c0b2f7Stbbdev // this may be unreadable
39251c0b2f7Stbbdev MALLOC_ASSERT(isAligned(object, sizeof(size_t)), "Try to free invalid small object");
393478de5b1Stbbdev #if !__TBB_USE_THREAD_SANITIZER
39451c0b2f7Stbbdev // releasing to free slab
39551c0b2f7Stbbdev MALLOC_ASSERT(allocatedCount>0, msg);
396478de5b1Stbbdev #endif
39751c0b2f7Stbbdev // must not point to slab's header
39851c0b2f7Stbbdev MALLOC_ASSERT((uintptr_t)object - (uintptr_t)this >= sizeof(Block), msg);
39951c0b2f7Stbbdev if (startupAllocObjSizeMark == objectSize) // startup block
40051c0b2f7Stbbdev MALLOC_ASSERT(object<=bumpPtr, msg);
40151c0b2f7Stbbdev else {
40251c0b2f7Stbbdev // non-startup objects are 8 Byte aligned
40351c0b2f7Stbbdev MALLOC_ASSERT(isAligned(object, 8), "Try to free invalid small object");
404478de5b1Stbbdev FreeObject *toFree = findObjectToFree(object);
405478de5b1Stbbdev #if !__TBB_USE_THREAD_SANITIZER
40651c0b2f7Stbbdev MALLOC_ASSERT(allocatedCount <= (slabSize-sizeof(Block))/objectSize
40751c0b2f7Stbbdev && (!bumpPtr || object>bumpPtr), msg);
40851c0b2f7Stbbdev // check against head of freeList, as this is mostly
40951c0b2f7Stbbdev // expected after double free
41051c0b2f7Stbbdev MALLOC_ASSERT(toFree != freeList, msg);
411478de5b1Stbbdev #endif
41251c0b2f7Stbbdev // check against head of publicFreeList, to detect double free
41351c0b2f7Stbbdev // involving foreign thread
41451c0b2f7Stbbdev MALLOC_ASSERT(toFree != publicFreeList.load(std::memory_order_relaxed), msg);
41551c0b2f7Stbbdev }
41651c0b2f7Stbbdev #else
41751c0b2f7Stbbdev suppress_unused_warning(object);
41851c0b2f7Stbbdev #endif
41951c0b2f7Stbbdev }
42051c0b2f7Stbbdev void initEmptyBlock(TLSData *tls, size_t size);
42151c0b2f7Stbbdev size_t findObjectSize(void *object) const;
getMemPool() const42251c0b2f7Stbbdev MemoryPool *getMemPool() const { return poolPtr; } // do not use on the hot path!
42351c0b2f7Stbbdev
42451c0b2f7Stbbdev protected:
42551c0b2f7Stbbdev void cleanBlockHeader();
42651c0b2f7Stbbdev
42751c0b2f7Stbbdev private:
42851c0b2f7Stbbdev static const float emptyEnoughRatio; /* Threshold on free space needed to "reactivate" a block */
42951c0b2f7Stbbdev
43051c0b2f7Stbbdev inline FreeObject *allocateFromBumpPtr();
43151c0b2f7Stbbdev inline FreeObject *findAllocatedObject(const void *address) const;
432478de5b1Stbbdev #if MALLOC_DEBUG
43351c0b2f7Stbbdev inline bool isProperlyPlaced(const void *object) const;
434478de5b1Stbbdev #endif
markOwned(TLSData * tls)43551c0b2f7Stbbdev inline void markOwned(TLSData *tls) {
436478de5b1Stbbdev MALLOC_ASSERT(!tlsPtr.load(std::memory_order_relaxed), ASSERT_TEXT);
43751c0b2f7Stbbdev ownerTid = ThreadId(); /* save the ID of the current thread */
438478de5b1Stbbdev tlsPtr.store(tls, std::memory_order_relaxed);
43951c0b2f7Stbbdev }
markOrphaned()44051c0b2f7Stbbdev inline void markOrphaned() {
441478de5b1Stbbdev MALLOC_ASSERT(tlsPtr.load(std::memory_order_relaxed), ASSERT_TEXT);
442478de5b1Stbbdev tlsPtr.store(nullptr, std::memory_order_relaxed);
44351c0b2f7Stbbdev }
44451c0b2f7Stbbdev
44551c0b2f7Stbbdev friend class Bin;
44651c0b2f7Stbbdev friend class TLSData;
44751c0b2f7Stbbdev friend bool MemoryPool::destroy();
44851c0b2f7Stbbdev };
44951c0b2f7Stbbdev
45051c0b2f7Stbbdev const float Block::emptyEnoughRatio = 1.0 / 4.0;
45151c0b2f7Stbbdev
45251c0b2f7Stbbdev static_assert(sizeof(Block) <= 2*estimatedCacheLineSize,
45351c0b2f7Stbbdev "The class Block does not fit into 2 cache lines on this platform. "
45451c0b2f7Stbbdev "Defining USE_INTERNAL_TID may help to fix it.");
45551c0b2f7Stbbdev
45651c0b2f7Stbbdev class Bin {
45751c0b2f7Stbbdev private:
458478de5b1Stbbdev public:
45951c0b2f7Stbbdev Block *activeBlk;
46051c0b2f7Stbbdev std::atomic<Block*> mailbox;
46151c0b2f7Stbbdev MallocMutex mailLock;
46251c0b2f7Stbbdev
46351c0b2f7Stbbdev public:
getActiveBlock() const46451c0b2f7Stbbdev inline Block* getActiveBlock() const { return activeBlk; }
resetActiveBlock()46557f524caSIlya Isaev void resetActiveBlock() { activeBlk = nullptr; }
46651c0b2f7Stbbdev inline void setActiveBlock(Block *block);
46751c0b2f7Stbbdev inline Block* setPreviousBlockActive();
46851c0b2f7Stbbdev Block* getPrivatizedFreeListBlock();
46951c0b2f7Stbbdev void moveBlockToFront(Block *block);
47051c0b2f7Stbbdev bool cleanPublicFreeLists();
47151c0b2f7Stbbdev void processEmptyBlock(Block *block, bool poolTheBlock);
47251c0b2f7Stbbdev void addPublicFreeListBlock(Block* block);
47351c0b2f7Stbbdev
47451c0b2f7Stbbdev void outofTLSBin(Block* block);
47551c0b2f7Stbbdev void verifyTLSBin(size_t size) const;
47651c0b2f7Stbbdev void pushTLSBin(Block* block);
47751c0b2f7Stbbdev
478478de5b1Stbbdev #if MALLOC_DEBUG
verifyInitState() const47951c0b2f7Stbbdev void verifyInitState() const {
48051c0b2f7Stbbdev MALLOC_ASSERT( !activeBlk, ASSERT_TEXT );
48151c0b2f7Stbbdev MALLOC_ASSERT( !mailbox.load(std::memory_order_relaxed), ASSERT_TEXT );
48251c0b2f7Stbbdev }
483478de5b1Stbbdev #endif
48451c0b2f7Stbbdev
48551c0b2f7Stbbdev friend void Block::freePublicObject (FreeObject *objectToFree);
48651c0b2f7Stbbdev };
48751c0b2f7Stbbdev
48851c0b2f7Stbbdev /********* End of the data structures **************/
48951c0b2f7Stbbdev
49051c0b2f7Stbbdev /*
49151c0b2f7Stbbdev * There are bins for all 8 byte aligned objects less than this segregated size; 8 bins in total
49251c0b2f7Stbbdev */
49351c0b2f7Stbbdev const uint32_t minSmallObjectIndex = 0;
49451c0b2f7Stbbdev const uint32_t numSmallObjectBins = 8;
49551c0b2f7Stbbdev const uint32_t maxSmallObjectSize = 64;
49651c0b2f7Stbbdev
49751c0b2f7Stbbdev /*
49851c0b2f7Stbbdev * There are 4 bins between each couple of powers of 2 [64-128-256-...]
49951c0b2f7Stbbdev * from maxSmallObjectSize till this size; 16 bins in total
50051c0b2f7Stbbdev */
50151c0b2f7Stbbdev const uint32_t minSegregatedObjectIndex = minSmallObjectIndex+numSmallObjectBins;
50251c0b2f7Stbbdev const uint32_t numSegregatedObjectBins = 16;
50351c0b2f7Stbbdev const uint32_t maxSegregatedObjectSize = 1024;
50451c0b2f7Stbbdev
50551c0b2f7Stbbdev /*
50651c0b2f7Stbbdev * And there are 5 bins with allocation sizes that are multiples of estimatedCacheLineSize
50751c0b2f7Stbbdev * and selected to fit 9, 6, 4, 3, and 2 allocations in a block.
50851c0b2f7Stbbdev */
50951c0b2f7Stbbdev const uint32_t minFittingIndex = minSegregatedObjectIndex+numSegregatedObjectBins;
51051c0b2f7Stbbdev const uint32_t numFittingBins = 5;
51151c0b2f7Stbbdev
51251c0b2f7Stbbdev const uint32_t fittingAlignment = estimatedCacheLineSize;
51351c0b2f7Stbbdev
51451c0b2f7Stbbdev #define SET_FITTING_SIZE(N) ( (slabSize-sizeof(Block))/N ) & ~(fittingAlignment-1)
51551c0b2f7Stbbdev // For blockSize=16*1024, sizeof(Block)=2*estimatedCacheLineSize and fittingAlignment=estimatedCacheLineSize,
51651c0b2f7Stbbdev // the comments show the fitting sizes and the amounts left unused for estimatedCacheLineSize=64/128:
51751c0b2f7Stbbdev const uint32_t fittingSize1 = SET_FITTING_SIZE(9); // 1792/1792 128/000
51851c0b2f7Stbbdev const uint32_t fittingSize2 = SET_FITTING_SIZE(6); // 2688/2688 128/000
51951c0b2f7Stbbdev const uint32_t fittingSize3 = SET_FITTING_SIZE(4); // 4032/3968 128/256
52051c0b2f7Stbbdev const uint32_t fittingSize4 = SET_FITTING_SIZE(3); // 5376/5376 128/000
52151c0b2f7Stbbdev const uint32_t fittingSize5 = SET_FITTING_SIZE(2); // 8128/8064 000/000
52251c0b2f7Stbbdev #undef SET_FITTING_SIZE
52351c0b2f7Stbbdev
52451c0b2f7Stbbdev /*
52551c0b2f7Stbbdev * The total number of thread-specific Block-based bins
52651c0b2f7Stbbdev */
52751c0b2f7Stbbdev const uint32_t numBlockBins = minFittingIndex+numFittingBins;
52851c0b2f7Stbbdev
52951c0b2f7Stbbdev /*
53051c0b2f7Stbbdev * Objects of this size and larger are considered large objects.
53151c0b2f7Stbbdev */
53251c0b2f7Stbbdev const uint32_t minLargeObjectSize = fittingSize5 + 1;
53351c0b2f7Stbbdev
53451c0b2f7Stbbdev /*
53551c0b2f7Stbbdev * Per-thread pool of slab blocks. Idea behind it is to not share with other
53651c0b2f7Stbbdev * threads memory that are likely in local cache(s) of our CPU.
53751c0b2f7Stbbdev */
53851c0b2f7Stbbdev class FreeBlockPool {
53951c0b2f7Stbbdev private:
54051c0b2f7Stbbdev std::atomic<Block*> head;
54151c0b2f7Stbbdev int size;
54251c0b2f7Stbbdev Backend *backend;
54351c0b2f7Stbbdev public:
54451c0b2f7Stbbdev static const int POOL_HIGH_MARK = 32;
54551c0b2f7Stbbdev static const int POOL_LOW_MARK = 8;
54651c0b2f7Stbbdev
54751c0b2f7Stbbdev class ResOfGet {
548ba947f18SIlya Isaev ResOfGet() = delete;
54951c0b2f7Stbbdev public:
55051c0b2f7Stbbdev Block* block;
55151c0b2f7Stbbdev bool lastAccMiss;
ResOfGet(Block * b,bool lastMiss)55251c0b2f7Stbbdev ResOfGet(Block *b, bool lastMiss) : block(b), lastAccMiss(lastMiss) {}
55351c0b2f7Stbbdev };
55451c0b2f7Stbbdev
55551c0b2f7Stbbdev // allocated in zero-initialized memory
FreeBlockPool(Backend * bknd)55651c0b2f7Stbbdev FreeBlockPool(Backend *bknd) : backend(bknd) {}
55751c0b2f7Stbbdev ResOfGet getBlock();
55851c0b2f7Stbbdev void returnBlock(Block *block);
55951c0b2f7Stbbdev bool externalCleanup(); // can be called by another thread
56051c0b2f7Stbbdev };
56151c0b2f7Stbbdev
56251c0b2f7Stbbdev template<int LOW_MARK, int HIGH_MARK>
56351c0b2f7Stbbdev class LocalLOCImpl {
56451c0b2f7Stbbdev private:
56551c0b2f7Stbbdev static const size_t MAX_TOTAL_SIZE = 4*1024*1024;
56651c0b2f7Stbbdev // TODO: can single-linked list be faster here?
56751c0b2f7Stbbdev LargeMemoryBlock *tail; // need it when do releasing on overflow
56851c0b2f7Stbbdev std::atomic<LargeMemoryBlock*> head;
56951c0b2f7Stbbdev size_t totalSize;
57051c0b2f7Stbbdev int numOfBlocks;
57151c0b2f7Stbbdev public:
57251c0b2f7Stbbdev bool put(LargeMemoryBlock *object, ExtMemoryPool *extMemPool);
57351c0b2f7Stbbdev LargeMemoryBlock *get(size_t size);
57451c0b2f7Stbbdev bool externalCleanup(ExtMemoryPool *extMemPool);
57551c0b2f7Stbbdev #if __TBB_MALLOC_WHITEBOX_TEST
LocalLOCImpl()57657f524caSIlya Isaev LocalLOCImpl() : tail(nullptr), head(nullptr), totalSize(0), numOfBlocks(0) {}
getMaxSize()57751c0b2f7Stbbdev static size_t getMaxSize() { return MAX_TOTAL_SIZE; }
57851c0b2f7Stbbdev static const int LOC_HIGH_MARK = HIGH_MARK;
57951c0b2f7Stbbdev #else
58051c0b2f7Stbbdev // no ctor, object must be created in zero-initialized memory
58151c0b2f7Stbbdev #endif
58251c0b2f7Stbbdev };
58351c0b2f7Stbbdev
58451c0b2f7Stbbdev typedef LocalLOCImpl<8,32> LocalLOC; // set production code parameters
58551c0b2f7Stbbdev
58651c0b2f7Stbbdev class TLSData : public TLSRemote {
58751c0b2f7Stbbdev MemoryPool *memPool;
58851c0b2f7Stbbdev public:
58951c0b2f7Stbbdev Bin bin[numBlockBinLimit];
59051c0b2f7Stbbdev FreeBlockPool freeSlabBlocks;
59151c0b2f7Stbbdev LocalLOC lloc;
59251c0b2f7Stbbdev unsigned currCacheIdx;
59351c0b2f7Stbbdev private:
594478de5b1Stbbdev std::atomic<bool> unused;
59551c0b2f7Stbbdev public:
TLSData(MemoryPool * mPool,Backend * bknd)596900b99bbSLukasz Dorau TLSData(MemoryPool *mPool, Backend *bknd) : memPool(mPool), freeSlabBlocks(bknd), currCacheIdx(0) {}
getMemPool() const59751c0b2f7Stbbdev MemoryPool *getMemPool() const { return memPool; }
59851c0b2f7Stbbdev Bin* getAllocationBin(size_t size);
59951c0b2f7Stbbdev void release();
externalCleanup(bool cleanOnlyUnused,bool cleanBins)60051c0b2f7Stbbdev bool externalCleanup(bool cleanOnlyUnused, bool cleanBins) {
601478de5b1Stbbdev if (!unused.load(std::memory_order_relaxed) && cleanOnlyUnused) return false;
60251c0b2f7Stbbdev // Heavy operation in terms of synchronization complexity,
60351c0b2f7Stbbdev // should be called only for the current thread
60451c0b2f7Stbbdev bool released = cleanBins ? cleanupBlockBins() : false;
60551c0b2f7Stbbdev // both cleanups to be called, and the order is not important
606a96a032fSVladislav Shchapov bool lloc_cleaned = lloc.externalCleanup(&memPool->extMemPool);
607a96a032fSVladislav Shchapov bool free_slab_blocks_cleaned = freeSlabBlocks.externalCleanup();
608a96a032fSVladislav Shchapov return released || lloc_cleaned || free_slab_blocks_cleaned;
60951c0b2f7Stbbdev }
61051c0b2f7Stbbdev bool cleanupBlockBins();
markUsed()611478de5b1Stbbdev void markUsed() { unused.store(false, std::memory_order_relaxed); } // called by owner when TLS touched
markUnused()612478de5b1Stbbdev void markUnused() { unused.store(true, std::memory_order_relaxed); } // can be called by not owner thread
61351c0b2f7Stbbdev };
61451c0b2f7Stbbdev
createTLS(MemoryPool * memPool,Backend * backend)61551c0b2f7Stbbdev TLSData *TLSKey::createTLS(MemoryPool *memPool, Backend *backend)
61651c0b2f7Stbbdev {
61751c0b2f7Stbbdev MALLOC_ASSERT( sizeof(TLSData) >= sizeof(Bin) * numBlockBins + sizeof(FreeBlockPool), ASSERT_TEXT );
61851c0b2f7Stbbdev TLSData* tls = (TLSData*) memPool->bootStrapBlocks.allocate(memPool, sizeof(TLSData));
61951c0b2f7Stbbdev if ( !tls )
62057f524caSIlya Isaev return nullptr;
62151c0b2f7Stbbdev new(tls) TLSData(memPool, backend);
62251c0b2f7Stbbdev /* the block contains zeroes after bootStrapMalloc, so bins are initialized */
62351c0b2f7Stbbdev #if MALLOC_DEBUG
62451c0b2f7Stbbdev for (uint32_t i = 0; i < numBlockBinLimit; i++)
62551c0b2f7Stbbdev tls->bin[i].verifyInitState();
62651c0b2f7Stbbdev #endif
62751c0b2f7Stbbdev setThreadMallocTLS(tls);
62851c0b2f7Stbbdev memPool->extMemPool.allLocalCaches.registerThread(tls);
62951c0b2f7Stbbdev return tls;
63051c0b2f7Stbbdev }
63151c0b2f7Stbbdev
cleanupBlockBins()63251c0b2f7Stbbdev bool TLSData::cleanupBlockBins()
63351c0b2f7Stbbdev {
63451c0b2f7Stbbdev bool released = false;
63551c0b2f7Stbbdev for (uint32_t i = 0; i < numBlockBinLimit; i++) {
63651c0b2f7Stbbdev released |= bin[i].cleanPublicFreeLists();
63751c0b2f7Stbbdev // After cleaning public free lists, only the active block might be empty.
63851c0b2f7Stbbdev // Do not use processEmptyBlock because it will just restore bumpPtr.
63951c0b2f7Stbbdev Block *block = bin[i].getActiveBlock();
64051c0b2f7Stbbdev if (block && block->empty()) {
64151c0b2f7Stbbdev bin[i].outofTLSBin(block);
64251c0b2f7Stbbdev memPool->returnEmptyBlock(block, /*poolTheBlock=*/false);
64351c0b2f7Stbbdev released = true;
64451c0b2f7Stbbdev }
64551c0b2f7Stbbdev }
64651c0b2f7Stbbdev return released;
64751c0b2f7Stbbdev }
64851c0b2f7Stbbdev
releaseAllLocalCaches()64951c0b2f7Stbbdev bool ExtMemoryPool::releaseAllLocalCaches()
65051c0b2f7Stbbdev {
65151c0b2f7Stbbdev // Iterate all registered TLS data and clean LLOC and Slab pools
65251c0b2f7Stbbdev bool released = allLocalCaches.cleanup(/*cleanOnlyUnused=*/false);
65351c0b2f7Stbbdev
65451c0b2f7Stbbdev // Bins privatization is done only for the current thread
65551c0b2f7Stbbdev if (TLSData *tlsData = tlsPointerKey.getThreadMallocTLS())
65651c0b2f7Stbbdev released |= tlsData->cleanupBlockBins();
65751c0b2f7Stbbdev
65851c0b2f7Stbbdev return released;
65951c0b2f7Stbbdev }
66051c0b2f7Stbbdev
registerThread(TLSRemote * tls)66151c0b2f7Stbbdev void AllLocalCaches::registerThread(TLSRemote *tls)
66251c0b2f7Stbbdev {
66357f524caSIlya Isaev tls->prev = nullptr;
66451c0b2f7Stbbdev MallocMutex::scoped_lock lock(listLock);
66551c0b2f7Stbbdev MALLOC_ASSERT(head!=tls, ASSERT_TEXT);
66651c0b2f7Stbbdev tls->next = head;
66751c0b2f7Stbbdev if (head)
66851c0b2f7Stbbdev head->prev = tls;
66951c0b2f7Stbbdev head = tls;
67051c0b2f7Stbbdev MALLOC_ASSERT(head->next!=head, ASSERT_TEXT);
67151c0b2f7Stbbdev }
67251c0b2f7Stbbdev
unregisterThread(TLSRemote * tls)67351c0b2f7Stbbdev void AllLocalCaches::unregisterThread(TLSRemote *tls)
67451c0b2f7Stbbdev {
67551c0b2f7Stbbdev MallocMutex::scoped_lock lock(listLock);
67651c0b2f7Stbbdev MALLOC_ASSERT(head, "Can't unregister thread: no threads are registered.");
67751c0b2f7Stbbdev if (head == tls)
67851c0b2f7Stbbdev head = tls->next;
67951c0b2f7Stbbdev if (tls->next)
68051c0b2f7Stbbdev tls->next->prev = tls->prev;
68151c0b2f7Stbbdev if (tls->prev)
68251c0b2f7Stbbdev tls->prev->next = tls->next;
68351c0b2f7Stbbdev MALLOC_ASSERT(!tls->next || tls->next->next!=tls->next, ASSERT_TEXT);
68451c0b2f7Stbbdev }
68551c0b2f7Stbbdev
cleanup(bool cleanOnlyUnused)68651c0b2f7Stbbdev bool AllLocalCaches::cleanup(bool cleanOnlyUnused)
68751c0b2f7Stbbdev {
68851c0b2f7Stbbdev bool released = false;
68951c0b2f7Stbbdev {
69051c0b2f7Stbbdev MallocMutex::scoped_lock lock(listLock);
69151c0b2f7Stbbdev for (TLSRemote *curr=head; curr; curr=curr->next)
69251c0b2f7Stbbdev released |= static_cast<TLSData*>(curr)->externalCleanup(cleanOnlyUnused, /*cleanBins=*/false);
69351c0b2f7Stbbdev }
69451c0b2f7Stbbdev return released;
69551c0b2f7Stbbdev }
69651c0b2f7Stbbdev
markUnused()69751c0b2f7Stbbdev void AllLocalCaches::markUnused()
69851c0b2f7Stbbdev {
699fc184738SKonstantin Boyarinov bool locked = false;
70051c0b2f7Stbbdev MallocMutex::scoped_lock lock(listLock, /*block=*/false, &locked);
70151c0b2f7Stbbdev if (!locked) // not wait for marking if someone doing something with it
70251c0b2f7Stbbdev return;
70351c0b2f7Stbbdev
70451c0b2f7Stbbdev for (TLSRemote *curr=head; curr; curr=curr->next)
70551c0b2f7Stbbdev static_cast<TLSData*>(curr)->markUnused();
70651c0b2f7Stbbdev }
70751c0b2f7Stbbdev
70851c0b2f7Stbbdev #if MALLOC_CHECK_RECURSION
70951c0b2f7Stbbdev MallocMutex RecursiveMallocCallProtector::rmc_mutex;
7108b6f831cStbbdev std::atomic<pthread_t> RecursiveMallocCallProtector::owner_thread;
711478de5b1Stbbdev std::atomic<void*> RecursiveMallocCallProtector::autoObjPtr;
71251c0b2f7Stbbdev bool RecursiveMallocCallProtector::mallocRecursionDetected;
71351c0b2f7Stbbdev #if __FreeBSD__
71451c0b2f7Stbbdev bool RecursiveMallocCallProtector::canUsePthread;
71551c0b2f7Stbbdev #endif
71651c0b2f7Stbbdev
71751c0b2f7Stbbdev #endif
71851c0b2f7Stbbdev
71951c0b2f7Stbbdev /*********** End code to provide thread ID and a TLS pointer **********/
72051c0b2f7Stbbdev
72151c0b2f7Stbbdev // Parameter for isLargeObject, keeps our expectations on memory origin.
72251c0b2f7Stbbdev // Assertions must use unknownMem to reliably report object invalidity.
72351c0b2f7Stbbdev enum MemoryOrigin {
72451c0b2f7Stbbdev ourMem, // allocated by TBB allocator
72551c0b2f7Stbbdev unknownMem // can be allocated by system allocator or TBB allocator
72651c0b2f7Stbbdev };
72751c0b2f7Stbbdev
728478de5b1Stbbdev template<MemoryOrigin>
729478de5b1Stbbdev #if __TBB_USE_THREAD_SANITIZER
730478de5b1Stbbdev // We have a real race when accessing the large object header for
731478de5b1Stbbdev // non large objects (e.g. small or foreign objects).
732478de5b1Stbbdev // Therefore, we need to hide this access from the thread sanitizer
733478de5b1Stbbdev __attribute__((no_sanitize("thread")))
734478de5b1Stbbdev #endif
735478de5b1Stbbdev bool isLargeObject(void *object);
73651c0b2f7Stbbdev static void *internalMalloc(size_t size);
73751c0b2f7Stbbdev static void internalFree(void *object);
73851c0b2f7Stbbdev static void *internalPoolMalloc(MemoryPool* mPool, size_t size);
73951c0b2f7Stbbdev static bool internalPoolFree(MemoryPool *mPool, void *object, size_t size);
74051c0b2f7Stbbdev
74151c0b2f7Stbbdev #if !MALLOC_DEBUG
74251c0b2f7Stbbdev #if __INTEL_COMPILER || _MSC_VER
74351c0b2f7Stbbdev #define NOINLINE(decl) __declspec(noinline) decl
74451c0b2f7Stbbdev #define ALWAYSINLINE(decl) __forceinline decl
74551c0b2f7Stbbdev #elif __GNUC__
74651c0b2f7Stbbdev #define NOINLINE(decl) decl __attribute__ ((noinline))
74751c0b2f7Stbbdev #define ALWAYSINLINE(decl) decl __attribute__ ((always_inline))
74851c0b2f7Stbbdev #else
74951c0b2f7Stbbdev #define NOINLINE(decl) decl
75051c0b2f7Stbbdev #define ALWAYSINLINE(decl) decl
75151c0b2f7Stbbdev #endif
75251c0b2f7Stbbdev
75351c0b2f7Stbbdev static NOINLINE( bool doInitialization() );
75451c0b2f7Stbbdev ALWAYSINLINE( bool isMallocInitialized() );
75551c0b2f7Stbbdev
75651c0b2f7Stbbdev #undef ALWAYSINLINE
75751c0b2f7Stbbdev #undef NOINLINE
75851c0b2f7Stbbdev #endif /* !MALLOC_DEBUG */
75951c0b2f7Stbbdev
76051c0b2f7Stbbdev
76151c0b2f7Stbbdev /********* Now some rough utility code to deal with indexing the size bins. **************/
76251c0b2f7Stbbdev
76351c0b2f7Stbbdev /*
76451c0b2f7Stbbdev * Given a number return the highest non-zero bit in it. It is intended to work with 32-bit values only.
7659e15720bStbbdev * Moreover, on some platforms, for sake of simplicity and performance, it is narrowed to only serve for 64 to 1023.
76651c0b2f7Stbbdev * This is enough for current algorithm of distribution of sizes among bins.
76751c0b2f7Stbbdev * __TBB_Log2 is not used here to minimize dependencies on TBB specific sources.
76851c0b2f7Stbbdev */
76951c0b2f7Stbbdev #if _WIN64 && _MSC_VER>=1400 && !__INTEL_COMPILER
77051c0b2f7Stbbdev extern "C" unsigned char _BitScanReverse( unsigned long* i, unsigned long w );
77151c0b2f7Stbbdev #pragma intrinsic(_BitScanReverse)
77251c0b2f7Stbbdev #endif
highestBitPos(unsigned int n)77351c0b2f7Stbbdev static inline unsigned int highestBitPos(unsigned int n)
77451c0b2f7Stbbdev {
77551c0b2f7Stbbdev MALLOC_ASSERT( n>=64 && n<1024, ASSERT_TEXT ); // only needed for bsr array lookup, but always true
77651c0b2f7Stbbdev unsigned int pos;
77751c0b2f7Stbbdev #if __ARCH_x86_32||__ARCH_x86_64
77851c0b2f7Stbbdev
779734f0bc0SPablo Romero # if __unix__||__APPLE__||__MINGW32__
78051c0b2f7Stbbdev __asm__ ("bsr %1,%0" : "=r"(pos) : "r"(n));
78151c0b2f7Stbbdev # elif (_WIN32 && (!_WIN64 || __INTEL_COMPILER))
78251c0b2f7Stbbdev __asm
78351c0b2f7Stbbdev {
78451c0b2f7Stbbdev bsr eax, n
78551c0b2f7Stbbdev mov pos, eax
78651c0b2f7Stbbdev }
78751c0b2f7Stbbdev # elif _WIN64 && _MSC_VER>=1400
78851c0b2f7Stbbdev _BitScanReverse((unsigned long*)&pos, (unsigned long)n);
78951c0b2f7Stbbdev # else
79051c0b2f7Stbbdev # error highestBitPos() not implemented for this platform
79151c0b2f7Stbbdev # endif
79251c0b2f7Stbbdev #elif __arm__
79351c0b2f7Stbbdev __asm__ __volatile__
79451c0b2f7Stbbdev (
79551c0b2f7Stbbdev "clz %0, %1\n"
79651c0b2f7Stbbdev "rsb %0, %0, %2\n"
79751c0b2f7Stbbdev :"=r" (pos) :"r" (n), "I" (31)
79851c0b2f7Stbbdev );
79951c0b2f7Stbbdev #else
80051c0b2f7Stbbdev static unsigned int bsr[16] = {0/*N/A*/,6,7,7,8,8,8,8,9,9,9,9,9,9,9,9};
80151c0b2f7Stbbdev pos = bsr[ n>>6 ];
80251c0b2f7Stbbdev #endif /* __ARCH_* */
80351c0b2f7Stbbdev return pos;
80451c0b2f7Stbbdev }
80551c0b2f7Stbbdev
getSmallObjectIndex(unsigned int size)80651c0b2f7Stbbdev unsigned int getSmallObjectIndex(unsigned int size)
80751c0b2f7Stbbdev {
80851c0b2f7Stbbdev unsigned int result = (size-1)>>3;
8092110128eSsarathnandu constexpr bool is_64bit = (8 == sizeof(void*));
8102110128eSsarathnandu if (is_64bit) {
811fa944e19SMircho Rodozov // For 64-bit malloc, 16 byte alignment is needed except for bin 0.
81251c0b2f7Stbbdev if (result) result |= 1; // 0,1,3,5,7; bins 2,4,6 are not aligned to 16 bytes
813fa944e19SMircho Rodozov }
81451c0b2f7Stbbdev return result;
81551c0b2f7Stbbdev }
816478de5b1Stbbdev
81751c0b2f7Stbbdev /*
81851c0b2f7Stbbdev * Depending on indexRequest, for a given size return either the index into the bin
81951c0b2f7Stbbdev * for objects of this size, or the actual size of objects in this bin.
82051c0b2f7Stbbdev */
82151c0b2f7Stbbdev template<bool indexRequest>
getIndexOrObjectSize(unsigned int size)82251c0b2f7Stbbdev static unsigned int getIndexOrObjectSize (unsigned int size)
82351c0b2f7Stbbdev {
82451c0b2f7Stbbdev if (size <= maxSmallObjectSize) { // selection from 8/16/24/32/40/48/56/64
825478de5b1Stbbdev unsigned int index = getSmallObjectIndex( size );
82651c0b2f7Stbbdev /* Bin 0 is for 8 bytes, bin 1 is for 16, and so forth */
82751c0b2f7Stbbdev return indexRequest ? index : (index+1)<<3;
82851c0b2f7Stbbdev }
82951c0b2f7Stbbdev else if (size <= maxSegregatedObjectSize ) { // 80/96/112/128 / 160/192/224/256 / 320/384/448/512 / 640/768/896/1024
83051c0b2f7Stbbdev unsigned int order = highestBitPos(size-1); // which group of bin sizes?
83151c0b2f7Stbbdev MALLOC_ASSERT( 6<=order && order<=9, ASSERT_TEXT );
83251c0b2f7Stbbdev if (indexRequest)
83351c0b2f7Stbbdev return minSegregatedObjectIndex - (4*6) - 4 + (4*order) + ((size-1)>>(order-2));
83451c0b2f7Stbbdev else {
83551c0b2f7Stbbdev unsigned int alignment = 128 >> (9-order); // alignment in the group
83651c0b2f7Stbbdev MALLOC_ASSERT( alignment==16 || alignment==32 || alignment==64 || alignment==128, ASSERT_TEXT );
83751c0b2f7Stbbdev return alignUp(size,alignment);
83851c0b2f7Stbbdev }
83951c0b2f7Stbbdev }
84051c0b2f7Stbbdev else {
84151c0b2f7Stbbdev if( size <= fittingSize3 ) {
84251c0b2f7Stbbdev if( size <= fittingSize2 ) {
84351c0b2f7Stbbdev if( size <= fittingSize1 )
84451c0b2f7Stbbdev return indexRequest ? minFittingIndex : fittingSize1;
84551c0b2f7Stbbdev else
84651c0b2f7Stbbdev return indexRequest ? minFittingIndex+1 : fittingSize2;
84751c0b2f7Stbbdev } else
84851c0b2f7Stbbdev return indexRequest ? minFittingIndex+2 : fittingSize3;
84951c0b2f7Stbbdev } else {
85051c0b2f7Stbbdev if( size <= fittingSize5 ) {
85151c0b2f7Stbbdev if( size <= fittingSize4 )
85251c0b2f7Stbbdev return indexRequest ? minFittingIndex+3 : fittingSize4;
85351c0b2f7Stbbdev else
85451c0b2f7Stbbdev return indexRequest ? minFittingIndex+4 : fittingSize5;
85551c0b2f7Stbbdev } else {
85651c0b2f7Stbbdev MALLOC_ASSERT( 0,ASSERT_TEXT ); // this should not happen
85751c0b2f7Stbbdev return ~0U;
85851c0b2f7Stbbdev }
85951c0b2f7Stbbdev }
86051c0b2f7Stbbdev }
86151c0b2f7Stbbdev }
86251c0b2f7Stbbdev
getIndex(unsigned int size)86351c0b2f7Stbbdev static unsigned int getIndex (unsigned int size)
86451c0b2f7Stbbdev {
86551c0b2f7Stbbdev return getIndexOrObjectSize</*indexRequest=*/true>(size);
86651c0b2f7Stbbdev }
86751c0b2f7Stbbdev
getObjectSize(unsigned int size)86851c0b2f7Stbbdev static unsigned int getObjectSize (unsigned int size)
86951c0b2f7Stbbdev {
87051c0b2f7Stbbdev return getIndexOrObjectSize</*indexRequest=*/false>(size);
87151c0b2f7Stbbdev }
87251c0b2f7Stbbdev
87351c0b2f7Stbbdev
allocate(MemoryPool * memPool,size_t size)87451c0b2f7Stbbdev void *BootStrapBlocks::allocate(MemoryPool *memPool, size_t size)
87551c0b2f7Stbbdev {
87651c0b2f7Stbbdev FreeObject *result;
87751c0b2f7Stbbdev
87851c0b2f7Stbbdev MALLOC_ASSERT( size == sizeof(TLSData), ASSERT_TEXT );
87951c0b2f7Stbbdev
88051c0b2f7Stbbdev { // Lock with acquire
88151c0b2f7Stbbdev MallocMutex::scoped_lock scoped_cs(bootStrapLock);
88251c0b2f7Stbbdev
88351c0b2f7Stbbdev if( bootStrapObjectList) {
88451c0b2f7Stbbdev result = bootStrapObjectList;
88551c0b2f7Stbbdev bootStrapObjectList = bootStrapObjectList->next;
88651c0b2f7Stbbdev } else {
88751c0b2f7Stbbdev if (!bootStrapBlock) {
88851c0b2f7Stbbdev bootStrapBlock = memPool->getEmptyBlock(size);
88957f524caSIlya Isaev if (!bootStrapBlock) return nullptr;
89051c0b2f7Stbbdev }
89151c0b2f7Stbbdev result = bootStrapBlock->bumpPtr;
89251c0b2f7Stbbdev bootStrapBlock->bumpPtr = (FreeObject *)((uintptr_t)bootStrapBlock->bumpPtr - bootStrapBlock->objectSize);
89351c0b2f7Stbbdev if ((uintptr_t)bootStrapBlock->bumpPtr < (uintptr_t)bootStrapBlock+sizeof(Block)) {
89457f524caSIlya Isaev bootStrapBlock->bumpPtr = nullptr;
89551c0b2f7Stbbdev bootStrapBlock->next = bootStrapBlockUsed;
89651c0b2f7Stbbdev bootStrapBlockUsed = bootStrapBlock;
89757f524caSIlya Isaev bootStrapBlock = nullptr;
89851c0b2f7Stbbdev }
89951c0b2f7Stbbdev }
90051c0b2f7Stbbdev } // Unlock with release
90151c0b2f7Stbbdev memset (result, 0, size);
90251c0b2f7Stbbdev return (void*)result;
90351c0b2f7Stbbdev }
90451c0b2f7Stbbdev
free(void * ptr)90551c0b2f7Stbbdev void BootStrapBlocks::free(void* ptr)
90651c0b2f7Stbbdev {
90751c0b2f7Stbbdev MALLOC_ASSERT( ptr, ASSERT_TEXT );
90851c0b2f7Stbbdev { // Lock with acquire
90951c0b2f7Stbbdev MallocMutex::scoped_lock scoped_cs(bootStrapLock);
91051c0b2f7Stbbdev ((FreeObject*)ptr)->next = bootStrapObjectList;
91151c0b2f7Stbbdev bootStrapObjectList = (FreeObject*)ptr;
91251c0b2f7Stbbdev } // Unlock with release
91351c0b2f7Stbbdev }
91451c0b2f7Stbbdev
reset()91551c0b2f7Stbbdev void BootStrapBlocks::reset()
91651c0b2f7Stbbdev {
91757f524caSIlya Isaev bootStrapBlock = bootStrapBlockUsed = nullptr;
91857f524caSIlya Isaev bootStrapObjectList = nullptr;
91951c0b2f7Stbbdev }
92051c0b2f7Stbbdev
92151c0b2f7Stbbdev #if !(FREELIST_NONBLOCKING)
92251c0b2f7Stbbdev static MallocMutex publicFreeListLock; // lock for changes of publicFreeList
92351c0b2f7Stbbdev #endif
92451c0b2f7Stbbdev
92551c0b2f7Stbbdev /********* End rough utility code **************/
92651c0b2f7Stbbdev
92751c0b2f7Stbbdev /* LifoList assumes zero initialization so a vector of it can be created
92851c0b2f7Stbbdev * by just allocating some space with no call to constructor.
92951c0b2f7Stbbdev * On Linux, it seems to be necessary to avoid linking with C++ libraries.
93051c0b2f7Stbbdev *
93151c0b2f7Stbbdev * By usage convention there is no race on the initialization. */
LifoList()932478de5b1Stbbdev LifoList::LifoList( ) : top(nullptr)
93351c0b2f7Stbbdev {
93451c0b2f7Stbbdev // MallocMutex assumes zero initialization
9352110128eSsarathnandu memset(static_cast<void*>(&lock), 0, sizeof(MallocMutex));
93651c0b2f7Stbbdev }
93751c0b2f7Stbbdev
push(Block * block)93851c0b2f7Stbbdev void LifoList::push(Block *block)
93951c0b2f7Stbbdev {
94051c0b2f7Stbbdev MallocMutex::scoped_lock scoped_cs(lock);
941478de5b1Stbbdev block->next = top.load(std::memory_order_relaxed);
942478de5b1Stbbdev top.store(block, std::memory_order_relaxed);
94351c0b2f7Stbbdev }
94451c0b2f7Stbbdev
pop()94551c0b2f7Stbbdev Block *LifoList::pop()
94651c0b2f7Stbbdev {
947478de5b1Stbbdev Block* block = nullptr;
948478de5b1Stbbdev if (top.load(std::memory_order_relaxed)) {
94951c0b2f7Stbbdev MallocMutex::scoped_lock scoped_cs(lock);
950478de5b1Stbbdev block = top.load(std::memory_order_relaxed);
951478de5b1Stbbdev if (block) {
952478de5b1Stbbdev top.store(block->next, std::memory_order_relaxed);
95351c0b2f7Stbbdev }
95451c0b2f7Stbbdev }
95551c0b2f7Stbbdev return block;
95651c0b2f7Stbbdev }
95751c0b2f7Stbbdev
grab()95851c0b2f7Stbbdev Block *LifoList::grab()
95951c0b2f7Stbbdev {
960478de5b1Stbbdev Block *block = nullptr;
961478de5b1Stbbdev if (top.load(std::memory_order_relaxed)) {
96251c0b2f7Stbbdev MallocMutex::scoped_lock scoped_cs(lock);
963478de5b1Stbbdev block = top.load(std::memory_order_relaxed);
964478de5b1Stbbdev top.store(nullptr, std::memory_order_relaxed);
96551c0b2f7Stbbdev }
96651c0b2f7Stbbdev return block;
96751c0b2f7Stbbdev }
96851c0b2f7Stbbdev
96951c0b2f7Stbbdev /********* Thread and block related code *************/
97051c0b2f7Stbbdev
releaseAll(Backend * backend)97151c0b2f7Stbbdev template<bool poolDestroy> void AllLargeBlocksList::releaseAll(Backend *backend) {
97251c0b2f7Stbbdev LargeMemoryBlock *next, *lmb = loHead;
97357f524caSIlya Isaev loHead = nullptr;
97451c0b2f7Stbbdev
97551c0b2f7Stbbdev for (; lmb; lmb = next) {
97651c0b2f7Stbbdev next = lmb->gNext;
97751c0b2f7Stbbdev if (poolDestroy) {
97851c0b2f7Stbbdev // as it's pool destruction, no need to return object to backend,
97951c0b2f7Stbbdev // only remove backrefs, as they are global
98051c0b2f7Stbbdev removeBackRef(lmb->backRefIdx);
98151c0b2f7Stbbdev } else {
98251c0b2f7Stbbdev // clean g(Next|Prev) to prevent removing lmb
98351c0b2f7Stbbdev // from AllLargeBlocksList inside returnLargeObject
98457f524caSIlya Isaev lmb->gNext = lmb->gPrev = nullptr;
98551c0b2f7Stbbdev backend->returnLargeObject(lmb);
98651c0b2f7Stbbdev }
98751c0b2f7Stbbdev }
98851c0b2f7Stbbdev }
98951c0b2f7Stbbdev
getTLS(bool create)99051c0b2f7Stbbdev TLSData* MemoryPool::getTLS(bool create)
99151c0b2f7Stbbdev {
99251c0b2f7Stbbdev TLSData* tls = extMemPool.tlsPointerKey.getThreadMallocTLS();
99351c0b2f7Stbbdev if (create && !tls)
99451c0b2f7Stbbdev tls = extMemPool.tlsPointerKey.createTLS(this, &extMemPool.backend);
99551c0b2f7Stbbdev return tls;
99651c0b2f7Stbbdev }
99751c0b2f7Stbbdev
99851c0b2f7Stbbdev /*
99951c0b2f7Stbbdev * Return the bin for the given size.
100051c0b2f7Stbbdev */
getAllocationBin(size_t size)100151c0b2f7Stbbdev inline Bin* TLSData::getAllocationBin(size_t size)
100251c0b2f7Stbbdev {
100351c0b2f7Stbbdev return bin + getIndex(size);
100451c0b2f7Stbbdev }
100551c0b2f7Stbbdev
100651c0b2f7Stbbdev /* Return an empty uninitialized block in a non-blocking fashion. */
getEmptyBlock(size_t size)100751c0b2f7Stbbdev Block *MemoryPool::getEmptyBlock(size_t size)
100851c0b2f7Stbbdev {
100951c0b2f7Stbbdev TLSData* tls = getTLS(/*create=*/false);
101051c0b2f7Stbbdev // try to use per-thread cache, if TLS available
101151c0b2f7Stbbdev FreeBlockPool::ResOfGet resOfGet = tls?
101257f524caSIlya Isaev tls->freeSlabBlocks.getBlock() : FreeBlockPool::ResOfGet(nullptr, false);
101351c0b2f7Stbbdev Block *result = resOfGet.block;
101451c0b2f7Stbbdev
101551c0b2f7Stbbdev if (!result) { // not found in local cache, asks backend for slabs
101651c0b2f7Stbbdev int num = resOfGet.lastAccMiss? Backend::numOfSlabAllocOnMiss : 1;
101751c0b2f7Stbbdev BackRefIdx backRefIdx[Backend::numOfSlabAllocOnMiss];
101851c0b2f7Stbbdev
101951c0b2f7Stbbdev result = static_cast<Block*>(extMemPool.backend.getSlabBlock(num));
102057f524caSIlya Isaev if (!result) return nullptr;
102151c0b2f7Stbbdev
102251c0b2f7Stbbdev if (!extMemPool.userPool())
102351c0b2f7Stbbdev for (int i=0; i<num; i++) {
102451c0b2f7Stbbdev backRefIdx[i] = BackRefIdx::newBackRef(/*largeObj=*/false);
102551c0b2f7Stbbdev if (backRefIdx[i].isInvalid()) {
102651c0b2f7Stbbdev // roll back resource allocation
102751c0b2f7Stbbdev for (int j=0; j<i; j++)
102851c0b2f7Stbbdev removeBackRef(backRefIdx[j]);
102951c0b2f7Stbbdev Block *b = result;
103051c0b2f7Stbbdev for (int j=0; j<num; b=(Block*)((uintptr_t)b+slabSize), j++)
103151c0b2f7Stbbdev extMemPool.backend.putSlabBlock(b);
103257f524caSIlya Isaev return nullptr;
103351c0b2f7Stbbdev }
103451c0b2f7Stbbdev }
103551c0b2f7Stbbdev // resources were allocated, register blocks
103651c0b2f7Stbbdev Block *b = result;
103751c0b2f7Stbbdev for (int i=0; i<num; b=(Block*)((uintptr_t)b+slabSize), i++) {
103851c0b2f7Stbbdev // slab block in user's pool must have invalid backRefIdx
103951c0b2f7Stbbdev if (extMemPool.userPool()) {
104051c0b2f7Stbbdev new (&b->backRefIdx) BackRefIdx();
104151c0b2f7Stbbdev } else {
104251c0b2f7Stbbdev setBackRef(backRefIdx[i], b);
104351c0b2f7Stbbdev b->backRefIdx = backRefIdx[i];
104451c0b2f7Stbbdev }
1045478de5b1Stbbdev b->tlsPtr.store(tls, std::memory_order_relaxed);
104651c0b2f7Stbbdev b->poolPtr = this;
104751c0b2f7Stbbdev // all but first one go to per-thread pool
104851c0b2f7Stbbdev if (i > 0) {
104951c0b2f7Stbbdev MALLOC_ASSERT(tls, ASSERT_TEXT);
105051c0b2f7Stbbdev tls->freeSlabBlocks.returnBlock(b);
105151c0b2f7Stbbdev }
105251c0b2f7Stbbdev }
105351c0b2f7Stbbdev }
105451c0b2f7Stbbdev MALLOC_ASSERT(result, ASSERT_TEXT);
105551c0b2f7Stbbdev result->initEmptyBlock(tls, size);
105651c0b2f7Stbbdev STAT_increment(getThreadId(), getIndex(result->objectSize), allocBlockNew);
105751c0b2f7Stbbdev return result;
105851c0b2f7Stbbdev }
105951c0b2f7Stbbdev
returnEmptyBlock(Block * block,bool poolTheBlock)106051c0b2f7Stbbdev void MemoryPool::returnEmptyBlock(Block *block, bool poolTheBlock)
106151c0b2f7Stbbdev {
106251c0b2f7Stbbdev block->reset();
106351c0b2f7Stbbdev if (poolTheBlock) {
106451c0b2f7Stbbdev getTLS(/*create=*/false)->freeSlabBlocks.returnBlock(block);
106551c0b2f7Stbbdev } else {
106651c0b2f7Stbbdev // slab blocks in user's pools do not have valid backRefIdx
106751c0b2f7Stbbdev if (!extMemPool.userPool())
106851c0b2f7Stbbdev removeBackRef(*(block->getBackRefIdx()));
106951c0b2f7Stbbdev extMemPool.backend.putSlabBlock(block);
107051c0b2f7Stbbdev }
107151c0b2f7Stbbdev }
107251c0b2f7Stbbdev
init(intptr_t poolId,rawAllocType rawAlloc,rawFreeType rawFree,size_t granularity,bool keepAllMemory,bool fixedPool)107351c0b2f7Stbbdev bool ExtMemoryPool::init(intptr_t poolId, rawAllocType rawAlloc,
107451c0b2f7Stbbdev rawFreeType rawFree, size_t granularity,
107551c0b2f7Stbbdev bool keepAllMemory, bool fixedPool)
107651c0b2f7Stbbdev {
107751c0b2f7Stbbdev this->poolId = poolId;
107851c0b2f7Stbbdev this->rawAlloc = rawAlloc;
107951c0b2f7Stbbdev this->rawFree = rawFree;
108051c0b2f7Stbbdev this->granularity = granularity;
108151c0b2f7Stbbdev this->keepAllMemory = keepAllMemory;
108251c0b2f7Stbbdev this->fixedPool = fixedPool;
108351c0b2f7Stbbdev this->delayRegsReleasing = false;
108451c0b2f7Stbbdev if (!initTLS())
108551c0b2f7Stbbdev return false;
108651c0b2f7Stbbdev loc.init(this);
108751c0b2f7Stbbdev backend.init(this);
108857f524caSIlya Isaev MALLOC_ASSERT(isPoolValid(), nullptr);
108951c0b2f7Stbbdev return true;
109051c0b2f7Stbbdev }
109151c0b2f7Stbbdev
initTLS()109251c0b2f7Stbbdev bool ExtMemoryPool::initTLS() { return tlsPointerKey.init(); }
109351c0b2f7Stbbdev
init(intptr_t poolId,const MemPoolPolicy * policy)109451c0b2f7Stbbdev bool MemoryPool::init(intptr_t poolId, const MemPoolPolicy *policy)
109551c0b2f7Stbbdev {
109651c0b2f7Stbbdev if (!extMemPool.init(poolId, policy->pAlloc, policy->pFree,
109751c0b2f7Stbbdev policy->granularity? policy->granularity : defaultGranularity,
109851c0b2f7Stbbdev policy->keepAllMemory, policy->fixedPool))
109951c0b2f7Stbbdev return false;
110051c0b2f7Stbbdev {
110151c0b2f7Stbbdev MallocMutex::scoped_lock lock(memPoolListLock);
110251c0b2f7Stbbdev next = defaultMemPool->next;
110351c0b2f7Stbbdev defaultMemPool->next = this;
110451c0b2f7Stbbdev prev = defaultMemPool;
110551c0b2f7Stbbdev if (next)
110651c0b2f7Stbbdev next->prev = this;
110751c0b2f7Stbbdev }
110851c0b2f7Stbbdev return true;
110951c0b2f7Stbbdev }
111051c0b2f7Stbbdev
reset()111151c0b2f7Stbbdev bool MemoryPool::reset()
111251c0b2f7Stbbdev {
111351c0b2f7Stbbdev MALLOC_ASSERT(extMemPool.userPool(), "No reset for the system pool.");
111451c0b2f7Stbbdev // memory is not releasing during pool reset
111551c0b2f7Stbbdev // TODO: mark regions to release unused on next reset()
111651c0b2f7Stbbdev extMemPool.delayRegionsReleasing(true);
111751c0b2f7Stbbdev
111851c0b2f7Stbbdev bootStrapBlocks.reset();
111951c0b2f7Stbbdev extMemPool.lmbList.releaseAll</*poolDestroy=*/false>(&extMemPool.backend);
112051c0b2f7Stbbdev if (!extMemPool.reset())
112151c0b2f7Stbbdev return false;
112251c0b2f7Stbbdev
112351c0b2f7Stbbdev if (!extMemPool.initTLS())
112451c0b2f7Stbbdev return false;
112551c0b2f7Stbbdev extMemPool.delayRegionsReleasing(false);
112651c0b2f7Stbbdev return true;
112751c0b2f7Stbbdev }
112851c0b2f7Stbbdev
destroy()112951c0b2f7Stbbdev bool MemoryPool::destroy()
113051c0b2f7Stbbdev {
113151c0b2f7Stbbdev #if __TBB_MALLOC_LOCACHE_STAT
113251c0b2f7Stbbdev extMemPool.loc.reportStat(stdout);
113351c0b2f7Stbbdev #endif
113451c0b2f7Stbbdev #if __TBB_MALLOC_BACKEND_STAT
113551c0b2f7Stbbdev extMemPool.backend.reportStat(stdout);
113651c0b2f7Stbbdev #endif
113751c0b2f7Stbbdev {
113851c0b2f7Stbbdev MallocMutex::scoped_lock lock(memPoolListLock);
113951c0b2f7Stbbdev // remove itself from global pool list
114051c0b2f7Stbbdev if (prev)
114151c0b2f7Stbbdev prev->next = next;
114251c0b2f7Stbbdev if (next)
114351c0b2f7Stbbdev next->prev = prev;
114451c0b2f7Stbbdev }
114551c0b2f7Stbbdev // slab blocks in non-default pool do not have backreferences,
114651c0b2f7Stbbdev // only large objects do
114751c0b2f7Stbbdev if (extMemPool.userPool())
114851c0b2f7Stbbdev extMemPool.lmbList.releaseAll</*poolDestroy=*/true>(&extMemPool.backend);
114951c0b2f7Stbbdev else {
115051c0b2f7Stbbdev // only one non-userPool() is supported now
115157f524caSIlya Isaev MALLOC_ASSERT(this==defaultMemPool, nullptr);
115251c0b2f7Stbbdev // There and below in extMemPool.destroy(), do not restore initial state
115351c0b2f7Stbbdev // for user pool, because it's just about to be released. But for system
115451c0b2f7Stbbdev // pool restoring, we do not want to do zeroing of it on subsequent reload.
115551c0b2f7Stbbdev bootStrapBlocks.reset();
115651c0b2f7Stbbdev extMemPool.orphanedBlocks.reset();
115751c0b2f7Stbbdev }
115851c0b2f7Stbbdev return extMemPool.destroy();
115951c0b2f7Stbbdev }
116051c0b2f7Stbbdev
onThreadShutdown(TLSData * tlsData)116151c0b2f7Stbbdev void MemoryPool::onThreadShutdown(TLSData *tlsData)
116251c0b2f7Stbbdev {
116351c0b2f7Stbbdev if (tlsData) { // might be called for "empty" TLS
116451c0b2f7Stbbdev tlsData->release();
116551c0b2f7Stbbdev bootStrapBlocks.free(tlsData);
116651c0b2f7Stbbdev clearTLS();
116751c0b2f7Stbbdev }
116851c0b2f7Stbbdev }
116951c0b2f7Stbbdev
117051c0b2f7Stbbdev #if MALLOC_DEBUG
verifyTLSBin(size_t size) const117151c0b2f7Stbbdev void Bin::verifyTLSBin (size_t size) const
117251c0b2f7Stbbdev {
117351c0b2f7Stbbdev /* The debug version verifies the TLSBin as needed */
117451c0b2f7Stbbdev uint32_t objSize = getObjectSize(size);
117551c0b2f7Stbbdev
117651c0b2f7Stbbdev if (activeBlk) {
117751c0b2f7Stbbdev MALLOC_ASSERT( activeBlk->isOwnedByCurrentThread(), ASSERT_TEXT );
117851c0b2f7Stbbdev MALLOC_ASSERT( activeBlk->objectSize == objSize, ASSERT_TEXT );
117951c0b2f7Stbbdev #if MALLOC_DEBUG>1
118051c0b2f7Stbbdev for (Block* temp = activeBlk->next; temp; temp=temp->next) {
118151c0b2f7Stbbdev MALLOC_ASSERT( temp!=activeBlk, ASSERT_TEXT );
118251c0b2f7Stbbdev MALLOC_ASSERT( temp->isOwnedByCurrentThread(), ASSERT_TEXT );
118351c0b2f7Stbbdev MALLOC_ASSERT( temp->objectSize == objSize, ASSERT_TEXT );
118451c0b2f7Stbbdev MALLOC_ASSERT( temp->previous->next == temp, ASSERT_TEXT );
118551c0b2f7Stbbdev if (temp->next) {
118651c0b2f7Stbbdev MALLOC_ASSERT( temp->next->previous == temp, ASSERT_TEXT );
118751c0b2f7Stbbdev }
118851c0b2f7Stbbdev }
118951c0b2f7Stbbdev for (Block* temp = activeBlk->previous; temp; temp=temp->previous) {
119051c0b2f7Stbbdev MALLOC_ASSERT( temp!=activeBlk, ASSERT_TEXT );
119151c0b2f7Stbbdev MALLOC_ASSERT( temp->isOwnedByCurrentThread(), ASSERT_TEXT );
119251c0b2f7Stbbdev MALLOC_ASSERT( temp->objectSize == objSize, ASSERT_TEXT );
119351c0b2f7Stbbdev MALLOC_ASSERT( temp->next->previous == temp, ASSERT_TEXT );
119451c0b2f7Stbbdev if (temp->previous) {
119551c0b2f7Stbbdev MALLOC_ASSERT( temp->previous->next == temp, ASSERT_TEXT );
119651c0b2f7Stbbdev }
119751c0b2f7Stbbdev }
119851c0b2f7Stbbdev #endif /* MALLOC_DEBUG>1 */
119951c0b2f7Stbbdev }
120051c0b2f7Stbbdev }
120151c0b2f7Stbbdev #else /* MALLOC_DEBUG */
verifyTLSBin(size_t) const120251c0b2f7Stbbdev inline void Bin::verifyTLSBin (size_t) const { }
120351c0b2f7Stbbdev #endif /* MALLOC_DEBUG */
120451c0b2f7Stbbdev
120551c0b2f7Stbbdev /*
120651c0b2f7Stbbdev * Add a block to the start of this tls bin list.
120751c0b2f7Stbbdev */
pushTLSBin(Block * block)120851c0b2f7Stbbdev void Bin::pushTLSBin(Block* block)
120951c0b2f7Stbbdev {
121051c0b2f7Stbbdev /* The objectSize should be defined and not a parameter
121151c0b2f7Stbbdev because the function is applied to partially filled blocks as well */
121251c0b2f7Stbbdev unsigned int size = block->objectSize;
121351c0b2f7Stbbdev
121451c0b2f7Stbbdev MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT );
121551c0b2f7Stbbdev MALLOC_ASSERT( block->objectSize != 0, ASSERT_TEXT );
121657f524caSIlya Isaev MALLOC_ASSERT( block->next == nullptr, ASSERT_TEXT );
121757f524caSIlya Isaev MALLOC_ASSERT( block->previous == nullptr, ASSERT_TEXT );
121851c0b2f7Stbbdev
121951c0b2f7Stbbdev MALLOC_ASSERT( this, ASSERT_TEXT );
122051c0b2f7Stbbdev verifyTLSBin(size);
122151c0b2f7Stbbdev
122251c0b2f7Stbbdev block->next = activeBlk;
122351c0b2f7Stbbdev if( activeBlk ) {
122451c0b2f7Stbbdev block->previous = activeBlk->previous;
122551c0b2f7Stbbdev activeBlk->previous = block;
122651c0b2f7Stbbdev if( block->previous )
122751c0b2f7Stbbdev block->previous->next = block;
122851c0b2f7Stbbdev } else {
122951c0b2f7Stbbdev activeBlk = block;
123051c0b2f7Stbbdev }
123151c0b2f7Stbbdev
123251c0b2f7Stbbdev verifyTLSBin(size);
123351c0b2f7Stbbdev }
123451c0b2f7Stbbdev
123551c0b2f7Stbbdev /*
123651c0b2f7Stbbdev * Take a block out of its tls bin (e.g. before removal).
123751c0b2f7Stbbdev */
outofTLSBin(Block * block)123851c0b2f7Stbbdev void Bin::outofTLSBin(Block* block)
123951c0b2f7Stbbdev {
124051c0b2f7Stbbdev unsigned int size = block->objectSize;
124151c0b2f7Stbbdev
124251c0b2f7Stbbdev MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT );
124351c0b2f7Stbbdev MALLOC_ASSERT( block->objectSize != 0, ASSERT_TEXT );
124451c0b2f7Stbbdev
124551c0b2f7Stbbdev MALLOC_ASSERT( this, ASSERT_TEXT );
124651c0b2f7Stbbdev verifyTLSBin(size);
124751c0b2f7Stbbdev
124851c0b2f7Stbbdev if (block == activeBlk) {
124951c0b2f7Stbbdev activeBlk = block->previous? block->previous : block->next;
125051c0b2f7Stbbdev }
125151c0b2f7Stbbdev /* Unlink the block */
125251c0b2f7Stbbdev if (block->previous) {
125351c0b2f7Stbbdev MALLOC_ASSERT( block->previous->next == block, ASSERT_TEXT );
125451c0b2f7Stbbdev block->previous->next = block->next;
125551c0b2f7Stbbdev }
125651c0b2f7Stbbdev if (block->next) {
125751c0b2f7Stbbdev MALLOC_ASSERT( block->next->previous == block, ASSERT_TEXT );
125851c0b2f7Stbbdev block->next->previous = block->previous;
125951c0b2f7Stbbdev }
126057f524caSIlya Isaev block->next = nullptr;
126157f524caSIlya Isaev block->previous = nullptr;
126251c0b2f7Stbbdev
126351c0b2f7Stbbdev verifyTLSBin(size);
126451c0b2f7Stbbdev }
126551c0b2f7Stbbdev
getPrivatizedFreeListBlock()126651c0b2f7Stbbdev Block* Bin::getPrivatizedFreeListBlock()
126751c0b2f7Stbbdev {
126851c0b2f7Stbbdev Block* block;
126951c0b2f7Stbbdev MALLOC_ASSERT( this, ASSERT_TEXT );
127051c0b2f7Stbbdev // if this method is called, active block usage must be unsuccessful
12712110128eSsarathnandu MALLOC_ASSERT( (!activeBlk && !mailbox.load(std::memory_order_relaxed)) || (activeBlk && activeBlk->isFull), ASSERT_TEXT );
127251c0b2f7Stbbdev
127351c0b2f7Stbbdev // the counter should be changed STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList);
127451c0b2f7Stbbdev if (!mailbox.load(std::memory_order_acquire)) // hotpath is empty mailbox
127557f524caSIlya Isaev return nullptr;
127651c0b2f7Stbbdev else { // mailbox is not empty, take lock and inspect it
127751c0b2f7Stbbdev MallocMutex::scoped_lock scoped_cs(mailLock);
127851c0b2f7Stbbdev block = mailbox.load(std::memory_order_relaxed);
127951c0b2f7Stbbdev if( block ) {
128051c0b2f7Stbbdev MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT );
1281478de5b1Stbbdev MALLOC_ASSERT( !isNotForUse(block->nextPrivatizable.load(std::memory_order_relaxed)), ASSERT_TEXT );
1282478de5b1Stbbdev mailbox.store(block->nextPrivatizable.load(std::memory_order_relaxed), std::memory_order_relaxed);
1283478de5b1Stbbdev block->nextPrivatizable.store((Block*)this, std::memory_order_relaxed);
128451c0b2f7Stbbdev }
128551c0b2f7Stbbdev }
128651c0b2f7Stbbdev if( block ) {
128751c0b2f7Stbbdev MALLOC_ASSERT( isSolidPtr(block->publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT );
128851c0b2f7Stbbdev block->privatizePublicFreeList();
128951c0b2f7Stbbdev block->adjustPositionInBin(this);
129051c0b2f7Stbbdev }
129151c0b2f7Stbbdev return block;
129251c0b2f7Stbbdev }
129351c0b2f7Stbbdev
addPublicFreeListBlock(Block * block)129451c0b2f7Stbbdev void Bin::addPublicFreeListBlock(Block* block)
129551c0b2f7Stbbdev {
129651c0b2f7Stbbdev MallocMutex::scoped_lock scoped_cs(mailLock);
1297478de5b1Stbbdev block->nextPrivatizable.store(mailbox.load(std::memory_order_relaxed), std::memory_order_relaxed);
129851c0b2f7Stbbdev mailbox.store(block, std::memory_order_relaxed);
129951c0b2f7Stbbdev }
130051c0b2f7Stbbdev
130151c0b2f7Stbbdev // Process publicly freed objects in all blocks and return empty blocks
130251c0b2f7Stbbdev // to the backend in order to reduce overall footprint.
cleanPublicFreeLists()130351c0b2f7Stbbdev bool Bin::cleanPublicFreeLists()
130451c0b2f7Stbbdev {
130551c0b2f7Stbbdev Block* block;
130651c0b2f7Stbbdev if (!mailbox.load(std::memory_order_acquire))
130751c0b2f7Stbbdev return false;
130851c0b2f7Stbbdev else {
130951c0b2f7Stbbdev // Grab all the blocks in the mailbox
131051c0b2f7Stbbdev MallocMutex::scoped_lock scoped_cs(mailLock);
131151c0b2f7Stbbdev block = mailbox.load(std::memory_order_relaxed);
131257f524caSIlya Isaev mailbox.store(nullptr, std::memory_order_relaxed);
131351c0b2f7Stbbdev }
131451c0b2f7Stbbdev bool released = false;
131551c0b2f7Stbbdev while (block) {
131651c0b2f7Stbbdev MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT );
1317478de5b1Stbbdev Block* tmp = block->nextPrivatizable.load(std::memory_order_relaxed);
1318478de5b1Stbbdev block->nextPrivatizable.store((Block*)this, std::memory_order_relaxed);
131951c0b2f7Stbbdev block->privatizePublicFreeList();
132051c0b2f7Stbbdev if (block->empty()) {
132151c0b2f7Stbbdev processEmptyBlock(block, /*poolTheBlock=*/false);
132251c0b2f7Stbbdev released = true;
132351c0b2f7Stbbdev } else
132451c0b2f7Stbbdev block->adjustPositionInBin(this);
132551c0b2f7Stbbdev block = tmp;
132651c0b2f7Stbbdev }
132751c0b2f7Stbbdev return released;
132851c0b2f7Stbbdev }
132951c0b2f7Stbbdev
adjustFullness()133051c0b2f7Stbbdev bool Block::adjustFullness()
133151c0b2f7Stbbdev {
133251c0b2f7Stbbdev if (bumpPtr) {
133351c0b2f7Stbbdev /* If we are still using a bump ptr for this block it is empty enough to use. */
133451c0b2f7Stbbdev STAT_increment(getThreadId(), getIndex(objectSize), examineEmptyEnough);
133551c0b2f7Stbbdev isFull = false;
133651c0b2f7Stbbdev } else {
133751c0b2f7Stbbdev const float threshold = (slabSize - sizeof(Block)) * (1 - emptyEnoughRatio);
133851c0b2f7Stbbdev /* allocatedCount shows how many objects in the block are in use; however it still counts
133951c0b2f7Stbbdev * blocks freed by other threads; so prior call to privatizePublicFreeList() is recommended */
134051c0b2f7Stbbdev isFull = (allocatedCount*objectSize > threshold) ? true : false;
134151c0b2f7Stbbdev #if COLLECT_STATISTICS
134251c0b2f7Stbbdev if (isFull)
134351c0b2f7Stbbdev STAT_increment(getThreadId(), getIndex(objectSize), examineNotEmpty);
134451c0b2f7Stbbdev else
134551c0b2f7Stbbdev STAT_increment(getThreadId(), getIndex(objectSize), examineEmptyEnough);
134651c0b2f7Stbbdev #endif
134751c0b2f7Stbbdev }
134851c0b2f7Stbbdev return isFull;
134951c0b2f7Stbbdev }
135051c0b2f7Stbbdev
135151c0b2f7Stbbdev // This method resides in class Block, and not in class Bin, in order to avoid
135251c0b2f7Stbbdev // calling getAllocationBin on a reasonably hot path in Block::freeOwnObject
adjustPositionInBin(Bin * bin)135357f524caSIlya Isaev void Block::adjustPositionInBin(Bin* bin/*=nullptr*/)
135451c0b2f7Stbbdev {
135551c0b2f7Stbbdev // If the block were full, but became empty enough to use,
135651c0b2f7Stbbdev // move it to the front of the list
135751c0b2f7Stbbdev if (isFull && !adjustFullness()) {
135851c0b2f7Stbbdev if (!bin)
1359478de5b1Stbbdev bin = tlsPtr.load(std::memory_order_relaxed)->getAllocationBin(objectSize);
136051c0b2f7Stbbdev bin->moveBlockToFront(this);
136151c0b2f7Stbbdev }
136251c0b2f7Stbbdev }
136351c0b2f7Stbbdev
136451c0b2f7Stbbdev /* Restore the bump pointer for an empty block that is planned to use */
restoreBumpPtr()136551c0b2f7Stbbdev void Block::restoreBumpPtr()
136651c0b2f7Stbbdev {
136751c0b2f7Stbbdev MALLOC_ASSERT( allocatedCount == 0, ASSERT_TEXT );
136851c0b2f7Stbbdev MALLOC_ASSERT( !isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT );
136951c0b2f7Stbbdev STAT_increment(getThreadId(), getIndex(objectSize), freeRestoreBumpPtr);
137051c0b2f7Stbbdev bumpPtr = (FreeObject *)((uintptr_t)this + slabSize - objectSize);
137157f524caSIlya Isaev freeList = nullptr;
137251c0b2f7Stbbdev isFull = false;
137351c0b2f7Stbbdev }
137451c0b2f7Stbbdev
freeOwnObject(void * object)137551c0b2f7Stbbdev void Block::freeOwnObject(void *object)
137651c0b2f7Stbbdev {
1377478de5b1Stbbdev tlsPtr.load(std::memory_order_relaxed)->markUsed();
137851c0b2f7Stbbdev allocatedCount--;
137951c0b2f7Stbbdev MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT );
138051c0b2f7Stbbdev #if COLLECT_STATISTICS
138151c0b2f7Stbbdev // Note that getAllocationBin is not called on the hottest path with statistics off.
1382478de5b1Stbbdev if (tlsPtr.load(std::memory_order_relaxed)->getAllocationBin(objectSize)->getActiveBlock() != this)
138351c0b2f7Stbbdev STAT_increment(getThreadId(), getIndex(objectSize), freeToInactiveBlock);
138451c0b2f7Stbbdev else
138551c0b2f7Stbbdev STAT_increment(getThreadId(), getIndex(objectSize), freeToActiveBlock);
138651c0b2f7Stbbdev #endif
138751c0b2f7Stbbdev if (empty()) {
138851c0b2f7Stbbdev // If the last object of a slab is freed, the slab cannot be marked full
138951c0b2f7Stbbdev MALLOC_ASSERT(!isFull, ASSERT_TEXT);
1390478de5b1Stbbdev tlsPtr.load(std::memory_order_relaxed)->getAllocationBin(objectSize)->processEmptyBlock(this, /*poolTheBlock=*/true);
139151c0b2f7Stbbdev } else { // hot path
139251c0b2f7Stbbdev FreeObject *objectToFree = findObjectToFree(object);
139351c0b2f7Stbbdev objectToFree->next = freeList;
139451c0b2f7Stbbdev freeList = objectToFree;
139551c0b2f7Stbbdev adjustPositionInBin();
139651c0b2f7Stbbdev }
139751c0b2f7Stbbdev }
139851c0b2f7Stbbdev
freePublicObject(FreeObject * objectToFree)139951c0b2f7Stbbdev void Block::freePublicObject (FreeObject *objectToFree)
140051c0b2f7Stbbdev {
1401478de5b1Stbbdev FreeObject* localPublicFreeList{};
140251c0b2f7Stbbdev
140351c0b2f7Stbbdev MALLOC_ITT_SYNC_RELEASING(&publicFreeList);
140451c0b2f7Stbbdev #if FREELIST_NONBLOCKING
140551c0b2f7Stbbdev // TBB_REVAMP_TODO: make it non atomic in non-blocking scenario
1406478de5b1Stbbdev localPublicFreeList = publicFreeList.load(std::memory_order_relaxed);
140751c0b2f7Stbbdev do {
1408478de5b1Stbbdev objectToFree->next = localPublicFreeList;
140951c0b2f7Stbbdev // no backoff necessary because trying to make change, not waiting for a change
1410478de5b1Stbbdev } while( !publicFreeList.compare_exchange_strong(localPublicFreeList, objectToFree) );
141151c0b2f7Stbbdev #else
141251c0b2f7Stbbdev STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList);
141351c0b2f7Stbbdev {
141451c0b2f7Stbbdev MallocMutex::scoped_lock scoped_cs(publicFreeListLock);
141551c0b2f7Stbbdev localPublicFreeList = objectToFree->next = publicFreeList;
141651c0b2f7Stbbdev publicFreeList = objectToFree;
141751c0b2f7Stbbdev }
141851c0b2f7Stbbdev #endif
141951c0b2f7Stbbdev
142057f524caSIlya Isaev if( localPublicFreeList==nullptr ) {
142151c0b2f7Stbbdev // if the block is abandoned, its nextPrivatizable pointer should be UNUSABLE
142251c0b2f7Stbbdev // otherwise, it should point to the bin the block belongs to.
142351c0b2f7Stbbdev // reading nextPrivatizable is thread-safe below, because:
142457f524caSIlya Isaev // 1) the executing thread atomically got publicFreeList==nullptr and changed it to non-nullptr;
142557f524caSIlya Isaev // 2) only owning thread can change it back to nullptr,
142651c0b2f7Stbbdev // 3) but it can not be done until the block is put to the mailbox
142751c0b2f7Stbbdev // So the executing thread is now the only one that can change nextPrivatizable
1428478de5b1Stbbdev Block* next = nextPrivatizable.load(std::memory_order_acquire);
1429478de5b1Stbbdev if( !isNotForUse(next) ) {
1430478de5b1Stbbdev MALLOC_ASSERT( next!=nullptr, ASSERT_TEXT );
1431478de5b1Stbbdev Bin* theBin = (Bin*) next;
143251c0b2f7Stbbdev #if MALLOC_DEBUG && TBB_REVAMP_TODO
143351c0b2f7Stbbdev // FIXME: The thread that returns the block is not the block's owner.
143451c0b2f7Stbbdev // The below assertion compares 'theBin' against the caller's local bin, thus, it always fails.
143551c0b2f7Stbbdev // Need to find a way to get the correct remote bin for comparison.
143651c0b2f7Stbbdev { // check that nextPrivatizable points to the bin the block belongs to
143751c0b2f7Stbbdev uint32_t index = getIndex( objectSize );
143851c0b2f7Stbbdev TLSData* tls = getThreadMallocTLS();
143951c0b2f7Stbbdev MALLOC_ASSERT( theBin==tls->bin+index, ASSERT_TEXT );
144051c0b2f7Stbbdev }
144151c0b2f7Stbbdev #endif // MALLOC_DEBUG
144251c0b2f7Stbbdev theBin->addPublicFreeListBlock(this);
144351c0b2f7Stbbdev }
144451c0b2f7Stbbdev }
144551c0b2f7Stbbdev STAT_increment(getThreadId(), ThreadCommonCounters, freeToOtherThread);
1446478de5b1Stbbdev STAT_increment(ownerTid.load(std::memory_order_relaxed), getIndex(objectSize), freeByOtherThread);
144751c0b2f7Stbbdev }
144851c0b2f7Stbbdev
144951c0b2f7Stbbdev // Make objects freed by other threads available for use again
privatizePublicFreeList(bool reset)145051c0b2f7Stbbdev void Block::privatizePublicFreeList( bool reset )
145151c0b2f7Stbbdev {
145251c0b2f7Stbbdev FreeObject *localPublicFreeList;
145351c0b2f7Stbbdev // If reset is false, publicFreeList should not be zeroed but set to UNUSABLE
145451c0b2f7Stbbdev // to properly synchronize with other threads freeing objects to this slab.
145551c0b2f7Stbbdev const intptr_t endMarker = reset ? 0 : UNUSABLE;
145651c0b2f7Stbbdev
145757f524caSIlya Isaev // Only the owner thread may reset the pointer to nullptr
145851c0b2f7Stbbdev MALLOC_ASSERT( isOwnedByCurrentThread() || !reset, ASSERT_TEXT );
145951c0b2f7Stbbdev #if FREELIST_NONBLOCKING
146051c0b2f7Stbbdev localPublicFreeList = publicFreeList.exchange((FreeObject*)endMarker);
146151c0b2f7Stbbdev #else
146251c0b2f7Stbbdev STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList);
146351c0b2f7Stbbdev {
146451c0b2f7Stbbdev MallocMutex::scoped_lock scoped_cs(publicFreeListLock);
146551c0b2f7Stbbdev localPublicFreeList = publicFreeList;
146651c0b2f7Stbbdev publicFreeList = endMarker;
146751c0b2f7Stbbdev }
146851c0b2f7Stbbdev #endif
146951c0b2f7Stbbdev MALLOC_ITT_SYNC_ACQUIRED(&publicFreeList);
147051c0b2f7Stbbdev MALLOC_ASSERT( !(reset && isNotForUse(publicFreeList)), ASSERT_TEXT );
147151c0b2f7Stbbdev
147257f524caSIlya Isaev // publicFreeList must have been UNUSABLE or valid, but not nullptr
147357f524caSIlya Isaev MALLOC_ASSERT( localPublicFreeList!=nullptr, ASSERT_TEXT );
147451c0b2f7Stbbdev if( isSolidPtr(localPublicFreeList) ) {
147551c0b2f7Stbbdev MALLOC_ASSERT( allocatedCount <= (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT );
147651c0b2f7Stbbdev /* other threads did not change the counter freeing our blocks */
147751c0b2f7Stbbdev allocatedCount--;
147851c0b2f7Stbbdev FreeObject *temp = localPublicFreeList;
147957f524caSIlya Isaev while( isSolidPtr(temp->next) ){ // the list will end with either nullptr or UNUSABLE
148051c0b2f7Stbbdev temp = temp->next;
148151c0b2f7Stbbdev allocatedCount--;
148251c0b2f7Stbbdev MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT );
148351c0b2f7Stbbdev }
148451c0b2f7Stbbdev /* merge with local freeList */
148551c0b2f7Stbbdev temp->next = freeList;
148651c0b2f7Stbbdev freeList = localPublicFreeList;
148751c0b2f7Stbbdev STAT_increment(getThreadId(), getIndex(objectSize), allocPrivatized);
148851c0b2f7Stbbdev }
148951c0b2f7Stbbdev }
149051c0b2f7Stbbdev
privatizeOrphaned(TLSData * tls,unsigned index)149151c0b2f7Stbbdev void Block::privatizeOrphaned(TLSData *tls, unsigned index)
149251c0b2f7Stbbdev {
149351c0b2f7Stbbdev Bin* bin = tls->bin + index;
149451c0b2f7Stbbdev STAT_increment(getThreadId(), index, allocBlockPublic);
149557f524caSIlya Isaev next = nullptr;
149657f524caSIlya Isaev previous = nullptr;
149757f524caSIlya Isaev MALLOC_ASSERT( publicFreeList.load(std::memory_order_relaxed) != nullptr, ASSERT_TEXT );
149851c0b2f7Stbbdev /* There is not a race here since no other thread owns this block */
149951c0b2f7Stbbdev markOwned(tls);
150051c0b2f7Stbbdev // It is safe to change nextPrivatizable, as publicFreeList is not null
1501478de5b1Stbbdev MALLOC_ASSERT( isNotForUse(nextPrivatizable.load(std::memory_order_relaxed)), ASSERT_TEXT );
1502478de5b1Stbbdev nextPrivatizable.store((Block*)bin, std::memory_order_relaxed);
150351c0b2f7Stbbdev // the next call is required to change publicFreeList to 0
150451c0b2f7Stbbdev privatizePublicFreeList();
150551c0b2f7Stbbdev if( empty() ) {
150651c0b2f7Stbbdev restoreBumpPtr();
150751c0b2f7Stbbdev } else {
150851c0b2f7Stbbdev adjustFullness(); // check the block fullness and set isFull
150951c0b2f7Stbbdev }
151051c0b2f7Stbbdev MALLOC_ASSERT( !isNotForUse(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT );
151151c0b2f7Stbbdev }
151251c0b2f7Stbbdev
151351c0b2f7Stbbdev
readyToShare()151451c0b2f7Stbbdev bool Block::readyToShare()
151551c0b2f7Stbbdev {
151657f524caSIlya Isaev FreeObject* oldVal = nullptr;
151751c0b2f7Stbbdev #if FREELIST_NONBLOCKING
151851c0b2f7Stbbdev publicFreeList.compare_exchange_strong(oldVal, (FreeObject*)UNUSABLE);
151951c0b2f7Stbbdev #else
152051c0b2f7Stbbdev STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList);
152151c0b2f7Stbbdev {
152251c0b2f7Stbbdev MallocMutex::scoped_lock scoped_cs(publicFreeListLock);
152357f524caSIlya Isaev if ( (oldVal=publicFreeList)==nullptr )
152451c0b2f7Stbbdev (intptr_t&)(publicFreeList) = UNUSABLE;
152551c0b2f7Stbbdev }
152651c0b2f7Stbbdev #endif
152757f524caSIlya Isaev return oldVal==nullptr;
152851c0b2f7Stbbdev }
152951c0b2f7Stbbdev
shareOrphaned(intptr_t binTag,unsigned index)153051c0b2f7Stbbdev void Block::shareOrphaned(intptr_t binTag, unsigned index)
153151c0b2f7Stbbdev {
153251c0b2f7Stbbdev MALLOC_ASSERT( binTag, ASSERT_TEXT );
153351c0b2f7Stbbdev // unreferenced formal parameter warning
153451c0b2f7Stbbdev tbb::detail::suppress_unused_warning(index);
153551c0b2f7Stbbdev STAT_increment(getThreadId(), index, freeBlockPublic);
153651c0b2f7Stbbdev markOrphaned();
1537478de5b1Stbbdev if ((intptr_t)nextPrivatizable.load(std::memory_order_relaxed) == binTag) {
153851c0b2f7Stbbdev // First check passed: the block is not in mailbox yet.
153951c0b2f7Stbbdev // Need to set publicFreeList to non-zero, so other threads
154051c0b2f7Stbbdev // will not change nextPrivatizable and it can be zeroed.
154151c0b2f7Stbbdev if ( !readyToShare() ) {
154251c0b2f7Stbbdev // another thread freed an object; we need to wait until it finishes.
154351c0b2f7Stbbdev // There is no need for exponential backoff, as the wait here is not for a lock;
154451c0b2f7Stbbdev // but need to yield, so the thread we wait has a chance to run.
154551c0b2f7Stbbdev // TODO: add a pause to also be friendly to hyperthreads
154651c0b2f7Stbbdev int count = 256;
1547478de5b1Stbbdev while ((intptr_t)nextPrivatizable.load(std::memory_order_relaxed) == binTag) {
154851c0b2f7Stbbdev if (--count==0) {
154951c0b2f7Stbbdev do_yield();
155051c0b2f7Stbbdev count = 256;
155151c0b2f7Stbbdev }
155251c0b2f7Stbbdev }
155351c0b2f7Stbbdev }
155451c0b2f7Stbbdev }
155557f524caSIlya Isaev MALLOC_ASSERT( publicFreeList.load(std::memory_order_relaxed) !=nullptr, ASSERT_TEXT );
155651c0b2f7Stbbdev // now it is safe to change our data
155757f524caSIlya Isaev previous = nullptr;
155851c0b2f7Stbbdev // it is caller responsibility to ensure that the list of blocks
155951c0b2f7Stbbdev // formed by nextPrivatizable pointers is kept consistent if required.
156051c0b2f7Stbbdev // if only called from thread shutdown code, it does not matter.
1561478de5b1Stbbdev nextPrivatizable.store((Block*)UNUSABLE, std::memory_order_relaxed);
156251c0b2f7Stbbdev }
156351c0b2f7Stbbdev
cleanBlockHeader()156451c0b2f7Stbbdev void Block::cleanBlockHeader()
156551c0b2f7Stbbdev {
1566478de5b1Stbbdev next = nullptr;
1567478de5b1Stbbdev previous = nullptr;
1568478de5b1Stbbdev freeList = nullptr;
156951c0b2f7Stbbdev allocatedCount = 0;
157051c0b2f7Stbbdev isFull = false;
1571478de5b1Stbbdev tlsPtr.store(nullptr, std::memory_order_relaxed);
157251c0b2f7Stbbdev
1573478de5b1Stbbdev publicFreeList.store(nullptr, std::memory_order_relaxed);
157451c0b2f7Stbbdev }
157551c0b2f7Stbbdev
initEmptyBlock(TLSData * tls,size_t size)157651c0b2f7Stbbdev void Block::initEmptyBlock(TLSData *tls, size_t size)
157751c0b2f7Stbbdev {
157851c0b2f7Stbbdev // Having getIndex and getObjectSize called next to each other
157951c0b2f7Stbbdev // allows better compiler optimization as they basically share the code.
158051c0b2f7Stbbdev unsigned int index = getIndex(size);
158151c0b2f7Stbbdev unsigned int objSz = getObjectSize(size);
158251c0b2f7Stbbdev
158351c0b2f7Stbbdev cleanBlockHeader();
158451c0b2f7Stbbdev objectSize = objSz;
158551c0b2f7Stbbdev markOwned(tls);
158651c0b2f7Stbbdev // bump pointer should be prepared for first allocation - thus mode it down to objectSize
158751c0b2f7Stbbdev bumpPtr = (FreeObject *)((uintptr_t)this + slabSize - objectSize);
158851c0b2f7Stbbdev
158951c0b2f7Stbbdev // each block should have the address where the head of the list of "privatizable" blocks is kept
159057f524caSIlya Isaev // the only exception is a block for boot strap which is initialized when TLS is yet nullptr
1591478de5b1Stbbdev nextPrivatizable.store( tls? (Block*)(tls->bin + index) : nullptr, std::memory_order_relaxed);
159251c0b2f7Stbbdev TRACEF(( "[ScalableMalloc trace] Empty block %p is initialized, owner is %ld, objectSize is %d, bumpPtr is %p\n",
1593478de5b1Stbbdev this, tlsPtr.load(std::memory_order_relaxed) ? getThreadId() : -1, objectSize, bumpPtr ));
159451c0b2f7Stbbdev }
159551c0b2f7Stbbdev
get(TLSData * tls,unsigned int size)159651c0b2f7Stbbdev Block *OrphanedBlocks::get(TLSData *tls, unsigned int size)
159751c0b2f7Stbbdev {
159851c0b2f7Stbbdev // TODO: try to use index from getAllocationBin
159951c0b2f7Stbbdev unsigned int index = getIndex(size);
160051c0b2f7Stbbdev Block *block = bins[index].pop();
160151c0b2f7Stbbdev if (block) {
160251c0b2f7Stbbdev MALLOC_ITT_SYNC_ACQUIRED(bins+index);
160351c0b2f7Stbbdev block->privatizeOrphaned(tls, index);
160451c0b2f7Stbbdev }
160551c0b2f7Stbbdev return block;
160651c0b2f7Stbbdev }
160751c0b2f7Stbbdev
put(intptr_t binTag,Block * block)160851c0b2f7Stbbdev void OrphanedBlocks::put(intptr_t binTag, Block *block)
160951c0b2f7Stbbdev {
161051c0b2f7Stbbdev unsigned int index = getIndex(block->getSize());
161151c0b2f7Stbbdev block->shareOrphaned(binTag, index);
161251c0b2f7Stbbdev MALLOC_ITT_SYNC_RELEASING(bins+index);
161351c0b2f7Stbbdev bins[index].push(block);
161451c0b2f7Stbbdev }
161551c0b2f7Stbbdev
reset()161651c0b2f7Stbbdev void OrphanedBlocks::reset()
161751c0b2f7Stbbdev {
161851c0b2f7Stbbdev for (uint32_t i=0; i<numBlockBinLimit; i++)
161951c0b2f7Stbbdev new (bins+i) LifoList();
162051c0b2f7Stbbdev }
162151c0b2f7Stbbdev
cleanup(Backend * backend)162251c0b2f7Stbbdev bool OrphanedBlocks::cleanup(Backend* backend)
162351c0b2f7Stbbdev {
162451c0b2f7Stbbdev bool released = false;
162551c0b2f7Stbbdev for (uint32_t i=0; i<numBlockBinLimit; i++) {
162651c0b2f7Stbbdev Block* block = bins[i].grab();
162751c0b2f7Stbbdev MALLOC_ITT_SYNC_ACQUIRED(bins+i);
162851c0b2f7Stbbdev while (block) {
162951c0b2f7Stbbdev Block* next = block->next;
163057f524caSIlya Isaev block->privatizePublicFreeList( /*reset=*/false ); // do not set publicFreeList to nullptr
163151c0b2f7Stbbdev if (block->empty()) {
163251c0b2f7Stbbdev block->reset();
163351c0b2f7Stbbdev // slab blocks in user's pools do not have valid backRefIdx
163451c0b2f7Stbbdev if (!backend->inUserPool())
163551c0b2f7Stbbdev removeBackRef(*(block->getBackRefIdx()));
163651c0b2f7Stbbdev backend->putSlabBlock(block);
163751c0b2f7Stbbdev released = true;
163851c0b2f7Stbbdev } else {
163951c0b2f7Stbbdev MALLOC_ITT_SYNC_RELEASING(bins+i);
164051c0b2f7Stbbdev bins[i].push(block);
164151c0b2f7Stbbdev }
164251c0b2f7Stbbdev block = next;
164351c0b2f7Stbbdev }
164451c0b2f7Stbbdev }
164551c0b2f7Stbbdev return released;
164651c0b2f7Stbbdev }
164751c0b2f7Stbbdev
getBlock()164851c0b2f7Stbbdev FreeBlockPool::ResOfGet FreeBlockPool::getBlock()
164951c0b2f7Stbbdev {
165057f524caSIlya Isaev Block *b = head.exchange(nullptr);
165183e48019SLukasz Dorau bool lastAccessMiss;
165251c0b2f7Stbbdev
165351c0b2f7Stbbdev if (b) {
165451c0b2f7Stbbdev size--;
165551c0b2f7Stbbdev Block *newHead = b->next;
165651c0b2f7Stbbdev lastAccessMiss = false;
165751c0b2f7Stbbdev head.store(newHead, std::memory_order_release);
165851c0b2f7Stbbdev } else {
165951c0b2f7Stbbdev lastAccessMiss = true;
166051c0b2f7Stbbdev }
166151c0b2f7Stbbdev return ResOfGet(b, lastAccessMiss);
166251c0b2f7Stbbdev }
166351c0b2f7Stbbdev
returnBlock(Block * block)166451c0b2f7Stbbdev void FreeBlockPool::returnBlock(Block *block)
166551c0b2f7Stbbdev {
166651c0b2f7Stbbdev MALLOC_ASSERT( size <= POOL_HIGH_MARK, ASSERT_TEXT );
166757f524caSIlya Isaev Block *localHead = head.exchange(nullptr);
166851c0b2f7Stbbdev
166951c0b2f7Stbbdev if (!localHead) {
167051c0b2f7Stbbdev size = 0; // head was stolen by externalClean, correct size accordingly
167151c0b2f7Stbbdev } else if (size == POOL_HIGH_MARK) {
167251c0b2f7Stbbdev // release cold blocks and add hot one,
167351c0b2f7Stbbdev // so keep POOL_LOW_MARK-1 blocks and add new block to head
167451c0b2f7Stbbdev Block *headToFree = localHead, *helper;
167551c0b2f7Stbbdev for (int i=0; i<POOL_LOW_MARK-2; i++)
167651c0b2f7Stbbdev headToFree = headToFree->next;
167751c0b2f7Stbbdev Block *last = headToFree;
167851c0b2f7Stbbdev headToFree = headToFree->next;
167957f524caSIlya Isaev last->next = nullptr;
168051c0b2f7Stbbdev size = POOL_LOW_MARK-1;
168151c0b2f7Stbbdev for (Block *currBl = headToFree; currBl; currBl = helper) {
168251c0b2f7Stbbdev helper = currBl->next;
168351c0b2f7Stbbdev // slab blocks in user's pools do not have valid backRefIdx
168451c0b2f7Stbbdev if (!backend->inUserPool())
168551c0b2f7Stbbdev removeBackRef(currBl->backRefIdx);
168651c0b2f7Stbbdev backend->putSlabBlock(currBl);
168751c0b2f7Stbbdev }
168851c0b2f7Stbbdev }
168951c0b2f7Stbbdev size++;
169051c0b2f7Stbbdev block->next = localHead;
169151c0b2f7Stbbdev head.store(block, std::memory_order_release);
169251c0b2f7Stbbdev }
169351c0b2f7Stbbdev
externalCleanup()169451c0b2f7Stbbdev bool FreeBlockPool::externalCleanup()
169551c0b2f7Stbbdev {
169651c0b2f7Stbbdev Block *helper;
169751c0b2f7Stbbdev bool released = false;
169851c0b2f7Stbbdev
169957f524caSIlya Isaev for (Block *currBl=head.exchange(nullptr); currBl; currBl=helper) {
170051c0b2f7Stbbdev helper = currBl->next;
170151c0b2f7Stbbdev // slab blocks in user's pools do not have valid backRefIdx
170251c0b2f7Stbbdev if (!backend->inUserPool())
170351c0b2f7Stbbdev removeBackRef(currBl->backRefIdx);
170451c0b2f7Stbbdev backend->putSlabBlock(currBl);
170551c0b2f7Stbbdev released = true;
170651c0b2f7Stbbdev }
170751c0b2f7Stbbdev return released;
170851c0b2f7Stbbdev }
170951c0b2f7Stbbdev
171051c0b2f7Stbbdev /* Prepare the block for returning to FreeBlockPool */
reset()171151c0b2f7Stbbdev void Block::reset()
171251c0b2f7Stbbdev {
171351c0b2f7Stbbdev // it is caller's responsibility to ensure no data is lost before calling this
171451c0b2f7Stbbdev MALLOC_ASSERT( allocatedCount==0, ASSERT_TEXT );
171551c0b2f7Stbbdev MALLOC_ASSERT( !isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT );
171651c0b2f7Stbbdev if (!isStartupAllocObject())
171751c0b2f7Stbbdev STAT_increment(getThreadId(), getIndex(objectSize), freeBlockBack);
171851c0b2f7Stbbdev
171951c0b2f7Stbbdev cleanBlockHeader();
172051c0b2f7Stbbdev
1721478de5b1Stbbdev nextPrivatizable.store(nullptr, std::memory_order_relaxed);
172251c0b2f7Stbbdev
172351c0b2f7Stbbdev objectSize = 0;
172451c0b2f7Stbbdev // for an empty block, bump pointer should point right after the end of the block
172551c0b2f7Stbbdev bumpPtr = (FreeObject *)((uintptr_t)this + slabSize);
172651c0b2f7Stbbdev }
172751c0b2f7Stbbdev
setActiveBlock(Block * block)172851c0b2f7Stbbdev inline void Bin::setActiveBlock (Block *block)
172951c0b2f7Stbbdev {
173051c0b2f7Stbbdev // MALLOC_ASSERT( bin, ASSERT_TEXT );
173151c0b2f7Stbbdev MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT );
173251c0b2f7Stbbdev // it is the caller responsibility to keep bin consistence (i.e. ensure this block is in the bin list)
173351c0b2f7Stbbdev activeBlk = block;
173451c0b2f7Stbbdev }
173551c0b2f7Stbbdev
setPreviousBlockActive()173651c0b2f7Stbbdev inline Block* Bin::setPreviousBlockActive()
173751c0b2f7Stbbdev {
173851c0b2f7Stbbdev MALLOC_ASSERT( activeBlk, ASSERT_TEXT );
173951c0b2f7Stbbdev Block* temp = activeBlk->previous;
174051c0b2f7Stbbdev if( temp ) {
174151c0b2f7Stbbdev MALLOC_ASSERT( !(temp->isFull), ASSERT_TEXT );
174251c0b2f7Stbbdev activeBlk = temp;
174351c0b2f7Stbbdev }
174451c0b2f7Stbbdev return temp;
174551c0b2f7Stbbdev }
174651c0b2f7Stbbdev
isOwnedByCurrentThread() const174751c0b2f7Stbbdev inline bool Block::isOwnedByCurrentThread() const {
1748478de5b1Stbbdev return tlsPtr.load(std::memory_order_relaxed) && ownerTid.isCurrentThreadId();
174951c0b2f7Stbbdev }
175051c0b2f7Stbbdev
findObjectToFree(const void * object) const175151c0b2f7Stbbdev FreeObject *Block::findObjectToFree(const void *object) const
175251c0b2f7Stbbdev {
175351c0b2f7Stbbdev FreeObject *objectToFree;
175451c0b2f7Stbbdev // Due to aligned allocations, a pointer passed to scalable_free
175551c0b2f7Stbbdev // might differ from the address of internally allocated object.
175651c0b2f7Stbbdev // Small objects however should always be fine.
175751c0b2f7Stbbdev if (objectSize <= maxSegregatedObjectSize)
175851c0b2f7Stbbdev objectToFree = (FreeObject*)object;
175951c0b2f7Stbbdev // "Fitting size" allocations are suspicious if aligned higher than naturally
176051c0b2f7Stbbdev else {
176151c0b2f7Stbbdev if ( ! isAligned(object,2*fittingAlignment) )
176251c0b2f7Stbbdev // TODO: the above check is questionable - it gives false negatives in ~50% cases,
176351c0b2f7Stbbdev // so might even be slower in average than unconditional use of findAllocatedObject.
176451c0b2f7Stbbdev // here it should be a "real" object
176551c0b2f7Stbbdev objectToFree = (FreeObject*)object;
176651c0b2f7Stbbdev else
176751c0b2f7Stbbdev // here object can be an aligned address, so applying additional checks
176851c0b2f7Stbbdev objectToFree = findAllocatedObject(object);
176951c0b2f7Stbbdev MALLOC_ASSERT( isAligned(objectToFree,fittingAlignment), ASSERT_TEXT );
177051c0b2f7Stbbdev }
177151c0b2f7Stbbdev MALLOC_ASSERT( isProperlyPlaced(objectToFree), ASSERT_TEXT );
177251c0b2f7Stbbdev
177351c0b2f7Stbbdev return objectToFree;
177451c0b2f7Stbbdev }
177551c0b2f7Stbbdev
release()177651c0b2f7Stbbdev void TLSData::release()
177751c0b2f7Stbbdev {
177851c0b2f7Stbbdev memPool->extMemPool.allLocalCaches.unregisterThread(this);
177951c0b2f7Stbbdev externalCleanup(/*cleanOnlyUnused=*/false, /*cleanBins=*/false);
178051c0b2f7Stbbdev
178151c0b2f7Stbbdev for (unsigned index = 0; index < numBlockBins; index++) {
178251c0b2f7Stbbdev Block *activeBlk = bin[index].getActiveBlock();
178351c0b2f7Stbbdev if (!activeBlk)
178451c0b2f7Stbbdev continue;
178551c0b2f7Stbbdev Block *threadlessBlock = activeBlk->previous;
1786478de5b1Stbbdev bool syncOnMailbox = false;
178751c0b2f7Stbbdev while (threadlessBlock) {
178851c0b2f7Stbbdev Block *threadBlock = threadlessBlock->previous;
178951c0b2f7Stbbdev if (threadlessBlock->empty()) {
179051c0b2f7Stbbdev /* we destroy the thread, so not use its block pool */
179151c0b2f7Stbbdev memPool->returnEmptyBlock(threadlessBlock, /*poolTheBlock=*/false);
179251c0b2f7Stbbdev } else {
179351c0b2f7Stbbdev memPool->extMemPool.orphanedBlocks.put(intptr_t(bin+index), threadlessBlock);
1794478de5b1Stbbdev syncOnMailbox = true;
179551c0b2f7Stbbdev }
179651c0b2f7Stbbdev threadlessBlock = threadBlock;
179751c0b2f7Stbbdev }
179851c0b2f7Stbbdev threadlessBlock = activeBlk;
179951c0b2f7Stbbdev while (threadlessBlock) {
180051c0b2f7Stbbdev Block *threadBlock = threadlessBlock->next;
180151c0b2f7Stbbdev if (threadlessBlock->empty()) {
180251c0b2f7Stbbdev /* we destroy the thread, so not use its block pool */
180351c0b2f7Stbbdev memPool->returnEmptyBlock(threadlessBlock, /*poolTheBlock=*/false);
180451c0b2f7Stbbdev } else {
180551c0b2f7Stbbdev memPool->extMemPool.orphanedBlocks.put(intptr_t(bin+index), threadlessBlock);
1806478de5b1Stbbdev syncOnMailbox = true;
180751c0b2f7Stbbdev }
180851c0b2f7Stbbdev threadlessBlock = threadBlock;
180951c0b2f7Stbbdev }
181051c0b2f7Stbbdev bin[index].resetActiveBlock();
1811478de5b1Stbbdev
1812478de5b1Stbbdev if (syncOnMailbox) {
1813478de5b1Stbbdev // Although, we synchronized on nextPrivatizable inside a block, we still need to
1814478de5b1Stbbdev // synchronize on the bin lifetime because the thread releasing an object into the public
1815478de5b1Stbbdev // free list is touching the bin (mailbox and mailLock)
1816478de5b1Stbbdev MallocMutex::scoped_lock scoped_cs(bin[index].mailLock);
1817478de5b1Stbbdev }
181851c0b2f7Stbbdev }
181951c0b2f7Stbbdev }
182051c0b2f7Stbbdev
182151c0b2f7Stbbdev
182251c0b2f7Stbbdev #if MALLOC_CHECK_RECURSION
182351c0b2f7Stbbdev // TODO: Use dedicated heap for this
182451c0b2f7Stbbdev
182551c0b2f7Stbbdev /*
182651c0b2f7Stbbdev * It's a special kind of allocation that can be used when malloc is
182751c0b2f7Stbbdev * not available (either during startup or when malloc was already called and
182851c0b2f7Stbbdev * we are, say, inside pthread_setspecific's call).
182951c0b2f7Stbbdev * Block can contain objects of different sizes,
183051c0b2f7Stbbdev * allocations are performed by moving bump pointer and increasing of object counter,
183151c0b2f7Stbbdev * releasing is done via counter of objects allocated in the block
183251c0b2f7Stbbdev * or moving bump pointer if releasing object is on a bound.
183351c0b2f7Stbbdev * TODO: make bump pointer to grow to the same backward direction as all the others.
183451c0b2f7Stbbdev */
183551c0b2f7Stbbdev
183651c0b2f7Stbbdev class StartupBlock : public Block {
availableSize() const183751c0b2f7Stbbdev size_t availableSize() const {
183851c0b2f7Stbbdev return slabSize - ((uintptr_t)bumpPtr - (uintptr_t)this);
183951c0b2f7Stbbdev }
184051c0b2f7Stbbdev static StartupBlock *getBlock();
184151c0b2f7Stbbdev public:
184251c0b2f7Stbbdev static FreeObject *allocate(size_t size);
msize(void * ptr)184351c0b2f7Stbbdev static size_t msize(void *ptr) { return *((size_t*)ptr - 1); }
184451c0b2f7Stbbdev void free(void *ptr);
184551c0b2f7Stbbdev };
184651c0b2f7Stbbdev
184751c0b2f7Stbbdev static MallocMutex startupMallocLock;
184851c0b2f7Stbbdev static StartupBlock *firstStartupBlock;
184951c0b2f7Stbbdev
getBlock()185051c0b2f7Stbbdev StartupBlock *StartupBlock::getBlock()
185151c0b2f7Stbbdev {
185251c0b2f7Stbbdev BackRefIdx backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/false);
185357f524caSIlya Isaev if (backRefIdx.isInvalid()) return nullptr;
185451c0b2f7Stbbdev
185551c0b2f7Stbbdev StartupBlock *block = static_cast<StartupBlock*>(
185651c0b2f7Stbbdev defaultMemPool->extMemPool.backend.getSlabBlock(1));
185757f524caSIlya Isaev if (!block) return nullptr;
185851c0b2f7Stbbdev
185951c0b2f7Stbbdev block->cleanBlockHeader();
186051c0b2f7Stbbdev setBackRef(backRefIdx, block);
186151c0b2f7Stbbdev block->backRefIdx = backRefIdx;
186251c0b2f7Stbbdev // use startupAllocObjSizeMark to mark objects from startup block marker
186351c0b2f7Stbbdev block->objectSize = startupAllocObjSizeMark;
186451c0b2f7Stbbdev block->bumpPtr = (FreeObject *)((uintptr_t)block + sizeof(StartupBlock));
186551c0b2f7Stbbdev return block;
186651c0b2f7Stbbdev }
186751c0b2f7Stbbdev
allocate(size_t size)186851c0b2f7Stbbdev FreeObject *StartupBlock::allocate(size_t size)
186951c0b2f7Stbbdev {
187051c0b2f7Stbbdev FreeObject *result;
187157f524caSIlya Isaev StartupBlock *newBlock = nullptr;
187251c0b2f7Stbbdev
187351c0b2f7Stbbdev /* Objects must be aligned on their natural bounds,
187451c0b2f7Stbbdev and objects bigger than word on word's bound. */
187551c0b2f7Stbbdev size = alignUp(size, sizeof(size_t));
187651c0b2f7Stbbdev // We need size of an object to implement msize.
187751c0b2f7Stbbdev size_t reqSize = size + sizeof(size_t);
187851c0b2f7Stbbdev {
187951c0b2f7Stbbdev MallocMutex::scoped_lock scoped_cs(startupMallocLock);
188051c0b2f7Stbbdev // Re-check whether we need a new block (conditions might have changed)
188151c0b2f7Stbbdev if (!firstStartupBlock || firstStartupBlock->availableSize() < reqSize) {
188251c0b2f7Stbbdev if (!newBlock) {
188351c0b2f7Stbbdev newBlock = StartupBlock::getBlock();
188457f524caSIlya Isaev if (!newBlock) return nullptr;
188551c0b2f7Stbbdev }
188651c0b2f7Stbbdev newBlock->next = (Block*)firstStartupBlock;
188751c0b2f7Stbbdev if (firstStartupBlock)
188851c0b2f7Stbbdev firstStartupBlock->previous = (Block*)newBlock;
188951c0b2f7Stbbdev firstStartupBlock = newBlock;
1890478de5b1Stbbdev }
189151c0b2f7Stbbdev result = firstStartupBlock->bumpPtr;
189251c0b2f7Stbbdev firstStartupBlock->allocatedCount++;
189351c0b2f7Stbbdev firstStartupBlock->bumpPtr =
189451c0b2f7Stbbdev (FreeObject *)((uintptr_t)firstStartupBlock->bumpPtr + reqSize);
189551c0b2f7Stbbdev }
189651c0b2f7Stbbdev
189751c0b2f7Stbbdev // keep object size at the negative offset
189851c0b2f7Stbbdev *((size_t*)result) = size;
189951c0b2f7Stbbdev return (FreeObject*)((size_t*)result+1);
190051c0b2f7Stbbdev }
190151c0b2f7Stbbdev
free(void * ptr)190251c0b2f7Stbbdev void StartupBlock::free(void *ptr)
190351c0b2f7Stbbdev {
190457f524caSIlya Isaev Block* blockToRelease = nullptr;
190551c0b2f7Stbbdev {
190651c0b2f7Stbbdev MallocMutex::scoped_lock scoped_cs(startupMallocLock);
190751c0b2f7Stbbdev
190851c0b2f7Stbbdev MALLOC_ASSERT(firstStartupBlock, ASSERT_TEXT);
190951c0b2f7Stbbdev MALLOC_ASSERT(startupAllocObjSizeMark==objectSize
191051c0b2f7Stbbdev && allocatedCount>0, ASSERT_TEXT);
191151c0b2f7Stbbdev MALLOC_ASSERT((uintptr_t)ptr>=(uintptr_t)this+sizeof(StartupBlock)
191251c0b2f7Stbbdev && (uintptr_t)ptr+StartupBlock::msize(ptr)<=(uintptr_t)this+slabSize,
191351c0b2f7Stbbdev ASSERT_TEXT);
191451c0b2f7Stbbdev if (0 == --allocatedCount) {
191551c0b2f7Stbbdev if (this == firstStartupBlock)
191651c0b2f7Stbbdev firstStartupBlock = (StartupBlock*)firstStartupBlock->next;
191751c0b2f7Stbbdev if (previous)
191851c0b2f7Stbbdev previous->next = next;
191951c0b2f7Stbbdev if (next)
192051c0b2f7Stbbdev next->previous = previous;
192151c0b2f7Stbbdev blockToRelease = this;
192251c0b2f7Stbbdev } else if ((uintptr_t)ptr + StartupBlock::msize(ptr) == (uintptr_t)bumpPtr) {
192351c0b2f7Stbbdev // last object in the block released
192451c0b2f7Stbbdev FreeObject *newBump = (FreeObject*)((size_t*)ptr - 1);
192551c0b2f7Stbbdev MALLOC_ASSERT((uintptr_t)newBump>(uintptr_t)this+sizeof(StartupBlock),
192651c0b2f7Stbbdev ASSERT_TEXT);
192751c0b2f7Stbbdev bumpPtr = newBump;
192851c0b2f7Stbbdev }
192951c0b2f7Stbbdev }
193051c0b2f7Stbbdev if (blockToRelease) {
193157f524caSIlya Isaev blockToRelease->previous = blockToRelease->next = nullptr;
193251c0b2f7Stbbdev defaultMemPool->returnEmptyBlock(blockToRelease, /*poolTheBlock=*/false);
193351c0b2f7Stbbdev }
193451c0b2f7Stbbdev }
193551c0b2f7Stbbdev
193651c0b2f7Stbbdev #endif /* MALLOC_CHECK_RECURSION */
193751c0b2f7Stbbdev
193851c0b2f7Stbbdev /********* End thread related code *************/
193951c0b2f7Stbbdev
194051c0b2f7Stbbdev /********* Library initialization *************/
194151c0b2f7Stbbdev
194251c0b2f7Stbbdev //! Value indicating the state of initialization.
194351c0b2f7Stbbdev /* 0 = initialization not started.
194451c0b2f7Stbbdev * 1 = initialization started but not finished.
194551c0b2f7Stbbdev * 2 = initialization finished.
194651c0b2f7Stbbdev * In theory, we only need values 0 and 2. But value 1 is nonetheless
194751c0b2f7Stbbdev * useful for detecting errors in the double-check pattern.
194851c0b2f7Stbbdev */
194951c0b2f7Stbbdev static std::atomic<intptr_t> mallocInitialized{0}; // implicitly initialized to 0
195051c0b2f7Stbbdev static MallocMutex initMutex;
195151c0b2f7Stbbdev
195251c0b2f7Stbbdev /** The leading "\0" is here so that applying "strings" to the binary
195351c0b2f7Stbbdev delivers a clean result. */
195451c0b2f7Stbbdev static char VersionString[] = "\0" TBBMALLOC_VERSION_STRINGS;
195551c0b2f7Stbbdev
1956112076d0SIlya Isaev #if USE_PTHREAD && __TBB_SOURCE_DIRECTLY_INCLUDED
195751c0b2f7Stbbdev
195851c0b2f7Stbbdev /* Decrease race interval between dynamic library unloading and pthread key
195951c0b2f7Stbbdev destructor. Protect only Pthreads with supported unloading. */
196051c0b2f7Stbbdev class ShutdownSync {
196151c0b2f7Stbbdev /* flag is the number of threads in pthread key dtor body
196251c0b2f7Stbbdev (i.e., between threadDtorStart() and threadDtorDone())
196351c0b2f7Stbbdev or the signal to skip dtor, if flag < 0 */
196451c0b2f7Stbbdev std::atomic<intptr_t> flag;
196551c0b2f7Stbbdev static const intptr_t skipDtor = INTPTR_MIN/2;
196651c0b2f7Stbbdev public:
init()196751c0b2f7Stbbdev void init() { flag.store(0, std::memory_order_release); }
196851c0b2f7Stbbdev /* Suppose that 2*abs(skipDtor) or more threads never call threadDtorStart()
196951c0b2f7Stbbdev simultaneously, so flag never becomes negative because of that. */
threadDtorStart()197051c0b2f7Stbbdev bool threadDtorStart() {
197151c0b2f7Stbbdev if (flag.load(std::memory_order_acquire) < 0)
197251c0b2f7Stbbdev return false;
197351c0b2f7Stbbdev if (++flag <= 0) { // note that new value returned
197451c0b2f7Stbbdev flag.fetch_sub(1); // flag is spoiled by us, restore it
197551c0b2f7Stbbdev return false;
197651c0b2f7Stbbdev }
197751c0b2f7Stbbdev return true;
197851c0b2f7Stbbdev }
threadDtorDone()197951c0b2f7Stbbdev void threadDtorDone() {
198051c0b2f7Stbbdev flag.fetch_sub(1);
198151c0b2f7Stbbdev }
processExit()198251c0b2f7Stbbdev void processExit() {
198351c0b2f7Stbbdev if (flag.fetch_add(skipDtor) != 0) {
198451c0b2f7Stbbdev SpinWaitUntilEq(flag, skipDtor);
198551c0b2f7Stbbdev }
198651c0b2f7Stbbdev }
198751c0b2f7Stbbdev };
198851c0b2f7Stbbdev
198951c0b2f7Stbbdev #else
199051c0b2f7Stbbdev
199151c0b2f7Stbbdev class ShutdownSync {
199251c0b2f7Stbbdev public:
init()199351c0b2f7Stbbdev void init() { }
threadDtorStart()199451c0b2f7Stbbdev bool threadDtorStart() { return true; }
threadDtorDone()199551c0b2f7Stbbdev void threadDtorDone() { }
processExit()199651c0b2f7Stbbdev void processExit() { }
199751c0b2f7Stbbdev };
199851c0b2f7Stbbdev
1999112076d0SIlya Isaev #endif // USE_PTHREAD && __TBB_SOURCE_DIRECTLY_INCLUDED
200051c0b2f7Stbbdev
200151c0b2f7Stbbdev static ShutdownSync shutdownSync;
200251c0b2f7Stbbdev
isMallocInitialized()200351c0b2f7Stbbdev inline bool isMallocInitialized() {
200451c0b2f7Stbbdev // Load must have acquire fence; otherwise thread taking "initialized" path
200551c0b2f7Stbbdev // might perform textually later loads *before* mallocInitialized becomes 2.
200651c0b2f7Stbbdev return 2 == mallocInitialized.load(std::memory_order_acquire);
200751c0b2f7Stbbdev }
200851c0b2f7Stbbdev
200951c0b2f7Stbbdev /* Caller is responsible for ensuring this routine is called exactly once. */
MallocInitializeITT()201051c0b2f7Stbbdev extern "C" void MallocInitializeITT() {
201151c0b2f7Stbbdev #if __TBB_USE_ITT_NOTIFY
201251c0b2f7Stbbdev if (!usedBySrcIncluded)
201351c0b2f7Stbbdev tbb::detail::r1::__TBB_load_ittnotify();
201451c0b2f7Stbbdev #endif
201551c0b2f7Stbbdev }
201651c0b2f7Stbbdev
initDefaultPool()201751c0b2f7Stbbdev void MemoryPool::initDefaultPool() {
201851c0b2f7Stbbdev hugePages.init();
201951c0b2f7Stbbdev }
202051c0b2f7Stbbdev
202151c0b2f7Stbbdev /*
202251c0b2f7Stbbdev * Allocator initialization routine;
202351c0b2f7Stbbdev * it is called lazily on the very first scalable_malloc call.
202451c0b2f7Stbbdev */
initMemoryManager()202551c0b2f7Stbbdev static bool initMemoryManager()
202651c0b2f7Stbbdev {
202751c0b2f7Stbbdev TRACEF(( "[ScalableMalloc trace] sizeof(Block) is %d (expected 128); sizeof(uintptr_t) is %d\n",
202851c0b2f7Stbbdev sizeof(Block), sizeof(uintptr_t) ));
202951c0b2f7Stbbdev MALLOC_ASSERT( 2*blockHeaderAlignment == sizeof(Block), ASSERT_TEXT );
203051c0b2f7Stbbdev MALLOC_ASSERT( sizeof(FreeObject) == sizeof(void*), ASSERT_TEXT );
203151c0b2f7Stbbdev MALLOC_ASSERT( isAligned(defaultMemPool, sizeof(intptr_t)),
203251c0b2f7Stbbdev "Memory pool must be void*-aligned for atomic to work over aligned arguments.");
203351c0b2f7Stbbdev
203451c0b2f7Stbbdev #if USE_WINTHREAD
203551c0b2f7Stbbdev const size_t granularity = 64*1024; // granulatity of VirtualAlloc
203651c0b2f7Stbbdev #else
203751c0b2f7Stbbdev // POSIX.1-2001-compliant way to get page size
203851c0b2f7Stbbdev const size_t granularity = sysconf(_SC_PAGESIZE);
203951c0b2f7Stbbdev #endif
204051c0b2f7Stbbdev if (!defaultMemPool) {
204151c0b2f7Stbbdev // Do not rely on static constructors and do the assignment in case
204251c0b2f7Stbbdev // of library static section not initialized at this call yet.
204351c0b2f7Stbbdev defaultMemPool = (MemoryPool*)defaultMemPool_space;
204451c0b2f7Stbbdev }
204551c0b2f7Stbbdev bool initOk = defaultMemPool->
204657f524caSIlya Isaev extMemPool.init(0, nullptr, nullptr, granularity,
204751c0b2f7Stbbdev /*keepAllMemory=*/false, /*fixedPool=*/false);
204851c0b2f7Stbbdev // TODO: extMemPool.init() to not allocate memory
20491ecde27fSIlya Mishin if (!initOk || !initBackRefMain(&defaultMemPool->extMemPool.backend) || !ThreadId::init())
205051c0b2f7Stbbdev return false;
205151c0b2f7Stbbdev MemoryPool::initDefaultPool();
205251c0b2f7Stbbdev // init() is required iff initMemoryManager() is called
205351c0b2f7Stbbdev // after mallocProcessShutdownNotification()
205451c0b2f7Stbbdev shutdownSync.init();
205551c0b2f7Stbbdev #if COLLECT_STATISTICS
205651c0b2f7Stbbdev initStatisticsCollection();
205751c0b2f7Stbbdev #endif
205851c0b2f7Stbbdev return true;
205951c0b2f7Stbbdev }
206051c0b2f7Stbbdev
GetBoolEnvironmentVariable(const char * name)206151c0b2f7Stbbdev static bool GetBoolEnvironmentVariable(const char* name) {
206251c0b2f7Stbbdev return tbb::detail::r1::GetBoolEnvironmentVariable(name);
206351c0b2f7Stbbdev }
206451c0b2f7Stbbdev
206551c0b2f7Stbbdev //! Ensures that initMemoryManager() is called once and only once.
206651c0b2f7Stbbdev /** Does not return until initMemoryManager() has been completed by a thread.
206751c0b2f7Stbbdev There is no need to call this routine if mallocInitialized==2 . */
doInitialization()206851c0b2f7Stbbdev static bool doInitialization()
206951c0b2f7Stbbdev {
207051c0b2f7Stbbdev MallocMutex::scoped_lock lock( initMutex );
207151c0b2f7Stbbdev if (mallocInitialized.load(std::memory_order_relaxed)!=2) {
207251c0b2f7Stbbdev MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==0, ASSERT_TEXT );
207351c0b2f7Stbbdev mallocInitialized.store(1, std::memory_order_relaxed);
207451c0b2f7Stbbdev RecursiveMallocCallProtector scoped;
207551c0b2f7Stbbdev if (!initMemoryManager()) {
207651c0b2f7Stbbdev mallocInitialized.store(0, std::memory_order_relaxed); // restore and out
207751c0b2f7Stbbdev return false;
207851c0b2f7Stbbdev }
207951c0b2f7Stbbdev #ifdef MALLOC_EXTRA_INITIALIZATION
208051c0b2f7Stbbdev MALLOC_EXTRA_INITIALIZATION;
208151c0b2f7Stbbdev #endif
208251c0b2f7Stbbdev #if MALLOC_CHECK_RECURSION
208351c0b2f7Stbbdev RecursiveMallocCallProtector::detectNaiveOverload();
208451c0b2f7Stbbdev #endif
208551c0b2f7Stbbdev MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==1, ASSERT_TEXT );
208651c0b2f7Stbbdev // Store must have release fence, otherwise mallocInitialized==2
208751c0b2f7Stbbdev // might become remotely visible before side effects of
208851c0b2f7Stbbdev // initMemoryManager() become remotely visible.
208951c0b2f7Stbbdev mallocInitialized.store(2, std::memory_order_release);
209051c0b2f7Stbbdev if( GetBoolEnvironmentVariable("TBB_VERSION") ) {
209151c0b2f7Stbbdev fputs(VersionString+1,stderr);
209251c0b2f7Stbbdev hugePages.printStatus();
209351c0b2f7Stbbdev }
209451c0b2f7Stbbdev }
209551c0b2f7Stbbdev /* It can't be 0 or I would have initialized it */
209651c0b2f7Stbbdev MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==2, ASSERT_TEXT );
209751c0b2f7Stbbdev return true;
209851c0b2f7Stbbdev }
209951c0b2f7Stbbdev
210051c0b2f7Stbbdev /********* End library initialization *************/
210151c0b2f7Stbbdev
210251c0b2f7Stbbdev /********* The malloc show begins *************/
210351c0b2f7Stbbdev
210451c0b2f7Stbbdev
allocateFromFreeList()210551c0b2f7Stbbdev FreeObject *Block::allocateFromFreeList()
210651c0b2f7Stbbdev {
210751c0b2f7Stbbdev FreeObject *result;
210851c0b2f7Stbbdev
210957f524caSIlya Isaev if (!freeList) return nullptr;
211051c0b2f7Stbbdev
211151c0b2f7Stbbdev result = freeList;
211251c0b2f7Stbbdev MALLOC_ASSERT( result, ASSERT_TEXT );
211351c0b2f7Stbbdev
211451c0b2f7Stbbdev freeList = result->next;
211551c0b2f7Stbbdev MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT );
211651c0b2f7Stbbdev allocatedCount++;
211751c0b2f7Stbbdev STAT_increment(getThreadId(), getIndex(objectSize), allocFreeListUsed);
211851c0b2f7Stbbdev
211951c0b2f7Stbbdev return result;
212051c0b2f7Stbbdev }
212151c0b2f7Stbbdev
allocateFromBumpPtr()212251c0b2f7Stbbdev FreeObject *Block::allocateFromBumpPtr()
212351c0b2f7Stbbdev {
212451c0b2f7Stbbdev FreeObject *result = bumpPtr;
212551c0b2f7Stbbdev if (result) {
212651c0b2f7Stbbdev bumpPtr = (FreeObject *) ((uintptr_t) bumpPtr - objectSize);
212751c0b2f7Stbbdev if ( (uintptr_t)bumpPtr < (uintptr_t)this+sizeof(Block) ) {
212857f524caSIlya Isaev bumpPtr = nullptr;
212951c0b2f7Stbbdev }
213051c0b2f7Stbbdev MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT );
213151c0b2f7Stbbdev allocatedCount++;
213251c0b2f7Stbbdev STAT_increment(getThreadId(), getIndex(objectSize), allocBumpPtrUsed);
213351c0b2f7Stbbdev }
213451c0b2f7Stbbdev return result;
213551c0b2f7Stbbdev }
213651c0b2f7Stbbdev
allocate()213751c0b2f7Stbbdev inline FreeObject* Block::allocate()
213851c0b2f7Stbbdev {
213951c0b2f7Stbbdev MALLOC_ASSERT( isOwnedByCurrentThread(), ASSERT_TEXT );
214051c0b2f7Stbbdev
214151c0b2f7Stbbdev /* for better cache locality, first looking in the free list. */
214251c0b2f7Stbbdev if ( FreeObject *result = allocateFromFreeList() ) {
214351c0b2f7Stbbdev return result;
214451c0b2f7Stbbdev }
214551c0b2f7Stbbdev MALLOC_ASSERT( !freeList, ASSERT_TEXT );
214651c0b2f7Stbbdev
214751c0b2f7Stbbdev /* if free list is empty, try thread local bump pointer allocation. */
214851c0b2f7Stbbdev if ( FreeObject *result = allocateFromBumpPtr() ) {
214951c0b2f7Stbbdev return result;
215051c0b2f7Stbbdev }
215151c0b2f7Stbbdev MALLOC_ASSERT( !bumpPtr, ASSERT_TEXT );
215251c0b2f7Stbbdev
215351c0b2f7Stbbdev /* the block is considered full. */
215451c0b2f7Stbbdev isFull = true;
215557f524caSIlya Isaev return nullptr;
215651c0b2f7Stbbdev }
215751c0b2f7Stbbdev
findObjectSize(void * object) const215851c0b2f7Stbbdev size_t Block::findObjectSize(void *object) const
215951c0b2f7Stbbdev {
216051c0b2f7Stbbdev size_t blSize = getSize();
216151c0b2f7Stbbdev #if MALLOC_CHECK_RECURSION
216251c0b2f7Stbbdev // Currently, there is no aligned allocations from startup blocks,
216351c0b2f7Stbbdev // so we can return just StartupBlock::msize().
216451c0b2f7Stbbdev // TODO: This must be extended if we add aligned allocation from startup blocks.
216551c0b2f7Stbbdev if (!blSize)
216651c0b2f7Stbbdev return StartupBlock::msize(object);
216751c0b2f7Stbbdev #endif
216851c0b2f7Stbbdev // object can be aligned, so real size can be less than block's
216951c0b2f7Stbbdev size_t size =
217051c0b2f7Stbbdev blSize - ((uintptr_t)object - (uintptr_t)findObjectToFree(object));
217151c0b2f7Stbbdev MALLOC_ASSERT(size>0 && size<minLargeObjectSize, ASSERT_TEXT);
217251c0b2f7Stbbdev return size;
217351c0b2f7Stbbdev }
217451c0b2f7Stbbdev
moveBlockToFront(Block * block)217551c0b2f7Stbbdev void Bin::moveBlockToFront(Block *block)
217651c0b2f7Stbbdev {
217751c0b2f7Stbbdev /* move the block to the front of the bin */
217851c0b2f7Stbbdev if (block == activeBlk) return;
217951c0b2f7Stbbdev outofTLSBin(block);
218051c0b2f7Stbbdev pushTLSBin(block);
218151c0b2f7Stbbdev }
218251c0b2f7Stbbdev
processEmptyBlock(Block * block,bool poolTheBlock)218351c0b2f7Stbbdev void Bin::processEmptyBlock(Block *block, bool poolTheBlock)
218451c0b2f7Stbbdev {
218551c0b2f7Stbbdev if (block != activeBlk) {
218651c0b2f7Stbbdev /* We are not using this block; return it to the pool */
218751c0b2f7Stbbdev outofTLSBin(block);
218851c0b2f7Stbbdev block->getMemPool()->returnEmptyBlock(block, poolTheBlock);
218951c0b2f7Stbbdev } else {
219051c0b2f7Stbbdev /* all objects are free - let's restore the bump pointer */
219151c0b2f7Stbbdev block->restoreBumpPtr();
219251c0b2f7Stbbdev }
219351c0b2f7Stbbdev }
219451c0b2f7Stbbdev
219551c0b2f7Stbbdev template<int LOW_MARK, int HIGH_MARK>
put(LargeMemoryBlock * object,ExtMemoryPool * extMemPool)219651c0b2f7Stbbdev bool LocalLOCImpl<LOW_MARK, HIGH_MARK>::put(LargeMemoryBlock *object, ExtMemoryPool *extMemPool)
219751c0b2f7Stbbdev {
219851c0b2f7Stbbdev const size_t size = object->unalignedSize;
219951c0b2f7Stbbdev // not spoil cache with too large object, that can cause its total cleanup
220051c0b2f7Stbbdev if (size > MAX_TOTAL_SIZE)
220151c0b2f7Stbbdev return false;
220257f524caSIlya Isaev LargeMemoryBlock *localHead = head.exchange(nullptr);
220351c0b2f7Stbbdev
220457f524caSIlya Isaev object->prev = nullptr;
220551c0b2f7Stbbdev object->next = localHead;
220651c0b2f7Stbbdev if (localHead)
220751c0b2f7Stbbdev localHead->prev = object;
220851c0b2f7Stbbdev else {
220951c0b2f7Stbbdev // those might not be cleaned during local cache stealing, correct them
221051c0b2f7Stbbdev totalSize = 0;
221151c0b2f7Stbbdev numOfBlocks = 0;
221251c0b2f7Stbbdev tail = object;
221351c0b2f7Stbbdev }
221451c0b2f7Stbbdev localHead = object;
221551c0b2f7Stbbdev totalSize += size;
221651c0b2f7Stbbdev numOfBlocks++;
221751c0b2f7Stbbdev // must meet both size and number of cached objects constrains
221851c0b2f7Stbbdev if (totalSize > MAX_TOTAL_SIZE || numOfBlocks >= HIGH_MARK) {
221951c0b2f7Stbbdev // scanning from tail until meet conditions
222051c0b2f7Stbbdev while (totalSize > MAX_TOTAL_SIZE || numOfBlocks > LOW_MARK) {
222151c0b2f7Stbbdev totalSize -= tail->unalignedSize;
222251c0b2f7Stbbdev numOfBlocks--;
222351c0b2f7Stbbdev tail = tail->prev;
222451c0b2f7Stbbdev }
222551c0b2f7Stbbdev LargeMemoryBlock *headToRelease = tail->next;
222657f524caSIlya Isaev tail->next = nullptr;
222751c0b2f7Stbbdev
222851c0b2f7Stbbdev extMemPool->freeLargeObjectList(headToRelease);
222951c0b2f7Stbbdev }
223051c0b2f7Stbbdev
223151c0b2f7Stbbdev head.store(localHead, std::memory_order_release);
223251c0b2f7Stbbdev return true;
223351c0b2f7Stbbdev }
223451c0b2f7Stbbdev
223551c0b2f7Stbbdev template<int LOW_MARK, int HIGH_MARK>
get(size_t size)223651c0b2f7Stbbdev LargeMemoryBlock *LocalLOCImpl<LOW_MARK, HIGH_MARK>::get(size_t size)
223751c0b2f7Stbbdev {
223857f524caSIlya Isaev LargeMemoryBlock *localHead, *res = nullptr;
223951c0b2f7Stbbdev
224051c0b2f7Stbbdev if (size > MAX_TOTAL_SIZE)
224157f524caSIlya Isaev return nullptr;
224251c0b2f7Stbbdev
224351c0b2f7Stbbdev // TBB_REVAMP_TODO: review this line
224457f524caSIlya Isaev if (!head.load(std::memory_order_acquire) || (localHead = head.exchange(nullptr)) == nullptr) {
224551c0b2f7Stbbdev // do not restore totalSize, numOfBlocks and tail at this point,
224651c0b2f7Stbbdev // as they are used only in put(), where they must be restored
224757f524caSIlya Isaev return nullptr;
224851c0b2f7Stbbdev }
224951c0b2f7Stbbdev
225051c0b2f7Stbbdev for (LargeMemoryBlock *curr = localHead; curr; curr=curr->next) {
225151c0b2f7Stbbdev if (curr->unalignedSize == size) {
225251c0b2f7Stbbdev res = curr;
225351c0b2f7Stbbdev if (curr->next)
225451c0b2f7Stbbdev curr->next->prev = curr->prev;
225551c0b2f7Stbbdev else
225651c0b2f7Stbbdev tail = curr->prev;
225751c0b2f7Stbbdev if (curr != localHead)
225851c0b2f7Stbbdev curr->prev->next = curr->next;
225951c0b2f7Stbbdev else
226051c0b2f7Stbbdev localHead = curr->next;
226151c0b2f7Stbbdev totalSize -= size;
226251c0b2f7Stbbdev numOfBlocks--;
226351c0b2f7Stbbdev break;
226451c0b2f7Stbbdev }
226551c0b2f7Stbbdev }
226651c0b2f7Stbbdev
226751c0b2f7Stbbdev head.store(localHead, std::memory_order_release);
226851c0b2f7Stbbdev return res;
226951c0b2f7Stbbdev }
227051c0b2f7Stbbdev
227151c0b2f7Stbbdev template<int LOW_MARK, int HIGH_MARK>
externalCleanup(ExtMemoryPool * extMemPool)227251c0b2f7Stbbdev bool LocalLOCImpl<LOW_MARK, HIGH_MARK>::externalCleanup(ExtMemoryPool *extMemPool)
227351c0b2f7Stbbdev {
227457f524caSIlya Isaev if (LargeMemoryBlock *localHead = head.exchange(nullptr)) {
227551c0b2f7Stbbdev extMemPool->freeLargeObjectList(localHead);
227651c0b2f7Stbbdev return true;
227751c0b2f7Stbbdev }
227851c0b2f7Stbbdev return false;
227951c0b2f7Stbbdev }
228051c0b2f7Stbbdev
getFromLLOCache(TLSData * tls,size_t size,size_t alignment)228151c0b2f7Stbbdev void *MemoryPool::getFromLLOCache(TLSData* tls, size_t size, size_t alignment)
228251c0b2f7Stbbdev {
228357f524caSIlya Isaev LargeMemoryBlock *lmb = nullptr;
228451c0b2f7Stbbdev
228551c0b2f7Stbbdev size_t headersSize = sizeof(LargeMemoryBlock)+sizeof(LargeObjectHdr);
228651c0b2f7Stbbdev size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+alignment);
228751c0b2f7Stbbdev if (allocationSize < size) // allocationSize is wrapped around after alignToBin
228857f524caSIlya Isaev return nullptr;
228951c0b2f7Stbbdev MALLOC_ASSERT(allocationSize >= alignment, "Overflow must be checked before.");
229051c0b2f7Stbbdev
229151c0b2f7Stbbdev if (tls) {
229251c0b2f7Stbbdev tls->markUsed();
229351c0b2f7Stbbdev lmb = tls->lloc.get(allocationSize);
229451c0b2f7Stbbdev }
229551c0b2f7Stbbdev if (!lmb)
229651c0b2f7Stbbdev lmb = extMemPool.mallocLargeObject(this, allocationSize);
229751c0b2f7Stbbdev
229851c0b2f7Stbbdev if (lmb) {
229951c0b2f7Stbbdev // doing shuffle we suppose that alignment offset guarantees
230051c0b2f7Stbbdev // that different cache lines are in use
230151c0b2f7Stbbdev MALLOC_ASSERT(alignment >= estimatedCacheLineSize, ASSERT_TEXT);
230251c0b2f7Stbbdev
230351c0b2f7Stbbdev void *alignedArea = (void*)alignUp((uintptr_t)lmb+headersSize, alignment);
230451c0b2f7Stbbdev uintptr_t alignedRight =
230551c0b2f7Stbbdev alignDown((uintptr_t)lmb+lmb->unalignedSize - size, alignment);
230651c0b2f7Stbbdev // Has some room to shuffle object between cache lines?
230751c0b2f7Stbbdev // Note that alignedRight and alignedArea are aligned at alignment.
230851c0b2f7Stbbdev unsigned ptrDelta = alignedRight - (uintptr_t)alignedArea;
230951c0b2f7Stbbdev if (ptrDelta && tls) { // !tls is cold path
231051c0b2f7Stbbdev // for the hot path of alignment==estimatedCacheLineSize,
231151c0b2f7Stbbdev // allow compilers to use shift for division
231251c0b2f7Stbbdev // (since estimatedCacheLineSize is a power-of-2 constant)
231351c0b2f7Stbbdev unsigned numOfPossibleOffsets = alignment == estimatedCacheLineSize?
231451c0b2f7Stbbdev ptrDelta / estimatedCacheLineSize :
231551c0b2f7Stbbdev ptrDelta / alignment;
231651c0b2f7Stbbdev unsigned myCacheIdx = ++tls->currCacheIdx;
231751c0b2f7Stbbdev unsigned offset = myCacheIdx % numOfPossibleOffsets;
231851c0b2f7Stbbdev
231951c0b2f7Stbbdev // Move object to a cache line with an offset that is different from
232051c0b2f7Stbbdev // previous allocation. This supposedly allows us to use cache
232151c0b2f7Stbbdev // associativity more efficiently.
232251c0b2f7Stbbdev alignedArea = (void*)((uintptr_t)alignedArea + offset*alignment);
232351c0b2f7Stbbdev }
232451c0b2f7Stbbdev MALLOC_ASSERT((uintptr_t)lmb+lmb->unalignedSize >=
232551c0b2f7Stbbdev (uintptr_t)alignedArea+size, "Object doesn't fit the block.");
232651c0b2f7Stbbdev LargeObjectHdr *header = (LargeObjectHdr*)alignedArea-1;
232751c0b2f7Stbbdev header->memoryBlock = lmb;
232851c0b2f7Stbbdev header->backRefIdx = lmb->backRefIdx;
232951c0b2f7Stbbdev setBackRef(header->backRefIdx, header);
233051c0b2f7Stbbdev
233151c0b2f7Stbbdev lmb->objectSize = size;
233251c0b2f7Stbbdev
233351c0b2f7Stbbdev MALLOC_ASSERT( isLargeObject<unknownMem>(alignedArea), ASSERT_TEXT );
233451c0b2f7Stbbdev MALLOC_ASSERT( isAligned(alignedArea, alignment), ASSERT_TEXT );
233551c0b2f7Stbbdev
233651c0b2f7Stbbdev return alignedArea;
233751c0b2f7Stbbdev }
233857f524caSIlya Isaev return nullptr;
233951c0b2f7Stbbdev }
234051c0b2f7Stbbdev
putToLLOCache(TLSData * tls,void * object)234151c0b2f7Stbbdev void MemoryPool::putToLLOCache(TLSData *tls, void *object)
234251c0b2f7Stbbdev {
234351c0b2f7Stbbdev LargeObjectHdr *header = (LargeObjectHdr*)object - 1;
234451c0b2f7Stbbdev // overwrite backRefIdx to simplify double free detection
234551c0b2f7Stbbdev header->backRefIdx = BackRefIdx();
234651c0b2f7Stbbdev
234751c0b2f7Stbbdev if (tls) {
234851c0b2f7Stbbdev tls->markUsed();
234951c0b2f7Stbbdev if (tls->lloc.put(header->memoryBlock, &extMemPool))
235051c0b2f7Stbbdev return;
235151c0b2f7Stbbdev }
235251c0b2f7Stbbdev extMemPool.freeLargeObject(header->memoryBlock);
235351c0b2f7Stbbdev }
235451c0b2f7Stbbdev
235551c0b2f7Stbbdev /*
235651c0b2f7Stbbdev * All aligned allocations fall into one of the following categories:
235751c0b2f7Stbbdev * 1. if both request size and alignment are <= maxSegregatedObjectSize,
235851c0b2f7Stbbdev * we just align the size up, and request this amount, because for every size
235951c0b2f7Stbbdev * aligned to some power of 2, the allocated object is at least that aligned.
236051c0b2f7Stbbdev * 2. for size<minLargeObjectSize, check if already guaranteed fittingAlignment is enough.
236151c0b2f7Stbbdev * 3. if size+alignment<minLargeObjectSize, we take an object of fittingSizeN and align
236251c0b2f7Stbbdev * its address up; given such pointer, scalable_free could find the real object.
236351c0b2f7Stbbdev * Wrapping of size+alignment is impossible because maximal allowed
236451c0b2f7Stbbdev * alignment plus minLargeObjectSize can't lead to wrapping.
236551c0b2f7Stbbdev * 4. otherwise, aligned large object is allocated.
236651c0b2f7Stbbdev */
allocateAligned(MemoryPool * memPool,size_t size,size_t alignment)236751c0b2f7Stbbdev static void *allocateAligned(MemoryPool *memPool, size_t size, size_t alignment)
236851c0b2f7Stbbdev {
236951c0b2f7Stbbdev MALLOC_ASSERT( isPowerOfTwo(alignment), ASSERT_TEXT );
237051c0b2f7Stbbdev
237151c0b2f7Stbbdev if (!isMallocInitialized())
237251c0b2f7Stbbdev if (!doInitialization())
237357f524caSIlya Isaev return nullptr;
237451c0b2f7Stbbdev
237551c0b2f7Stbbdev void *result;
237651c0b2f7Stbbdev if (size<=maxSegregatedObjectSize && alignment<=maxSegregatedObjectSize)
237751c0b2f7Stbbdev result = internalPoolMalloc(memPool, alignUp(size? size: sizeof(size_t), alignment));
237851c0b2f7Stbbdev else if (size<minLargeObjectSize) {
237951c0b2f7Stbbdev if (alignment<=fittingAlignment)
238051c0b2f7Stbbdev result = internalPoolMalloc(memPool, size);
238151c0b2f7Stbbdev else if (size+alignment < minLargeObjectSize) {
238251c0b2f7Stbbdev void *unaligned = internalPoolMalloc(memPool, size+alignment);
238357f524caSIlya Isaev if (!unaligned) return nullptr;
238451c0b2f7Stbbdev result = alignUp(unaligned, alignment);
238551c0b2f7Stbbdev } else
238651c0b2f7Stbbdev goto LargeObjAlloc;
238751c0b2f7Stbbdev } else {
238851c0b2f7Stbbdev LargeObjAlloc:
238951c0b2f7Stbbdev TLSData *tls = memPool->getTLS(/*create=*/true);
239051c0b2f7Stbbdev // take into account only alignment that are higher then natural
239151c0b2f7Stbbdev result =
239251c0b2f7Stbbdev memPool->getFromLLOCache(tls, size, largeObjectAlignment>alignment?
239351c0b2f7Stbbdev largeObjectAlignment: alignment);
239451c0b2f7Stbbdev }
239551c0b2f7Stbbdev
239651c0b2f7Stbbdev MALLOC_ASSERT( isAligned(result, alignment), ASSERT_TEXT );
239751c0b2f7Stbbdev return result;
239851c0b2f7Stbbdev }
239951c0b2f7Stbbdev
reallocAligned(MemoryPool * memPool,void * ptr,size_t newSize,size_t alignment=0)240051c0b2f7Stbbdev static void *reallocAligned(MemoryPool *memPool, void *ptr,
240151c0b2f7Stbbdev size_t newSize, size_t alignment = 0)
240251c0b2f7Stbbdev {
240351c0b2f7Stbbdev void *result;
240451c0b2f7Stbbdev size_t copySize;
240551c0b2f7Stbbdev
240651c0b2f7Stbbdev if (isLargeObject<ourMem>(ptr)) {
240751c0b2f7Stbbdev LargeMemoryBlock* lmb = ((LargeObjectHdr *)ptr - 1)->memoryBlock;
240851c0b2f7Stbbdev copySize = lmb->unalignedSize-((uintptr_t)ptr-(uintptr_t)lmb);
240951c0b2f7Stbbdev
241051c0b2f7Stbbdev // Apply different strategies if size decreases
241151c0b2f7Stbbdev if (newSize <= copySize && (0 == alignment || isAligned(ptr, alignment))) {
241251c0b2f7Stbbdev
241351c0b2f7Stbbdev // For huge objects (that do not fit in backend cache), keep the same space unless
241451c0b2f7Stbbdev // the new size is at least twice smaller
241551c0b2f7Stbbdev bool isMemoryBlockHuge = copySize > memPool->extMemPool.backend.getMaxBinnedSize();
241651c0b2f7Stbbdev size_t threshold = isMemoryBlockHuge ? copySize / 2 : 0;
241751c0b2f7Stbbdev if (newSize > threshold) {
241851c0b2f7Stbbdev lmb->objectSize = newSize;
241951c0b2f7Stbbdev return ptr;
242051c0b2f7Stbbdev }
242151c0b2f7Stbbdev // TODO: For large objects suitable for the backend cache,
242251c0b2f7Stbbdev // split out the excessive part and put it to the backend.
242351c0b2f7Stbbdev }
242451c0b2f7Stbbdev // Reallocate for real
242551c0b2f7Stbbdev copySize = lmb->objectSize;
242651c0b2f7Stbbdev #if BACKEND_HAS_MREMAP
242751c0b2f7Stbbdev if (void *r = memPool->extMemPool.remap(ptr, copySize, newSize,
242851c0b2f7Stbbdev alignment < largeObjectAlignment ? largeObjectAlignment : alignment))
242951c0b2f7Stbbdev return r;
243051c0b2f7Stbbdev #endif
243151c0b2f7Stbbdev result = alignment ? allocateAligned(memPool, newSize, alignment) :
243251c0b2f7Stbbdev internalPoolMalloc(memPool, newSize);
243351c0b2f7Stbbdev
243451c0b2f7Stbbdev } else {
243551c0b2f7Stbbdev Block* block = (Block *)alignDown(ptr, slabSize);
243651c0b2f7Stbbdev copySize = block->findObjectSize(ptr);
243751c0b2f7Stbbdev
243851c0b2f7Stbbdev // TODO: Move object to another bin if size decreases and the current bin is "empty enough".
243951c0b2f7Stbbdev // Currently, in case of size decreasing, old pointer is returned
244051c0b2f7Stbbdev if (newSize <= copySize && (0==alignment || isAligned(ptr, alignment))) {
244151c0b2f7Stbbdev return ptr;
244251c0b2f7Stbbdev } else {
244351c0b2f7Stbbdev result = alignment ? allocateAligned(memPool, newSize, alignment) :
244451c0b2f7Stbbdev internalPoolMalloc(memPool, newSize);
244551c0b2f7Stbbdev }
244651c0b2f7Stbbdev }
244751c0b2f7Stbbdev if (result) {
244851c0b2f7Stbbdev memcpy(result, ptr, copySize < newSize ? copySize : newSize);
244951c0b2f7Stbbdev internalPoolFree(memPool, ptr, 0);
245051c0b2f7Stbbdev }
245151c0b2f7Stbbdev return result;
245251c0b2f7Stbbdev }
245351c0b2f7Stbbdev
2454478de5b1Stbbdev #if MALLOC_DEBUG
245551c0b2f7Stbbdev /* A predicate checks if an object is properly placed inside its block */
isProperlyPlaced(const void * object) const245651c0b2f7Stbbdev inline bool Block::isProperlyPlaced(const void *object) const
245751c0b2f7Stbbdev {
245851c0b2f7Stbbdev return 0 == ((uintptr_t)this + slabSize - (uintptr_t)object) % objectSize;
245951c0b2f7Stbbdev }
2460478de5b1Stbbdev #endif
246151c0b2f7Stbbdev
246251c0b2f7Stbbdev /* Finds the real object inside the block */
findAllocatedObject(const void * address) const246351c0b2f7Stbbdev FreeObject *Block::findAllocatedObject(const void *address) const
246451c0b2f7Stbbdev {
246551c0b2f7Stbbdev // calculate offset from the end of the block space
246651c0b2f7Stbbdev uint16_t offset = (uintptr_t)this + slabSize - (uintptr_t)address;
246751c0b2f7Stbbdev MALLOC_ASSERT( offset<=slabSize-sizeof(Block), ASSERT_TEXT );
246851c0b2f7Stbbdev // find offset difference from a multiple of allocation size
246951c0b2f7Stbbdev offset %= objectSize;
247051c0b2f7Stbbdev // and move the address down to where the real object starts.
247151c0b2f7Stbbdev return (FreeObject*)((uintptr_t)address - (offset? objectSize-offset: 0));
247251c0b2f7Stbbdev }
247351c0b2f7Stbbdev
247451c0b2f7Stbbdev /*
247551c0b2f7Stbbdev * Bad dereference caused by a foreign pointer is possible only here, not earlier in call chain.
247651c0b2f7Stbbdev * Separate function isolates SEH code, as it has bad influence on compiler optimization.
247751c0b2f7Stbbdev */
safer_dereference(const BackRefIdx * ptr)247851c0b2f7Stbbdev static inline BackRefIdx safer_dereference (const BackRefIdx *ptr)
247951c0b2f7Stbbdev {
248051c0b2f7Stbbdev BackRefIdx id;
248151c0b2f7Stbbdev #if _MSC_VER
248251c0b2f7Stbbdev __try {
248351c0b2f7Stbbdev #endif
2484478de5b1Stbbdev id = dereference(ptr);
248551c0b2f7Stbbdev #if _MSC_VER
248651c0b2f7Stbbdev } __except( GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION?
248751c0b2f7Stbbdev EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH ) {
248851c0b2f7Stbbdev id = BackRefIdx();
248951c0b2f7Stbbdev }
249051c0b2f7Stbbdev #endif
249151c0b2f7Stbbdev return id;
249251c0b2f7Stbbdev }
249351c0b2f7Stbbdev
249451c0b2f7Stbbdev template<MemoryOrigin memOrigin>
isLargeObject(void * object)249551c0b2f7Stbbdev bool isLargeObject(void *object)
249651c0b2f7Stbbdev {
249751c0b2f7Stbbdev if (!isAligned(object, largeObjectAlignment))
249851c0b2f7Stbbdev return false;
249951c0b2f7Stbbdev LargeObjectHdr *header = (LargeObjectHdr*)object - 1;
250051c0b2f7Stbbdev BackRefIdx idx = (memOrigin == unknownMem) ?
2501478de5b1Stbbdev safer_dereference(&header->backRefIdx) : dereference(&header->backRefIdx);
250251c0b2f7Stbbdev
250351c0b2f7Stbbdev return idx.isLargeObject()
250457f524caSIlya Isaev // in valid LargeObjectHdr memoryBlock is not nullptr
250551c0b2f7Stbbdev && header->memoryBlock
250651c0b2f7Stbbdev // in valid LargeObjectHdr memoryBlock points somewhere before header
250751c0b2f7Stbbdev // TODO: more strict check
250851c0b2f7Stbbdev && (uintptr_t)header->memoryBlock < (uintptr_t)header
250951c0b2f7Stbbdev && getBackRef(idx) == header;
251051c0b2f7Stbbdev }
251151c0b2f7Stbbdev
isSmallObject(void * ptr)251251c0b2f7Stbbdev static inline bool isSmallObject (void *ptr)
251351c0b2f7Stbbdev {
251451c0b2f7Stbbdev Block* expectedBlock = (Block*)alignDown(ptr, slabSize);
251551c0b2f7Stbbdev const BackRefIdx* idx = expectedBlock->getBackRefIdx();
251651c0b2f7Stbbdev
251751c0b2f7Stbbdev bool isSmall = expectedBlock == getBackRef(safer_dereference(idx));
251851c0b2f7Stbbdev if (isSmall)
251951c0b2f7Stbbdev expectedBlock->checkFreePrecond(ptr);
252051c0b2f7Stbbdev return isSmall;
252151c0b2f7Stbbdev }
252251c0b2f7Stbbdev
252351c0b2f7Stbbdev /**** Check if an object was allocated by scalable_malloc ****/
isRecognized(void * ptr)252451c0b2f7Stbbdev static inline bool isRecognized (void* ptr)
252551c0b2f7Stbbdev {
252651c0b2f7Stbbdev return defaultMemPool->extMemPool.backend.ptrCanBeValid(ptr) &&
252751c0b2f7Stbbdev (isLargeObject<unknownMem>(ptr) || isSmallObject(ptr));
252851c0b2f7Stbbdev }
252951c0b2f7Stbbdev
freeSmallObject(void * object)253051c0b2f7Stbbdev static inline void freeSmallObject(void *object)
253151c0b2f7Stbbdev {
253251c0b2f7Stbbdev /* mask low bits to get the block */
253351c0b2f7Stbbdev Block *block = (Block *)alignDown(object, slabSize);
253451c0b2f7Stbbdev block->checkFreePrecond(object);
253551c0b2f7Stbbdev
253651c0b2f7Stbbdev #if MALLOC_CHECK_RECURSION
253751c0b2f7Stbbdev if (block->isStartupAllocObject()) {
253851c0b2f7Stbbdev ((StartupBlock *)block)->free(object);
253951c0b2f7Stbbdev return;
254051c0b2f7Stbbdev }
254151c0b2f7Stbbdev #endif
254251c0b2f7Stbbdev if (block->isOwnedByCurrentThread()) {
254351c0b2f7Stbbdev block->freeOwnObject(object);
254451c0b2f7Stbbdev } else { /* Slower path to add to the shared list, the allocatedCount is updated by the owner thread in malloc. */
254551c0b2f7Stbbdev FreeObject *objectToFree = block->findObjectToFree(object);
254651c0b2f7Stbbdev block->freePublicObject(objectToFree);
254751c0b2f7Stbbdev }
254851c0b2f7Stbbdev }
254951c0b2f7Stbbdev
internalPoolMalloc(MemoryPool * memPool,size_t size)255051c0b2f7Stbbdev static void *internalPoolMalloc(MemoryPool* memPool, size_t size)
255151c0b2f7Stbbdev {
255251c0b2f7Stbbdev Bin* bin;
255351c0b2f7Stbbdev Block * mallocBlock;
255451c0b2f7Stbbdev
255557f524caSIlya Isaev if (!memPool) return nullptr;
255651c0b2f7Stbbdev
255751c0b2f7Stbbdev if (!size) size = sizeof(size_t);
255851c0b2f7Stbbdev
255951c0b2f7Stbbdev TLSData *tls = memPool->getTLS(/*create=*/true);
256051c0b2f7Stbbdev
256151c0b2f7Stbbdev /* Allocate a large object */
256251c0b2f7Stbbdev if (size >= minLargeObjectSize)
256351c0b2f7Stbbdev return memPool->getFromLLOCache(tls, size, largeObjectAlignment);
256451c0b2f7Stbbdev
256557f524caSIlya Isaev if (!tls) return nullptr;
256651c0b2f7Stbbdev
256751c0b2f7Stbbdev tls->markUsed();
256851c0b2f7Stbbdev /*
256951c0b2f7Stbbdev * Get an element in thread-local array corresponding to the given size;
257051c0b2f7Stbbdev * It keeps ptr to the active block for allocations of this size
257151c0b2f7Stbbdev */
257251c0b2f7Stbbdev bin = tls->getAllocationBin(size);
257357f524caSIlya Isaev if ( !bin ) return nullptr;
257451c0b2f7Stbbdev
257551c0b2f7Stbbdev /* Get a block to try to allocate in. */
257651c0b2f7Stbbdev for( mallocBlock = bin->getActiveBlock(); mallocBlock;
257751c0b2f7Stbbdev mallocBlock = bin->setPreviousBlockActive() ) // the previous block should be empty enough
257851c0b2f7Stbbdev {
257951c0b2f7Stbbdev if( FreeObject *result = mallocBlock->allocate() )
258051c0b2f7Stbbdev return result;
258151c0b2f7Stbbdev }
258251c0b2f7Stbbdev
258351c0b2f7Stbbdev /*
258451c0b2f7Stbbdev * else privatize publicly freed objects in some block and allocate from it
258551c0b2f7Stbbdev */
258651c0b2f7Stbbdev mallocBlock = bin->getPrivatizedFreeListBlock();
258751c0b2f7Stbbdev if (mallocBlock) {
258851c0b2f7Stbbdev MALLOC_ASSERT( mallocBlock->freeListNonNull(), ASSERT_TEXT );
258951c0b2f7Stbbdev if ( FreeObject *result = mallocBlock->allocateFromFreeList() )
259051c0b2f7Stbbdev return result;
259151c0b2f7Stbbdev /* Else something strange happened, need to retry from the beginning; */
259251c0b2f7Stbbdev TRACEF(( "[ScalableMalloc trace] Something is wrong: no objects in public free list; reentering.\n" ));
259351c0b2f7Stbbdev return internalPoolMalloc(memPool, size);
259451c0b2f7Stbbdev }
259551c0b2f7Stbbdev
259651c0b2f7Stbbdev /*
259751c0b2f7Stbbdev * no suitable own blocks, try to get a partial block that some other thread has discarded.
259851c0b2f7Stbbdev */
259951c0b2f7Stbbdev mallocBlock = memPool->extMemPool.orphanedBlocks.get(tls, size);
260051c0b2f7Stbbdev while (mallocBlock) {
260151c0b2f7Stbbdev bin->pushTLSBin(mallocBlock);
260251c0b2f7Stbbdev bin->setActiveBlock(mallocBlock); // TODO: move under the below condition?
260351c0b2f7Stbbdev if( FreeObject *result = mallocBlock->allocate() )
260451c0b2f7Stbbdev return result;
260551c0b2f7Stbbdev mallocBlock = memPool->extMemPool.orphanedBlocks.get(tls, size);
260651c0b2f7Stbbdev }
260751c0b2f7Stbbdev
260851c0b2f7Stbbdev /*
260951c0b2f7Stbbdev * else try to get a new empty block
261051c0b2f7Stbbdev */
261151c0b2f7Stbbdev mallocBlock = memPool->getEmptyBlock(size);
261251c0b2f7Stbbdev if (mallocBlock) {
261351c0b2f7Stbbdev bin->pushTLSBin(mallocBlock);
261451c0b2f7Stbbdev bin->setActiveBlock(mallocBlock);
261551c0b2f7Stbbdev if( FreeObject *result = mallocBlock->allocate() )
261651c0b2f7Stbbdev return result;
261751c0b2f7Stbbdev /* Else something strange happened, need to retry from the beginning; */
261851c0b2f7Stbbdev TRACEF(( "[ScalableMalloc trace] Something is wrong: no objects in empty block; reentering.\n" ));
261951c0b2f7Stbbdev return internalPoolMalloc(memPool, size);
262051c0b2f7Stbbdev }
262151c0b2f7Stbbdev /*
262257f524caSIlya Isaev * else nothing works so return nullptr
262351c0b2f7Stbbdev */
262457f524caSIlya Isaev TRACEF(( "[ScalableMalloc trace] No memory found, returning nullptr.\n" ));
262557f524caSIlya Isaev return nullptr;
262651c0b2f7Stbbdev }
262751c0b2f7Stbbdev
262851c0b2f7Stbbdev // When size==0 (i.e. unknown), detect here whether the object is large.
262951c0b2f7Stbbdev // For size is known and < minLargeObjectSize, we still need to check
263051c0b2f7Stbbdev // if the actual object is large, because large objects might be used
263151c0b2f7Stbbdev // for aligned small allocations.
internalPoolFree(MemoryPool * memPool,void * object,size_t size)263251c0b2f7Stbbdev static bool internalPoolFree(MemoryPool *memPool, void *object, size_t size)
263351c0b2f7Stbbdev {
263451c0b2f7Stbbdev if (!memPool || !object) return false;
263551c0b2f7Stbbdev
263651c0b2f7Stbbdev // The library is initialized at allocation call, so releasing while
263751c0b2f7Stbbdev // not initialized means foreign object is releasing.
263851c0b2f7Stbbdev MALLOC_ASSERT(isMallocInitialized(), ASSERT_TEXT);
263951c0b2f7Stbbdev MALLOC_ASSERT(memPool->extMemPool.userPool() || isRecognized(object),
264051c0b2f7Stbbdev "Invalid pointer during object releasing is detected.");
264151c0b2f7Stbbdev
264251c0b2f7Stbbdev if (size >= minLargeObjectSize || isLargeObject<ourMem>(object))
264351c0b2f7Stbbdev memPool->putToLLOCache(memPool->getTLS(/*create=*/false), object);
264451c0b2f7Stbbdev else
264551c0b2f7Stbbdev freeSmallObject(object);
264651c0b2f7Stbbdev return true;
264751c0b2f7Stbbdev }
264851c0b2f7Stbbdev
internalMalloc(size_t size)264951c0b2f7Stbbdev static void *internalMalloc(size_t size)
265051c0b2f7Stbbdev {
265151c0b2f7Stbbdev if (!size) size = sizeof(size_t);
265251c0b2f7Stbbdev
265351c0b2f7Stbbdev #if MALLOC_CHECK_RECURSION
265451c0b2f7Stbbdev if (RecursiveMallocCallProtector::sameThreadActive())
265551c0b2f7Stbbdev return size<minLargeObjectSize? StartupBlock::allocate(size) :
265651c0b2f7Stbbdev // nested allocation, so skip tls
265757f524caSIlya Isaev (FreeObject*)defaultMemPool->getFromLLOCache(nullptr, size, slabSize);
265851c0b2f7Stbbdev #endif
265951c0b2f7Stbbdev
266051c0b2f7Stbbdev if (!isMallocInitialized())
266151c0b2f7Stbbdev if (!doInitialization())
266257f524caSIlya Isaev return nullptr;
266351c0b2f7Stbbdev return internalPoolMalloc(defaultMemPool, size);
266451c0b2f7Stbbdev }
266551c0b2f7Stbbdev
internalFree(void * object)266651c0b2f7Stbbdev static void internalFree(void *object)
266751c0b2f7Stbbdev {
266851c0b2f7Stbbdev internalPoolFree(defaultMemPool, object, 0);
266951c0b2f7Stbbdev }
267051c0b2f7Stbbdev
internalMsize(void * ptr)267151c0b2f7Stbbdev static size_t internalMsize(void* ptr)
267251c0b2f7Stbbdev {
267351c0b2f7Stbbdev MALLOC_ASSERT(ptr, "Invalid pointer passed to internalMsize");
267451c0b2f7Stbbdev if (isLargeObject<ourMem>(ptr)) {
267551c0b2f7Stbbdev // TODO: return the maximum memory size, that can be written to this object
267651c0b2f7Stbbdev LargeMemoryBlock* lmb = ((LargeObjectHdr*)ptr - 1)->memoryBlock;
267751c0b2f7Stbbdev return lmb->objectSize;
267851c0b2f7Stbbdev } else {
267951c0b2f7Stbbdev Block *block = (Block*)alignDown(ptr, slabSize);
268051c0b2f7Stbbdev return block->findObjectSize(ptr);
268151c0b2f7Stbbdev }
268251c0b2f7Stbbdev }
268351c0b2f7Stbbdev
268451c0b2f7Stbbdev } // namespace internal
268551c0b2f7Stbbdev
268651c0b2f7Stbbdev using namespace rml::internal;
268751c0b2f7Stbbdev
268851c0b2f7Stbbdev // legacy entry point saved for compatibility with binaries complied
268951c0b2f7Stbbdev // with pre-6003 versions of TBB
pool_create(intptr_t pool_id,const MemPoolPolicy * policy)26908827ea7dSLong Nguyen TBBMALLOC_EXPORT rml::MemoryPool *pool_create(intptr_t pool_id, const MemPoolPolicy *policy)
269151c0b2f7Stbbdev {
269251c0b2f7Stbbdev rml::MemoryPool *pool;
269351c0b2f7Stbbdev MemPoolPolicy pol(policy->pAlloc, policy->pFree, policy->granularity);
269451c0b2f7Stbbdev
269551c0b2f7Stbbdev pool_create_v1(pool_id, &pol, &pool);
269651c0b2f7Stbbdev return pool;
269751c0b2f7Stbbdev }
269851c0b2f7Stbbdev
pool_create_v1(intptr_t pool_id,const MemPoolPolicy * policy,rml::MemoryPool ** pool)269951c0b2f7Stbbdev rml::MemPoolError pool_create_v1(intptr_t pool_id, const MemPoolPolicy *policy,
270051c0b2f7Stbbdev rml::MemoryPool **pool)
270151c0b2f7Stbbdev {
270251c0b2f7Stbbdev if ( !policy->pAlloc || policy->version<MemPoolPolicy::TBBMALLOC_POOL_VERSION
270351c0b2f7Stbbdev // empty pFree allowed only for fixed pools
270451c0b2f7Stbbdev || !(policy->fixedPool || policy->pFree)) {
270557f524caSIlya Isaev *pool = nullptr;
270651c0b2f7Stbbdev return INVALID_POLICY;
270751c0b2f7Stbbdev }
270851c0b2f7Stbbdev if ( policy->version>MemPoolPolicy::TBBMALLOC_POOL_VERSION // future versions are not supported
270951c0b2f7Stbbdev // new flags can be added in place of reserved, but default
271051c0b2f7Stbbdev // behaviour must be supported by this version
271151c0b2f7Stbbdev || policy->reserved ) {
271257f524caSIlya Isaev *pool = nullptr;
271351c0b2f7Stbbdev return UNSUPPORTED_POLICY;
271451c0b2f7Stbbdev }
271551c0b2f7Stbbdev if (!isMallocInitialized())
271651c0b2f7Stbbdev if (!doInitialization()) {
271757f524caSIlya Isaev *pool = nullptr;
271851c0b2f7Stbbdev return NO_MEMORY;
271951c0b2f7Stbbdev }
272051c0b2f7Stbbdev rml::internal::MemoryPool *memPool =
272151c0b2f7Stbbdev (rml::internal::MemoryPool*)internalMalloc((sizeof(rml::internal::MemoryPool)));
272251c0b2f7Stbbdev if (!memPool) {
272357f524caSIlya Isaev *pool = nullptr;
272451c0b2f7Stbbdev return NO_MEMORY;
272551c0b2f7Stbbdev }
27262110128eSsarathnandu memset(static_cast<void*>(memPool), 0, sizeof(rml::internal::MemoryPool));
272751c0b2f7Stbbdev if (!memPool->init(pool_id, policy)) {
272851c0b2f7Stbbdev internalFree(memPool);
272957f524caSIlya Isaev *pool = nullptr;
273051c0b2f7Stbbdev return NO_MEMORY;
273151c0b2f7Stbbdev }
273251c0b2f7Stbbdev
273351c0b2f7Stbbdev *pool = (rml::MemoryPool*)memPool;
273451c0b2f7Stbbdev return POOL_OK;
273551c0b2f7Stbbdev }
273651c0b2f7Stbbdev
pool_destroy(rml::MemoryPool * memPool)273751c0b2f7Stbbdev bool pool_destroy(rml::MemoryPool* memPool)
273851c0b2f7Stbbdev {
273951c0b2f7Stbbdev if (!memPool) return false;
274051c0b2f7Stbbdev bool ret = ((rml::internal::MemoryPool*)memPool)->destroy();
274151c0b2f7Stbbdev internalFree(memPool);
274251c0b2f7Stbbdev
274351c0b2f7Stbbdev return ret;
274451c0b2f7Stbbdev }
274551c0b2f7Stbbdev
pool_reset(rml::MemoryPool * memPool)274651c0b2f7Stbbdev bool pool_reset(rml::MemoryPool* memPool)
274751c0b2f7Stbbdev {
274851c0b2f7Stbbdev if (!memPool) return false;
274951c0b2f7Stbbdev
275051c0b2f7Stbbdev return ((rml::internal::MemoryPool*)memPool)->reset();
275151c0b2f7Stbbdev }
275251c0b2f7Stbbdev
pool_malloc(rml::MemoryPool * mPool,size_t size)275351c0b2f7Stbbdev void *pool_malloc(rml::MemoryPool* mPool, size_t size)
275451c0b2f7Stbbdev {
275551c0b2f7Stbbdev return internalPoolMalloc((rml::internal::MemoryPool*)mPool, size);
275651c0b2f7Stbbdev }
275751c0b2f7Stbbdev
pool_realloc(rml::MemoryPool * mPool,void * object,size_t size)275851c0b2f7Stbbdev void *pool_realloc(rml::MemoryPool* mPool, void *object, size_t size)
275951c0b2f7Stbbdev {
276051c0b2f7Stbbdev if (!object)
276151c0b2f7Stbbdev return internalPoolMalloc((rml::internal::MemoryPool*)mPool, size);
276251c0b2f7Stbbdev if (!size) {
276351c0b2f7Stbbdev internalPoolFree((rml::internal::MemoryPool*)mPool, object, 0);
276457f524caSIlya Isaev return nullptr;
276551c0b2f7Stbbdev }
276651c0b2f7Stbbdev return reallocAligned((rml::internal::MemoryPool*)mPool, object, size, 0);
276751c0b2f7Stbbdev }
276851c0b2f7Stbbdev
pool_aligned_malloc(rml::MemoryPool * mPool,size_t size,size_t alignment)276951c0b2f7Stbbdev void *pool_aligned_malloc(rml::MemoryPool* mPool, size_t size, size_t alignment)
277051c0b2f7Stbbdev {
277151c0b2f7Stbbdev if (!isPowerOfTwo(alignment) || 0==size)
277257f524caSIlya Isaev return nullptr;
277351c0b2f7Stbbdev
277451c0b2f7Stbbdev return allocateAligned((rml::internal::MemoryPool*)mPool, size, alignment);
277551c0b2f7Stbbdev }
277651c0b2f7Stbbdev
pool_aligned_realloc(rml::MemoryPool * memPool,void * ptr,size_t size,size_t alignment)277751c0b2f7Stbbdev void *pool_aligned_realloc(rml::MemoryPool* memPool, void *ptr, size_t size, size_t alignment)
277851c0b2f7Stbbdev {
277951c0b2f7Stbbdev if (!isPowerOfTwo(alignment))
278057f524caSIlya Isaev return nullptr;
278151c0b2f7Stbbdev rml::internal::MemoryPool *mPool = (rml::internal::MemoryPool*)memPool;
278251c0b2f7Stbbdev void *tmp;
278351c0b2f7Stbbdev
278451c0b2f7Stbbdev if (!ptr)
278551c0b2f7Stbbdev tmp = allocateAligned(mPool, size, alignment);
278651c0b2f7Stbbdev else if (!size) {
278751c0b2f7Stbbdev internalPoolFree(mPool, ptr, 0);
278857f524caSIlya Isaev return nullptr;
278951c0b2f7Stbbdev } else
279051c0b2f7Stbbdev tmp = reallocAligned(mPool, ptr, size, alignment);
279151c0b2f7Stbbdev
279251c0b2f7Stbbdev return tmp;
279351c0b2f7Stbbdev }
279451c0b2f7Stbbdev
pool_free(rml::MemoryPool * mPool,void * object)279551c0b2f7Stbbdev bool pool_free(rml::MemoryPool *mPool, void *object)
279651c0b2f7Stbbdev {
279751c0b2f7Stbbdev return internalPoolFree((rml::internal::MemoryPool*)mPool, object, 0);
279851c0b2f7Stbbdev }
279951c0b2f7Stbbdev
pool_identify(void * object)280051c0b2f7Stbbdev rml::MemoryPool *pool_identify(void *object)
280151c0b2f7Stbbdev {
280251c0b2f7Stbbdev rml::internal::MemoryPool *pool;
280351c0b2f7Stbbdev if (isLargeObject<ourMem>(object)) {
280451c0b2f7Stbbdev LargeObjectHdr *header = (LargeObjectHdr*)object - 1;
280551c0b2f7Stbbdev pool = header->memoryBlock->pool;
280651c0b2f7Stbbdev } else {
280751c0b2f7Stbbdev Block *block = (Block*)alignDown(object, slabSize);
280851c0b2f7Stbbdev pool = block->getMemPool();
280951c0b2f7Stbbdev }
281051c0b2f7Stbbdev // do not return defaultMemPool, as it can't be used in pool_free() etc
281151c0b2f7Stbbdev __TBB_ASSERT_RELEASE(pool!=defaultMemPool,
281251c0b2f7Stbbdev "rml::pool_identify() can't be used for scalable_malloc() etc results.");
281351c0b2f7Stbbdev return (rml::MemoryPool*)pool;
281451c0b2f7Stbbdev }
281551c0b2f7Stbbdev
pool_msize(rml::MemoryPool * mPool,void * object)281651c0b2f7Stbbdev size_t pool_msize(rml::MemoryPool *mPool, void* object)
281751c0b2f7Stbbdev {
281851c0b2f7Stbbdev if (object) {
281951c0b2f7Stbbdev // No assert for object recognition, cause objects allocated from non-default
282051c0b2f7Stbbdev // memory pool do not participate in range checking and do not have valid backreferences for
282151c0b2f7Stbbdev // small objects. Instead, check that an object belong to the certain memory pool.
282251c0b2f7Stbbdev MALLOC_ASSERT_EX(mPool == pool_identify(object), "Object does not belong to the specified pool");
282351c0b2f7Stbbdev return internalMsize(object);
282451c0b2f7Stbbdev }
282551c0b2f7Stbbdev errno = EINVAL;
282651c0b2f7Stbbdev // Unlike _msize, return 0 in case of parameter error.
282751c0b2f7Stbbdev // Returning size_t(-1) looks more like the way to troubles.
282851c0b2f7Stbbdev return 0;
282951c0b2f7Stbbdev }
283051c0b2f7Stbbdev
283151c0b2f7Stbbdev } // namespace rml
283251c0b2f7Stbbdev
283351c0b2f7Stbbdev using namespace rml::internal;
283451c0b2f7Stbbdev
283551c0b2f7Stbbdev #if MALLOC_TRACE
283651c0b2f7Stbbdev static unsigned int threadGoingDownCount = 0;
283751c0b2f7Stbbdev #endif
283851c0b2f7Stbbdev
283951c0b2f7Stbbdev /*
284051c0b2f7Stbbdev * When a thread is shutting down this routine should be called to remove all the thread ids
284157f524caSIlya Isaev * from the malloc blocks and replace them with a nullptr thread id.
284251c0b2f7Stbbdev *
284351c0b2f7Stbbdev * For pthreads, the function is set as a callback in pthread_key_create for TLS bin.
284451c0b2f7Stbbdev * It will be automatically called at thread exit with the key value as the argument,
284557f524caSIlya Isaev * unless that value is nullptr.
284651c0b2f7Stbbdev * For Windows, it is called from DllMain( DLL_THREAD_DETACH ).
284751c0b2f7Stbbdev *
284851c0b2f7Stbbdev * However neither of the above is called for the main process thread, so the routine
284951c0b2f7Stbbdev * also needs to be called during the process shutdown.
285051c0b2f7Stbbdev *
285151c0b2f7Stbbdev */
285251c0b2f7Stbbdev // TODO: Consider making this function part of class MemoryPool.
doThreadShutdownNotification(TLSData * tls,bool main_thread)285351c0b2f7Stbbdev void doThreadShutdownNotification(TLSData* tls, bool main_thread)
285451c0b2f7Stbbdev {
285551c0b2f7Stbbdev TRACEF(( "[ScalableMalloc trace] Thread id %d blocks return start %d\n",
285651c0b2f7Stbbdev getThreadId(), threadGoingDownCount++ ));
285751c0b2f7Stbbdev
285851c0b2f7Stbbdev #if USE_PTHREAD
285951c0b2f7Stbbdev if (tls) {
286051c0b2f7Stbbdev if (!shutdownSync.threadDtorStart()) return;
286151c0b2f7Stbbdev tls->getMemPool()->onThreadShutdown(tls);
286251c0b2f7Stbbdev shutdownSync.threadDtorDone();
286351c0b2f7Stbbdev } else
286451c0b2f7Stbbdev #endif
286551c0b2f7Stbbdev {
286651c0b2f7Stbbdev suppress_unused_warning(tls); // not used on Windows
286751c0b2f7Stbbdev // The default pool is safe to use at this point:
286851c0b2f7Stbbdev // on Linux, only the main thread can go here before destroying defaultMemPool;
286951c0b2f7Stbbdev // on Windows, shutdown is synchronized via loader lock and isMallocInitialized().
287051c0b2f7Stbbdev // See also __TBB_mallocProcessShutdownNotification()
287151c0b2f7Stbbdev defaultMemPool->onThreadShutdown(defaultMemPool->getTLS(/*create=*/false));
287251c0b2f7Stbbdev // Take lock to walk through other pools; but waiting might be dangerous at this point
287351c0b2f7Stbbdev // (e.g. on Windows the main thread might deadlock)
2874fc184738SKonstantin Boyarinov bool locked = false;
287551c0b2f7Stbbdev MallocMutex::scoped_lock lock(MemoryPool::memPoolListLock, /*wait=*/!main_thread, &locked);
287651c0b2f7Stbbdev if (locked) { // the list is safe to process
287751c0b2f7Stbbdev for (MemoryPool *memPool = defaultMemPool->next; memPool; memPool = memPool->next)
287851c0b2f7Stbbdev memPool->onThreadShutdown(memPool->getTLS(/*create=*/false));
287951c0b2f7Stbbdev }
288051c0b2f7Stbbdev }
288151c0b2f7Stbbdev
288251c0b2f7Stbbdev TRACEF(( "[ScalableMalloc trace] Thread id %d blocks return end\n", getThreadId() ));
288351c0b2f7Stbbdev }
288451c0b2f7Stbbdev
288551c0b2f7Stbbdev #if USE_PTHREAD
mallocThreadShutdownNotification(void * arg)288651c0b2f7Stbbdev void mallocThreadShutdownNotification(void* arg)
288751c0b2f7Stbbdev {
288851c0b2f7Stbbdev // The routine is called for each pool (as TLS dtor) on each thread, except for the main thread
288951c0b2f7Stbbdev if (!isMallocInitialized()) return;
289051c0b2f7Stbbdev doThreadShutdownNotification((TLSData*)arg, false);
289151c0b2f7Stbbdev }
289251c0b2f7Stbbdev #else
__TBB_mallocThreadShutdownNotification()289351c0b2f7Stbbdev extern "C" void __TBB_mallocThreadShutdownNotification()
289451c0b2f7Stbbdev {
289551c0b2f7Stbbdev // The routine is called once per thread on Windows
289651c0b2f7Stbbdev if (!isMallocInitialized()) return;
289757f524caSIlya Isaev doThreadShutdownNotification(nullptr, false);
289851c0b2f7Stbbdev }
289951c0b2f7Stbbdev #endif
290051c0b2f7Stbbdev
__TBB_mallocProcessShutdownNotification(bool windows_process_dying)290151c0b2f7Stbbdev extern "C" void __TBB_mallocProcessShutdownNotification(bool windows_process_dying)
290251c0b2f7Stbbdev {
290351c0b2f7Stbbdev if (!isMallocInitialized()) return;
290451c0b2f7Stbbdev
290551c0b2f7Stbbdev // Don't clean allocator internals if the entire process is exiting
290651c0b2f7Stbbdev if (!windows_process_dying) {
290757f524caSIlya Isaev doThreadShutdownNotification(nullptr, /*main_thread=*/true);
290851c0b2f7Stbbdev }
290951c0b2f7Stbbdev #if __TBB_MALLOC_LOCACHE_STAT
291051c0b2f7Stbbdev printf("cache hit ratio %f, size hit %f\n",
291151c0b2f7Stbbdev 1.*cacheHits/mallocCalls, 1.*memHitKB/memAllocKB);
291251c0b2f7Stbbdev defaultMemPool->extMemPool.loc.reportStat(stdout);
291351c0b2f7Stbbdev #endif
291451c0b2f7Stbbdev
291551c0b2f7Stbbdev shutdownSync.processExit();
291651c0b2f7Stbbdev #if __TBB_SOURCE_DIRECTLY_INCLUDED
291751c0b2f7Stbbdev /* Pthread keys must be deleted as soon as possible to not call key dtor
291851c0b2f7Stbbdev on thread termination when then the tbbmalloc code can be already unloaded.
291951c0b2f7Stbbdev */
292051c0b2f7Stbbdev defaultMemPool->destroy();
29211ecde27fSIlya Mishin destroyBackRefMain(&defaultMemPool->extMemPool.backend);
292251c0b2f7Stbbdev ThreadId::destroy(); // Delete key for thread id
292351c0b2f7Stbbdev hugePages.reset();
292451c0b2f7Stbbdev // new total malloc initialization is possible after this point
292551c0b2f7Stbbdev mallocInitialized.store(0, std::memory_order_release);
292651c0b2f7Stbbdev #endif // __TBB_SOURCE_DIRECTLY_INCLUDED
292751c0b2f7Stbbdev
292851c0b2f7Stbbdev #if COLLECT_STATISTICS
292951c0b2f7Stbbdev unsigned nThreads = ThreadId::getMaxThreadId();
293051c0b2f7Stbbdev for( int i=1; i<=nThreads && i<MAX_THREADS; ++i )
293151c0b2f7Stbbdev STAT_print(i);
293251c0b2f7Stbbdev #endif
29330e6d4699SAnuya Welling if (!usedBySrcIncluded) {
293451c0b2f7Stbbdev MALLOC_ITT_FINI_ITTLIB();
29350e6d4699SAnuya Welling MALLOC_ITT_RELEASE_RESOURCES();
29360e6d4699SAnuya Welling }
293751c0b2f7Stbbdev }
293851c0b2f7Stbbdev
scalable_malloc(size_t size)293951c0b2f7Stbbdev extern "C" void * scalable_malloc(size_t size)
294051c0b2f7Stbbdev {
294151c0b2f7Stbbdev void *ptr = internalMalloc(size);
294251c0b2f7Stbbdev if (!ptr) errno = ENOMEM;
294351c0b2f7Stbbdev return ptr;
294451c0b2f7Stbbdev }
294551c0b2f7Stbbdev
scalable_free(void * object)294651c0b2f7Stbbdev extern "C" void scalable_free(void *object)
294751c0b2f7Stbbdev {
294851c0b2f7Stbbdev internalFree(object);
294951c0b2f7Stbbdev }
295051c0b2f7Stbbdev
295151c0b2f7Stbbdev #if MALLOC_ZONE_OVERLOAD_ENABLED
__TBB_malloc_free_definite_size(void * object,size_t size)295251c0b2f7Stbbdev extern "C" void __TBB_malloc_free_definite_size(void *object, size_t size)
295351c0b2f7Stbbdev {
295451c0b2f7Stbbdev internalPoolFree(defaultMemPool, object, size);
295551c0b2f7Stbbdev }
295651c0b2f7Stbbdev #endif
295751c0b2f7Stbbdev
295851c0b2f7Stbbdev /*
295951c0b2f7Stbbdev * A variant that provides additional memory safety, by checking whether the given address
296051c0b2f7Stbbdev * was obtained with this allocator, and if not redirecting to the provided alternative call.
296151c0b2f7Stbbdev */
__TBB_malloc_safer_free(void * object,void (* original_free)(void *))29628827ea7dSLong Nguyen extern "C" TBBMALLOC_EXPORT void __TBB_malloc_safer_free(void *object, void (*original_free)(void*))
296351c0b2f7Stbbdev {
296451c0b2f7Stbbdev if (!object)
296551c0b2f7Stbbdev return;
296651c0b2f7Stbbdev
296751c0b2f7Stbbdev // tbbmalloc can allocate object only when tbbmalloc has been initialized
296851c0b2f7Stbbdev if (mallocInitialized.load(std::memory_order_acquire) && defaultMemPool->extMemPool.backend.ptrCanBeValid(object)) {
296951c0b2f7Stbbdev if (isLargeObject<unknownMem>(object)) {
297051c0b2f7Stbbdev // must check 1st for large object, because small object check touches 4 pages on left,
297151c0b2f7Stbbdev // and it can be inaccessible
297251c0b2f7Stbbdev TLSData *tls = defaultMemPool->getTLS(/*create=*/false);
297351c0b2f7Stbbdev
297451c0b2f7Stbbdev defaultMemPool->putToLLOCache(tls, object);
297551c0b2f7Stbbdev return;
297651c0b2f7Stbbdev } else if (isSmallObject(object)) {
297751c0b2f7Stbbdev freeSmallObject(object);
297851c0b2f7Stbbdev return;
297951c0b2f7Stbbdev }
298051c0b2f7Stbbdev }
298151c0b2f7Stbbdev if (original_free)
298251c0b2f7Stbbdev original_free(object);
298351c0b2f7Stbbdev }
298451c0b2f7Stbbdev
298551c0b2f7Stbbdev /********* End the free code *************/
298651c0b2f7Stbbdev
298751c0b2f7Stbbdev /********* Code for scalable_realloc ***********/
298851c0b2f7Stbbdev
298951c0b2f7Stbbdev /*
299051c0b2f7Stbbdev * From K&R
299151c0b2f7Stbbdev * "realloc changes the size of the object pointed to by p to size. The contents will
299251c0b2f7Stbbdev * be unchanged up to the minimum of the old and the new sizes. If the new size is larger,
299351c0b2f7Stbbdev * the new space is uninitialized. realloc returns a pointer to the new space, or
299457f524caSIlya Isaev * nullptr if the request cannot be satisfied, in which case *p is unchanged."
299551c0b2f7Stbbdev *
299651c0b2f7Stbbdev */
scalable_realloc(void * ptr,size_t size)299751c0b2f7Stbbdev extern "C" void* scalable_realloc(void* ptr, size_t size)
299851c0b2f7Stbbdev {
299951c0b2f7Stbbdev void *tmp;
300051c0b2f7Stbbdev
300151c0b2f7Stbbdev if (!ptr)
300251c0b2f7Stbbdev tmp = internalMalloc(size);
300351c0b2f7Stbbdev else if (!size) {
300451c0b2f7Stbbdev internalFree(ptr);
300557f524caSIlya Isaev return nullptr;
300651c0b2f7Stbbdev } else
300751c0b2f7Stbbdev tmp = reallocAligned(defaultMemPool, ptr, size, 0);
300851c0b2f7Stbbdev
300951c0b2f7Stbbdev if (!tmp) errno = ENOMEM;
301051c0b2f7Stbbdev return tmp;
301151c0b2f7Stbbdev }
301251c0b2f7Stbbdev
301351c0b2f7Stbbdev /*
301451c0b2f7Stbbdev * A variant that provides additional memory safety, by checking whether the given address
301551c0b2f7Stbbdev * was obtained with this allocator, and if not redirecting to the provided alternative call.
301651c0b2f7Stbbdev */
__TBB_malloc_safer_realloc(void * ptr,size_t sz,void * original_realloc)30178827ea7dSLong Nguyen extern "C" TBBMALLOC_EXPORT void* __TBB_malloc_safer_realloc(void* ptr, size_t sz, void* original_realloc)
301851c0b2f7Stbbdev {
301951c0b2f7Stbbdev void *tmp; // TODO: fix warnings about uninitialized use of tmp
302051c0b2f7Stbbdev
302151c0b2f7Stbbdev if (!ptr) {
302251c0b2f7Stbbdev tmp = internalMalloc(sz);
302351c0b2f7Stbbdev } else if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(ptr)) {
302451c0b2f7Stbbdev if (!sz) {
302551c0b2f7Stbbdev internalFree(ptr);
302657f524caSIlya Isaev return nullptr;
302751c0b2f7Stbbdev } else {
302851c0b2f7Stbbdev tmp = reallocAligned(defaultMemPool, ptr, sz, 0);
302951c0b2f7Stbbdev }
303051c0b2f7Stbbdev }
303151c0b2f7Stbbdev #if USE_WINTHREAD
303251c0b2f7Stbbdev else if (original_realloc && sz) {
303351c0b2f7Stbbdev orig_ptrs *original_ptrs = static_cast<orig_ptrs*>(original_realloc);
303451c0b2f7Stbbdev if ( original_ptrs->msize ){
303551c0b2f7Stbbdev size_t oldSize = original_ptrs->msize(ptr);
303651c0b2f7Stbbdev tmp = internalMalloc(sz);
303751c0b2f7Stbbdev if (tmp) {
303851c0b2f7Stbbdev memcpy(tmp, ptr, sz<oldSize? sz : oldSize);
303951c0b2f7Stbbdev if ( original_ptrs->free ){
304051c0b2f7Stbbdev original_ptrs->free( ptr );
304151c0b2f7Stbbdev }
304251c0b2f7Stbbdev }
304351c0b2f7Stbbdev } else
304457f524caSIlya Isaev tmp = nullptr;
304551c0b2f7Stbbdev }
304651c0b2f7Stbbdev #else
304751c0b2f7Stbbdev else if (original_realloc) {
304851c0b2f7Stbbdev typedef void* (*realloc_ptr_t)(void*,size_t);
304951c0b2f7Stbbdev realloc_ptr_t original_realloc_ptr;
305051c0b2f7Stbbdev (void *&)original_realloc_ptr = original_realloc;
305151c0b2f7Stbbdev tmp = original_realloc_ptr(ptr,sz);
305251c0b2f7Stbbdev }
305351c0b2f7Stbbdev #endif
305457f524caSIlya Isaev else tmp = nullptr;
305551c0b2f7Stbbdev
305651c0b2f7Stbbdev if (!tmp) errno = ENOMEM;
305751c0b2f7Stbbdev return tmp;
305851c0b2f7Stbbdev }
305951c0b2f7Stbbdev
306051c0b2f7Stbbdev /********* End code for scalable_realloc ***********/
306151c0b2f7Stbbdev
306251c0b2f7Stbbdev /********* Code for scalable_calloc ***********/
306351c0b2f7Stbbdev
306451c0b2f7Stbbdev /*
306551c0b2f7Stbbdev * From K&R
306651c0b2f7Stbbdev * calloc returns a pointer to space for an array of nobj objects,
306757f524caSIlya Isaev * each of size size, or nullptr if the request cannot be satisfied.
306851c0b2f7Stbbdev * The space is initialized to zero bytes.
306951c0b2f7Stbbdev *
307051c0b2f7Stbbdev */
307151c0b2f7Stbbdev
scalable_calloc(size_t nobj,size_t size)307251c0b2f7Stbbdev extern "C" void * scalable_calloc(size_t nobj, size_t size)
307351c0b2f7Stbbdev {
307451c0b2f7Stbbdev // it's square root of maximal size_t value
307551c0b2f7Stbbdev const size_t mult_not_overflow = size_t(1) << (sizeof(size_t)*CHAR_BIT/2);
307651c0b2f7Stbbdev const size_t arraySize = nobj * size;
307751c0b2f7Stbbdev
307851c0b2f7Stbbdev // check for overflow during multiplication:
307951c0b2f7Stbbdev if (nobj>=mult_not_overflow || size>=mult_not_overflow) // 1) heuristic check
308051c0b2f7Stbbdev if (nobj && arraySize / nobj != size) { // 2) exact check
308151c0b2f7Stbbdev errno = ENOMEM;
308257f524caSIlya Isaev return nullptr;
308351c0b2f7Stbbdev }
308451c0b2f7Stbbdev void* result = internalMalloc(arraySize);
308551c0b2f7Stbbdev if (result)
308651c0b2f7Stbbdev memset(result, 0, arraySize);
308751c0b2f7Stbbdev else
308851c0b2f7Stbbdev errno = ENOMEM;
308951c0b2f7Stbbdev return result;
309051c0b2f7Stbbdev }
309151c0b2f7Stbbdev
309251c0b2f7Stbbdev /********* End code for scalable_calloc ***********/
309351c0b2f7Stbbdev
309451c0b2f7Stbbdev /********* Code for aligned allocation API **********/
309551c0b2f7Stbbdev
scalable_posix_memalign(void ** memptr,size_t alignment,size_t size)309651c0b2f7Stbbdev extern "C" int scalable_posix_memalign(void **memptr, size_t alignment, size_t size)
309751c0b2f7Stbbdev {
309851c0b2f7Stbbdev if ( !isPowerOfTwoAtLeast(alignment, sizeof(void*)) )
309951c0b2f7Stbbdev return EINVAL;
310051c0b2f7Stbbdev void *result = allocateAligned(defaultMemPool, size, alignment);
310151c0b2f7Stbbdev if (!result)
310251c0b2f7Stbbdev return ENOMEM;
310351c0b2f7Stbbdev *memptr = result;
310451c0b2f7Stbbdev return 0;
310551c0b2f7Stbbdev }
310651c0b2f7Stbbdev
scalable_aligned_malloc(size_t size,size_t alignment)310751c0b2f7Stbbdev extern "C" void * scalable_aligned_malloc(size_t size, size_t alignment)
310851c0b2f7Stbbdev {
310951c0b2f7Stbbdev if (!isPowerOfTwo(alignment) || 0==size) {
311051c0b2f7Stbbdev errno = EINVAL;
311157f524caSIlya Isaev return nullptr;
311251c0b2f7Stbbdev }
311351c0b2f7Stbbdev void *tmp = allocateAligned(defaultMemPool, size, alignment);
311451c0b2f7Stbbdev if (!tmp) errno = ENOMEM;
311551c0b2f7Stbbdev return tmp;
311651c0b2f7Stbbdev }
311751c0b2f7Stbbdev
scalable_aligned_realloc(void * ptr,size_t size,size_t alignment)311851c0b2f7Stbbdev extern "C" void * scalable_aligned_realloc(void *ptr, size_t size, size_t alignment)
311951c0b2f7Stbbdev {
312051c0b2f7Stbbdev if (!isPowerOfTwo(alignment)) {
312151c0b2f7Stbbdev errno = EINVAL;
312257f524caSIlya Isaev return nullptr;
312351c0b2f7Stbbdev }
312451c0b2f7Stbbdev void *tmp;
312551c0b2f7Stbbdev
312651c0b2f7Stbbdev if (!ptr)
312751c0b2f7Stbbdev tmp = allocateAligned(defaultMemPool, size, alignment);
312851c0b2f7Stbbdev else if (!size) {
312951c0b2f7Stbbdev scalable_free(ptr);
313057f524caSIlya Isaev return nullptr;
313151c0b2f7Stbbdev } else
313251c0b2f7Stbbdev tmp = reallocAligned(defaultMemPool, ptr, size, alignment);
313351c0b2f7Stbbdev
313451c0b2f7Stbbdev if (!tmp) errno = ENOMEM;
313551c0b2f7Stbbdev return tmp;
313651c0b2f7Stbbdev }
313751c0b2f7Stbbdev
__TBB_malloc_safer_aligned_realloc(void * ptr,size_t size,size_t alignment,void * orig_function)31388827ea7dSLong Nguyen extern "C" TBBMALLOC_EXPORT void * __TBB_malloc_safer_aligned_realloc(void *ptr, size_t size, size_t alignment, void* orig_function)
313951c0b2f7Stbbdev {
314051c0b2f7Stbbdev /* corner cases left out of reallocAligned to not deal with errno there */
314151c0b2f7Stbbdev if (!isPowerOfTwo(alignment)) {
314251c0b2f7Stbbdev errno = EINVAL;
314357f524caSIlya Isaev return nullptr;
314451c0b2f7Stbbdev }
314557f524caSIlya Isaev void *tmp = nullptr;
314651c0b2f7Stbbdev
314751c0b2f7Stbbdev if (!ptr) {
314851c0b2f7Stbbdev tmp = allocateAligned(defaultMemPool, size, alignment);
314951c0b2f7Stbbdev } else if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(ptr)) {
315051c0b2f7Stbbdev if (!size) {
315151c0b2f7Stbbdev internalFree(ptr);
315257f524caSIlya Isaev return nullptr;
315351c0b2f7Stbbdev } else {
315451c0b2f7Stbbdev tmp = reallocAligned(defaultMemPool, ptr, size, alignment);
315551c0b2f7Stbbdev }
315651c0b2f7Stbbdev }
315751c0b2f7Stbbdev #if USE_WINTHREAD
315851c0b2f7Stbbdev else {
315951c0b2f7Stbbdev orig_aligned_ptrs *original_ptrs = static_cast<orig_aligned_ptrs*>(orig_function);
316051c0b2f7Stbbdev if (size) {
316151c0b2f7Stbbdev // Without orig_msize, we can't do anything with this.
316251c0b2f7Stbbdev // Just keeping old pointer.
316351c0b2f7Stbbdev if ( original_ptrs->aligned_msize ){
316451c0b2f7Stbbdev // set alignment and offset to have possibly correct oldSize
316551c0b2f7Stbbdev size_t oldSize = original_ptrs->aligned_msize(ptr, sizeof(void*), 0);
316651c0b2f7Stbbdev tmp = allocateAligned(defaultMemPool, size, alignment);
316751c0b2f7Stbbdev if (tmp) {
316851c0b2f7Stbbdev memcpy(tmp, ptr, size<oldSize? size : oldSize);
316951c0b2f7Stbbdev if ( original_ptrs->aligned_free ){
317051c0b2f7Stbbdev original_ptrs->aligned_free( ptr );
317151c0b2f7Stbbdev }
317251c0b2f7Stbbdev }
317351c0b2f7Stbbdev }
317451c0b2f7Stbbdev } else {
317551c0b2f7Stbbdev if ( original_ptrs->aligned_free ){
317651c0b2f7Stbbdev original_ptrs->aligned_free( ptr );
317751c0b2f7Stbbdev }
317857f524caSIlya Isaev return nullptr;
317951c0b2f7Stbbdev }
318051c0b2f7Stbbdev }
318151c0b2f7Stbbdev #else
318251c0b2f7Stbbdev // As original_realloc can't align result, and there is no way to find
318351c0b2f7Stbbdev // size of reallocating object, we are giving up.
318451c0b2f7Stbbdev suppress_unused_warning(orig_function);
318551c0b2f7Stbbdev #endif
318651c0b2f7Stbbdev if (!tmp) errno = ENOMEM;
318751c0b2f7Stbbdev return tmp;
318851c0b2f7Stbbdev }
318951c0b2f7Stbbdev
scalable_aligned_free(void * ptr)319051c0b2f7Stbbdev extern "C" void scalable_aligned_free(void *ptr)
319151c0b2f7Stbbdev {
319251c0b2f7Stbbdev internalFree(ptr);
319351c0b2f7Stbbdev }
319451c0b2f7Stbbdev
319551c0b2f7Stbbdev /********* end code for aligned allocation API **********/
319651c0b2f7Stbbdev
319751c0b2f7Stbbdev /********* Code for scalable_msize ***********/
319851c0b2f7Stbbdev
319951c0b2f7Stbbdev /*
320051c0b2f7Stbbdev * Returns the size of a memory block allocated in the heap.
320151c0b2f7Stbbdev */
scalable_msize(void * ptr)320251c0b2f7Stbbdev extern "C" size_t scalable_msize(void* ptr)
320351c0b2f7Stbbdev {
320451c0b2f7Stbbdev if (ptr) {
320551c0b2f7Stbbdev MALLOC_ASSERT(isRecognized(ptr), "Invalid pointer in scalable_msize detected.");
320651c0b2f7Stbbdev return internalMsize(ptr);
320751c0b2f7Stbbdev }
320851c0b2f7Stbbdev errno = EINVAL;
320951c0b2f7Stbbdev // Unlike _msize, return 0 in case of parameter error.
321051c0b2f7Stbbdev // Returning size_t(-1) looks more like the way to troubles.
321151c0b2f7Stbbdev return 0;
321251c0b2f7Stbbdev }
321351c0b2f7Stbbdev
321451c0b2f7Stbbdev /*
321551c0b2f7Stbbdev * A variant that provides additional memory safety, by checking whether the given address
321651c0b2f7Stbbdev * was obtained with this allocator, and if not redirecting to the provided alternative call.
321751c0b2f7Stbbdev */
__TBB_malloc_safer_msize(void * object,size_t (* original_msize)(void *))32188827ea7dSLong Nguyen extern "C" TBBMALLOC_EXPORT size_t __TBB_malloc_safer_msize(void *object, size_t (*original_msize)(void*))
321951c0b2f7Stbbdev {
322051c0b2f7Stbbdev if (object) {
322151c0b2f7Stbbdev // Check if the memory was allocated by scalable_malloc
322251c0b2f7Stbbdev if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(object))
322351c0b2f7Stbbdev return internalMsize(object);
322451c0b2f7Stbbdev else if (original_msize)
322551c0b2f7Stbbdev return original_msize(object);
322651c0b2f7Stbbdev }
322757f524caSIlya Isaev // object is nullptr or unknown, or foreign and no original_msize
322851c0b2f7Stbbdev #if USE_WINTHREAD
322951c0b2f7Stbbdev errno = EINVAL; // errno expected to be set only on this platform
323051c0b2f7Stbbdev #endif
323151c0b2f7Stbbdev return 0;
323251c0b2f7Stbbdev }
323351c0b2f7Stbbdev
323451c0b2f7Stbbdev /*
323551c0b2f7Stbbdev * The same as above but for _aligned_msize case
323651c0b2f7Stbbdev */
__TBB_malloc_safer_aligned_msize(void * object,size_t alignment,size_t offset,size_t (* orig_aligned_msize)(void *,size_t,size_t))32378827ea7dSLong Nguyen extern "C" TBBMALLOC_EXPORT size_t __TBB_malloc_safer_aligned_msize(void *object, size_t alignment, size_t offset, size_t (*orig_aligned_msize)(void*,size_t,size_t))
323851c0b2f7Stbbdev {
323951c0b2f7Stbbdev if (object) {
324051c0b2f7Stbbdev // Check if the memory was allocated by scalable_malloc
324151c0b2f7Stbbdev if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(object))
324251c0b2f7Stbbdev return internalMsize(object);
324351c0b2f7Stbbdev else if (orig_aligned_msize)
324451c0b2f7Stbbdev return orig_aligned_msize(object,alignment,offset);
324551c0b2f7Stbbdev }
324657f524caSIlya Isaev // object is nullptr or unknown
324751c0b2f7Stbbdev errno = EINVAL;
324851c0b2f7Stbbdev return 0;
324951c0b2f7Stbbdev }
325051c0b2f7Stbbdev
325151c0b2f7Stbbdev /********* End code for scalable_msize ***********/
325251c0b2f7Stbbdev
scalable_allocation_mode(int param,intptr_t value)325351c0b2f7Stbbdev extern "C" int scalable_allocation_mode(int param, intptr_t value)
325451c0b2f7Stbbdev {
325551c0b2f7Stbbdev if (param == TBBMALLOC_SET_SOFT_HEAP_LIMIT) {
325651c0b2f7Stbbdev defaultMemPool->extMemPool.backend.setRecommendedMaxSize((size_t)value);
325751c0b2f7Stbbdev return TBBMALLOC_OK;
325851c0b2f7Stbbdev } else if (param == USE_HUGE_PAGES) {
3259734f0bc0SPablo Romero #if __unix__
326051c0b2f7Stbbdev switch (value) {
326151c0b2f7Stbbdev case 0:
326251c0b2f7Stbbdev case 1:
326351c0b2f7Stbbdev hugePages.setMode(value);
326451c0b2f7Stbbdev return TBBMALLOC_OK;
326551c0b2f7Stbbdev default:
326651c0b2f7Stbbdev return TBBMALLOC_INVALID_PARAM;
326751c0b2f7Stbbdev }
326851c0b2f7Stbbdev #else
326951c0b2f7Stbbdev return TBBMALLOC_NO_EFFECT;
327051c0b2f7Stbbdev #endif
327151c0b2f7Stbbdev #if __TBB_SOURCE_DIRECTLY_INCLUDED
327251c0b2f7Stbbdev } else if (param == TBBMALLOC_INTERNAL_SOURCE_INCLUDED) {
327351c0b2f7Stbbdev switch (value) {
327451c0b2f7Stbbdev case 0: // used by dynamic library
327551c0b2f7Stbbdev case 1: // used by static library or directly included sources
327651c0b2f7Stbbdev usedBySrcIncluded = value;
327751c0b2f7Stbbdev return TBBMALLOC_OK;
327851c0b2f7Stbbdev default:
327951c0b2f7Stbbdev return TBBMALLOC_INVALID_PARAM;
328051c0b2f7Stbbdev }
328151c0b2f7Stbbdev #endif
328251c0b2f7Stbbdev } else if (param == TBBMALLOC_SET_HUGE_SIZE_THRESHOLD) {
328351c0b2f7Stbbdev defaultMemPool->extMemPool.loc.setHugeSizeThreshold((size_t)value);
328451c0b2f7Stbbdev return TBBMALLOC_OK;
328551c0b2f7Stbbdev }
328651c0b2f7Stbbdev return TBBMALLOC_INVALID_PARAM;
328751c0b2f7Stbbdev }
328851c0b2f7Stbbdev
scalable_allocation_command(int cmd,void * param)328951c0b2f7Stbbdev extern "C" int scalable_allocation_command(int cmd, void *param)
329051c0b2f7Stbbdev {
329151c0b2f7Stbbdev if (param)
329251c0b2f7Stbbdev return TBBMALLOC_INVALID_PARAM;
329351c0b2f7Stbbdev
329451c0b2f7Stbbdev bool released = false;
329551c0b2f7Stbbdev switch(cmd) {
329651c0b2f7Stbbdev case TBBMALLOC_CLEAN_THREAD_BUFFERS:
329751c0b2f7Stbbdev if (TLSData *tls = defaultMemPool->getTLS(/*create=*/false))
329851c0b2f7Stbbdev released = tls->externalCleanup(/*cleanOnlyUnused*/false, /*cleanBins=*/true);
329951c0b2f7Stbbdev break;
330051c0b2f7Stbbdev case TBBMALLOC_CLEAN_ALL_BUFFERS:
330132d5ec1fSŁukasz Plewa released = defaultMemPool->extMemPool.hardCachesCleanup(true);
330251c0b2f7Stbbdev break;
330351c0b2f7Stbbdev default:
330451c0b2f7Stbbdev return TBBMALLOC_INVALID_PARAM;
330551c0b2f7Stbbdev }
330651c0b2f7Stbbdev return released ? TBBMALLOC_OK : TBBMALLOC_NO_EFFECT;
330751c0b2f7Stbbdev }
3308