151c0b2f7Stbbdev /* 22110128eSsarathnandu Copyright (c) 2005-2023 Intel Corporation 351c0b2f7Stbbdev 451c0b2f7Stbbdev Licensed under the Apache License, Version 2.0 (the "License"); 551c0b2f7Stbbdev you may not use this file except in compliance with the License. 651c0b2f7Stbbdev You may obtain a copy of the License at 751c0b2f7Stbbdev 851c0b2f7Stbbdev http://www.apache.org/licenses/LICENSE-2.0 951c0b2f7Stbbdev 1051c0b2f7Stbbdev Unless required by applicable law or agreed to in writing, software 1151c0b2f7Stbbdev distributed under the License is distributed on an "AS IS" BASIS, 1251c0b2f7Stbbdev WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1351c0b2f7Stbbdev See the License for the specific language governing permissions and 1451c0b2f7Stbbdev limitations under the License. 1551c0b2f7Stbbdev */ 1651c0b2f7Stbbdev 1751c0b2f7Stbbdev #include "tbbmalloc_internal.h" 1851c0b2f7Stbbdev #include <errno.h> 1951c0b2f7Stbbdev #include <new> /* for placement new */ 2051c0b2f7Stbbdev #include <string.h> /* for memset */ 2151c0b2f7Stbbdev 2249e08aacStbbdev #include "oneapi/tbb/version.h" 2351c0b2f7Stbbdev #include "../tbb/environment.h" 2451c0b2f7Stbbdev #include "../tbb/itt_notify.h" // for __TBB_load_ittnotify() 2551c0b2f7Stbbdev 2651c0b2f7Stbbdev #if USE_PTHREAD 2751c0b2f7Stbbdev #define TlsSetValue_func pthread_setspecific 2851c0b2f7Stbbdev #define TlsGetValue_func pthread_getspecific 2951c0b2f7Stbbdev #define GetMyTID() pthread_self() 3051c0b2f7Stbbdev #include <sched.h> 3151c0b2f7Stbbdev extern "C" { static void mallocThreadShutdownNotification(void*); } 3251c0b2f7Stbbdev #if __sun || __SUNPRO_CC 3351c0b2f7Stbbdev #define __asm__ asm 3451c0b2f7Stbbdev #endif 3551c0b2f7Stbbdev #include <unistd.h> // sysconf(_SC_PAGESIZE) 3651c0b2f7Stbbdev #elif USE_WINTHREAD 3751c0b2f7Stbbdev #define GetMyTID() GetCurrentThreadId() 3851c0b2f7Stbbdev #if __TBB_WIN8UI_SUPPORT 3951c0b2f7Stbbdev #include<thread> 4051c0b2f7Stbbdev #define TlsSetValue_func FlsSetValue 4151c0b2f7Stbbdev #define TlsGetValue_func FlsGetValue 4257f524caSIlya Isaev #define TlsAlloc() FlsAlloc(nullptr) 4351c0b2f7Stbbdev #define TLS_ALLOC_FAILURE FLS_OUT_OF_INDEXES 4451c0b2f7Stbbdev #define TlsFree FlsFree 4551c0b2f7Stbbdev #else 4651c0b2f7Stbbdev #define TlsSetValue_func TlsSetValue 4751c0b2f7Stbbdev #define TlsGetValue_func TlsGetValue 4851c0b2f7Stbbdev #define TLS_ALLOC_FAILURE TLS_OUT_OF_INDEXES 4951c0b2f7Stbbdev #endif 5051c0b2f7Stbbdev #else 5151c0b2f7Stbbdev #error Must define USE_PTHREAD or USE_WINTHREAD 5251c0b2f7Stbbdev #endif 5351c0b2f7Stbbdev 5451c0b2f7Stbbdev #define FREELIST_NONBLOCKING 1 5551c0b2f7Stbbdev 5651c0b2f7Stbbdev namespace rml { 5751c0b2f7Stbbdev class MemoryPool; 5851c0b2f7Stbbdev namespace internal { 5951c0b2f7Stbbdev 6051c0b2f7Stbbdev class Block; 6151c0b2f7Stbbdev class MemoryPool; 6251c0b2f7Stbbdev 6351c0b2f7Stbbdev #if MALLOC_CHECK_RECURSION 6451c0b2f7Stbbdev 6551c0b2f7Stbbdev inline bool isMallocInitialized(); 6651c0b2f7Stbbdev 6751c0b2f7Stbbdev #endif // MALLOC_CHECK_RECURSION 6851c0b2f7Stbbdev 6951c0b2f7Stbbdev /** Support for handling the special UNUSABLE pointer state **/ 7051c0b2f7Stbbdev const intptr_t UNUSABLE = 0x1; 7151c0b2f7Stbbdev inline bool isSolidPtr( void* ptr ) { 7251c0b2f7Stbbdev return (UNUSABLE|(intptr_t)ptr)!=UNUSABLE; 7351c0b2f7Stbbdev } 7451c0b2f7Stbbdev inline bool isNotForUse( void* ptr ) { 7551c0b2f7Stbbdev return (intptr_t)ptr==UNUSABLE; 7651c0b2f7Stbbdev } 7751c0b2f7Stbbdev 7851c0b2f7Stbbdev /* 7951c0b2f7Stbbdev * Block::objectSize value used to mark blocks allocated by startupAlloc 8051c0b2f7Stbbdev */ 8151c0b2f7Stbbdev const uint16_t startupAllocObjSizeMark = ~(uint16_t)0; 8251c0b2f7Stbbdev 8351c0b2f7Stbbdev /* 8451c0b2f7Stbbdev * The following constant is used to define the size of struct Block, the block header. 8551c0b2f7Stbbdev * The intent is to have the size of a Block multiple of the cache line size, this allows us to 8651c0b2f7Stbbdev * get good alignment at the cost of some overhead equal to the amount of padding included in the Block. 8751c0b2f7Stbbdev */ 8851c0b2f7Stbbdev const int blockHeaderAlignment = estimatedCacheLineSize; 8951c0b2f7Stbbdev 9051c0b2f7Stbbdev /********* The data structures and global objects **************/ 9151c0b2f7Stbbdev 9251c0b2f7Stbbdev /* 9351c0b2f7Stbbdev * The malloc routines themselves need to be able to occasionally malloc some space, 9451c0b2f7Stbbdev * in order to set up the structures used by the thread local structures. This 9551c0b2f7Stbbdev * routine performs that functions. 9651c0b2f7Stbbdev */ 9751c0b2f7Stbbdev class BootStrapBlocks { 9851c0b2f7Stbbdev MallocMutex bootStrapLock; 9951c0b2f7Stbbdev Block *bootStrapBlock; 10051c0b2f7Stbbdev Block *bootStrapBlockUsed; 10151c0b2f7Stbbdev FreeObject *bootStrapObjectList; 10251c0b2f7Stbbdev public: 10351c0b2f7Stbbdev void *allocate(MemoryPool *memPool, size_t size); 10451c0b2f7Stbbdev void free(void* ptr); 10551c0b2f7Stbbdev void reset(); 10651c0b2f7Stbbdev }; 10751c0b2f7Stbbdev 10851c0b2f7Stbbdev #if USE_INTERNAL_TID 10951c0b2f7Stbbdev class ThreadId { 11051c0b2f7Stbbdev static tls_key_t Tid_key; 11151c0b2f7Stbbdev std::atomic<intptr_t> ThreadCount; 11251c0b2f7Stbbdev 11351c0b2f7Stbbdev unsigned int id; 11451c0b2f7Stbbdev 11551c0b2f7Stbbdev static unsigned int tlsNumber() { 11651c0b2f7Stbbdev unsigned int result = reinterpret_cast<intptr_t>(TlsGetValue_func(Tid_key)); 11751c0b2f7Stbbdev if( !result ) { 11851c0b2f7Stbbdev RecursiveMallocCallProtector scoped; 11951c0b2f7Stbbdev // Thread-local value is zero -> first call from this thread, 12051c0b2f7Stbbdev // need to initialize with next ID value (IDs start from 1) 12151c0b2f7Stbbdev result = ++ThreadCount; // returned new value! 12251c0b2f7Stbbdev TlsSetValue_func( Tid_key, reinterpret_cast<void*>(result) ); 12351c0b2f7Stbbdev } 12451c0b2f7Stbbdev return result; 12551c0b2f7Stbbdev } 12651c0b2f7Stbbdev public: 12751c0b2f7Stbbdev static bool init() { 12851c0b2f7Stbbdev #if USE_WINTHREAD 12951c0b2f7Stbbdev Tid_key = TlsAlloc(); 13051c0b2f7Stbbdev if (Tid_key == TLS_ALLOC_FAILURE) 13151c0b2f7Stbbdev return false; 13251c0b2f7Stbbdev #else 13357f524caSIlya Isaev int status = pthread_key_create( &Tid_key, nullptr ); 13451c0b2f7Stbbdev if ( status ) { 13551c0b2f7Stbbdev fprintf (stderr, "The memory manager cannot create tls key during initialization\n"); 13651c0b2f7Stbbdev return false; 13751c0b2f7Stbbdev } 13851c0b2f7Stbbdev #endif /* USE_WINTHREAD */ 13951c0b2f7Stbbdev return true; 14051c0b2f7Stbbdev } 141478de5b1Stbbdev #if __TBB_SOURCE_DIRECTLY_INCLUDED 14251c0b2f7Stbbdev static void destroy() { 14351c0b2f7Stbbdev if( Tid_key ) { 14451c0b2f7Stbbdev #if USE_WINTHREAD 14551c0b2f7Stbbdev BOOL status = !(TlsFree( Tid_key )); // fail is zero 14651c0b2f7Stbbdev #else 14751c0b2f7Stbbdev int status = pthread_key_delete( Tid_key ); 14851c0b2f7Stbbdev #endif /* USE_WINTHREAD */ 14951c0b2f7Stbbdev if ( status ) 15051c0b2f7Stbbdev fprintf (stderr, "The memory manager cannot delete tls key\n"); 15151c0b2f7Stbbdev Tid_key = 0; 15251c0b2f7Stbbdev } 15351c0b2f7Stbbdev } 154478de5b1Stbbdev #endif 15551c0b2f7Stbbdev 15651c0b2f7Stbbdev ThreadId() : id(ThreadId::tlsNumber()) {} 15751c0b2f7Stbbdev bool isCurrentThreadId() const { return id == ThreadId::tlsNumber(); } 15851c0b2f7Stbbdev 15951c0b2f7Stbbdev #if COLLECT_STATISTICS || MALLOC_TRACE 16051c0b2f7Stbbdev friend unsigned int getThreadId() { return ThreadId::tlsNumber(); } 16151c0b2f7Stbbdev #endif 16251c0b2f7Stbbdev #if COLLECT_STATISTICS 16351c0b2f7Stbbdev static unsigned getMaxThreadId() { return ThreadCount.load(std::memory_order_relaxed); } 16451c0b2f7Stbbdev 16551c0b2f7Stbbdev friend int STAT_increment(ThreadId tid, int bin, int ctr); 16651c0b2f7Stbbdev #endif 16751c0b2f7Stbbdev }; 16851c0b2f7Stbbdev 16951c0b2f7Stbbdev tls_key_t ThreadId::Tid_key; 17051c0b2f7Stbbdev intptr_t ThreadId::ThreadCount; 17151c0b2f7Stbbdev 17251c0b2f7Stbbdev #if COLLECT_STATISTICS 17351c0b2f7Stbbdev int STAT_increment(ThreadId tid, int bin, int ctr) 17451c0b2f7Stbbdev { 17551c0b2f7Stbbdev return ::STAT_increment(tid.id, bin, ctr); 17651c0b2f7Stbbdev } 17751c0b2f7Stbbdev #endif 17851c0b2f7Stbbdev 17951c0b2f7Stbbdev #else // USE_INTERNAL_TID 18051c0b2f7Stbbdev 18151c0b2f7Stbbdev class ThreadId { 18251c0b2f7Stbbdev #if USE_PTHREAD 183478de5b1Stbbdev std::atomic<pthread_t> tid; 18451c0b2f7Stbbdev #else 185478de5b1Stbbdev std::atomic<DWORD> tid; 18651c0b2f7Stbbdev #endif 18751c0b2f7Stbbdev public: 18851c0b2f7Stbbdev ThreadId() : tid(GetMyTID()) {} 18951c0b2f7Stbbdev #if USE_PTHREAD 190478de5b1Stbbdev bool isCurrentThreadId() const { return pthread_equal(pthread_self(), tid.load(std::memory_order_relaxed)); } 19151c0b2f7Stbbdev #else 192478de5b1Stbbdev bool isCurrentThreadId() const { return GetCurrentThreadId() == tid.load(std::memory_order_relaxed); } 19351c0b2f7Stbbdev #endif 194478de5b1Stbbdev ThreadId& operator=(const ThreadId& other) { 195478de5b1Stbbdev tid.store(other.tid.load(std::memory_order_relaxed), std::memory_order_relaxed); 196478de5b1Stbbdev return *this; 197478de5b1Stbbdev } 19851c0b2f7Stbbdev static bool init() { return true; } 199478de5b1Stbbdev #if __TBB_SOURCE_DIRECTLY_INCLUDED 20051c0b2f7Stbbdev static void destroy() {} 201478de5b1Stbbdev #endif 20251c0b2f7Stbbdev }; 20351c0b2f7Stbbdev 20451c0b2f7Stbbdev #endif // USE_INTERNAL_TID 20551c0b2f7Stbbdev 20651c0b2f7Stbbdev /*********** Code to provide thread ID and a thread-local void pointer **********/ 20751c0b2f7Stbbdev 20851c0b2f7Stbbdev bool TLSKey::init() 20951c0b2f7Stbbdev { 21051c0b2f7Stbbdev #if USE_WINTHREAD 21151c0b2f7Stbbdev TLS_pointer_key = TlsAlloc(); 21251c0b2f7Stbbdev if (TLS_pointer_key == TLS_ALLOC_FAILURE) 21351c0b2f7Stbbdev return false; 21451c0b2f7Stbbdev #else 21551c0b2f7Stbbdev int status = pthread_key_create( &TLS_pointer_key, mallocThreadShutdownNotification ); 21651c0b2f7Stbbdev if ( status ) 21751c0b2f7Stbbdev return false; 21851c0b2f7Stbbdev #endif /* USE_WINTHREAD */ 21951c0b2f7Stbbdev return true; 22051c0b2f7Stbbdev } 22151c0b2f7Stbbdev 22251c0b2f7Stbbdev bool TLSKey::destroy() 22351c0b2f7Stbbdev { 22451c0b2f7Stbbdev #if USE_WINTHREAD 22551c0b2f7Stbbdev BOOL status1 = !(TlsFree(TLS_pointer_key)); // fail is zero 22651c0b2f7Stbbdev #else 22751c0b2f7Stbbdev int status1 = pthread_key_delete(TLS_pointer_key); 22851c0b2f7Stbbdev #endif /* USE_WINTHREAD */ 22951c0b2f7Stbbdev MALLOC_ASSERT(!status1, "The memory manager cannot delete tls key."); 23051c0b2f7Stbbdev return status1==0; 23151c0b2f7Stbbdev } 23251c0b2f7Stbbdev 23351c0b2f7Stbbdev inline TLSData* TLSKey::getThreadMallocTLS() const 23451c0b2f7Stbbdev { 23551c0b2f7Stbbdev return (TLSData *)TlsGetValue_func( TLS_pointer_key ); 23651c0b2f7Stbbdev } 23751c0b2f7Stbbdev 23851c0b2f7Stbbdev inline void TLSKey::setThreadMallocTLS( TLSData * newvalue ) { 23951c0b2f7Stbbdev RecursiveMallocCallProtector scoped; 24051c0b2f7Stbbdev TlsSetValue_func( TLS_pointer_key, newvalue ); 24151c0b2f7Stbbdev } 24251c0b2f7Stbbdev 24351c0b2f7Stbbdev /* The 'next' field in the block header has to maintain some invariants: 24451c0b2f7Stbbdev * it needs to be on a 16K boundary and the first field in the block. 24551c0b2f7Stbbdev * Any value stored there needs to have the lower 14 bits set to 0 24651c0b2f7Stbbdev * so that various assert work. This means that if you want to smash this memory 24751c0b2f7Stbbdev * for debugging purposes you will need to obey this invariant. 24851c0b2f7Stbbdev * The total size of the header needs to be a power of 2 to simplify 24951c0b2f7Stbbdev * the alignment requirements. For now it is a 128 byte structure. 25051c0b2f7Stbbdev * To avoid false sharing, the fields changed only locally are separated 25151c0b2f7Stbbdev * from the fields changed by foreign threads. 25251c0b2f7Stbbdev * Changing the size of the block header would require to change 25351c0b2f7Stbbdev * some bin allocation sizes, in particular "fitting" sizes (see above). 25451c0b2f7Stbbdev */ 25551c0b2f7Stbbdev class Bin; 25651c0b2f7Stbbdev class StartupBlock; 25751c0b2f7Stbbdev 25851c0b2f7Stbbdev class MemoryPool { 25951c0b2f7Stbbdev // if no explicit grainsize, expect to see malloc in user's pAlloc 26051c0b2f7Stbbdev // and set reasonable low granularity 26151c0b2f7Stbbdev static const size_t defaultGranularity = estimatedCacheLineSize; 26251c0b2f7Stbbdev 263ba947f18SIlya Isaev MemoryPool() = delete; // deny 26451c0b2f7Stbbdev public: 26551c0b2f7Stbbdev static MallocMutex memPoolListLock; 26651c0b2f7Stbbdev 26751c0b2f7Stbbdev // list of all active pools is used to release 26851c0b2f7Stbbdev // all TLS data on thread termination or library unload 26951c0b2f7Stbbdev MemoryPool *next, 27051c0b2f7Stbbdev *prev; 27151c0b2f7Stbbdev ExtMemoryPool extMemPool; 27251c0b2f7Stbbdev BootStrapBlocks bootStrapBlocks; 27351c0b2f7Stbbdev 27451c0b2f7Stbbdev static void initDefaultPool(); 27551c0b2f7Stbbdev 27651c0b2f7Stbbdev bool init(intptr_t poolId, const MemPoolPolicy* memPoolPolicy); 27751c0b2f7Stbbdev bool reset(); 27851c0b2f7Stbbdev bool destroy(); 27951c0b2f7Stbbdev void onThreadShutdown(TLSData *tlsData); 28051c0b2f7Stbbdev 28151c0b2f7Stbbdev inline TLSData *getTLS(bool create); 28257f524caSIlya Isaev void clearTLS() { extMemPool.tlsPointerKey.setThreadMallocTLS(nullptr); } 28351c0b2f7Stbbdev 28451c0b2f7Stbbdev Block *getEmptyBlock(size_t size); 28551c0b2f7Stbbdev void returnEmptyBlock(Block *block, bool poolTheBlock); 28651c0b2f7Stbbdev 28751c0b2f7Stbbdev // get/put large object to/from local large object cache 28851c0b2f7Stbbdev void *getFromLLOCache(TLSData *tls, size_t size, size_t alignment); 28951c0b2f7Stbbdev void putToLLOCache(TLSData *tls, void *object); 29051c0b2f7Stbbdev }; 29151c0b2f7Stbbdev 29251c0b2f7Stbbdev static intptr_t defaultMemPool_space[sizeof(MemoryPool)/sizeof(intptr_t) + 29351c0b2f7Stbbdev (sizeof(MemoryPool)%sizeof(intptr_t)? 1 : 0)]; 29451c0b2f7Stbbdev static MemoryPool *defaultMemPool = (MemoryPool*)defaultMemPool_space; 29551c0b2f7Stbbdev const size_t MemoryPool::defaultGranularity; 29651c0b2f7Stbbdev // zero-initialized 29751c0b2f7Stbbdev MallocMutex MemoryPool::memPoolListLock; 29851c0b2f7Stbbdev // TODO: move huge page status to default pool, because that's its states 29951c0b2f7Stbbdev HugePagesStatus hugePages; 30051c0b2f7Stbbdev static bool usedBySrcIncluded = false; 30151c0b2f7Stbbdev 30251c0b2f7Stbbdev // Padding helpers 30351c0b2f7Stbbdev template<size_t padd> 30451c0b2f7Stbbdev struct PaddingImpl { 30551c0b2f7Stbbdev size_t __padding[padd]; 30651c0b2f7Stbbdev }; 30751c0b2f7Stbbdev 30851c0b2f7Stbbdev template<> 30951c0b2f7Stbbdev struct PaddingImpl<0> {}; 31051c0b2f7Stbbdev 31151c0b2f7Stbbdev template<int N> 31251c0b2f7Stbbdev struct Padding : PaddingImpl<N/sizeof(size_t)> {}; 31351c0b2f7Stbbdev 31451c0b2f7Stbbdev // Slab block is 16KB-aligned. To prevent false sharing, separate locally-accessed 31551c0b2f7Stbbdev // fields and fields commonly accessed by not owner threads. 31651c0b2f7Stbbdev class GlobalBlockFields : public BlockI { 31751c0b2f7Stbbdev protected: 31851c0b2f7Stbbdev std::atomic<FreeObject*> publicFreeList; 319478de5b1Stbbdev std::atomic<Block*> nextPrivatizable; 32051c0b2f7Stbbdev MemoryPool *poolPtr; 32151c0b2f7Stbbdev }; 32251c0b2f7Stbbdev 32351c0b2f7Stbbdev class LocalBlockFields : public GlobalBlockFields, Padding<blockHeaderAlignment - sizeof(GlobalBlockFields)> { 32451c0b2f7Stbbdev protected: 32551c0b2f7Stbbdev Block *next; 32651c0b2f7Stbbdev Block *previous; /* Use double linked list to speed up removal */ 32751c0b2f7Stbbdev FreeObject *bumpPtr; /* Bump pointer moves from the end to the beginning of a block */ 32851c0b2f7Stbbdev FreeObject *freeList; 32951c0b2f7Stbbdev /* Pointer to local data for the owner thread. Used for fast finding tls 33051c0b2f7Stbbdev when releasing object from a block that current thread owned. 33157f524caSIlya Isaev nullptr for orphaned blocks. */ 332478de5b1Stbbdev std::atomic<TLSData*> tlsPtr; 33351c0b2f7Stbbdev ThreadId ownerTid; /* the ID of the thread that owns or last owned the block */ 33451c0b2f7Stbbdev BackRefIdx backRefIdx; 33551c0b2f7Stbbdev uint16_t allocatedCount; /* Number of objects allocated (obviously by the owning thread) */ 33651c0b2f7Stbbdev uint16_t objectSize; 33751c0b2f7Stbbdev bool isFull; 33851c0b2f7Stbbdev 33951c0b2f7Stbbdev friend class FreeBlockPool; 34051c0b2f7Stbbdev friend class StartupBlock; 34151c0b2f7Stbbdev friend class LifoList; 34251c0b2f7Stbbdev friend void *BootStrapBlocks::allocate(MemoryPool *, size_t); 34351c0b2f7Stbbdev friend bool OrphanedBlocks::cleanup(Backend*); 34451c0b2f7Stbbdev friend Block *MemoryPool::getEmptyBlock(size_t); 34551c0b2f7Stbbdev }; 34651c0b2f7Stbbdev 34751c0b2f7Stbbdev // Use inheritance to guarantee that a user data start on next cache line. 34851c0b2f7Stbbdev // Can't use member for it, because when LocalBlockFields already on cache line, 34951c0b2f7Stbbdev // we must have no additional memory consumption for all compilers. 35051c0b2f7Stbbdev class Block : public LocalBlockFields, 35151c0b2f7Stbbdev Padding<2*blockHeaderAlignment - sizeof(LocalBlockFields)> { 35251c0b2f7Stbbdev public: 35351c0b2f7Stbbdev bool empty() const { 35451c0b2f7Stbbdev if (allocatedCount > 0) return false; 35551c0b2f7Stbbdev MALLOC_ASSERT(!isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT); 35651c0b2f7Stbbdev return true; 35751c0b2f7Stbbdev } 35851c0b2f7Stbbdev inline FreeObject* allocate(); 35951c0b2f7Stbbdev inline FreeObject *allocateFromFreeList(); 36051c0b2f7Stbbdev 36151c0b2f7Stbbdev inline bool adjustFullness(); 36257f524caSIlya Isaev void adjustPositionInBin(Bin* bin = nullptr); 363478de5b1Stbbdev #if MALLOC_DEBUG 36451c0b2f7Stbbdev bool freeListNonNull() { return freeList; } 365478de5b1Stbbdev #endif 36651c0b2f7Stbbdev void freePublicObject(FreeObject *objectToFree); 36751c0b2f7Stbbdev inline void freeOwnObject(void *object); 36851c0b2f7Stbbdev void reset(); 36951c0b2f7Stbbdev void privatizePublicFreeList( bool reset = true ); 37051c0b2f7Stbbdev void restoreBumpPtr(); 37151c0b2f7Stbbdev void privatizeOrphaned(TLSData *tls, unsigned index); 37251c0b2f7Stbbdev bool readyToShare(); 37351c0b2f7Stbbdev void shareOrphaned(intptr_t binTag, unsigned index); 37451c0b2f7Stbbdev unsigned int getSize() const { 37551c0b2f7Stbbdev MALLOC_ASSERT(isStartupAllocObject() || objectSize<minLargeObjectSize, 37651c0b2f7Stbbdev "Invalid object size"); 37751c0b2f7Stbbdev return isStartupAllocObject()? 0 : objectSize; 37851c0b2f7Stbbdev } 37951c0b2f7Stbbdev const BackRefIdx *getBackRefIdx() const { return &backRefIdx; } 38051c0b2f7Stbbdev inline bool isOwnedByCurrentThread() const; 38151c0b2f7Stbbdev bool isStartupAllocObject() const { return objectSize == startupAllocObjSizeMark; } 38251c0b2f7Stbbdev inline FreeObject *findObjectToFree(const void *object) const; 38351c0b2f7Stbbdev void checkFreePrecond(const void *object) const { 38451c0b2f7Stbbdev #if MALLOC_DEBUG 38551c0b2f7Stbbdev const char *msg = "Possible double free or heap corruption."; 38651c0b2f7Stbbdev // small objects are always at least sizeof(size_t) Byte aligned, 38751c0b2f7Stbbdev // try to check this before this dereference as for invalid objects 38851c0b2f7Stbbdev // this may be unreadable 38951c0b2f7Stbbdev MALLOC_ASSERT(isAligned(object, sizeof(size_t)), "Try to free invalid small object"); 390478de5b1Stbbdev #if !__TBB_USE_THREAD_SANITIZER 39151c0b2f7Stbbdev // releasing to free slab 39251c0b2f7Stbbdev MALLOC_ASSERT(allocatedCount>0, msg); 393478de5b1Stbbdev #endif 39451c0b2f7Stbbdev // must not point to slab's header 39551c0b2f7Stbbdev MALLOC_ASSERT((uintptr_t)object - (uintptr_t)this >= sizeof(Block), msg); 39651c0b2f7Stbbdev if (startupAllocObjSizeMark == objectSize) // startup block 39751c0b2f7Stbbdev MALLOC_ASSERT(object<=bumpPtr, msg); 39851c0b2f7Stbbdev else { 39951c0b2f7Stbbdev // non-startup objects are 8 Byte aligned 40051c0b2f7Stbbdev MALLOC_ASSERT(isAligned(object, 8), "Try to free invalid small object"); 401478de5b1Stbbdev FreeObject *toFree = findObjectToFree(object); 402478de5b1Stbbdev #if !__TBB_USE_THREAD_SANITIZER 40351c0b2f7Stbbdev MALLOC_ASSERT(allocatedCount <= (slabSize-sizeof(Block))/objectSize 40451c0b2f7Stbbdev && (!bumpPtr || object>bumpPtr), msg); 40551c0b2f7Stbbdev // check against head of freeList, as this is mostly 40651c0b2f7Stbbdev // expected after double free 40751c0b2f7Stbbdev MALLOC_ASSERT(toFree != freeList, msg); 408478de5b1Stbbdev #endif 40951c0b2f7Stbbdev // check against head of publicFreeList, to detect double free 41051c0b2f7Stbbdev // involving foreign thread 41151c0b2f7Stbbdev MALLOC_ASSERT(toFree != publicFreeList.load(std::memory_order_relaxed), msg); 41251c0b2f7Stbbdev } 41351c0b2f7Stbbdev #else 41451c0b2f7Stbbdev suppress_unused_warning(object); 41551c0b2f7Stbbdev #endif 41651c0b2f7Stbbdev } 41751c0b2f7Stbbdev void initEmptyBlock(TLSData *tls, size_t size); 41851c0b2f7Stbbdev size_t findObjectSize(void *object) const; 41951c0b2f7Stbbdev MemoryPool *getMemPool() const { return poolPtr; } // do not use on the hot path! 42051c0b2f7Stbbdev 42151c0b2f7Stbbdev protected: 42251c0b2f7Stbbdev void cleanBlockHeader(); 42351c0b2f7Stbbdev 42451c0b2f7Stbbdev private: 42551c0b2f7Stbbdev static const float emptyEnoughRatio; /* Threshold on free space needed to "reactivate" a block */ 42651c0b2f7Stbbdev 42751c0b2f7Stbbdev inline FreeObject *allocateFromBumpPtr(); 42851c0b2f7Stbbdev inline FreeObject *findAllocatedObject(const void *address) const; 429478de5b1Stbbdev #if MALLOC_DEBUG 43051c0b2f7Stbbdev inline bool isProperlyPlaced(const void *object) const; 431478de5b1Stbbdev #endif 43251c0b2f7Stbbdev inline void markOwned(TLSData *tls) { 433478de5b1Stbbdev MALLOC_ASSERT(!tlsPtr.load(std::memory_order_relaxed), ASSERT_TEXT); 43451c0b2f7Stbbdev ownerTid = ThreadId(); /* save the ID of the current thread */ 435478de5b1Stbbdev tlsPtr.store(tls, std::memory_order_relaxed); 43651c0b2f7Stbbdev } 43751c0b2f7Stbbdev inline void markOrphaned() { 438478de5b1Stbbdev MALLOC_ASSERT(tlsPtr.load(std::memory_order_relaxed), ASSERT_TEXT); 439478de5b1Stbbdev tlsPtr.store(nullptr, std::memory_order_relaxed); 44051c0b2f7Stbbdev } 44151c0b2f7Stbbdev 44251c0b2f7Stbbdev friend class Bin; 44351c0b2f7Stbbdev friend class TLSData; 44451c0b2f7Stbbdev friend bool MemoryPool::destroy(); 44551c0b2f7Stbbdev }; 44651c0b2f7Stbbdev 44751c0b2f7Stbbdev const float Block::emptyEnoughRatio = 1.0 / 4.0; 44851c0b2f7Stbbdev 44951c0b2f7Stbbdev static_assert(sizeof(Block) <= 2*estimatedCacheLineSize, 45051c0b2f7Stbbdev "The class Block does not fit into 2 cache lines on this platform. " 45151c0b2f7Stbbdev "Defining USE_INTERNAL_TID may help to fix it."); 45251c0b2f7Stbbdev 45351c0b2f7Stbbdev class Bin { 45451c0b2f7Stbbdev private: 455478de5b1Stbbdev public: 45651c0b2f7Stbbdev Block *activeBlk; 45751c0b2f7Stbbdev std::atomic<Block*> mailbox; 45851c0b2f7Stbbdev MallocMutex mailLock; 45951c0b2f7Stbbdev 46051c0b2f7Stbbdev public: 46151c0b2f7Stbbdev inline Block* getActiveBlock() const { return activeBlk; } 46257f524caSIlya Isaev void resetActiveBlock() { activeBlk = nullptr; } 46351c0b2f7Stbbdev inline void setActiveBlock(Block *block); 46451c0b2f7Stbbdev inline Block* setPreviousBlockActive(); 46551c0b2f7Stbbdev Block* getPrivatizedFreeListBlock(); 46651c0b2f7Stbbdev void moveBlockToFront(Block *block); 46751c0b2f7Stbbdev bool cleanPublicFreeLists(); 46851c0b2f7Stbbdev void processEmptyBlock(Block *block, bool poolTheBlock); 46951c0b2f7Stbbdev void addPublicFreeListBlock(Block* block); 47051c0b2f7Stbbdev 47151c0b2f7Stbbdev void outofTLSBin(Block* block); 47251c0b2f7Stbbdev void verifyTLSBin(size_t size) const; 47351c0b2f7Stbbdev void pushTLSBin(Block* block); 47451c0b2f7Stbbdev 475478de5b1Stbbdev #if MALLOC_DEBUG 47651c0b2f7Stbbdev void verifyInitState() const { 47751c0b2f7Stbbdev MALLOC_ASSERT( !activeBlk, ASSERT_TEXT ); 47851c0b2f7Stbbdev MALLOC_ASSERT( !mailbox.load(std::memory_order_relaxed), ASSERT_TEXT ); 47951c0b2f7Stbbdev } 480478de5b1Stbbdev #endif 48151c0b2f7Stbbdev 48251c0b2f7Stbbdev friend void Block::freePublicObject (FreeObject *objectToFree); 48351c0b2f7Stbbdev }; 48451c0b2f7Stbbdev 48551c0b2f7Stbbdev /********* End of the data structures **************/ 48651c0b2f7Stbbdev 48751c0b2f7Stbbdev /* 48851c0b2f7Stbbdev * There are bins for all 8 byte aligned objects less than this segregated size; 8 bins in total 48951c0b2f7Stbbdev */ 49051c0b2f7Stbbdev const uint32_t minSmallObjectIndex = 0; 49151c0b2f7Stbbdev const uint32_t numSmallObjectBins = 8; 49251c0b2f7Stbbdev const uint32_t maxSmallObjectSize = 64; 49351c0b2f7Stbbdev 49451c0b2f7Stbbdev /* 49551c0b2f7Stbbdev * There are 4 bins between each couple of powers of 2 [64-128-256-...] 49651c0b2f7Stbbdev * from maxSmallObjectSize till this size; 16 bins in total 49751c0b2f7Stbbdev */ 49851c0b2f7Stbbdev const uint32_t minSegregatedObjectIndex = minSmallObjectIndex+numSmallObjectBins; 49951c0b2f7Stbbdev const uint32_t numSegregatedObjectBins = 16; 50051c0b2f7Stbbdev const uint32_t maxSegregatedObjectSize = 1024; 50151c0b2f7Stbbdev 50251c0b2f7Stbbdev /* 50351c0b2f7Stbbdev * And there are 5 bins with allocation sizes that are multiples of estimatedCacheLineSize 50451c0b2f7Stbbdev * and selected to fit 9, 6, 4, 3, and 2 allocations in a block. 50551c0b2f7Stbbdev */ 50651c0b2f7Stbbdev const uint32_t minFittingIndex = minSegregatedObjectIndex+numSegregatedObjectBins; 50751c0b2f7Stbbdev const uint32_t numFittingBins = 5; 50851c0b2f7Stbbdev 50951c0b2f7Stbbdev const uint32_t fittingAlignment = estimatedCacheLineSize; 51051c0b2f7Stbbdev 51151c0b2f7Stbbdev #define SET_FITTING_SIZE(N) ( (slabSize-sizeof(Block))/N ) & ~(fittingAlignment-1) 51251c0b2f7Stbbdev // For blockSize=16*1024, sizeof(Block)=2*estimatedCacheLineSize and fittingAlignment=estimatedCacheLineSize, 51351c0b2f7Stbbdev // the comments show the fitting sizes and the amounts left unused for estimatedCacheLineSize=64/128: 51451c0b2f7Stbbdev const uint32_t fittingSize1 = SET_FITTING_SIZE(9); // 1792/1792 128/000 51551c0b2f7Stbbdev const uint32_t fittingSize2 = SET_FITTING_SIZE(6); // 2688/2688 128/000 51651c0b2f7Stbbdev const uint32_t fittingSize3 = SET_FITTING_SIZE(4); // 4032/3968 128/256 51751c0b2f7Stbbdev const uint32_t fittingSize4 = SET_FITTING_SIZE(3); // 5376/5376 128/000 51851c0b2f7Stbbdev const uint32_t fittingSize5 = SET_FITTING_SIZE(2); // 8128/8064 000/000 51951c0b2f7Stbbdev #undef SET_FITTING_SIZE 52051c0b2f7Stbbdev 52151c0b2f7Stbbdev /* 52251c0b2f7Stbbdev * The total number of thread-specific Block-based bins 52351c0b2f7Stbbdev */ 52451c0b2f7Stbbdev const uint32_t numBlockBins = minFittingIndex+numFittingBins; 52551c0b2f7Stbbdev 52651c0b2f7Stbbdev /* 52751c0b2f7Stbbdev * Objects of this size and larger are considered large objects. 52851c0b2f7Stbbdev */ 52951c0b2f7Stbbdev const uint32_t minLargeObjectSize = fittingSize5 + 1; 53051c0b2f7Stbbdev 53151c0b2f7Stbbdev /* 53251c0b2f7Stbbdev * Per-thread pool of slab blocks. Idea behind it is to not share with other 53351c0b2f7Stbbdev * threads memory that are likely in local cache(s) of our CPU. 53451c0b2f7Stbbdev */ 53551c0b2f7Stbbdev class FreeBlockPool { 53651c0b2f7Stbbdev private: 53751c0b2f7Stbbdev std::atomic<Block*> head; 53851c0b2f7Stbbdev int size; 53951c0b2f7Stbbdev Backend *backend; 54051c0b2f7Stbbdev bool lastAccessMiss; 54151c0b2f7Stbbdev public: 54251c0b2f7Stbbdev static const int POOL_HIGH_MARK = 32; 54351c0b2f7Stbbdev static const int POOL_LOW_MARK = 8; 54451c0b2f7Stbbdev 54551c0b2f7Stbbdev class ResOfGet { 546ba947f18SIlya Isaev ResOfGet() = delete; 54751c0b2f7Stbbdev public: 54851c0b2f7Stbbdev Block* block; 54951c0b2f7Stbbdev bool lastAccMiss; 55051c0b2f7Stbbdev ResOfGet(Block *b, bool lastMiss) : block(b), lastAccMiss(lastMiss) {} 55151c0b2f7Stbbdev }; 55251c0b2f7Stbbdev 55351c0b2f7Stbbdev // allocated in zero-initialized memory 55451c0b2f7Stbbdev FreeBlockPool(Backend *bknd) : backend(bknd) {} 55551c0b2f7Stbbdev ResOfGet getBlock(); 55651c0b2f7Stbbdev void returnBlock(Block *block); 55751c0b2f7Stbbdev bool externalCleanup(); // can be called by another thread 55851c0b2f7Stbbdev }; 55951c0b2f7Stbbdev 56051c0b2f7Stbbdev template<int LOW_MARK, int HIGH_MARK> 56151c0b2f7Stbbdev class LocalLOCImpl { 56251c0b2f7Stbbdev private: 56351c0b2f7Stbbdev static const size_t MAX_TOTAL_SIZE = 4*1024*1024; 56451c0b2f7Stbbdev // TODO: can single-linked list be faster here? 56551c0b2f7Stbbdev LargeMemoryBlock *tail; // need it when do releasing on overflow 56651c0b2f7Stbbdev std::atomic<LargeMemoryBlock*> head; 56751c0b2f7Stbbdev size_t totalSize; 56851c0b2f7Stbbdev int numOfBlocks; 56951c0b2f7Stbbdev public: 57051c0b2f7Stbbdev bool put(LargeMemoryBlock *object, ExtMemoryPool *extMemPool); 57151c0b2f7Stbbdev LargeMemoryBlock *get(size_t size); 57251c0b2f7Stbbdev bool externalCleanup(ExtMemoryPool *extMemPool); 57351c0b2f7Stbbdev #if __TBB_MALLOC_WHITEBOX_TEST 57457f524caSIlya Isaev LocalLOCImpl() : tail(nullptr), head(nullptr), totalSize(0), numOfBlocks(0) {} 57551c0b2f7Stbbdev static size_t getMaxSize() { return MAX_TOTAL_SIZE; } 57651c0b2f7Stbbdev static const int LOC_HIGH_MARK = HIGH_MARK; 57751c0b2f7Stbbdev #else 57851c0b2f7Stbbdev // no ctor, object must be created in zero-initialized memory 57951c0b2f7Stbbdev #endif 58051c0b2f7Stbbdev }; 58151c0b2f7Stbbdev 58251c0b2f7Stbbdev typedef LocalLOCImpl<8,32> LocalLOC; // set production code parameters 58351c0b2f7Stbbdev 58451c0b2f7Stbbdev class TLSData : public TLSRemote { 58551c0b2f7Stbbdev MemoryPool *memPool; 58651c0b2f7Stbbdev public: 58751c0b2f7Stbbdev Bin bin[numBlockBinLimit]; 58851c0b2f7Stbbdev FreeBlockPool freeSlabBlocks; 58951c0b2f7Stbbdev LocalLOC lloc; 59051c0b2f7Stbbdev unsigned currCacheIdx; 59151c0b2f7Stbbdev private: 592478de5b1Stbbdev std::atomic<bool> unused; 59351c0b2f7Stbbdev public: 59451c0b2f7Stbbdev TLSData(MemoryPool *mPool, Backend *bknd) : memPool(mPool), freeSlabBlocks(bknd) {} 59551c0b2f7Stbbdev MemoryPool *getMemPool() const { return memPool; } 59651c0b2f7Stbbdev Bin* getAllocationBin(size_t size); 59751c0b2f7Stbbdev void release(); 59851c0b2f7Stbbdev bool externalCleanup(bool cleanOnlyUnused, bool cleanBins) { 599478de5b1Stbbdev if (!unused.load(std::memory_order_relaxed) && cleanOnlyUnused) return false; 60051c0b2f7Stbbdev // Heavy operation in terms of synchronization complexity, 60151c0b2f7Stbbdev // should be called only for the current thread 60251c0b2f7Stbbdev bool released = cleanBins ? cleanupBlockBins() : false; 60351c0b2f7Stbbdev // both cleanups to be called, and the order is not important 604*a96a032fSVladislav Shchapov bool lloc_cleaned = lloc.externalCleanup(&memPool->extMemPool); 605*a96a032fSVladislav Shchapov bool free_slab_blocks_cleaned = freeSlabBlocks.externalCleanup(); 606*a96a032fSVladislav Shchapov return released || lloc_cleaned || free_slab_blocks_cleaned; 60751c0b2f7Stbbdev } 60851c0b2f7Stbbdev bool cleanupBlockBins(); 609478de5b1Stbbdev void markUsed() { unused.store(false, std::memory_order_relaxed); } // called by owner when TLS touched 610478de5b1Stbbdev void markUnused() { unused.store(true, std::memory_order_relaxed); } // can be called by not owner thread 61151c0b2f7Stbbdev }; 61251c0b2f7Stbbdev 61351c0b2f7Stbbdev TLSData *TLSKey::createTLS(MemoryPool *memPool, Backend *backend) 61451c0b2f7Stbbdev { 61551c0b2f7Stbbdev MALLOC_ASSERT( sizeof(TLSData) >= sizeof(Bin) * numBlockBins + sizeof(FreeBlockPool), ASSERT_TEXT ); 61651c0b2f7Stbbdev TLSData* tls = (TLSData*) memPool->bootStrapBlocks.allocate(memPool, sizeof(TLSData)); 61751c0b2f7Stbbdev if ( !tls ) 61857f524caSIlya Isaev return nullptr; 61951c0b2f7Stbbdev new(tls) TLSData(memPool, backend); 62051c0b2f7Stbbdev /* the block contains zeroes after bootStrapMalloc, so bins are initialized */ 62151c0b2f7Stbbdev #if MALLOC_DEBUG 62251c0b2f7Stbbdev for (uint32_t i = 0; i < numBlockBinLimit; i++) 62351c0b2f7Stbbdev tls->bin[i].verifyInitState(); 62451c0b2f7Stbbdev #endif 62551c0b2f7Stbbdev setThreadMallocTLS(tls); 62651c0b2f7Stbbdev memPool->extMemPool.allLocalCaches.registerThread(tls); 62751c0b2f7Stbbdev return tls; 62851c0b2f7Stbbdev } 62951c0b2f7Stbbdev 63051c0b2f7Stbbdev bool TLSData::cleanupBlockBins() 63151c0b2f7Stbbdev { 63251c0b2f7Stbbdev bool released = false; 63351c0b2f7Stbbdev for (uint32_t i = 0; i < numBlockBinLimit; i++) { 63451c0b2f7Stbbdev released |= bin[i].cleanPublicFreeLists(); 63551c0b2f7Stbbdev // After cleaning public free lists, only the active block might be empty. 63651c0b2f7Stbbdev // Do not use processEmptyBlock because it will just restore bumpPtr. 63751c0b2f7Stbbdev Block *block = bin[i].getActiveBlock(); 63851c0b2f7Stbbdev if (block && block->empty()) { 63951c0b2f7Stbbdev bin[i].outofTLSBin(block); 64051c0b2f7Stbbdev memPool->returnEmptyBlock(block, /*poolTheBlock=*/false); 64151c0b2f7Stbbdev released = true; 64251c0b2f7Stbbdev } 64351c0b2f7Stbbdev } 64451c0b2f7Stbbdev return released; 64551c0b2f7Stbbdev } 64651c0b2f7Stbbdev 64751c0b2f7Stbbdev bool ExtMemoryPool::releaseAllLocalCaches() 64851c0b2f7Stbbdev { 64951c0b2f7Stbbdev // Iterate all registered TLS data and clean LLOC and Slab pools 65051c0b2f7Stbbdev bool released = allLocalCaches.cleanup(/*cleanOnlyUnused=*/false); 65151c0b2f7Stbbdev 65251c0b2f7Stbbdev // Bins privatization is done only for the current thread 65351c0b2f7Stbbdev if (TLSData *tlsData = tlsPointerKey.getThreadMallocTLS()) 65451c0b2f7Stbbdev released |= tlsData->cleanupBlockBins(); 65551c0b2f7Stbbdev 65651c0b2f7Stbbdev return released; 65751c0b2f7Stbbdev } 65851c0b2f7Stbbdev 65951c0b2f7Stbbdev void AllLocalCaches::registerThread(TLSRemote *tls) 66051c0b2f7Stbbdev { 66157f524caSIlya Isaev tls->prev = nullptr; 66251c0b2f7Stbbdev MallocMutex::scoped_lock lock(listLock); 66351c0b2f7Stbbdev MALLOC_ASSERT(head!=tls, ASSERT_TEXT); 66451c0b2f7Stbbdev tls->next = head; 66551c0b2f7Stbbdev if (head) 66651c0b2f7Stbbdev head->prev = tls; 66751c0b2f7Stbbdev head = tls; 66851c0b2f7Stbbdev MALLOC_ASSERT(head->next!=head, ASSERT_TEXT); 66951c0b2f7Stbbdev } 67051c0b2f7Stbbdev 67151c0b2f7Stbbdev void AllLocalCaches::unregisterThread(TLSRemote *tls) 67251c0b2f7Stbbdev { 67351c0b2f7Stbbdev MallocMutex::scoped_lock lock(listLock); 67451c0b2f7Stbbdev MALLOC_ASSERT(head, "Can't unregister thread: no threads are registered."); 67551c0b2f7Stbbdev if (head == tls) 67651c0b2f7Stbbdev head = tls->next; 67751c0b2f7Stbbdev if (tls->next) 67851c0b2f7Stbbdev tls->next->prev = tls->prev; 67951c0b2f7Stbbdev if (tls->prev) 68051c0b2f7Stbbdev tls->prev->next = tls->next; 68151c0b2f7Stbbdev MALLOC_ASSERT(!tls->next || tls->next->next!=tls->next, ASSERT_TEXT); 68251c0b2f7Stbbdev } 68351c0b2f7Stbbdev 68451c0b2f7Stbbdev bool AllLocalCaches::cleanup(bool cleanOnlyUnused) 68551c0b2f7Stbbdev { 68651c0b2f7Stbbdev bool released = false; 68751c0b2f7Stbbdev { 68851c0b2f7Stbbdev MallocMutex::scoped_lock lock(listLock); 68951c0b2f7Stbbdev for (TLSRemote *curr=head; curr; curr=curr->next) 69051c0b2f7Stbbdev released |= static_cast<TLSData*>(curr)->externalCleanup(cleanOnlyUnused, /*cleanBins=*/false); 69151c0b2f7Stbbdev } 69251c0b2f7Stbbdev return released; 69351c0b2f7Stbbdev } 69451c0b2f7Stbbdev 69551c0b2f7Stbbdev void AllLocalCaches::markUnused() 69651c0b2f7Stbbdev { 69751c0b2f7Stbbdev bool locked; 69851c0b2f7Stbbdev MallocMutex::scoped_lock lock(listLock, /*block=*/false, &locked); 69951c0b2f7Stbbdev if (!locked) // not wait for marking if someone doing something with it 70051c0b2f7Stbbdev return; 70151c0b2f7Stbbdev 70251c0b2f7Stbbdev for (TLSRemote *curr=head; curr; curr=curr->next) 70351c0b2f7Stbbdev static_cast<TLSData*>(curr)->markUnused(); 70451c0b2f7Stbbdev } 70551c0b2f7Stbbdev 70651c0b2f7Stbbdev #if MALLOC_CHECK_RECURSION 70751c0b2f7Stbbdev MallocMutex RecursiveMallocCallProtector::rmc_mutex; 7088b6f831cStbbdev std::atomic<pthread_t> RecursiveMallocCallProtector::owner_thread; 709478de5b1Stbbdev std::atomic<void*> RecursiveMallocCallProtector::autoObjPtr; 71051c0b2f7Stbbdev bool RecursiveMallocCallProtector::mallocRecursionDetected; 71151c0b2f7Stbbdev #if __FreeBSD__ 71251c0b2f7Stbbdev bool RecursiveMallocCallProtector::canUsePthread; 71351c0b2f7Stbbdev #endif 71451c0b2f7Stbbdev 71551c0b2f7Stbbdev #endif 71651c0b2f7Stbbdev 71751c0b2f7Stbbdev /*********** End code to provide thread ID and a TLS pointer **********/ 71851c0b2f7Stbbdev 71951c0b2f7Stbbdev // Parameter for isLargeObject, keeps our expectations on memory origin. 72051c0b2f7Stbbdev // Assertions must use unknownMem to reliably report object invalidity. 72151c0b2f7Stbbdev enum MemoryOrigin { 72251c0b2f7Stbbdev ourMem, // allocated by TBB allocator 72351c0b2f7Stbbdev unknownMem // can be allocated by system allocator or TBB allocator 72451c0b2f7Stbbdev }; 72551c0b2f7Stbbdev 726478de5b1Stbbdev template<MemoryOrigin> 727478de5b1Stbbdev #if __TBB_USE_THREAD_SANITIZER 728478de5b1Stbbdev // We have a real race when accessing the large object header for 729478de5b1Stbbdev // non large objects (e.g. small or foreign objects). 730478de5b1Stbbdev // Therefore, we need to hide this access from the thread sanitizer 731478de5b1Stbbdev __attribute__((no_sanitize("thread"))) 732478de5b1Stbbdev #endif 733478de5b1Stbbdev bool isLargeObject(void *object); 73451c0b2f7Stbbdev static void *internalMalloc(size_t size); 73551c0b2f7Stbbdev static void internalFree(void *object); 73651c0b2f7Stbbdev static void *internalPoolMalloc(MemoryPool* mPool, size_t size); 73751c0b2f7Stbbdev static bool internalPoolFree(MemoryPool *mPool, void *object, size_t size); 73851c0b2f7Stbbdev 73951c0b2f7Stbbdev #if !MALLOC_DEBUG 74051c0b2f7Stbbdev #if __INTEL_COMPILER || _MSC_VER 74151c0b2f7Stbbdev #define NOINLINE(decl) __declspec(noinline) decl 74251c0b2f7Stbbdev #define ALWAYSINLINE(decl) __forceinline decl 74351c0b2f7Stbbdev #elif __GNUC__ 74451c0b2f7Stbbdev #define NOINLINE(decl) decl __attribute__ ((noinline)) 74551c0b2f7Stbbdev #define ALWAYSINLINE(decl) decl __attribute__ ((always_inline)) 74651c0b2f7Stbbdev #else 74751c0b2f7Stbbdev #define NOINLINE(decl) decl 74851c0b2f7Stbbdev #define ALWAYSINLINE(decl) decl 74951c0b2f7Stbbdev #endif 75051c0b2f7Stbbdev 75151c0b2f7Stbbdev static NOINLINE( bool doInitialization() ); 75251c0b2f7Stbbdev ALWAYSINLINE( bool isMallocInitialized() ); 75351c0b2f7Stbbdev 75451c0b2f7Stbbdev #undef ALWAYSINLINE 75551c0b2f7Stbbdev #undef NOINLINE 75651c0b2f7Stbbdev #endif /* !MALLOC_DEBUG */ 75751c0b2f7Stbbdev 75851c0b2f7Stbbdev 75951c0b2f7Stbbdev /********* Now some rough utility code to deal with indexing the size bins. **************/ 76051c0b2f7Stbbdev 76151c0b2f7Stbbdev /* 76251c0b2f7Stbbdev * Given a number return the highest non-zero bit in it. It is intended to work with 32-bit values only. 7639e15720bStbbdev * Moreover, on some platforms, for sake of simplicity and performance, it is narrowed to only serve for 64 to 1023. 76451c0b2f7Stbbdev * This is enough for current algorithm of distribution of sizes among bins. 76551c0b2f7Stbbdev * __TBB_Log2 is not used here to minimize dependencies on TBB specific sources. 76651c0b2f7Stbbdev */ 76751c0b2f7Stbbdev #if _WIN64 && _MSC_VER>=1400 && !__INTEL_COMPILER 76851c0b2f7Stbbdev extern "C" unsigned char _BitScanReverse( unsigned long* i, unsigned long w ); 76951c0b2f7Stbbdev #pragma intrinsic(_BitScanReverse) 77051c0b2f7Stbbdev #endif 77151c0b2f7Stbbdev static inline unsigned int highestBitPos(unsigned int n) 77251c0b2f7Stbbdev { 77351c0b2f7Stbbdev MALLOC_ASSERT( n>=64 && n<1024, ASSERT_TEXT ); // only needed for bsr array lookup, but always true 77451c0b2f7Stbbdev unsigned int pos; 77551c0b2f7Stbbdev #if __ARCH_x86_32||__ARCH_x86_64 77651c0b2f7Stbbdev 777734f0bc0SPablo Romero # if __unix__||__APPLE__||__MINGW32__ 77851c0b2f7Stbbdev __asm__ ("bsr %1,%0" : "=r"(pos) : "r"(n)); 77951c0b2f7Stbbdev # elif (_WIN32 && (!_WIN64 || __INTEL_COMPILER)) 78051c0b2f7Stbbdev __asm 78151c0b2f7Stbbdev { 78251c0b2f7Stbbdev bsr eax, n 78351c0b2f7Stbbdev mov pos, eax 78451c0b2f7Stbbdev } 78551c0b2f7Stbbdev # elif _WIN64 && _MSC_VER>=1400 78651c0b2f7Stbbdev _BitScanReverse((unsigned long*)&pos, (unsigned long)n); 78751c0b2f7Stbbdev # else 78851c0b2f7Stbbdev # error highestBitPos() not implemented for this platform 78951c0b2f7Stbbdev # endif 79051c0b2f7Stbbdev #elif __arm__ 79151c0b2f7Stbbdev __asm__ __volatile__ 79251c0b2f7Stbbdev ( 79351c0b2f7Stbbdev "clz %0, %1\n" 79451c0b2f7Stbbdev "rsb %0, %0, %2\n" 79551c0b2f7Stbbdev :"=r" (pos) :"r" (n), "I" (31) 79651c0b2f7Stbbdev ); 79751c0b2f7Stbbdev #else 79851c0b2f7Stbbdev static unsigned int bsr[16] = {0/*N/A*/,6,7,7,8,8,8,8,9,9,9,9,9,9,9,9}; 79951c0b2f7Stbbdev pos = bsr[ n>>6 ]; 80051c0b2f7Stbbdev #endif /* __ARCH_* */ 80151c0b2f7Stbbdev return pos; 80251c0b2f7Stbbdev } 80351c0b2f7Stbbdev 80451c0b2f7Stbbdev unsigned int getSmallObjectIndex(unsigned int size) 80551c0b2f7Stbbdev { 80651c0b2f7Stbbdev unsigned int result = (size-1)>>3; 8072110128eSsarathnandu constexpr bool is_64bit = (8 == sizeof(void*)); 8082110128eSsarathnandu if (is_64bit) { 809fa944e19SMircho Rodozov // For 64-bit malloc, 16 byte alignment is needed except for bin 0. 81051c0b2f7Stbbdev if (result) result |= 1; // 0,1,3,5,7; bins 2,4,6 are not aligned to 16 bytes 811fa944e19SMircho Rodozov } 81251c0b2f7Stbbdev return result; 81351c0b2f7Stbbdev } 814478de5b1Stbbdev 81551c0b2f7Stbbdev /* 81651c0b2f7Stbbdev * Depending on indexRequest, for a given size return either the index into the bin 81751c0b2f7Stbbdev * for objects of this size, or the actual size of objects in this bin. 81851c0b2f7Stbbdev */ 81951c0b2f7Stbbdev template<bool indexRequest> 82051c0b2f7Stbbdev static unsigned int getIndexOrObjectSize (unsigned int size) 82151c0b2f7Stbbdev { 82251c0b2f7Stbbdev if (size <= maxSmallObjectSize) { // selection from 8/16/24/32/40/48/56/64 823478de5b1Stbbdev unsigned int index = getSmallObjectIndex( size ); 82451c0b2f7Stbbdev /* Bin 0 is for 8 bytes, bin 1 is for 16, and so forth */ 82551c0b2f7Stbbdev return indexRequest ? index : (index+1)<<3; 82651c0b2f7Stbbdev } 82751c0b2f7Stbbdev else if (size <= maxSegregatedObjectSize ) { // 80/96/112/128 / 160/192/224/256 / 320/384/448/512 / 640/768/896/1024 82851c0b2f7Stbbdev unsigned int order = highestBitPos(size-1); // which group of bin sizes? 82951c0b2f7Stbbdev MALLOC_ASSERT( 6<=order && order<=9, ASSERT_TEXT ); 83051c0b2f7Stbbdev if (indexRequest) 83151c0b2f7Stbbdev return minSegregatedObjectIndex - (4*6) - 4 + (4*order) + ((size-1)>>(order-2)); 83251c0b2f7Stbbdev else { 83351c0b2f7Stbbdev unsigned int alignment = 128 >> (9-order); // alignment in the group 83451c0b2f7Stbbdev MALLOC_ASSERT( alignment==16 || alignment==32 || alignment==64 || alignment==128, ASSERT_TEXT ); 83551c0b2f7Stbbdev return alignUp(size,alignment); 83651c0b2f7Stbbdev } 83751c0b2f7Stbbdev } 83851c0b2f7Stbbdev else { 83951c0b2f7Stbbdev if( size <= fittingSize3 ) { 84051c0b2f7Stbbdev if( size <= fittingSize2 ) { 84151c0b2f7Stbbdev if( size <= fittingSize1 ) 84251c0b2f7Stbbdev return indexRequest ? minFittingIndex : fittingSize1; 84351c0b2f7Stbbdev else 84451c0b2f7Stbbdev return indexRequest ? minFittingIndex+1 : fittingSize2; 84551c0b2f7Stbbdev } else 84651c0b2f7Stbbdev return indexRequest ? minFittingIndex+2 : fittingSize3; 84751c0b2f7Stbbdev } else { 84851c0b2f7Stbbdev if( size <= fittingSize5 ) { 84951c0b2f7Stbbdev if( size <= fittingSize4 ) 85051c0b2f7Stbbdev return indexRequest ? minFittingIndex+3 : fittingSize4; 85151c0b2f7Stbbdev else 85251c0b2f7Stbbdev return indexRequest ? minFittingIndex+4 : fittingSize5; 85351c0b2f7Stbbdev } else { 85451c0b2f7Stbbdev MALLOC_ASSERT( 0,ASSERT_TEXT ); // this should not happen 85551c0b2f7Stbbdev return ~0U; 85651c0b2f7Stbbdev } 85751c0b2f7Stbbdev } 85851c0b2f7Stbbdev } 85951c0b2f7Stbbdev } 86051c0b2f7Stbbdev 86151c0b2f7Stbbdev static unsigned int getIndex (unsigned int size) 86251c0b2f7Stbbdev { 86351c0b2f7Stbbdev return getIndexOrObjectSize</*indexRequest=*/true>(size); 86451c0b2f7Stbbdev } 86551c0b2f7Stbbdev 86651c0b2f7Stbbdev static unsigned int getObjectSize (unsigned int size) 86751c0b2f7Stbbdev { 86851c0b2f7Stbbdev return getIndexOrObjectSize</*indexRequest=*/false>(size); 86951c0b2f7Stbbdev } 87051c0b2f7Stbbdev 87151c0b2f7Stbbdev 87251c0b2f7Stbbdev void *BootStrapBlocks::allocate(MemoryPool *memPool, size_t size) 87351c0b2f7Stbbdev { 87451c0b2f7Stbbdev FreeObject *result; 87551c0b2f7Stbbdev 87651c0b2f7Stbbdev MALLOC_ASSERT( size == sizeof(TLSData), ASSERT_TEXT ); 87751c0b2f7Stbbdev 87851c0b2f7Stbbdev { // Lock with acquire 87951c0b2f7Stbbdev MallocMutex::scoped_lock scoped_cs(bootStrapLock); 88051c0b2f7Stbbdev 88151c0b2f7Stbbdev if( bootStrapObjectList) { 88251c0b2f7Stbbdev result = bootStrapObjectList; 88351c0b2f7Stbbdev bootStrapObjectList = bootStrapObjectList->next; 88451c0b2f7Stbbdev } else { 88551c0b2f7Stbbdev if (!bootStrapBlock) { 88651c0b2f7Stbbdev bootStrapBlock = memPool->getEmptyBlock(size); 88757f524caSIlya Isaev if (!bootStrapBlock) return nullptr; 88851c0b2f7Stbbdev } 88951c0b2f7Stbbdev result = bootStrapBlock->bumpPtr; 89051c0b2f7Stbbdev bootStrapBlock->bumpPtr = (FreeObject *)((uintptr_t)bootStrapBlock->bumpPtr - bootStrapBlock->objectSize); 89151c0b2f7Stbbdev if ((uintptr_t)bootStrapBlock->bumpPtr < (uintptr_t)bootStrapBlock+sizeof(Block)) { 89257f524caSIlya Isaev bootStrapBlock->bumpPtr = nullptr; 89351c0b2f7Stbbdev bootStrapBlock->next = bootStrapBlockUsed; 89451c0b2f7Stbbdev bootStrapBlockUsed = bootStrapBlock; 89557f524caSIlya Isaev bootStrapBlock = nullptr; 89651c0b2f7Stbbdev } 89751c0b2f7Stbbdev } 89851c0b2f7Stbbdev } // Unlock with release 89951c0b2f7Stbbdev memset (result, 0, size); 90051c0b2f7Stbbdev return (void*)result; 90151c0b2f7Stbbdev } 90251c0b2f7Stbbdev 90351c0b2f7Stbbdev void BootStrapBlocks::free(void* ptr) 90451c0b2f7Stbbdev { 90551c0b2f7Stbbdev MALLOC_ASSERT( ptr, ASSERT_TEXT ); 90651c0b2f7Stbbdev { // Lock with acquire 90751c0b2f7Stbbdev MallocMutex::scoped_lock scoped_cs(bootStrapLock); 90851c0b2f7Stbbdev ((FreeObject*)ptr)->next = bootStrapObjectList; 90951c0b2f7Stbbdev bootStrapObjectList = (FreeObject*)ptr; 91051c0b2f7Stbbdev } // Unlock with release 91151c0b2f7Stbbdev } 91251c0b2f7Stbbdev 91351c0b2f7Stbbdev void BootStrapBlocks::reset() 91451c0b2f7Stbbdev { 91557f524caSIlya Isaev bootStrapBlock = bootStrapBlockUsed = nullptr; 91657f524caSIlya Isaev bootStrapObjectList = nullptr; 91751c0b2f7Stbbdev } 91851c0b2f7Stbbdev 91951c0b2f7Stbbdev #if !(FREELIST_NONBLOCKING) 92051c0b2f7Stbbdev static MallocMutex publicFreeListLock; // lock for changes of publicFreeList 92151c0b2f7Stbbdev #endif 92251c0b2f7Stbbdev 92351c0b2f7Stbbdev /********* End rough utility code **************/ 92451c0b2f7Stbbdev 92551c0b2f7Stbbdev /* LifoList assumes zero initialization so a vector of it can be created 92651c0b2f7Stbbdev * by just allocating some space with no call to constructor. 92751c0b2f7Stbbdev * On Linux, it seems to be necessary to avoid linking with C++ libraries. 92851c0b2f7Stbbdev * 92951c0b2f7Stbbdev * By usage convention there is no race on the initialization. */ 930478de5b1Stbbdev LifoList::LifoList( ) : top(nullptr) 93151c0b2f7Stbbdev { 93251c0b2f7Stbbdev // MallocMutex assumes zero initialization 9332110128eSsarathnandu memset(static_cast<void*>(&lock), 0, sizeof(MallocMutex)); 93451c0b2f7Stbbdev } 93551c0b2f7Stbbdev 93651c0b2f7Stbbdev void LifoList::push(Block *block) 93751c0b2f7Stbbdev { 93851c0b2f7Stbbdev MallocMutex::scoped_lock scoped_cs(lock); 939478de5b1Stbbdev block->next = top.load(std::memory_order_relaxed); 940478de5b1Stbbdev top.store(block, std::memory_order_relaxed); 94151c0b2f7Stbbdev } 94251c0b2f7Stbbdev 94351c0b2f7Stbbdev Block *LifoList::pop() 94451c0b2f7Stbbdev { 945478de5b1Stbbdev Block* block = nullptr; 946478de5b1Stbbdev if (top.load(std::memory_order_relaxed)) { 94751c0b2f7Stbbdev MallocMutex::scoped_lock scoped_cs(lock); 948478de5b1Stbbdev block = top.load(std::memory_order_relaxed); 949478de5b1Stbbdev if (block) { 950478de5b1Stbbdev top.store(block->next, std::memory_order_relaxed); 95151c0b2f7Stbbdev } 95251c0b2f7Stbbdev } 95351c0b2f7Stbbdev return block; 95451c0b2f7Stbbdev } 95551c0b2f7Stbbdev 95651c0b2f7Stbbdev Block *LifoList::grab() 95751c0b2f7Stbbdev { 958478de5b1Stbbdev Block *block = nullptr; 959478de5b1Stbbdev if (top.load(std::memory_order_relaxed)) { 96051c0b2f7Stbbdev MallocMutex::scoped_lock scoped_cs(lock); 961478de5b1Stbbdev block = top.load(std::memory_order_relaxed); 962478de5b1Stbbdev top.store(nullptr, std::memory_order_relaxed); 96351c0b2f7Stbbdev } 96451c0b2f7Stbbdev return block; 96551c0b2f7Stbbdev } 96651c0b2f7Stbbdev 96751c0b2f7Stbbdev /********* Thread and block related code *************/ 96851c0b2f7Stbbdev 96951c0b2f7Stbbdev template<bool poolDestroy> void AllLargeBlocksList::releaseAll(Backend *backend) { 97051c0b2f7Stbbdev LargeMemoryBlock *next, *lmb = loHead; 97157f524caSIlya Isaev loHead = nullptr; 97251c0b2f7Stbbdev 97351c0b2f7Stbbdev for (; lmb; lmb = next) { 97451c0b2f7Stbbdev next = lmb->gNext; 97551c0b2f7Stbbdev if (poolDestroy) { 97651c0b2f7Stbbdev // as it's pool destruction, no need to return object to backend, 97751c0b2f7Stbbdev // only remove backrefs, as they are global 97851c0b2f7Stbbdev removeBackRef(lmb->backRefIdx); 97951c0b2f7Stbbdev } else { 98051c0b2f7Stbbdev // clean g(Next|Prev) to prevent removing lmb 98151c0b2f7Stbbdev // from AllLargeBlocksList inside returnLargeObject 98257f524caSIlya Isaev lmb->gNext = lmb->gPrev = nullptr; 98351c0b2f7Stbbdev backend->returnLargeObject(lmb); 98451c0b2f7Stbbdev } 98551c0b2f7Stbbdev } 98651c0b2f7Stbbdev } 98751c0b2f7Stbbdev 98851c0b2f7Stbbdev TLSData* MemoryPool::getTLS(bool create) 98951c0b2f7Stbbdev { 99051c0b2f7Stbbdev TLSData* tls = extMemPool.tlsPointerKey.getThreadMallocTLS(); 99151c0b2f7Stbbdev if (create && !tls) 99251c0b2f7Stbbdev tls = extMemPool.tlsPointerKey.createTLS(this, &extMemPool.backend); 99351c0b2f7Stbbdev return tls; 99451c0b2f7Stbbdev } 99551c0b2f7Stbbdev 99651c0b2f7Stbbdev /* 99751c0b2f7Stbbdev * Return the bin for the given size. 99851c0b2f7Stbbdev */ 99951c0b2f7Stbbdev inline Bin* TLSData::getAllocationBin(size_t size) 100051c0b2f7Stbbdev { 100151c0b2f7Stbbdev return bin + getIndex(size); 100251c0b2f7Stbbdev } 100351c0b2f7Stbbdev 100451c0b2f7Stbbdev /* Return an empty uninitialized block in a non-blocking fashion. */ 100551c0b2f7Stbbdev Block *MemoryPool::getEmptyBlock(size_t size) 100651c0b2f7Stbbdev { 100751c0b2f7Stbbdev TLSData* tls = getTLS(/*create=*/false); 100851c0b2f7Stbbdev // try to use per-thread cache, if TLS available 100951c0b2f7Stbbdev FreeBlockPool::ResOfGet resOfGet = tls? 101057f524caSIlya Isaev tls->freeSlabBlocks.getBlock() : FreeBlockPool::ResOfGet(nullptr, false); 101151c0b2f7Stbbdev Block *result = resOfGet.block; 101251c0b2f7Stbbdev 101351c0b2f7Stbbdev if (!result) { // not found in local cache, asks backend for slabs 101451c0b2f7Stbbdev int num = resOfGet.lastAccMiss? Backend::numOfSlabAllocOnMiss : 1; 101551c0b2f7Stbbdev BackRefIdx backRefIdx[Backend::numOfSlabAllocOnMiss]; 101651c0b2f7Stbbdev 101751c0b2f7Stbbdev result = static_cast<Block*>(extMemPool.backend.getSlabBlock(num)); 101857f524caSIlya Isaev if (!result) return nullptr; 101951c0b2f7Stbbdev 102051c0b2f7Stbbdev if (!extMemPool.userPool()) 102151c0b2f7Stbbdev for (int i=0; i<num; i++) { 102251c0b2f7Stbbdev backRefIdx[i] = BackRefIdx::newBackRef(/*largeObj=*/false); 102351c0b2f7Stbbdev if (backRefIdx[i].isInvalid()) { 102451c0b2f7Stbbdev // roll back resource allocation 102551c0b2f7Stbbdev for (int j=0; j<i; j++) 102651c0b2f7Stbbdev removeBackRef(backRefIdx[j]); 102751c0b2f7Stbbdev Block *b = result; 102851c0b2f7Stbbdev for (int j=0; j<num; b=(Block*)((uintptr_t)b+slabSize), j++) 102951c0b2f7Stbbdev extMemPool.backend.putSlabBlock(b); 103057f524caSIlya Isaev return nullptr; 103151c0b2f7Stbbdev } 103251c0b2f7Stbbdev } 103351c0b2f7Stbbdev // resources were allocated, register blocks 103451c0b2f7Stbbdev Block *b = result; 103551c0b2f7Stbbdev for (int i=0; i<num; b=(Block*)((uintptr_t)b+slabSize), i++) { 103651c0b2f7Stbbdev // slab block in user's pool must have invalid backRefIdx 103751c0b2f7Stbbdev if (extMemPool.userPool()) { 103851c0b2f7Stbbdev new (&b->backRefIdx) BackRefIdx(); 103951c0b2f7Stbbdev } else { 104051c0b2f7Stbbdev setBackRef(backRefIdx[i], b); 104151c0b2f7Stbbdev b->backRefIdx = backRefIdx[i]; 104251c0b2f7Stbbdev } 1043478de5b1Stbbdev b->tlsPtr.store(tls, std::memory_order_relaxed); 104451c0b2f7Stbbdev b->poolPtr = this; 104551c0b2f7Stbbdev // all but first one go to per-thread pool 104651c0b2f7Stbbdev if (i > 0) { 104751c0b2f7Stbbdev MALLOC_ASSERT(tls, ASSERT_TEXT); 104851c0b2f7Stbbdev tls->freeSlabBlocks.returnBlock(b); 104951c0b2f7Stbbdev } 105051c0b2f7Stbbdev } 105151c0b2f7Stbbdev } 105251c0b2f7Stbbdev MALLOC_ASSERT(result, ASSERT_TEXT); 105351c0b2f7Stbbdev result->initEmptyBlock(tls, size); 105451c0b2f7Stbbdev STAT_increment(getThreadId(), getIndex(result->objectSize), allocBlockNew); 105551c0b2f7Stbbdev return result; 105651c0b2f7Stbbdev } 105751c0b2f7Stbbdev 105851c0b2f7Stbbdev void MemoryPool::returnEmptyBlock(Block *block, bool poolTheBlock) 105951c0b2f7Stbbdev { 106051c0b2f7Stbbdev block->reset(); 106151c0b2f7Stbbdev if (poolTheBlock) { 106251c0b2f7Stbbdev getTLS(/*create=*/false)->freeSlabBlocks.returnBlock(block); 106351c0b2f7Stbbdev } else { 106451c0b2f7Stbbdev // slab blocks in user's pools do not have valid backRefIdx 106551c0b2f7Stbbdev if (!extMemPool.userPool()) 106651c0b2f7Stbbdev removeBackRef(*(block->getBackRefIdx())); 106751c0b2f7Stbbdev extMemPool.backend.putSlabBlock(block); 106851c0b2f7Stbbdev } 106951c0b2f7Stbbdev } 107051c0b2f7Stbbdev 107151c0b2f7Stbbdev bool ExtMemoryPool::init(intptr_t poolId, rawAllocType rawAlloc, 107251c0b2f7Stbbdev rawFreeType rawFree, size_t granularity, 107351c0b2f7Stbbdev bool keepAllMemory, bool fixedPool) 107451c0b2f7Stbbdev { 107551c0b2f7Stbbdev this->poolId = poolId; 107651c0b2f7Stbbdev this->rawAlloc = rawAlloc; 107751c0b2f7Stbbdev this->rawFree = rawFree; 107851c0b2f7Stbbdev this->granularity = granularity; 107951c0b2f7Stbbdev this->keepAllMemory = keepAllMemory; 108051c0b2f7Stbbdev this->fixedPool = fixedPool; 108151c0b2f7Stbbdev this->delayRegsReleasing = false; 108251c0b2f7Stbbdev if (!initTLS()) 108351c0b2f7Stbbdev return false; 108451c0b2f7Stbbdev loc.init(this); 108551c0b2f7Stbbdev backend.init(this); 108657f524caSIlya Isaev MALLOC_ASSERT(isPoolValid(), nullptr); 108751c0b2f7Stbbdev return true; 108851c0b2f7Stbbdev } 108951c0b2f7Stbbdev 109051c0b2f7Stbbdev bool ExtMemoryPool::initTLS() { return tlsPointerKey.init(); } 109151c0b2f7Stbbdev 109251c0b2f7Stbbdev bool MemoryPool::init(intptr_t poolId, const MemPoolPolicy *policy) 109351c0b2f7Stbbdev { 109451c0b2f7Stbbdev if (!extMemPool.init(poolId, policy->pAlloc, policy->pFree, 109551c0b2f7Stbbdev policy->granularity? policy->granularity : defaultGranularity, 109651c0b2f7Stbbdev policy->keepAllMemory, policy->fixedPool)) 109751c0b2f7Stbbdev return false; 109851c0b2f7Stbbdev { 109951c0b2f7Stbbdev MallocMutex::scoped_lock lock(memPoolListLock); 110051c0b2f7Stbbdev next = defaultMemPool->next; 110151c0b2f7Stbbdev defaultMemPool->next = this; 110251c0b2f7Stbbdev prev = defaultMemPool; 110351c0b2f7Stbbdev if (next) 110451c0b2f7Stbbdev next->prev = this; 110551c0b2f7Stbbdev } 110651c0b2f7Stbbdev return true; 110751c0b2f7Stbbdev } 110851c0b2f7Stbbdev 110951c0b2f7Stbbdev bool MemoryPool::reset() 111051c0b2f7Stbbdev { 111151c0b2f7Stbbdev MALLOC_ASSERT(extMemPool.userPool(), "No reset for the system pool."); 111251c0b2f7Stbbdev // memory is not releasing during pool reset 111351c0b2f7Stbbdev // TODO: mark regions to release unused on next reset() 111451c0b2f7Stbbdev extMemPool.delayRegionsReleasing(true); 111551c0b2f7Stbbdev 111651c0b2f7Stbbdev bootStrapBlocks.reset(); 111751c0b2f7Stbbdev extMemPool.lmbList.releaseAll</*poolDestroy=*/false>(&extMemPool.backend); 111851c0b2f7Stbbdev if (!extMemPool.reset()) 111951c0b2f7Stbbdev return false; 112051c0b2f7Stbbdev 112151c0b2f7Stbbdev if (!extMemPool.initTLS()) 112251c0b2f7Stbbdev return false; 112351c0b2f7Stbbdev extMemPool.delayRegionsReleasing(false); 112451c0b2f7Stbbdev return true; 112551c0b2f7Stbbdev } 112651c0b2f7Stbbdev 112751c0b2f7Stbbdev bool MemoryPool::destroy() 112851c0b2f7Stbbdev { 112951c0b2f7Stbbdev #if __TBB_MALLOC_LOCACHE_STAT 113051c0b2f7Stbbdev extMemPool.loc.reportStat(stdout); 113151c0b2f7Stbbdev #endif 113251c0b2f7Stbbdev #if __TBB_MALLOC_BACKEND_STAT 113351c0b2f7Stbbdev extMemPool.backend.reportStat(stdout); 113451c0b2f7Stbbdev #endif 113551c0b2f7Stbbdev { 113651c0b2f7Stbbdev MallocMutex::scoped_lock lock(memPoolListLock); 113751c0b2f7Stbbdev // remove itself from global pool list 113851c0b2f7Stbbdev if (prev) 113951c0b2f7Stbbdev prev->next = next; 114051c0b2f7Stbbdev if (next) 114151c0b2f7Stbbdev next->prev = prev; 114251c0b2f7Stbbdev } 114351c0b2f7Stbbdev // slab blocks in non-default pool do not have backreferences, 114451c0b2f7Stbbdev // only large objects do 114551c0b2f7Stbbdev if (extMemPool.userPool()) 114651c0b2f7Stbbdev extMemPool.lmbList.releaseAll</*poolDestroy=*/true>(&extMemPool.backend); 114751c0b2f7Stbbdev else { 114851c0b2f7Stbbdev // only one non-userPool() is supported now 114957f524caSIlya Isaev MALLOC_ASSERT(this==defaultMemPool, nullptr); 115051c0b2f7Stbbdev // There and below in extMemPool.destroy(), do not restore initial state 115151c0b2f7Stbbdev // for user pool, because it's just about to be released. But for system 115251c0b2f7Stbbdev // pool restoring, we do not want to do zeroing of it on subsequent reload. 115351c0b2f7Stbbdev bootStrapBlocks.reset(); 115451c0b2f7Stbbdev extMemPool.orphanedBlocks.reset(); 115551c0b2f7Stbbdev } 115651c0b2f7Stbbdev return extMemPool.destroy(); 115751c0b2f7Stbbdev } 115851c0b2f7Stbbdev 115951c0b2f7Stbbdev void MemoryPool::onThreadShutdown(TLSData *tlsData) 116051c0b2f7Stbbdev { 116151c0b2f7Stbbdev if (tlsData) { // might be called for "empty" TLS 116251c0b2f7Stbbdev tlsData->release(); 116351c0b2f7Stbbdev bootStrapBlocks.free(tlsData); 116451c0b2f7Stbbdev clearTLS(); 116551c0b2f7Stbbdev } 116651c0b2f7Stbbdev } 116751c0b2f7Stbbdev 116851c0b2f7Stbbdev #if MALLOC_DEBUG 116951c0b2f7Stbbdev void Bin::verifyTLSBin (size_t size) const 117051c0b2f7Stbbdev { 117151c0b2f7Stbbdev /* The debug version verifies the TLSBin as needed */ 117251c0b2f7Stbbdev uint32_t objSize = getObjectSize(size); 117351c0b2f7Stbbdev 117451c0b2f7Stbbdev if (activeBlk) { 117551c0b2f7Stbbdev MALLOC_ASSERT( activeBlk->isOwnedByCurrentThread(), ASSERT_TEXT ); 117651c0b2f7Stbbdev MALLOC_ASSERT( activeBlk->objectSize == objSize, ASSERT_TEXT ); 117751c0b2f7Stbbdev #if MALLOC_DEBUG>1 117851c0b2f7Stbbdev for (Block* temp = activeBlk->next; temp; temp=temp->next) { 117951c0b2f7Stbbdev MALLOC_ASSERT( temp!=activeBlk, ASSERT_TEXT ); 118051c0b2f7Stbbdev MALLOC_ASSERT( temp->isOwnedByCurrentThread(), ASSERT_TEXT ); 118151c0b2f7Stbbdev MALLOC_ASSERT( temp->objectSize == objSize, ASSERT_TEXT ); 118251c0b2f7Stbbdev MALLOC_ASSERT( temp->previous->next == temp, ASSERT_TEXT ); 118351c0b2f7Stbbdev if (temp->next) { 118451c0b2f7Stbbdev MALLOC_ASSERT( temp->next->previous == temp, ASSERT_TEXT ); 118551c0b2f7Stbbdev } 118651c0b2f7Stbbdev } 118751c0b2f7Stbbdev for (Block* temp = activeBlk->previous; temp; temp=temp->previous) { 118851c0b2f7Stbbdev MALLOC_ASSERT( temp!=activeBlk, ASSERT_TEXT ); 118951c0b2f7Stbbdev MALLOC_ASSERT( temp->isOwnedByCurrentThread(), ASSERT_TEXT ); 119051c0b2f7Stbbdev MALLOC_ASSERT( temp->objectSize == objSize, ASSERT_TEXT ); 119151c0b2f7Stbbdev MALLOC_ASSERT( temp->next->previous == temp, ASSERT_TEXT ); 119251c0b2f7Stbbdev if (temp->previous) { 119351c0b2f7Stbbdev MALLOC_ASSERT( temp->previous->next == temp, ASSERT_TEXT ); 119451c0b2f7Stbbdev } 119551c0b2f7Stbbdev } 119651c0b2f7Stbbdev #endif /* MALLOC_DEBUG>1 */ 119751c0b2f7Stbbdev } 119851c0b2f7Stbbdev } 119951c0b2f7Stbbdev #else /* MALLOC_DEBUG */ 120051c0b2f7Stbbdev inline void Bin::verifyTLSBin (size_t) const { } 120151c0b2f7Stbbdev #endif /* MALLOC_DEBUG */ 120251c0b2f7Stbbdev 120351c0b2f7Stbbdev /* 120451c0b2f7Stbbdev * Add a block to the start of this tls bin list. 120551c0b2f7Stbbdev */ 120651c0b2f7Stbbdev void Bin::pushTLSBin(Block* block) 120751c0b2f7Stbbdev { 120851c0b2f7Stbbdev /* The objectSize should be defined and not a parameter 120951c0b2f7Stbbdev because the function is applied to partially filled blocks as well */ 121051c0b2f7Stbbdev unsigned int size = block->objectSize; 121151c0b2f7Stbbdev 121251c0b2f7Stbbdev MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 121351c0b2f7Stbbdev MALLOC_ASSERT( block->objectSize != 0, ASSERT_TEXT ); 121457f524caSIlya Isaev MALLOC_ASSERT( block->next == nullptr, ASSERT_TEXT ); 121557f524caSIlya Isaev MALLOC_ASSERT( block->previous == nullptr, ASSERT_TEXT ); 121651c0b2f7Stbbdev 121751c0b2f7Stbbdev MALLOC_ASSERT( this, ASSERT_TEXT ); 121851c0b2f7Stbbdev verifyTLSBin(size); 121951c0b2f7Stbbdev 122051c0b2f7Stbbdev block->next = activeBlk; 122151c0b2f7Stbbdev if( activeBlk ) { 122251c0b2f7Stbbdev block->previous = activeBlk->previous; 122351c0b2f7Stbbdev activeBlk->previous = block; 122451c0b2f7Stbbdev if( block->previous ) 122551c0b2f7Stbbdev block->previous->next = block; 122651c0b2f7Stbbdev } else { 122751c0b2f7Stbbdev activeBlk = block; 122851c0b2f7Stbbdev } 122951c0b2f7Stbbdev 123051c0b2f7Stbbdev verifyTLSBin(size); 123151c0b2f7Stbbdev } 123251c0b2f7Stbbdev 123351c0b2f7Stbbdev /* 123451c0b2f7Stbbdev * Take a block out of its tls bin (e.g. before removal). 123551c0b2f7Stbbdev */ 123651c0b2f7Stbbdev void Bin::outofTLSBin(Block* block) 123751c0b2f7Stbbdev { 123851c0b2f7Stbbdev unsigned int size = block->objectSize; 123951c0b2f7Stbbdev 124051c0b2f7Stbbdev MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 124151c0b2f7Stbbdev MALLOC_ASSERT( block->objectSize != 0, ASSERT_TEXT ); 124251c0b2f7Stbbdev 124351c0b2f7Stbbdev MALLOC_ASSERT( this, ASSERT_TEXT ); 124451c0b2f7Stbbdev verifyTLSBin(size); 124551c0b2f7Stbbdev 124651c0b2f7Stbbdev if (block == activeBlk) { 124751c0b2f7Stbbdev activeBlk = block->previous? block->previous : block->next; 124851c0b2f7Stbbdev } 124951c0b2f7Stbbdev /* Unlink the block */ 125051c0b2f7Stbbdev if (block->previous) { 125151c0b2f7Stbbdev MALLOC_ASSERT( block->previous->next == block, ASSERT_TEXT ); 125251c0b2f7Stbbdev block->previous->next = block->next; 125351c0b2f7Stbbdev } 125451c0b2f7Stbbdev if (block->next) { 125551c0b2f7Stbbdev MALLOC_ASSERT( block->next->previous == block, ASSERT_TEXT ); 125651c0b2f7Stbbdev block->next->previous = block->previous; 125751c0b2f7Stbbdev } 125857f524caSIlya Isaev block->next = nullptr; 125957f524caSIlya Isaev block->previous = nullptr; 126051c0b2f7Stbbdev 126151c0b2f7Stbbdev verifyTLSBin(size); 126251c0b2f7Stbbdev } 126351c0b2f7Stbbdev 126451c0b2f7Stbbdev Block* Bin::getPrivatizedFreeListBlock() 126551c0b2f7Stbbdev { 126651c0b2f7Stbbdev Block* block; 126751c0b2f7Stbbdev MALLOC_ASSERT( this, ASSERT_TEXT ); 126851c0b2f7Stbbdev // if this method is called, active block usage must be unsuccessful 12692110128eSsarathnandu MALLOC_ASSERT( (!activeBlk && !mailbox.load(std::memory_order_relaxed)) || (activeBlk && activeBlk->isFull), ASSERT_TEXT ); 127051c0b2f7Stbbdev 127151c0b2f7Stbbdev // the counter should be changed STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 127251c0b2f7Stbbdev if (!mailbox.load(std::memory_order_acquire)) // hotpath is empty mailbox 127357f524caSIlya Isaev return nullptr; 127451c0b2f7Stbbdev else { // mailbox is not empty, take lock and inspect it 127551c0b2f7Stbbdev MallocMutex::scoped_lock scoped_cs(mailLock); 127651c0b2f7Stbbdev block = mailbox.load(std::memory_order_relaxed); 127751c0b2f7Stbbdev if( block ) { 127851c0b2f7Stbbdev MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1279478de5b1Stbbdev MALLOC_ASSERT( !isNotForUse(block->nextPrivatizable.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1280478de5b1Stbbdev mailbox.store(block->nextPrivatizable.load(std::memory_order_relaxed), std::memory_order_relaxed); 1281478de5b1Stbbdev block->nextPrivatizable.store((Block*)this, std::memory_order_relaxed); 128251c0b2f7Stbbdev } 128351c0b2f7Stbbdev } 128451c0b2f7Stbbdev if( block ) { 128551c0b2f7Stbbdev MALLOC_ASSERT( isSolidPtr(block->publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 128651c0b2f7Stbbdev block->privatizePublicFreeList(); 128751c0b2f7Stbbdev block->adjustPositionInBin(this); 128851c0b2f7Stbbdev } 128951c0b2f7Stbbdev return block; 129051c0b2f7Stbbdev } 129151c0b2f7Stbbdev 129251c0b2f7Stbbdev void Bin::addPublicFreeListBlock(Block* block) 129351c0b2f7Stbbdev { 129451c0b2f7Stbbdev MallocMutex::scoped_lock scoped_cs(mailLock); 1295478de5b1Stbbdev block->nextPrivatizable.store(mailbox.load(std::memory_order_relaxed), std::memory_order_relaxed); 129651c0b2f7Stbbdev mailbox.store(block, std::memory_order_relaxed); 129751c0b2f7Stbbdev } 129851c0b2f7Stbbdev 129951c0b2f7Stbbdev // Process publicly freed objects in all blocks and return empty blocks 130051c0b2f7Stbbdev // to the backend in order to reduce overall footprint. 130151c0b2f7Stbbdev bool Bin::cleanPublicFreeLists() 130251c0b2f7Stbbdev { 130351c0b2f7Stbbdev Block* block; 130451c0b2f7Stbbdev if (!mailbox.load(std::memory_order_acquire)) 130551c0b2f7Stbbdev return false; 130651c0b2f7Stbbdev else { 130751c0b2f7Stbbdev // Grab all the blocks in the mailbox 130851c0b2f7Stbbdev MallocMutex::scoped_lock scoped_cs(mailLock); 130951c0b2f7Stbbdev block = mailbox.load(std::memory_order_relaxed); 131057f524caSIlya Isaev mailbox.store(nullptr, std::memory_order_relaxed); 131151c0b2f7Stbbdev } 131251c0b2f7Stbbdev bool released = false; 131351c0b2f7Stbbdev while (block) { 131451c0b2f7Stbbdev MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1315478de5b1Stbbdev Block* tmp = block->nextPrivatizable.load(std::memory_order_relaxed); 1316478de5b1Stbbdev block->nextPrivatizable.store((Block*)this, std::memory_order_relaxed); 131751c0b2f7Stbbdev block->privatizePublicFreeList(); 131851c0b2f7Stbbdev if (block->empty()) { 131951c0b2f7Stbbdev processEmptyBlock(block, /*poolTheBlock=*/false); 132051c0b2f7Stbbdev released = true; 132151c0b2f7Stbbdev } else 132251c0b2f7Stbbdev block->adjustPositionInBin(this); 132351c0b2f7Stbbdev block = tmp; 132451c0b2f7Stbbdev } 132551c0b2f7Stbbdev return released; 132651c0b2f7Stbbdev } 132751c0b2f7Stbbdev 132851c0b2f7Stbbdev bool Block::adjustFullness() 132951c0b2f7Stbbdev { 133051c0b2f7Stbbdev if (bumpPtr) { 133151c0b2f7Stbbdev /* If we are still using a bump ptr for this block it is empty enough to use. */ 133251c0b2f7Stbbdev STAT_increment(getThreadId(), getIndex(objectSize), examineEmptyEnough); 133351c0b2f7Stbbdev isFull = false; 133451c0b2f7Stbbdev } else { 133551c0b2f7Stbbdev const float threshold = (slabSize - sizeof(Block)) * (1 - emptyEnoughRatio); 133651c0b2f7Stbbdev /* allocatedCount shows how many objects in the block are in use; however it still counts 133751c0b2f7Stbbdev * blocks freed by other threads; so prior call to privatizePublicFreeList() is recommended */ 133851c0b2f7Stbbdev isFull = (allocatedCount*objectSize > threshold) ? true : false; 133951c0b2f7Stbbdev #if COLLECT_STATISTICS 134051c0b2f7Stbbdev if (isFull) 134151c0b2f7Stbbdev STAT_increment(getThreadId(), getIndex(objectSize), examineNotEmpty); 134251c0b2f7Stbbdev else 134351c0b2f7Stbbdev STAT_increment(getThreadId(), getIndex(objectSize), examineEmptyEnough); 134451c0b2f7Stbbdev #endif 134551c0b2f7Stbbdev } 134651c0b2f7Stbbdev return isFull; 134751c0b2f7Stbbdev } 134851c0b2f7Stbbdev 134951c0b2f7Stbbdev // This method resides in class Block, and not in class Bin, in order to avoid 135051c0b2f7Stbbdev // calling getAllocationBin on a reasonably hot path in Block::freeOwnObject 135157f524caSIlya Isaev void Block::adjustPositionInBin(Bin* bin/*=nullptr*/) 135251c0b2f7Stbbdev { 135351c0b2f7Stbbdev // If the block were full, but became empty enough to use, 135451c0b2f7Stbbdev // move it to the front of the list 135551c0b2f7Stbbdev if (isFull && !adjustFullness()) { 135651c0b2f7Stbbdev if (!bin) 1357478de5b1Stbbdev bin = tlsPtr.load(std::memory_order_relaxed)->getAllocationBin(objectSize); 135851c0b2f7Stbbdev bin->moveBlockToFront(this); 135951c0b2f7Stbbdev } 136051c0b2f7Stbbdev } 136151c0b2f7Stbbdev 136251c0b2f7Stbbdev /* Restore the bump pointer for an empty block that is planned to use */ 136351c0b2f7Stbbdev void Block::restoreBumpPtr() 136451c0b2f7Stbbdev { 136551c0b2f7Stbbdev MALLOC_ASSERT( allocatedCount == 0, ASSERT_TEXT ); 136651c0b2f7Stbbdev MALLOC_ASSERT( !isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 136751c0b2f7Stbbdev STAT_increment(getThreadId(), getIndex(objectSize), freeRestoreBumpPtr); 136851c0b2f7Stbbdev bumpPtr = (FreeObject *)((uintptr_t)this + slabSize - objectSize); 136957f524caSIlya Isaev freeList = nullptr; 137051c0b2f7Stbbdev isFull = false; 137151c0b2f7Stbbdev } 137251c0b2f7Stbbdev 137351c0b2f7Stbbdev void Block::freeOwnObject(void *object) 137451c0b2f7Stbbdev { 1375478de5b1Stbbdev tlsPtr.load(std::memory_order_relaxed)->markUsed(); 137651c0b2f7Stbbdev allocatedCount--; 137751c0b2f7Stbbdev MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 137851c0b2f7Stbbdev #if COLLECT_STATISTICS 137951c0b2f7Stbbdev // Note that getAllocationBin is not called on the hottest path with statistics off. 1380478de5b1Stbbdev if (tlsPtr.load(std::memory_order_relaxed)->getAllocationBin(objectSize)->getActiveBlock() != this) 138151c0b2f7Stbbdev STAT_increment(getThreadId(), getIndex(objectSize), freeToInactiveBlock); 138251c0b2f7Stbbdev else 138351c0b2f7Stbbdev STAT_increment(getThreadId(), getIndex(objectSize), freeToActiveBlock); 138451c0b2f7Stbbdev #endif 138551c0b2f7Stbbdev if (empty()) { 138651c0b2f7Stbbdev // If the last object of a slab is freed, the slab cannot be marked full 138751c0b2f7Stbbdev MALLOC_ASSERT(!isFull, ASSERT_TEXT); 1388478de5b1Stbbdev tlsPtr.load(std::memory_order_relaxed)->getAllocationBin(objectSize)->processEmptyBlock(this, /*poolTheBlock=*/true); 138951c0b2f7Stbbdev } else { // hot path 139051c0b2f7Stbbdev FreeObject *objectToFree = findObjectToFree(object); 139151c0b2f7Stbbdev objectToFree->next = freeList; 139251c0b2f7Stbbdev freeList = objectToFree; 139351c0b2f7Stbbdev adjustPositionInBin(); 139451c0b2f7Stbbdev } 139551c0b2f7Stbbdev } 139651c0b2f7Stbbdev 139751c0b2f7Stbbdev void Block::freePublicObject (FreeObject *objectToFree) 139851c0b2f7Stbbdev { 1399478de5b1Stbbdev FreeObject* localPublicFreeList{}; 140051c0b2f7Stbbdev 140151c0b2f7Stbbdev MALLOC_ITT_SYNC_RELEASING(&publicFreeList); 140251c0b2f7Stbbdev #if FREELIST_NONBLOCKING 140351c0b2f7Stbbdev // TBB_REVAMP_TODO: make it non atomic in non-blocking scenario 1404478de5b1Stbbdev localPublicFreeList = publicFreeList.load(std::memory_order_relaxed); 140551c0b2f7Stbbdev do { 1406478de5b1Stbbdev objectToFree->next = localPublicFreeList; 140751c0b2f7Stbbdev // no backoff necessary because trying to make change, not waiting for a change 1408478de5b1Stbbdev } while( !publicFreeList.compare_exchange_strong(localPublicFreeList, objectToFree) ); 140951c0b2f7Stbbdev #else 141051c0b2f7Stbbdev STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 141151c0b2f7Stbbdev { 141251c0b2f7Stbbdev MallocMutex::scoped_lock scoped_cs(publicFreeListLock); 141351c0b2f7Stbbdev localPublicFreeList = objectToFree->next = publicFreeList; 141451c0b2f7Stbbdev publicFreeList = objectToFree; 141551c0b2f7Stbbdev } 141651c0b2f7Stbbdev #endif 141751c0b2f7Stbbdev 141857f524caSIlya Isaev if( localPublicFreeList==nullptr ) { 141951c0b2f7Stbbdev // if the block is abandoned, its nextPrivatizable pointer should be UNUSABLE 142051c0b2f7Stbbdev // otherwise, it should point to the bin the block belongs to. 142151c0b2f7Stbbdev // reading nextPrivatizable is thread-safe below, because: 142257f524caSIlya Isaev // 1) the executing thread atomically got publicFreeList==nullptr and changed it to non-nullptr; 142357f524caSIlya Isaev // 2) only owning thread can change it back to nullptr, 142451c0b2f7Stbbdev // 3) but it can not be done until the block is put to the mailbox 142551c0b2f7Stbbdev // So the executing thread is now the only one that can change nextPrivatizable 1426478de5b1Stbbdev Block* next = nextPrivatizable.load(std::memory_order_acquire); 1427478de5b1Stbbdev if( !isNotForUse(next) ) { 1428478de5b1Stbbdev MALLOC_ASSERT( next!=nullptr, ASSERT_TEXT ); 1429478de5b1Stbbdev Bin* theBin = (Bin*) next; 143051c0b2f7Stbbdev #if MALLOC_DEBUG && TBB_REVAMP_TODO 143151c0b2f7Stbbdev // FIXME: The thread that returns the block is not the block's owner. 143251c0b2f7Stbbdev // The below assertion compares 'theBin' against the caller's local bin, thus, it always fails. 143351c0b2f7Stbbdev // Need to find a way to get the correct remote bin for comparison. 143451c0b2f7Stbbdev { // check that nextPrivatizable points to the bin the block belongs to 143551c0b2f7Stbbdev uint32_t index = getIndex( objectSize ); 143651c0b2f7Stbbdev TLSData* tls = getThreadMallocTLS(); 143751c0b2f7Stbbdev MALLOC_ASSERT( theBin==tls->bin+index, ASSERT_TEXT ); 143851c0b2f7Stbbdev } 143951c0b2f7Stbbdev #endif // MALLOC_DEBUG 144051c0b2f7Stbbdev theBin->addPublicFreeListBlock(this); 144151c0b2f7Stbbdev } 144251c0b2f7Stbbdev } 144351c0b2f7Stbbdev STAT_increment(getThreadId(), ThreadCommonCounters, freeToOtherThread); 1444478de5b1Stbbdev STAT_increment(ownerTid.load(std::memory_order_relaxed), getIndex(objectSize), freeByOtherThread); 144551c0b2f7Stbbdev } 144651c0b2f7Stbbdev 144751c0b2f7Stbbdev // Make objects freed by other threads available for use again 144851c0b2f7Stbbdev void Block::privatizePublicFreeList( bool reset ) 144951c0b2f7Stbbdev { 145051c0b2f7Stbbdev FreeObject *localPublicFreeList; 145151c0b2f7Stbbdev // If reset is false, publicFreeList should not be zeroed but set to UNUSABLE 145251c0b2f7Stbbdev // to properly synchronize with other threads freeing objects to this slab. 145351c0b2f7Stbbdev const intptr_t endMarker = reset ? 0 : UNUSABLE; 145451c0b2f7Stbbdev 145557f524caSIlya Isaev // Only the owner thread may reset the pointer to nullptr 145651c0b2f7Stbbdev MALLOC_ASSERT( isOwnedByCurrentThread() || !reset, ASSERT_TEXT ); 145751c0b2f7Stbbdev #if FREELIST_NONBLOCKING 145851c0b2f7Stbbdev localPublicFreeList = publicFreeList.exchange((FreeObject*)endMarker); 145951c0b2f7Stbbdev #else 146051c0b2f7Stbbdev STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 146151c0b2f7Stbbdev { 146251c0b2f7Stbbdev MallocMutex::scoped_lock scoped_cs(publicFreeListLock); 146351c0b2f7Stbbdev localPublicFreeList = publicFreeList; 146451c0b2f7Stbbdev publicFreeList = endMarker; 146551c0b2f7Stbbdev } 146651c0b2f7Stbbdev #endif 146751c0b2f7Stbbdev MALLOC_ITT_SYNC_ACQUIRED(&publicFreeList); 146851c0b2f7Stbbdev MALLOC_ASSERT( !(reset && isNotForUse(publicFreeList)), ASSERT_TEXT ); 146951c0b2f7Stbbdev 147057f524caSIlya Isaev // publicFreeList must have been UNUSABLE or valid, but not nullptr 147157f524caSIlya Isaev MALLOC_ASSERT( localPublicFreeList!=nullptr, ASSERT_TEXT ); 147251c0b2f7Stbbdev if( isSolidPtr(localPublicFreeList) ) { 147351c0b2f7Stbbdev MALLOC_ASSERT( allocatedCount <= (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 147451c0b2f7Stbbdev /* other threads did not change the counter freeing our blocks */ 147551c0b2f7Stbbdev allocatedCount--; 147651c0b2f7Stbbdev FreeObject *temp = localPublicFreeList; 147757f524caSIlya Isaev while( isSolidPtr(temp->next) ){ // the list will end with either nullptr or UNUSABLE 147851c0b2f7Stbbdev temp = temp->next; 147951c0b2f7Stbbdev allocatedCount--; 148051c0b2f7Stbbdev MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 148151c0b2f7Stbbdev } 148251c0b2f7Stbbdev /* merge with local freeList */ 148351c0b2f7Stbbdev temp->next = freeList; 148451c0b2f7Stbbdev freeList = localPublicFreeList; 148551c0b2f7Stbbdev STAT_increment(getThreadId(), getIndex(objectSize), allocPrivatized); 148651c0b2f7Stbbdev } 148751c0b2f7Stbbdev } 148851c0b2f7Stbbdev 148951c0b2f7Stbbdev void Block::privatizeOrphaned(TLSData *tls, unsigned index) 149051c0b2f7Stbbdev { 149151c0b2f7Stbbdev Bin* bin = tls->bin + index; 149251c0b2f7Stbbdev STAT_increment(getThreadId(), index, allocBlockPublic); 149357f524caSIlya Isaev next = nullptr; 149457f524caSIlya Isaev previous = nullptr; 149557f524caSIlya Isaev MALLOC_ASSERT( publicFreeList.load(std::memory_order_relaxed) != nullptr, ASSERT_TEXT ); 149651c0b2f7Stbbdev /* There is not a race here since no other thread owns this block */ 149751c0b2f7Stbbdev markOwned(tls); 149851c0b2f7Stbbdev // It is safe to change nextPrivatizable, as publicFreeList is not null 1499478de5b1Stbbdev MALLOC_ASSERT( isNotForUse(nextPrivatizable.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1500478de5b1Stbbdev nextPrivatizable.store((Block*)bin, std::memory_order_relaxed); 150151c0b2f7Stbbdev // the next call is required to change publicFreeList to 0 150251c0b2f7Stbbdev privatizePublicFreeList(); 150351c0b2f7Stbbdev if( empty() ) { 150451c0b2f7Stbbdev restoreBumpPtr(); 150551c0b2f7Stbbdev } else { 150651c0b2f7Stbbdev adjustFullness(); // check the block fullness and set isFull 150751c0b2f7Stbbdev } 150851c0b2f7Stbbdev MALLOC_ASSERT( !isNotForUse(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 150951c0b2f7Stbbdev } 151051c0b2f7Stbbdev 151151c0b2f7Stbbdev 151251c0b2f7Stbbdev bool Block::readyToShare() 151351c0b2f7Stbbdev { 151457f524caSIlya Isaev FreeObject* oldVal = nullptr; 151551c0b2f7Stbbdev #if FREELIST_NONBLOCKING 151651c0b2f7Stbbdev publicFreeList.compare_exchange_strong(oldVal, (FreeObject*)UNUSABLE); 151751c0b2f7Stbbdev #else 151851c0b2f7Stbbdev STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 151951c0b2f7Stbbdev { 152051c0b2f7Stbbdev MallocMutex::scoped_lock scoped_cs(publicFreeListLock); 152157f524caSIlya Isaev if ( (oldVal=publicFreeList)==nullptr ) 152251c0b2f7Stbbdev (intptr_t&)(publicFreeList) = UNUSABLE; 152351c0b2f7Stbbdev } 152451c0b2f7Stbbdev #endif 152557f524caSIlya Isaev return oldVal==nullptr; 152651c0b2f7Stbbdev } 152751c0b2f7Stbbdev 152851c0b2f7Stbbdev void Block::shareOrphaned(intptr_t binTag, unsigned index) 152951c0b2f7Stbbdev { 153051c0b2f7Stbbdev MALLOC_ASSERT( binTag, ASSERT_TEXT ); 153151c0b2f7Stbbdev // unreferenced formal parameter warning 153251c0b2f7Stbbdev tbb::detail::suppress_unused_warning(index); 153351c0b2f7Stbbdev STAT_increment(getThreadId(), index, freeBlockPublic); 153451c0b2f7Stbbdev markOrphaned(); 1535478de5b1Stbbdev if ((intptr_t)nextPrivatizable.load(std::memory_order_relaxed) == binTag) { 153651c0b2f7Stbbdev // First check passed: the block is not in mailbox yet. 153751c0b2f7Stbbdev // Need to set publicFreeList to non-zero, so other threads 153851c0b2f7Stbbdev // will not change nextPrivatizable and it can be zeroed. 153951c0b2f7Stbbdev if ( !readyToShare() ) { 154051c0b2f7Stbbdev // another thread freed an object; we need to wait until it finishes. 154151c0b2f7Stbbdev // There is no need for exponential backoff, as the wait here is not for a lock; 154251c0b2f7Stbbdev // but need to yield, so the thread we wait has a chance to run. 154351c0b2f7Stbbdev // TODO: add a pause to also be friendly to hyperthreads 154451c0b2f7Stbbdev int count = 256; 1545478de5b1Stbbdev while ((intptr_t)nextPrivatizable.load(std::memory_order_relaxed) == binTag) { 154651c0b2f7Stbbdev if (--count==0) { 154751c0b2f7Stbbdev do_yield(); 154851c0b2f7Stbbdev count = 256; 154951c0b2f7Stbbdev } 155051c0b2f7Stbbdev } 155151c0b2f7Stbbdev } 155251c0b2f7Stbbdev } 155357f524caSIlya Isaev MALLOC_ASSERT( publicFreeList.load(std::memory_order_relaxed) !=nullptr, ASSERT_TEXT ); 155451c0b2f7Stbbdev // now it is safe to change our data 155557f524caSIlya Isaev previous = nullptr; 155651c0b2f7Stbbdev // it is caller responsibility to ensure that the list of blocks 155751c0b2f7Stbbdev // formed by nextPrivatizable pointers is kept consistent if required. 155851c0b2f7Stbbdev // if only called from thread shutdown code, it does not matter. 1559478de5b1Stbbdev nextPrivatizable.store((Block*)UNUSABLE, std::memory_order_relaxed); 156051c0b2f7Stbbdev } 156151c0b2f7Stbbdev 156251c0b2f7Stbbdev void Block::cleanBlockHeader() 156351c0b2f7Stbbdev { 1564478de5b1Stbbdev next = nullptr; 1565478de5b1Stbbdev previous = nullptr; 1566478de5b1Stbbdev freeList = nullptr; 156751c0b2f7Stbbdev allocatedCount = 0; 156851c0b2f7Stbbdev isFull = false; 1569478de5b1Stbbdev tlsPtr.store(nullptr, std::memory_order_relaxed); 157051c0b2f7Stbbdev 1571478de5b1Stbbdev publicFreeList.store(nullptr, std::memory_order_relaxed); 157251c0b2f7Stbbdev } 157351c0b2f7Stbbdev 157451c0b2f7Stbbdev void Block::initEmptyBlock(TLSData *tls, size_t size) 157551c0b2f7Stbbdev { 157651c0b2f7Stbbdev // Having getIndex and getObjectSize called next to each other 157751c0b2f7Stbbdev // allows better compiler optimization as they basically share the code. 157851c0b2f7Stbbdev unsigned int index = getIndex(size); 157951c0b2f7Stbbdev unsigned int objSz = getObjectSize(size); 158051c0b2f7Stbbdev 158151c0b2f7Stbbdev cleanBlockHeader(); 158251c0b2f7Stbbdev objectSize = objSz; 158351c0b2f7Stbbdev markOwned(tls); 158451c0b2f7Stbbdev // bump pointer should be prepared for first allocation - thus mode it down to objectSize 158551c0b2f7Stbbdev bumpPtr = (FreeObject *)((uintptr_t)this + slabSize - objectSize); 158651c0b2f7Stbbdev 158751c0b2f7Stbbdev // each block should have the address where the head of the list of "privatizable" blocks is kept 158857f524caSIlya Isaev // the only exception is a block for boot strap which is initialized when TLS is yet nullptr 1589478de5b1Stbbdev nextPrivatizable.store( tls? (Block*)(tls->bin + index) : nullptr, std::memory_order_relaxed); 159051c0b2f7Stbbdev TRACEF(( "[ScalableMalloc trace] Empty block %p is initialized, owner is %ld, objectSize is %d, bumpPtr is %p\n", 1591478de5b1Stbbdev this, tlsPtr.load(std::memory_order_relaxed) ? getThreadId() : -1, objectSize, bumpPtr )); 159251c0b2f7Stbbdev } 159351c0b2f7Stbbdev 159451c0b2f7Stbbdev Block *OrphanedBlocks::get(TLSData *tls, unsigned int size) 159551c0b2f7Stbbdev { 159651c0b2f7Stbbdev // TODO: try to use index from getAllocationBin 159751c0b2f7Stbbdev unsigned int index = getIndex(size); 159851c0b2f7Stbbdev Block *block = bins[index].pop(); 159951c0b2f7Stbbdev if (block) { 160051c0b2f7Stbbdev MALLOC_ITT_SYNC_ACQUIRED(bins+index); 160151c0b2f7Stbbdev block->privatizeOrphaned(tls, index); 160251c0b2f7Stbbdev } 160351c0b2f7Stbbdev return block; 160451c0b2f7Stbbdev } 160551c0b2f7Stbbdev 160651c0b2f7Stbbdev void OrphanedBlocks::put(intptr_t binTag, Block *block) 160751c0b2f7Stbbdev { 160851c0b2f7Stbbdev unsigned int index = getIndex(block->getSize()); 160951c0b2f7Stbbdev block->shareOrphaned(binTag, index); 161051c0b2f7Stbbdev MALLOC_ITT_SYNC_RELEASING(bins+index); 161151c0b2f7Stbbdev bins[index].push(block); 161251c0b2f7Stbbdev } 161351c0b2f7Stbbdev 161451c0b2f7Stbbdev void OrphanedBlocks::reset() 161551c0b2f7Stbbdev { 161651c0b2f7Stbbdev for (uint32_t i=0; i<numBlockBinLimit; i++) 161751c0b2f7Stbbdev new (bins+i) LifoList(); 161851c0b2f7Stbbdev } 161951c0b2f7Stbbdev 162051c0b2f7Stbbdev bool OrphanedBlocks::cleanup(Backend* backend) 162151c0b2f7Stbbdev { 162251c0b2f7Stbbdev bool released = false; 162351c0b2f7Stbbdev for (uint32_t i=0; i<numBlockBinLimit; i++) { 162451c0b2f7Stbbdev Block* block = bins[i].grab(); 162551c0b2f7Stbbdev MALLOC_ITT_SYNC_ACQUIRED(bins+i); 162651c0b2f7Stbbdev while (block) { 162751c0b2f7Stbbdev Block* next = block->next; 162857f524caSIlya Isaev block->privatizePublicFreeList( /*reset=*/false ); // do not set publicFreeList to nullptr 162951c0b2f7Stbbdev if (block->empty()) { 163051c0b2f7Stbbdev block->reset(); 163151c0b2f7Stbbdev // slab blocks in user's pools do not have valid backRefIdx 163251c0b2f7Stbbdev if (!backend->inUserPool()) 163351c0b2f7Stbbdev removeBackRef(*(block->getBackRefIdx())); 163451c0b2f7Stbbdev backend->putSlabBlock(block); 163551c0b2f7Stbbdev released = true; 163651c0b2f7Stbbdev } else { 163751c0b2f7Stbbdev MALLOC_ITT_SYNC_RELEASING(bins+i); 163851c0b2f7Stbbdev bins[i].push(block); 163951c0b2f7Stbbdev } 164051c0b2f7Stbbdev block = next; 164151c0b2f7Stbbdev } 164251c0b2f7Stbbdev } 164351c0b2f7Stbbdev return released; 164451c0b2f7Stbbdev } 164551c0b2f7Stbbdev 164651c0b2f7Stbbdev FreeBlockPool::ResOfGet FreeBlockPool::getBlock() 164751c0b2f7Stbbdev { 164857f524caSIlya Isaev Block *b = head.exchange(nullptr); 164951c0b2f7Stbbdev 165051c0b2f7Stbbdev if (b) { 165151c0b2f7Stbbdev size--; 165251c0b2f7Stbbdev Block *newHead = b->next; 165351c0b2f7Stbbdev lastAccessMiss = false; 165451c0b2f7Stbbdev head.store(newHead, std::memory_order_release); 165551c0b2f7Stbbdev } else { 165651c0b2f7Stbbdev lastAccessMiss = true; 165751c0b2f7Stbbdev } 165851c0b2f7Stbbdev return ResOfGet(b, lastAccessMiss); 165951c0b2f7Stbbdev } 166051c0b2f7Stbbdev 166151c0b2f7Stbbdev void FreeBlockPool::returnBlock(Block *block) 166251c0b2f7Stbbdev { 166351c0b2f7Stbbdev MALLOC_ASSERT( size <= POOL_HIGH_MARK, ASSERT_TEXT ); 166457f524caSIlya Isaev Block *localHead = head.exchange(nullptr); 166551c0b2f7Stbbdev 166651c0b2f7Stbbdev if (!localHead) { 166751c0b2f7Stbbdev size = 0; // head was stolen by externalClean, correct size accordingly 166851c0b2f7Stbbdev } else if (size == POOL_HIGH_MARK) { 166951c0b2f7Stbbdev // release cold blocks and add hot one, 167051c0b2f7Stbbdev // so keep POOL_LOW_MARK-1 blocks and add new block to head 167151c0b2f7Stbbdev Block *headToFree = localHead, *helper; 167251c0b2f7Stbbdev for (int i=0; i<POOL_LOW_MARK-2; i++) 167351c0b2f7Stbbdev headToFree = headToFree->next; 167451c0b2f7Stbbdev Block *last = headToFree; 167551c0b2f7Stbbdev headToFree = headToFree->next; 167657f524caSIlya Isaev last->next = nullptr; 167751c0b2f7Stbbdev size = POOL_LOW_MARK-1; 167851c0b2f7Stbbdev for (Block *currBl = headToFree; currBl; currBl = helper) { 167951c0b2f7Stbbdev helper = currBl->next; 168051c0b2f7Stbbdev // slab blocks in user's pools do not have valid backRefIdx 168151c0b2f7Stbbdev if (!backend->inUserPool()) 168251c0b2f7Stbbdev removeBackRef(currBl->backRefIdx); 168351c0b2f7Stbbdev backend->putSlabBlock(currBl); 168451c0b2f7Stbbdev } 168551c0b2f7Stbbdev } 168651c0b2f7Stbbdev size++; 168751c0b2f7Stbbdev block->next = localHead; 168851c0b2f7Stbbdev head.store(block, std::memory_order_release); 168951c0b2f7Stbbdev } 169051c0b2f7Stbbdev 169151c0b2f7Stbbdev bool FreeBlockPool::externalCleanup() 169251c0b2f7Stbbdev { 169351c0b2f7Stbbdev Block *helper; 169451c0b2f7Stbbdev bool released = false; 169551c0b2f7Stbbdev 169657f524caSIlya Isaev for (Block *currBl=head.exchange(nullptr); currBl; currBl=helper) { 169751c0b2f7Stbbdev helper = currBl->next; 169851c0b2f7Stbbdev // slab blocks in user's pools do not have valid backRefIdx 169951c0b2f7Stbbdev if (!backend->inUserPool()) 170051c0b2f7Stbbdev removeBackRef(currBl->backRefIdx); 170151c0b2f7Stbbdev backend->putSlabBlock(currBl); 170251c0b2f7Stbbdev released = true; 170351c0b2f7Stbbdev } 170451c0b2f7Stbbdev return released; 170551c0b2f7Stbbdev } 170651c0b2f7Stbbdev 170751c0b2f7Stbbdev /* Prepare the block for returning to FreeBlockPool */ 170851c0b2f7Stbbdev void Block::reset() 170951c0b2f7Stbbdev { 171051c0b2f7Stbbdev // it is caller's responsibility to ensure no data is lost before calling this 171151c0b2f7Stbbdev MALLOC_ASSERT( allocatedCount==0, ASSERT_TEXT ); 171251c0b2f7Stbbdev MALLOC_ASSERT( !isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 171351c0b2f7Stbbdev if (!isStartupAllocObject()) 171451c0b2f7Stbbdev STAT_increment(getThreadId(), getIndex(objectSize), freeBlockBack); 171551c0b2f7Stbbdev 171651c0b2f7Stbbdev cleanBlockHeader(); 171751c0b2f7Stbbdev 1718478de5b1Stbbdev nextPrivatizable.store(nullptr, std::memory_order_relaxed); 171951c0b2f7Stbbdev 172051c0b2f7Stbbdev objectSize = 0; 172151c0b2f7Stbbdev // for an empty block, bump pointer should point right after the end of the block 172251c0b2f7Stbbdev bumpPtr = (FreeObject *)((uintptr_t)this + slabSize); 172351c0b2f7Stbbdev } 172451c0b2f7Stbbdev 172551c0b2f7Stbbdev inline void Bin::setActiveBlock (Block *block) 172651c0b2f7Stbbdev { 172751c0b2f7Stbbdev // MALLOC_ASSERT( bin, ASSERT_TEXT ); 172851c0b2f7Stbbdev MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 172951c0b2f7Stbbdev // it is the caller responsibility to keep bin consistence (i.e. ensure this block is in the bin list) 173051c0b2f7Stbbdev activeBlk = block; 173151c0b2f7Stbbdev } 173251c0b2f7Stbbdev 173351c0b2f7Stbbdev inline Block* Bin::setPreviousBlockActive() 173451c0b2f7Stbbdev { 173551c0b2f7Stbbdev MALLOC_ASSERT( activeBlk, ASSERT_TEXT ); 173651c0b2f7Stbbdev Block* temp = activeBlk->previous; 173751c0b2f7Stbbdev if( temp ) { 173851c0b2f7Stbbdev MALLOC_ASSERT( !(temp->isFull), ASSERT_TEXT ); 173951c0b2f7Stbbdev activeBlk = temp; 174051c0b2f7Stbbdev } 174151c0b2f7Stbbdev return temp; 174251c0b2f7Stbbdev } 174351c0b2f7Stbbdev 174451c0b2f7Stbbdev inline bool Block::isOwnedByCurrentThread() const { 1745478de5b1Stbbdev return tlsPtr.load(std::memory_order_relaxed) && ownerTid.isCurrentThreadId(); 174651c0b2f7Stbbdev } 174751c0b2f7Stbbdev 174851c0b2f7Stbbdev FreeObject *Block::findObjectToFree(const void *object) const 174951c0b2f7Stbbdev { 175051c0b2f7Stbbdev FreeObject *objectToFree; 175151c0b2f7Stbbdev // Due to aligned allocations, a pointer passed to scalable_free 175251c0b2f7Stbbdev // might differ from the address of internally allocated object. 175351c0b2f7Stbbdev // Small objects however should always be fine. 175451c0b2f7Stbbdev if (objectSize <= maxSegregatedObjectSize) 175551c0b2f7Stbbdev objectToFree = (FreeObject*)object; 175651c0b2f7Stbbdev // "Fitting size" allocations are suspicious if aligned higher than naturally 175751c0b2f7Stbbdev else { 175851c0b2f7Stbbdev if ( ! isAligned(object,2*fittingAlignment) ) 175951c0b2f7Stbbdev // TODO: the above check is questionable - it gives false negatives in ~50% cases, 176051c0b2f7Stbbdev // so might even be slower in average than unconditional use of findAllocatedObject. 176151c0b2f7Stbbdev // here it should be a "real" object 176251c0b2f7Stbbdev objectToFree = (FreeObject*)object; 176351c0b2f7Stbbdev else 176451c0b2f7Stbbdev // here object can be an aligned address, so applying additional checks 176551c0b2f7Stbbdev objectToFree = findAllocatedObject(object); 176651c0b2f7Stbbdev MALLOC_ASSERT( isAligned(objectToFree,fittingAlignment), ASSERT_TEXT ); 176751c0b2f7Stbbdev } 176851c0b2f7Stbbdev MALLOC_ASSERT( isProperlyPlaced(objectToFree), ASSERT_TEXT ); 176951c0b2f7Stbbdev 177051c0b2f7Stbbdev return objectToFree; 177151c0b2f7Stbbdev } 177251c0b2f7Stbbdev 177351c0b2f7Stbbdev void TLSData::release() 177451c0b2f7Stbbdev { 177551c0b2f7Stbbdev memPool->extMemPool.allLocalCaches.unregisterThread(this); 177651c0b2f7Stbbdev externalCleanup(/*cleanOnlyUnused=*/false, /*cleanBins=*/false); 177751c0b2f7Stbbdev 177851c0b2f7Stbbdev for (unsigned index = 0; index < numBlockBins; index++) { 177951c0b2f7Stbbdev Block *activeBlk = bin[index].getActiveBlock(); 178051c0b2f7Stbbdev if (!activeBlk) 178151c0b2f7Stbbdev continue; 178251c0b2f7Stbbdev Block *threadlessBlock = activeBlk->previous; 1783478de5b1Stbbdev bool syncOnMailbox = false; 178451c0b2f7Stbbdev while (threadlessBlock) { 178551c0b2f7Stbbdev Block *threadBlock = threadlessBlock->previous; 178651c0b2f7Stbbdev if (threadlessBlock->empty()) { 178751c0b2f7Stbbdev /* we destroy the thread, so not use its block pool */ 178851c0b2f7Stbbdev memPool->returnEmptyBlock(threadlessBlock, /*poolTheBlock=*/false); 178951c0b2f7Stbbdev } else { 179051c0b2f7Stbbdev memPool->extMemPool.orphanedBlocks.put(intptr_t(bin+index), threadlessBlock); 1791478de5b1Stbbdev syncOnMailbox = true; 179251c0b2f7Stbbdev } 179351c0b2f7Stbbdev threadlessBlock = threadBlock; 179451c0b2f7Stbbdev } 179551c0b2f7Stbbdev threadlessBlock = activeBlk; 179651c0b2f7Stbbdev while (threadlessBlock) { 179751c0b2f7Stbbdev Block *threadBlock = threadlessBlock->next; 179851c0b2f7Stbbdev if (threadlessBlock->empty()) { 179951c0b2f7Stbbdev /* we destroy the thread, so not use its block pool */ 180051c0b2f7Stbbdev memPool->returnEmptyBlock(threadlessBlock, /*poolTheBlock=*/false); 180151c0b2f7Stbbdev } else { 180251c0b2f7Stbbdev memPool->extMemPool.orphanedBlocks.put(intptr_t(bin+index), threadlessBlock); 1803478de5b1Stbbdev syncOnMailbox = true; 180451c0b2f7Stbbdev } 180551c0b2f7Stbbdev threadlessBlock = threadBlock; 180651c0b2f7Stbbdev } 180751c0b2f7Stbbdev bin[index].resetActiveBlock(); 1808478de5b1Stbbdev 1809478de5b1Stbbdev if (syncOnMailbox) { 1810478de5b1Stbbdev // Although, we synchronized on nextPrivatizable inside a block, we still need to 1811478de5b1Stbbdev // synchronize on the bin lifetime because the thread releasing an object into the public 1812478de5b1Stbbdev // free list is touching the bin (mailbox and mailLock) 1813478de5b1Stbbdev MallocMutex::scoped_lock scoped_cs(bin[index].mailLock); 1814478de5b1Stbbdev } 181551c0b2f7Stbbdev } 181651c0b2f7Stbbdev } 181751c0b2f7Stbbdev 181851c0b2f7Stbbdev 181951c0b2f7Stbbdev #if MALLOC_CHECK_RECURSION 182051c0b2f7Stbbdev // TODO: Use dedicated heap for this 182151c0b2f7Stbbdev 182251c0b2f7Stbbdev /* 182351c0b2f7Stbbdev * It's a special kind of allocation that can be used when malloc is 182451c0b2f7Stbbdev * not available (either during startup or when malloc was already called and 182551c0b2f7Stbbdev * we are, say, inside pthread_setspecific's call). 182651c0b2f7Stbbdev * Block can contain objects of different sizes, 182751c0b2f7Stbbdev * allocations are performed by moving bump pointer and increasing of object counter, 182851c0b2f7Stbbdev * releasing is done via counter of objects allocated in the block 182951c0b2f7Stbbdev * or moving bump pointer if releasing object is on a bound. 183051c0b2f7Stbbdev * TODO: make bump pointer to grow to the same backward direction as all the others. 183151c0b2f7Stbbdev */ 183251c0b2f7Stbbdev 183351c0b2f7Stbbdev class StartupBlock : public Block { 183451c0b2f7Stbbdev size_t availableSize() const { 183551c0b2f7Stbbdev return slabSize - ((uintptr_t)bumpPtr - (uintptr_t)this); 183651c0b2f7Stbbdev } 183751c0b2f7Stbbdev static StartupBlock *getBlock(); 183851c0b2f7Stbbdev public: 183951c0b2f7Stbbdev static FreeObject *allocate(size_t size); 184051c0b2f7Stbbdev static size_t msize(void *ptr) { return *((size_t*)ptr - 1); } 184151c0b2f7Stbbdev void free(void *ptr); 184251c0b2f7Stbbdev }; 184351c0b2f7Stbbdev 184451c0b2f7Stbbdev static MallocMutex startupMallocLock; 184551c0b2f7Stbbdev static StartupBlock *firstStartupBlock; 184651c0b2f7Stbbdev 184751c0b2f7Stbbdev StartupBlock *StartupBlock::getBlock() 184851c0b2f7Stbbdev { 184951c0b2f7Stbbdev BackRefIdx backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/false); 185057f524caSIlya Isaev if (backRefIdx.isInvalid()) return nullptr; 185151c0b2f7Stbbdev 185251c0b2f7Stbbdev StartupBlock *block = static_cast<StartupBlock*>( 185351c0b2f7Stbbdev defaultMemPool->extMemPool.backend.getSlabBlock(1)); 185457f524caSIlya Isaev if (!block) return nullptr; 185551c0b2f7Stbbdev 185651c0b2f7Stbbdev block->cleanBlockHeader(); 185751c0b2f7Stbbdev setBackRef(backRefIdx, block); 185851c0b2f7Stbbdev block->backRefIdx = backRefIdx; 185951c0b2f7Stbbdev // use startupAllocObjSizeMark to mark objects from startup block marker 186051c0b2f7Stbbdev block->objectSize = startupAllocObjSizeMark; 186151c0b2f7Stbbdev block->bumpPtr = (FreeObject *)((uintptr_t)block + sizeof(StartupBlock)); 186251c0b2f7Stbbdev return block; 186351c0b2f7Stbbdev } 186451c0b2f7Stbbdev 186551c0b2f7Stbbdev FreeObject *StartupBlock::allocate(size_t size) 186651c0b2f7Stbbdev { 186751c0b2f7Stbbdev FreeObject *result; 186857f524caSIlya Isaev StartupBlock *newBlock = nullptr; 186951c0b2f7Stbbdev 187051c0b2f7Stbbdev /* Objects must be aligned on their natural bounds, 187151c0b2f7Stbbdev and objects bigger than word on word's bound. */ 187251c0b2f7Stbbdev size = alignUp(size, sizeof(size_t)); 187351c0b2f7Stbbdev // We need size of an object to implement msize. 187451c0b2f7Stbbdev size_t reqSize = size + sizeof(size_t); 187551c0b2f7Stbbdev { 187651c0b2f7Stbbdev MallocMutex::scoped_lock scoped_cs(startupMallocLock); 187751c0b2f7Stbbdev // Re-check whether we need a new block (conditions might have changed) 187851c0b2f7Stbbdev if (!firstStartupBlock || firstStartupBlock->availableSize() < reqSize) { 187951c0b2f7Stbbdev if (!newBlock) { 188051c0b2f7Stbbdev newBlock = StartupBlock::getBlock(); 188157f524caSIlya Isaev if (!newBlock) return nullptr; 188251c0b2f7Stbbdev } 188351c0b2f7Stbbdev newBlock->next = (Block*)firstStartupBlock; 188451c0b2f7Stbbdev if (firstStartupBlock) 188551c0b2f7Stbbdev firstStartupBlock->previous = (Block*)newBlock; 188651c0b2f7Stbbdev firstStartupBlock = newBlock; 1887478de5b1Stbbdev } 188851c0b2f7Stbbdev result = firstStartupBlock->bumpPtr; 188951c0b2f7Stbbdev firstStartupBlock->allocatedCount++; 189051c0b2f7Stbbdev firstStartupBlock->bumpPtr = 189151c0b2f7Stbbdev (FreeObject *)((uintptr_t)firstStartupBlock->bumpPtr + reqSize); 189251c0b2f7Stbbdev } 189351c0b2f7Stbbdev 189451c0b2f7Stbbdev // keep object size at the negative offset 189551c0b2f7Stbbdev *((size_t*)result) = size; 189651c0b2f7Stbbdev return (FreeObject*)((size_t*)result+1); 189751c0b2f7Stbbdev } 189851c0b2f7Stbbdev 189951c0b2f7Stbbdev void StartupBlock::free(void *ptr) 190051c0b2f7Stbbdev { 190157f524caSIlya Isaev Block* blockToRelease = nullptr; 190251c0b2f7Stbbdev { 190351c0b2f7Stbbdev MallocMutex::scoped_lock scoped_cs(startupMallocLock); 190451c0b2f7Stbbdev 190551c0b2f7Stbbdev MALLOC_ASSERT(firstStartupBlock, ASSERT_TEXT); 190651c0b2f7Stbbdev MALLOC_ASSERT(startupAllocObjSizeMark==objectSize 190751c0b2f7Stbbdev && allocatedCount>0, ASSERT_TEXT); 190851c0b2f7Stbbdev MALLOC_ASSERT((uintptr_t)ptr>=(uintptr_t)this+sizeof(StartupBlock) 190951c0b2f7Stbbdev && (uintptr_t)ptr+StartupBlock::msize(ptr)<=(uintptr_t)this+slabSize, 191051c0b2f7Stbbdev ASSERT_TEXT); 191151c0b2f7Stbbdev if (0 == --allocatedCount) { 191251c0b2f7Stbbdev if (this == firstStartupBlock) 191351c0b2f7Stbbdev firstStartupBlock = (StartupBlock*)firstStartupBlock->next; 191451c0b2f7Stbbdev if (previous) 191551c0b2f7Stbbdev previous->next = next; 191651c0b2f7Stbbdev if (next) 191751c0b2f7Stbbdev next->previous = previous; 191851c0b2f7Stbbdev blockToRelease = this; 191951c0b2f7Stbbdev } else if ((uintptr_t)ptr + StartupBlock::msize(ptr) == (uintptr_t)bumpPtr) { 192051c0b2f7Stbbdev // last object in the block released 192151c0b2f7Stbbdev FreeObject *newBump = (FreeObject*)((size_t*)ptr - 1); 192251c0b2f7Stbbdev MALLOC_ASSERT((uintptr_t)newBump>(uintptr_t)this+sizeof(StartupBlock), 192351c0b2f7Stbbdev ASSERT_TEXT); 192451c0b2f7Stbbdev bumpPtr = newBump; 192551c0b2f7Stbbdev } 192651c0b2f7Stbbdev } 192751c0b2f7Stbbdev if (blockToRelease) { 192857f524caSIlya Isaev blockToRelease->previous = blockToRelease->next = nullptr; 192951c0b2f7Stbbdev defaultMemPool->returnEmptyBlock(blockToRelease, /*poolTheBlock=*/false); 193051c0b2f7Stbbdev } 193151c0b2f7Stbbdev } 193251c0b2f7Stbbdev 193351c0b2f7Stbbdev #endif /* MALLOC_CHECK_RECURSION */ 193451c0b2f7Stbbdev 193551c0b2f7Stbbdev /********* End thread related code *************/ 193651c0b2f7Stbbdev 193751c0b2f7Stbbdev /********* Library initialization *************/ 193851c0b2f7Stbbdev 193951c0b2f7Stbbdev //! Value indicating the state of initialization. 194051c0b2f7Stbbdev /* 0 = initialization not started. 194151c0b2f7Stbbdev * 1 = initialization started but not finished. 194251c0b2f7Stbbdev * 2 = initialization finished. 194351c0b2f7Stbbdev * In theory, we only need values 0 and 2. But value 1 is nonetheless 194451c0b2f7Stbbdev * useful for detecting errors in the double-check pattern. 194551c0b2f7Stbbdev */ 194651c0b2f7Stbbdev static std::atomic<intptr_t> mallocInitialized{0}; // implicitly initialized to 0 194751c0b2f7Stbbdev static MallocMutex initMutex; 194851c0b2f7Stbbdev 194951c0b2f7Stbbdev /** The leading "\0" is here so that applying "strings" to the binary 195051c0b2f7Stbbdev delivers a clean result. */ 195151c0b2f7Stbbdev static char VersionString[] = "\0" TBBMALLOC_VERSION_STRINGS; 195251c0b2f7Stbbdev 1953112076d0SIlya Isaev #if USE_PTHREAD && __TBB_SOURCE_DIRECTLY_INCLUDED 195451c0b2f7Stbbdev 195551c0b2f7Stbbdev /* Decrease race interval between dynamic library unloading and pthread key 195651c0b2f7Stbbdev destructor. Protect only Pthreads with supported unloading. */ 195751c0b2f7Stbbdev class ShutdownSync { 195851c0b2f7Stbbdev /* flag is the number of threads in pthread key dtor body 195951c0b2f7Stbbdev (i.e., between threadDtorStart() and threadDtorDone()) 196051c0b2f7Stbbdev or the signal to skip dtor, if flag < 0 */ 196151c0b2f7Stbbdev std::atomic<intptr_t> flag; 196251c0b2f7Stbbdev static const intptr_t skipDtor = INTPTR_MIN/2; 196351c0b2f7Stbbdev public: 196451c0b2f7Stbbdev void init() { flag.store(0, std::memory_order_release); } 196551c0b2f7Stbbdev /* Suppose that 2*abs(skipDtor) or more threads never call threadDtorStart() 196651c0b2f7Stbbdev simultaneously, so flag never becomes negative because of that. */ 196751c0b2f7Stbbdev bool threadDtorStart() { 196851c0b2f7Stbbdev if (flag.load(std::memory_order_acquire) < 0) 196951c0b2f7Stbbdev return false; 197051c0b2f7Stbbdev if (++flag <= 0) { // note that new value returned 197151c0b2f7Stbbdev flag.fetch_sub(1); // flag is spoiled by us, restore it 197251c0b2f7Stbbdev return false; 197351c0b2f7Stbbdev } 197451c0b2f7Stbbdev return true; 197551c0b2f7Stbbdev } 197651c0b2f7Stbbdev void threadDtorDone() { 197751c0b2f7Stbbdev flag.fetch_sub(1); 197851c0b2f7Stbbdev } 197951c0b2f7Stbbdev void processExit() { 198051c0b2f7Stbbdev if (flag.fetch_add(skipDtor) != 0) { 198151c0b2f7Stbbdev SpinWaitUntilEq(flag, skipDtor); 198251c0b2f7Stbbdev } 198351c0b2f7Stbbdev } 198451c0b2f7Stbbdev }; 198551c0b2f7Stbbdev 198651c0b2f7Stbbdev #else 198751c0b2f7Stbbdev 198851c0b2f7Stbbdev class ShutdownSync { 198951c0b2f7Stbbdev public: 199051c0b2f7Stbbdev void init() { } 199151c0b2f7Stbbdev bool threadDtorStart() { return true; } 199251c0b2f7Stbbdev void threadDtorDone() { } 199351c0b2f7Stbbdev void processExit() { } 199451c0b2f7Stbbdev }; 199551c0b2f7Stbbdev 1996112076d0SIlya Isaev #endif // USE_PTHREAD && __TBB_SOURCE_DIRECTLY_INCLUDED 199751c0b2f7Stbbdev 199851c0b2f7Stbbdev static ShutdownSync shutdownSync; 199951c0b2f7Stbbdev 200051c0b2f7Stbbdev inline bool isMallocInitialized() { 200151c0b2f7Stbbdev // Load must have acquire fence; otherwise thread taking "initialized" path 200251c0b2f7Stbbdev // might perform textually later loads *before* mallocInitialized becomes 2. 200351c0b2f7Stbbdev return 2 == mallocInitialized.load(std::memory_order_acquire); 200451c0b2f7Stbbdev } 200551c0b2f7Stbbdev 200651c0b2f7Stbbdev /* Caller is responsible for ensuring this routine is called exactly once. */ 200751c0b2f7Stbbdev extern "C" void MallocInitializeITT() { 200851c0b2f7Stbbdev #if __TBB_USE_ITT_NOTIFY 200951c0b2f7Stbbdev if (!usedBySrcIncluded) 201051c0b2f7Stbbdev tbb::detail::r1::__TBB_load_ittnotify(); 201151c0b2f7Stbbdev #endif 201251c0b2f7Stbbdev } 201351c0b2f7Stbbdev 201451c0b2f7Stbbdev void MemoryPool::initDefaultPool() { 201551c0b2f7Stbbdev hugePages.init(); 201651c0b2f7Stbbdev } 201751c0b2f7Stbbdev 201851c0b2f7Stbbdev /* 201951c0b2f7Stbbdev * Allocator initialization routine; 202051c0b2f7Stbbdev * it is called lazily on the very first scalable_malloc call. 202151c0b2f7Stbbdev */ 202251c0b2f7Stbbdev static bool initMemoryManager() 202351c0b2f7Stbbdev { 202451c0b2f7Stbbdev TRACEF(( "[ScalableMalloc trace] sizeof(Block) is %d (expected 128); sizeof(uintptr_t) is %d\n", 202551c0b2f7Stbbdev sizeof(Block), sizeof(uintptr_t) )); 202651c0b2f7Stbbdev MALLOC_ASSERT( 2*blockHeaderAlignment == sizeof(Block), ASSERT_TEXT ); 202751c0b2f7Stbbdev MALLOC_ASSERT( sizeof(FreeObject) == sizeof(void*), ASSERT_TEXT ); 202851c0b2f7Stbbdev MALLOC_ASSERT( isAligned(defaultMemPool, sizeof(intptr_t)), 202951c0b2f7Stbbdev "Memory pool must be void*-aligned for atomic to work over aligned arguments."); 203051c0b2f7Stbbdev 203151c0b2f7Stbbdev #if USE_WINTHREAD 203251c0b2f7Stbbdev const size_t granularity = 64*1024; // granulatity of VirtualAlloc 203351c0b2f7Stbbdev #else 203451c0b2f7Stbbdev // POSIX.1-2001-compliant way to get page size 203551c0b2f7Stbbdev const size_t granularity = sysconf(_SC_PAGESIZE); 203651c0b2f7Stbbdev #endif 203751c0b2f7Stbbdev if (!defaultMemPool) { 203851c0b2f7Stbbdev // Do not rely on static constructors and do the assignment in case 203951c0b2f7Stbbdev // of library static section not initialized at this call yet. 204051c0b2f7Stbbdev defaultMemPool = (MemoryPool*)defaultMemPool_space; 204151c0b2f7Stbbdev } 204251c0b2f7Stbbdev bool initOk = defaultMemPool-> 204357f524caSIlya Isaev extMemPool.init(0, nullptr, nullptr, granularity, 204451c0b2f7Stbbdev /*keepAllMemory=*/false, /*fixedPool=*/false); 204551c0b2f7Stbbdev // TODO: extMemPool.init() to not allocate memory 20461ecde27fSIlya Mishin if (!initOk || !initBackRefMain(&defaultMemPool->extMemPool.backend) || !ThreadId::init()) 204751c0b2f7Stbbdev return false; 204851c0b2f7Stbbdev MemoryPool::initDefaultPool(); 204951c0b2f7Stbbdev // init() is required iff initMemoryManager() is called 205051c0b2f7Stbbdev // after mallocProcessShutdownNotification() 205151c0b2f7Stbbdev shutdownSync.init(); 205251c0b2f7Stbbdev #if COLLECT_STATISTICS 205351c0b2f7Stbbdev initStatisticsCollection(); 205451c0b2f7Stbbdev #endif 205551c0b2f7Stbbdev return true; 205651c0b2f7Stbbdev } 205751c0b2f7Stbbdev 205851c0b2f7Stbbdev static bool GetBoolEnvironmentVariable(const char* name) { 205951c0b2f7Stbbdev return tbb::detail::r1::GetBoolEnvironmentVariable(name); 206051c0b2f7Stbbdev } 206151c0b2f7Stbbdev 206251c0b2f7Stbbdev //! Ensures that initMemoryManager() is called once and only once. 206351c0b2f7Stbbdev /** Does not return until initMemoryManager() has been completed by a thread. 206451c0b2f7Stbbdev There is no need to call this routine if mallocInitialized==2 . */ 206551c0b2f7Stbbdev static bool doInitialization() 206651c0b2f7Stbbdev { 206751c0b2f7Stbbdev MallocMutex::scoped_lock lock( initMutex ); 206851c0b2f7Stbbdev if (mallocInitialized.load(std::memory_order_relaxed)!=2) { 206951c0b2f7Stbbdev MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==0, ASSERT_TEXT ); 207051c0b2f7Stbbdev mallocInitialized.store(1, std::memory_order_relaxed); 207151c0b2f7Stbbdev RecursiveMallocCallProtector scoped; 207251c0b2f7Stbbdev if (!initMemoryManager()) { 207351c0b2f7Stbbdev mallocInitialized.store(0, std::memory_order_relaxed); // restore and out 207451c0b2f7Stbbdev return false; 207551c0b2f7Stbbdev } 207651c0b2f7Stbbdev #ifdef MALLOC_EXTRA_INITIALIZATION 207751c0b2f7Stbbdev MALLOC_EXTRA_INITIALIZATION; 207851c0b2f7Stbbdev #endif 207951c0b2f7Stbbdev #if MALLOC_CHECK_RECURSION 208051c0b2f7Stbbdev RecursiveMallocCallProtector::detectNaiveOverload(); 208151c0b2f7Stbbdev #endif 208251c0b2f7Stbbdev MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==1, ASSERT_TEXT ); 208351c0b2f7Stbbdev // Store must have release fence, otherwise mallocInitialized==2 208451c0b2f7Stbbdev // might become remotely visible before side effects of 208551c0b2f7Stbbdev // initMemoryManager() become remotely visible. 208651c0b2f7Stbbdev mallocInitialized.store(2, std::memory_order_release); 208751c0b2f7Stbbdev if( GetBoolEnvironmentVariable("TBB_VERSION") ) { 208851c0b2f7Stbbdev fputs(VersionString+1,stderr); 208951c0b2f7Stbbdev hugePages.printStatus(); 209051c0b2f7Stbbdev } 209151c0b2f7Stbbdev } 209251c0b2f7Stbbdev /* It can't be 0 or I would have initialized it */ 209351c0b2f7Stbbdev MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==2, ASSERT_TEXT ); 209451c0b2f7Stbbdev return true; 209551c0b2f7Stbbdev } 209651c0b2f7Stbbdev 209751c0b2f7Stbbdev /********* End library initialization *************/ 209851c0b2f7Stbbdev 209951c0b2f7Stbbdev /********* The malloc show begins *************/ 210051c0b2f7Stbbdev 210151c0b2f7Stbbdev 210251c0b2f7Stbbdev FreeObject *Block::allocateFromFreeList() 210351c0b2f7Stbbdev { 210451c0b2f7Stbbdev FreeObject *result; 210551c0b2f7Stbbdev 210657f524caSIlya Isaev if (!freeList) return nullptr; 210751c0b2f7Stbbdev 210851c0b2f7Stbbdev result = freeList; 210951c0b2f7Stbbdev MALLOC_ASSERT( result, ASSERT_TEXT ); 211051c0b2f7Stbbdev 211151c0b2f7Stbbdev freeList = result->next; 211251c0b2f7Stbbdev MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 211351c0b2f7Stbbdev allocatedCount++; 211451c0b2f7Stbbdev STAT_increment(getThreadId(), getIndex(objectSize), allocFreeListUsed); 211551c0b2f7Stbbdev 211651c0b2f7Stbbdev return result; 211751c0b2f7Stbbdev } 211851c0b2f7Stbbdev 211951c0b2f7Stbbdev FreeObject *Block::allocateFromBumpPtr() 212051c0b2f7Stbbdev { 212151c0b2f7Stbbdev FreeObject *result = bumpPtr; 212251c0b2f7Stbbdev if (result) { 212351c0b2f7Stbbdev bumpPtr = (FreeObject *) ((uintptr_t) bumpPtr - objectSize); 212451c0b2f7Stbbdev if ( (uintptr_t)bumpPtr < (uintptr_t)this+sizeof(Block) ) { 212557f524caSIlya Isaev bumpPtr = nullptr; 212651c0b2f7Stbbdev } 212751c0b2f7Stbbdev MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 212851c0b2f7Stbbdev allocatedCount++; 212951c0b2f7Stbbdev STAT_increment(getThreadId(), getIndex(objectSize), allocBumpPtrUsed); 213051c0b2f7Stbbdev } 213151c0b2f7Stbbdev return result; 213251c0b2f7Stbbdev } 213351c0b2f7Stbbdev 213451c0b2f7Stbbdev inline FreeObject* Block::allocate() 213551c0b2f7Stbbdev { 213651c0b2f7Stbbdev MALLOC_ASSERT( isOwnedByCurrentThread(), ASSERT_TEXT ); 213751c0b2f7Stbbdev 213851c0b2f7Stbbdev /* for better cache locality, first looking in the free list. */ 213951c0b2f7Stbbdev if ( FreeObject *result = allocateFromFreeList() ) { 214051c0b2f7Stbbdev return result; 214151c0b2f7Stbbdev } 214251c0b2f7Stbbdev MALLOC_ASSERT( !freeList, ASSERT_TEXT ); 214351c0b2f7Stbbdev 214451c0b2f7Stbbdev /* if free list is empty, try thread local bump pointer allocation. */ 214551c0b2f7Stbbdev if ( FreeObject *result = allocateFromBumpPtr() ) { 214651c0b2f7Stbbdev return result; 214751c0b2f7Stbbdev } 214851c0b2f7Stbbdev MALLOC_ASSERT( !bumpPtr, ASSERT_TEXT ); 214951c0b2f7Stbbdev 215051c0b2f7Stbbdev /* the block is considered full. */ 215151c0b2f7Stbbdev isFull = true; 215257f524caSIlya Isaev return nullptr; 215351c0b2f7Stbbdev } 215451c0b2f7Stbbdev 215551c0b2f7Stbbdev size_t Block::findObjectSize(void *object) const 215651c0b2f7Stbbdev { 215751c0b2f7Stbbdev size_t blSize = getSize(); 215851c0b2f7Stbbdev #if MALLOC_CHECK_RECURSION 215951c0b2f7Stbbdev // Currently, there is no aligned allocations from startup blocks, 216051c0b2f7Stbbdev // so we can return just StartupBlock::msize(). 216151c0b2f7Stbbdev // TODO: This must be extended if we add aligned allocation from startup blocks. 216251c0b2f7Stbbdev if (!blSize) 216351c0b2f7Stbbdev return StartupBlock::msize(object); 216451c0b2f7Stbbdev #endif 216551c0b2f7Stbbdev // object can be aligned, so real size can be less than block's 216651c0b2f7Stbbdev size_t size = 216751c0b2f7Stbbdev blSize - ((uintptr_t)object - (uintptr_t)findObjectToFree(object)); 216851c0b2f7Stbbdev MALLOC_ASSERT(size>0 && size<minLargeObjectSize, ASSERT_TEXT); 216951c0b2f7Stbbdev return size; 217051c0b2f7Stbbdev } 217151c0b2f7Stbbdev 217251c0b2f7Stbbdev void Bin::moveBlockToFront(Block *block) 217351c0b2f7Stbbdev { 217451c0b2f7Stbbdev /* move the block to the front of the bin */ 217551c0b2f7Stbbdev if (block == activeBlk) return; 217651c0b2f7Stbbdev outofTLSBin(block); 217751c0b2f7Stbbdev pushTLSBin(block); 217851c0b2f7Stbbdev } 217951c0b2f7Stbbdev 218051c0b2f7Stbbdev void Bin::processEmptyBlock(Block *block, bool poolTheBlock) 218151c0b2f7Stbbdev { 218251c0b2f7Stbbdev if (block != activeBlk) { 218351c0b2f7Stbbdev /* We are not using this block; return it to the pool */ 218451c0b2f7Stbbdev outofTLSBin(block); 218551c0b2f7Stbbdev block->getMemPool()->returnEmptyBlock(block, poolTheBlock); 218651c0b2f7Stbbdev } else { 218751c0b2f7Stbbdev /* all objects are free - let's restore the bump pointer */ 218851c0b2f7Stbbdev block->restoreBumpPtr(); 218951c0b2f7Stbbdev } 219051c0b2f7Stbbdev } 219151c0b2f7Stbbdev 219251c0b2f7Stbbdev template<int LOW_MARK, int HIGH_MARK> 219351c0b2f7Stbbdev bool LocalLOCImpl<LOW_MARK, HIGH_MARK>::put(LargeMemoryBlock *object, ExtMemoryPool *extMemPool) 219451c0b2f7Stbbdev { 219551c0b2f7Stbbdev const size_t size = object->unalignedSize; 219651c0b2f7Stbbdev // not spoil cache with too large object, that can cause its total cleanup 219751c0b2f7Stbbdev if (size > MAX_TOTAL_SIZE) 219851c0b2f7Stbbdev return false; 219957f524caSIlya Isaev LargeMemoryBlock *localHead = head.exchange(nullptr); 220051c0b2f7Stbbdev 220157f524caSIlya Isaev object->prev = nullptr; 220251c0b2f7Stbbdev object->next = localHead; 220351c0b2f7Stbbdev if (localHead) 220451c0b2f7Stbbdev localHead->prev = object; 220551c0b2f7Stbbdev else { 220651c0b2f7Stbbdev // those might not be cleaned during local cache stealing, correct them 220751c0b2f7Stbbdev totalSize = 0; 220851c0b2f7Stbbdev numOfBlocks = 0; 220951c0b2f7Stbbdev tail = object; 221051c0b2f7Stbbdev } 221151c0b2f7Stbbdev localHead = object; 221251c0b2f7Stbbdev totalSize += size; 221351c0b2f7Stbbdev numOfBlocks++; 221451c0b2f7Stbbdev // must meet both size and number of cached objects constrains 221551c0b2f7Stbbdev if (totalSize > MAX_TOTAL_SIZE || numOfBlocks >= HIGH_MARK) { 221651c0b2f7Stbbdev // scanning from tail until meet conditions 221751c0b2f7Stbbdev while (totalSize > MAX_TOTAL_SIZE || numOfBlocks > LOW_MARK) { 221851c0b2f7Stbbdev totalSize -= tail->unalignedSize; 221951c0b2f7Stbbdev numOfBlocks--; 222051c0b2f7Stbbdev tail = tail->prev; 222151c0b2f7Stbbdev } 222251c0b2f7Stbbdev LargeMemoryBlock *headToRelease = tail->next; 222357f524caSIlya Isaev tail->next = nullptr; 222451c0b2f7Stbbdev 222551c0b2f7Stbbdev extMemPool->freeLargeObjectList(headToRelease); 222651c0b2f7Stbbdev } 222751c0b2f7Stbbdev 222851c0b2f7Stbbdev head.store(localHead, std::memory_order_release); 222951c0b2f7Stbbdev return true; 223051c0b2f7Stbbdev } 223151c0b2f7Stbbdev 223251c0b2f7Stbbdev template<int LOW_MARK, int HIGH_MARK> 223351c0b2f7Stbbdev LargeMemoryBlock *LocalLOCImpl<LOW_MARK, HIGH_MARK>::get(size_t size) 223451c0b2f7Stbbdev { 223557f524caSIlya Isaev LargeMemoryBlock *localHead, *res = nullptr; 223651c0b2f7Stbbdev 223751c0b2f7Stbbdev if (size > MAX_TOTAL_SIZE) 223857f524caSIlya Isaev return nullptr; 223951c0b2f7Stbbdev 224051c0b2f7Stbbdev // TBB_REVAMP_TODO: review this line 224157f524caSIlya Isaev if (!head.load(std::memory_order_acquire) || (localHead = head.exchange(nullptr)) == nullptr) { 224251c0b2f7Stbbdev // do not restore totalSize, numOfBlocks and tail at this point, 224351c0b2f7Stbbdev // as they are used only in put(), where they must be restored 224457f524caSIlya Isaev return nullptr; 224551c0b2f7Stbbdev } 224651c0b2f7Stbbdev 224751c0b2f7Stbbdev for (LargeMemoryBlock *curr = localHead; curr; curr=curr->next) { 224851c0b2f7Stbbdev if (curr->unalignedSize == size) { 224951c0b2f7Stbbdev res = curr; 225051c0b2f7Stbbdev if (curr->next) 225151c0b2f7Stbbdev curr->next->prev = curr->prev; 225251c0b2f7Stbbdev else 225351c0b2f7Stbbdev tail = curr->prev; 225451c0b2f7Stbbdev if (curr != localHead) 225551c0b2f7Stbbdev curr->prev->next = curr->next; 225651c0b2f7Stbbdev else 225751c0b2f7Stbbdev localHead = curr->next; 225851c0b2f7Stbbdev totalSize -= size; 225951c0b2f7Stbbdev numOfBlocks--; 226051c0b2f7Stbbdev break; 226151c0b2f7Stbbdev } 226251c0b2f7Stbbdev } 226351c0b2f7Stbbdev 226451c0b2f7Stbbdev head.store(localHead, std::memory_order_release); 226551c0b2f7Stbbdev return res; 226651c0b2f7Stbbdev } 226751c0b2f7Stbbdev 226851c0b2f7Stbbdev template<int LOW_MARK, int HIGH_MARK> 226951c0b2f7Stbbdev bool LocalLOCImpl<LOW_MARK, HIGH_MARK>::externalCleanup(ExtMemoryPool *extMemPool) 227051c0b2f7Stbbdev { 227157f524caSIlya Isaev if (LargeMemoryBlock *localHead = head.exchange(nullptr)) { 227251c0b2f7Stbbdev extMemPool->freeLargeObjectList(localHead); 227351c0b2f7Stbbdev return true; 227451c0b2f7Stbbdev } 227551c0b2f7Stbbdev return false; 227651c0b2f7Stbbdev } 227751c0b2f7Stbbdev 227851c0b2f7Stbbdev void *MemoryPool::getFromLLOCache(TLSData* tls, size_t size, size_t alignment) 227951c0b2f7Stbbdev { 228057f524caSIlya Isaev LargeMemoryBlock *lmb = nullptr; 228151c0b2f7Stbbdev 228251c0b2f7Stbbdev size_t headersSize = sizeof(LargeMemoryBlock)+sizeof(LargeObjectHdr); 228351c0b2f7Stbbdev size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+alignment); 228451c0b2f7Stbbdev if (allocationSize < size) // allocationSize is wrapped around after alignToBin 228557f524caSIlya Isaev return nullptr; 228651c0b2f7Stbbdev MALLOC_ASSERT(allocationSize >= alignment, "Overflow must be checked before."); 228751c0b2f7Stbbdev 228851c0b2f7Stbbdev if (tls) { 228951c0b2f7Stbbdev tls->markUsed(); 229051c0b2f7Stbbdev lmb = tls->lloc.get(allocationSize); 229151c0b2f7Stbbdev } 229251c0b2f7Stbbdev if (!lmb) 229351c0b2f7Stbbdev lmb = extMemPool.mallocLargeObject(this, allocationSize); 229451c0b2f7Stbbdev 229551c0b2f7Stbbdev if (lmb) { 229651c0b2f7Stbbdev // doing shuffle we suppose that alignment offset guarantees 229751c0b2f7Stbbdev // that different cache lines are in use 229851c0b2f7Stbbdev MALLOC_ASSERT(alignment >= estimatedCacheLineSize, ASSERT_TEXT); 229951c0b2f7Stbbdev 230051c0b2f7Stbbdev void *alignedArea = (void*)alignUp((uintptr_t)lmb+headersSize, alignment); 230151c0b2f7Stbbdev uintptr_t alignedRight = 230251c0b2f7Stbbdev alignDown((uintptr_t)lmb+lmb->unalignedSize - size, alignment); 230351c0b2f7Stbbdev // Has some room to shuffle object between cache lines? 230451c0b2f7Stbbdev // Note that alignedRight and alignedArea are aligned at alignment. 230551c0b2f7Stbbdev unsigned ptrDelta = alignedRight - (uintptr_t)alignedArea; 230651c0b2f7Stbbdev if (ptrDelta && tls) { // !tls is cold path 230751c0b2f7Stbbdev // for the hot path of alignment==estimatedCacheLineSize, 230851c0b2f7Stbbdev // allow compilers to use shift for division 230951c0b2f7Stbbdev // (since estimatedCacheLineSize is a power-of-2 constant) 231051c0b2f7Stbbdev unsigned numOfPossibleOffsets = alignment == estimatedCacheLineSize? 231151c0b2f7Stbbdev ptrDelta / estimatedCacheLineSize : 231251c0b2f7Stbbdev ptrDelta / alignment; 231351c0b2f7Stbbdev unsigned myCacheIdx = ++tls->currCacheIdx; 231451c0b2f7Stbbdev unsigned offset = myCacheIdx % numOfPossibleOffsets; 231551c0b2f7Stbbdev 231651c0b2f7Stbbdev // Move object to a cache line with an offset that is different from 231751c0b2f7Stbbdev // previous allocation. This supposedly allows us to use cache 231851c0b2f7Stbbdev // associativity more efficiently. 231951c0b2f7Stbbdev alignedArea = (void*)((uintptr_t)alignedArea + offset*alignment); 232051c0b2f7Stbbdev } 232151c0b2f7Stbbdev MALLOC_ASSERT((uintptr_t)lmb+lmb->unalignedSize >= 232251c0b2f7Stbbdev (uintptr_t)alignedArea+size, "Object doesn't fit the block."); 232351c0b2f7Stbbdev LargeObjectHdr *header = (LargeObjectHdr*)alignedArea-1; 232451c0b2f7Stbbdev header->memoryBlock = lmb; 232551c0b2f7Stbbdev header->backRefIdx = lmb->backRefIdx; 232651c0b2f7Stbbdev setBackRef(header->backRefIdx, header); 232751c0b2f7Stbbdev 232851c0b2f7Stbbdev lmb->objectSize = size; 232951c0b2f7Stbbdev 233051c0b2f7Stbbdev MALLOC_ASSERT( isLargeObject<unknownMem>(alignedArea), ASSERT_TEXT ); 233151c0b2f7Stbbdev MALLOC_ASSERT( isAligned(alignedArea, alignment), ASSERT_TEXT ); 233251c0b2f7Stbbdev 233351c0b2f7Stbbdev return alignedArea; 233451c0b2f7Stbbdev } 233557f524caSIlya Isaev return nullptr; 233651c0b2f7Stbbdev } 233751c0b2f7Stbbdev 233851c0b2f7Stbbdev void MemoryPool::putToLLOCache(TLSData *tls, void *object) 233951c0b2f7Stbbdev { 234051c0b2f7Stbbdev LargeObjectHdr *header = (LargeObjectHdr*)object - 1; 234151c0b2f7Stbbdev // overwrite backRefIdx to simplify double free detection 234251c0b2f7Stbbdev header->backRefIdx = BackRefIdx(); 234351c0b2f7Stbbdev 234451c0b2f7Stbbdev if (tls) { 234551c0b2f7Stbbdev tls->markUsed(); 234651c0b2f7Stbbdev if (tls->lloc.put(header->memoryBlock, &extMemPool)) 234751c0b2f7Stbbdev return; 234851c0b2f7Stbbdev } 234951c0b2f7Stbbdev extMemPool.freeLargeObject(header->memoryBlock); 235051c0b2f7Stbbdev } 235151c0b2f7Stbbdev 235251c0b2f7Stbbdev /* 235351c0b2f7Stbbdev * All aligned allocations fall into one of the following categories: 235451c0b2f7Stbbdev * 1. if both request size and alignment are <= maxSegregatedObjectSize, 235551c0b2f7Stbbdev * we just align the size up, and request this amount, because for every size 235651c0b2f7Stbbdev * aligned to some power of 2, the allocated object is at least that aligned. 235751c0b2f7Stbbdev * 2. for size<minLargeObjectSize, check if already guaranteed fittingAlignment is enough. 235851c0b2f7Stbbdev * 3. if size+alignment<minLargeObjectSize, we take an object of fittingSizeN and align 235951c0b2f7Stbbdev * its address up; given such pointer, scalable_free could find the real object. 236051c0b2f7Stbbdev * Wrapping of size+alignment is impossible because maximal allowed 236151c0b2f7Stbbdev * alignment plus minLargeObjectSize can't lead to wrapping. 236251c0b2f7Stbbdev * 4. otherwise, aligned large object is allocated. 236351c0b2f7Stbbdev */ 236451c0b2f7Stbbdev static void *allocateAligned(MemoryPool *memPool, size_t size, size_t alignment) 236551c0b2f7Stbbdev { 236651c0b2f7Stbbdev MALLOC_ASSERT( isPowerOfTwo(alignment), ASSERT_TEXT ); 236751c0b2f7Stbbdev 236851c0b2f7Stbbdev if (!isMallocInitialized()) 236951c0b2f7Stbbdev if (!doInitialization()) 237057f524caSIlya Isaev return nullptr; 237151c0b2f7Stbbdev 237251c0b2f7Stbbdev void *result; 237351c0b2f7Stbbdev if (size<=maxSegregatedObjectSize && alignment<=maxSegregatedObjectSize) 237451c0b2f7Stbbdev result = internalPoolMalloc(memPool, alignUp(size? size: sizeof(size_t), alignment)); 237551c0b2f7Stbbdev else if (size<minLargeObjectSize) { 237651c0b2f7Stbbdev if (alignment<=fittingAlignment) 237751c0b2f7Stbbdev result = internalPoolMalloc(memPool, size); 237851c0b2f7Stbbdev else if (size+alignment < minLargeObjectSize) { 237951c0b2f7Stbbdev void *unaligned = internalPoolMalloc(memPool, size+alignment); 238057f524caSIlya Isaev if (!unaligned) return nullptr; 238151c0b2f7Stbbdev result = alignUp(unaligned, alignment); 238251c0b2f7Stbbdev } else 238351c0b2f7Stbbdev goto LargeObjAlloc; 238451c0b2f7Stbbdev } else { 238551c0b2f7Stbbdev LargeObjAlloc: 238651c0b2f7Stbbdev TLSData *tls = memPool->getTLS(/*create=*/true); 238751c0b2f7Stbbdev // take into account only alignment that are higher then natural 238851c0b2f7Stbbdev result = 238951c0b2f7Stbbdev memPool->getFromLLOCache(tls, size, largeObjectAlignment>alignment? 239051c0b2f7Stbbdev largeObjectAlignment: alignment); 239151c0b2f7Stbbdev } 239251c0b2f7Stbbdev 239351c0b2f7Stbbdev MALLOC_ASSERT( isAligned(result, alignment), ASSERT_TEXT ); 239451c0b2f7Stbbdev return result; 239551c0b2f7Stbbdev } 239651c0b2f7Stbbdev 239751c0b2f7Stbbdev static void *reallocAligned(MemoryPool *memPool, void *ptr, 239851c0b2f7Stbbdev size_t newSize, size_t alignment = 0) 239951c0b2f7Stbbdev { 240051c0b2f7Stbbdev void *result; 240151c0b2f7Stbbdev size_t copySize; 240251c0b2f7Stbbdev 240351c0b2f7Stbbdev if (isLargeObject<ourMem>(ptr)) { 240451c0b2f7Stbbdev LargeMemoryBlock* lmb = ((LargeObjectHdr *)ptr - 1)->memoryBlock; 240551c0b2f7Stbbdev copySize = lmb->unalignedSize-((uintptr_t)ptr-(uintptr_t)lmb); 240651c0b2f7Stbbdev 240751c0b2f7Stbbdev // Apply different strategies if size decreases 240851c0b2f7Stbbdev if (newSize <= copySize && (0 == alignment || isAligned(ptr, alignment))) { 240951c0b2f7Stbbdev 241051c0b2f7Stbbdev // For huge objects (that do not fit in backend cache), keep the same space unless 241151c0b2f7Stbbdev // the new size is at least twice smaller 241251c0b2f7Stbbdev bool isMemoryBlockHuge = copySize > memPool->extMemPool.backend.getMaxBinnedSize(); 241351c0b2f7Stbbdev size_t threshold = isMemoryBlockHuge ? copySize / 2 : 0; 241451c0b2f7Stbbdev if (newSize > threshold) { 241551c0b2f7Stbbdev lmb->objectSize = newSize; 241651c0b2f7Stbbdev return ptr; 241751c0b2f7Stbbdev } 241851c0b2f7Stbbdev // TODO: For large objects suitable for the backend cache, 241951c0b2f7Stbbdev // split out the excessive part and put it to the backend. 242051c0b2f7Stbbdev } 242151c0b2f7Stbbdev // Reallocate for real 242251c0b2f7Stbbdev copySize = lmb->objectSize; 242351c0b2f7Stbbdev #if BACKEND_HAS_MREMAP 242451c0b2f7Stbbdev if (void *r = memPool->extMemPool.remap(ptr, copySize, newSize, 242551c0b2f7Stbbdev alignment < largeObjectAlignment ? largeObjectAlignment : alignment)) 242651c0b2f7Stbbdev return r; 242751c0b2f7Stbbdev #endif 242851c0b2f7Stbbdev result = alignment ? allocateAligned(memPool, newSize, alignment) : 242951c0b2f7Stbbdev internalPoolMalloc(memPool, newSize); 243051c0b2f7Stbbdev 243151c0b2f7Stbbdev } else { 243251c0b2f7Stbbdev Block* block = (Block *)alignDown(ptr, slabSize); 243351c0b2f7Stbbdev copySize = block->findObjectSize(ptr); 243451c0b2f7Stbbdev 243551c0b2f7Stbbdev // TODO: Move object to another bin if size decreases and the current bin is "empty enough". 243651c0b2f7Stbbdev // Currently, in case of size decreasing, old pointer is returned 243751c0b2f7Stbbdev if (newSize <= copySize && (0==alignment || isAligned(ptr, alignment))) { 243851c0b2f7Stbbdev return ptr; 243951c0b2f7Stbbdev } else { 244051c0b2f7Stbbdev result = alignment ? allocateAligned(memPool, newSize, alignment) : 244151c0b2f7Stbbdev internalPoolMalloc(memPool, newSize); 244251c0b2f7Stbbdev } 244351c0b2f7Stbbdev } 244451c0b2f7Stbbdev if (result) { 244551c0b2f7Stbbdev memcpy(result, ptr, copySize < newSize ? copySize : newSize); 244651c0b2f7Stbbdev internalPoolFree(memPool, ptr, 0); 244751c0b2f7Stbbdev } 244851c0b2f7Stbbdev return result; 244951c0b2f7Stbbdev } 245051c0b2f7Stbbdev 2451478de5b1Stbbdev #if MALLOC_DEBUG 245251c0b2f7Stbbdev /* A predicate checks if an object is properly placed inside its block */ 245351c0b2f7Stbbdev inline bool Block::isProperlyPlaced(const void *object) const 245451c0b2f7Stbbdev { 245551c0b2f7Stbbdev return 0 == ((uintptr_t)this + slabSize - (uintptr_t)object) % objectSize; 245651c0b2f7Stbbdev } 2457478de5b1Stbbdev #endif 245851c0b2f7Stbbdev 245951c0b2f7Stbbdev /* Finds the real object inside the block */ 246051c0b2f7Stbbdev FreeObject *Block::findAllocatedObject(const void *address) const 246151c0b2f7Stbbdev { 246251c0b2f7Stbbdev // calculate offset from the end of the block space 246351c0b2f7Stbbdev uint16_t offset = (uintptr_t)this + slabSize - (uintptr_t)address; 246451c0b2f7Stbbdev MALLOC_ASSERT( offset<=slabSize-sizeof(Block), ASSERT_TEXT ); 246551c0b2f7Stbbdev // find offset difference from a multiple of allocation size 246651c0b2f7Stbbdev offset %= objectSize; 246751c0b2f7Stbbdev // and move the address down to where the real object starts. 246851c0b2f7Stbbdev return (FreeObject*)((uintptr_t)address - (offset? objectSize-offset: 0)); 246951c0b2f7Stbbdev } 247051c0b2f7Stbbdev 247151c0b2f7Stbbdev /* 247251c0b2f7Stbbdev * Bad dereference caused by a foreign pointer is possible only here, not earlier in call chain. 247351c0b2f7Stbbdev * Separate function isolates SEH code, as it has bad influence on compiler optimization. 247451c0b2f7Stbbdev */ 247551c0b2f7Stbbdev static inline BackRefIdx safer_dereference (const BackRefIdx *ptr) 247651c0b2f7Stbbdev { 247751c0b2f7Stbbdev BackRefIdx id; 247851c0b2f7Stbbdev #if _MSC_VER 247951c0b2f7Stbbdev __try { 248051c0b2f7Stbbdev #endif 2481478de5b1Stbbdev id = dereference(ptr); 248251c0b2f7Stbbdev #if _MSC_VER 248351c0b2f7Stbbdev } __except( GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION? 248451c0b2f7Stbbdev EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH ) { 248551c0b2f7Stbbdev id = BackRefIdx(); 248651c0b2f7Stbbdev } 248751c0b2f7Stbbdev #endif 248851c0b2f7Stbbdev return id; 248951c0b2f7Stbbdev } 249051c0b2f7Stbbdev 249151c0b2f7Stbbdev template<MemoryOrigin memOrigin> 249251c0b2f7Stbbdev bool isLargeObject(void *object) 249351c0b2f7Stbbdev { 249451c0b2f7Stbbdev if (!isAligned(object, largeObjectAlignment)) 249551c0b2f7Stbbdev return false; 249651c0b2f7Stbbdev LargeObjectHdr *header = (LargeObjectHdr*)object - 1; 249751c0b2f7Stbbdev BackRefIdx idx = (memOrigin == unknownMem) ? 2498478de5b1Stbbdev safer_dereference(&header->backRefIdx) : dereference(&header->backRefIdx); 249951c0b2f7Stbbdev 250051c0b2f7Stbbdev return idx.isLargeObject() 250157f524caSIlya Isaev // in valid LargeObjectHdr memoryBlock is not nullptr 250251c0b2f7Stbbdev && header->memoryBlock 250351c0b2f7Stbbdev // in valid LargeObjectHdr memoryBlock points somewhere before header 250451c0b2f7Stbbdev // TODO: more strict check 250551c0b2f7Stbbdev && (uintptr_t)header->memoryBlock < (uintptr_t)header 250651c0b2f7Stbbdev && getBackRef(idx) == header; 250751c0b2f7Stbbdev } 250851c0b2f7Stbbdev 250951c0b2f7Stbbdev static inline bool isSmallObject (void *ptr) 251051c0b2f7Stbbdev { 251151c0b2f7Stbbdev Block* expectedBlock = (Block*)alignDown(ptr, slabSize); 251251c0b2f7Stbbdev const BackRefIdx* idx = expectedBlock->getBackRefIdx(); 251351c0b2f7Stbbdev 251451c0b2f7Stbbdev bool isSmall = expectedBlock == getBackRef(safer_dereference(idx)); 251551c0b2f7Stbbdev if (isSmall) 251651c0b2f7Stbbdev expectedBlock->checkFreePrecond(ptr); 251751c0b2f7Stbbdev return isSmall; 251851c0b2f7Stbbdev } 251951c0b2f7Stbbdev 252051c0b2f7Stbbdev /**** Check if an object was allocated by scalable_malloc ****/ 252151c0b2f7Stbbdev static inline bool isRecognized (void* ptr) 252251c0b2f7Stbbdev { 252351c0b2f7Stbbdev return defaultMemPool->extMemPool.backend.ptrCanBeValid(ptr) && 252451c0b2f7Stbbdev (isLargeObject<unknownMem>(ptr) || isSmallObject(ptr)); 252551c0b2f7Stbbdev } 252651c0b2f7Stbbdev 252751c0b2f7Stbbdev static inline void freeSmallObject(void *object) 252851c0b2f7Stbbdev { 252951c0b2f7Stbbdev /* mask low bits to get the block */ 253051c0b2f7Stbbdev Block *block = (Block *)alignDown(object, slabSize); 253151c0b2f7Stbbdev block->checkFreePrecond(object); 253251c0b2f7Stbbdev 253351c0b2f7Stbbdev #if MALLOC_CHECK_RECURSION 253451c0b2f7Stbbdev if (block->isStartupAllocObject()) { 253551c0b2f7Stbbdev ((StartupBlock *)block)->free(object); 253651c0b2f7Stbbdev return; 253751c0b2f7Stbbdev } 253851c0b2f7Stbbdev #endif 253951c0b2f7Stbbdev if (block->isOwnedByCurrentThread()) { 254051c0b2f7Stbbdev block->freeOwnObject(object); 254151c0b2f7Stbbdev } else { /* Slower path to add to the shared list, the allocatedCount is updated by the owner thread in malloc. */ 254251c0b2f7Stbbdev FreeObject *objectToFree = block->findObjectToFree(object); 254351c0b2f7Stbbdev block->freePublicObject(objectToFree); 254451c0b2f7Stbbdev } 254551c0b2f7Stbbdev } 254651c0b2f7Stbbdev 254751c0b2f7Stbbdev static void *internalPoolMalloc(MemoryPool* memPool, size_t size) 254851c0b2f7Stbbdev { 254951c0b2f7Stbbdev Bin* bin; 255051c0b2f7Stbbdev Block * mallocBlock; 255151c0b2f7Stbbdev 255257f524caSIlya Isaev if (!memPool) return nullptr; 255351c0b2f7Stbbdev 255451c0b2f7Stbbdev if (!size) size = sizeof(size_t); 255551c0b2f7Stbbdev 255651c0b2f7Stbbdev TLSData *tls = memPool->getTLS(/*create=*/true); 255751c0b2f7Stbbdev 255851c0b2f7Stbbdev /* Allocate a large object */ 255951c0b2f7Stbbdev if (size >= minLargeObjectSize) 256051c0b2f7Stbbdev return memPool->getFromLLOCache(tls, size, largeObjectAlignment); 256151c0b2f7Stbbdev 256257f524caSIlya Isaev if (!tls) return nullptr; 256351c0b2f7Stbbdev 256451c0b2f7Stbbdev tls->markUsed(); 256551c0b2f7Stbbdev /* 256651c0b2f7Stbbdev * Get an element in thread-local array corresponding to the given size; 256751c0b2f7Stbbdev * It keeps ptr to the active block for allocations of this size 256851c0b2f7Stbbdev */ 256951c0b2f7Stbbdev bin = tls->getAllocationBin(size); 257057f524caSIlya Isaev if ( !bin ) return nullptr; 257151c0b2f7Stbbdev 257251c0b2f7Stbbdev /* Get a block to try to allocate in. */ 257351c0b2f7Stbbdev for( mallocBlock = bin->getActiveBlock(); mallocBlock; 257451c0b2f7Stbbdev mallocBlock = bin->setPreviousBlockActive() ) // the previous block should be empty enough 257551c0b2f7Stbbdev { 257651c0b2f7Stbbdev if( FreeObject *result = mallocBlock->allocate() ) 257751c0b2f7Stbbdev return result; 257851c0b2f7Stbbdev } 257951c0b2f7Stbbdev 258051c0b2f7Stbbdev /* 258151c0b2f7Stbbdev * else privatize publicly freed objects in some block and allocate from it 258251c0b2f7Stbbdev */ 258351c0b2f7Stbbdev mallocBlock = bin->getPrivatizedFreeListBlock(); 258451c0b2f7Stbbdev if (mallocBlock) { 258551c0b2f7Stbbdev MALLOC_ASSERT( mallocBlock->freeListNonNull(), ASSERT_TEXT ); 258651c0b2f7Stbbdev if ( FreeObject *result = mallocBlock->allocateFromFreeList() ) 258751c0b2f7Stbbdev return result; 258851c0b2f7Stbbdev /* Else something strange happened, need to retry from the beginning; */ 258951c0b2f7Stbbdev TRACEF(( "[ScalableMalloc trace] Something is wrong: no objects in public free list; reentering.\n" )); 259051c0b2f7Stbbdev return internalPoolMalloc(memPool, size); 259151c0b2f7Stbbdev } 259251c0b2f7Stbbdev 259351c0b2f7Stbbdev /* 259451c0b2f7Stbbdev * no suitable own blocks, try to get a partial block that some other thread has discarded. 259551c0b2f7Stbbdev */ 259651c0b2f7Stbbdev mallocBlock = memPool->extMemPool.orphanedBlocks.get(tls, size); 259751c0b2f7Stbbdev while (mallocBlock) { 259851c0b2f7Stbbdev bin->pushTLSBin(mallocBlock); 259951c0b2f7Stbbdev bin->setActiveBlock(mallocBlock); // TODO: move under the below condition? 260051c0b2f7Stbbdev if( FreeObject *result = mallocBlock->allocate() ) 260151c0b2f7Stbbdev return result; 260251c0b2f7Stbbdev mallocBlock = memPool->extMemPool.orphanedBlocks.get(tls, size); 260351c0b2f7Stbbdev } 260451c0b2f7Stbbdev 260551c0b2f7Stbbdev /* 260651c0b2f7Stbbdev * else try to get a new empty block 260751c0b2f7Stbbdev */ 260851c0b2f7Stbbdev mallocBlock = memPool->getEmptyBlock(size); 260951c0b2f7Stbbdev if (mallocBlock) { 261051c0b2f7Stbbdev bin->pushTLSBin(mallocBlock); 261151c0b2f7Stbbdev bin->setActiveBlock(mallocBlock); 261251c0b2f7Stbbdev if( FreeObject *result = mallocBlock->allocate() ) 261351c0b2f7Stbbdev return result; 261451c0b2f7Stbbdev /* Else something strange happened, need to retry from the beginning; */ 261551c0b2f7Stbbdev TRACEF(( "[ScalableMalloc trace] Something is wrong: no objects in empty block; reentering.\n" )); 261651c0b2f7Stbbdev return internalPoolMalloc(memPool, size); 261751c0b2f7Stbbdev } 261851c0b2f7Stbbdev /* 261957f524caSIlya Isaev * else nothing works so return nullptr 262051c0b2f7Stbbdev */ 262157f524caSIlya Isaev TRACEF(( "[ScalableMalloc trace] No memory found, returning nullptr.\n" )); 262257f524caSIlya Isaev return nullptr; 262351c0b2f7Stbbdev } 262451c0b2f7Stbbdev 262551c0b2f7Stbbdev // When size==0 (i.e. unknown), detect here whether the object is large. 262651c0b2f7Stbbdev // For size is known and < minLargeObjectSize, we still need to check 262751c0b2f7Stbbdev // if the actual object is large, because large objects might be used 262851c0b2f7Stbbdev // for aligned small allocations. 262951c0b2f7Stbbdev static bool internalPoolFree(MemoryPool *memPool, void *object, size_t size) 263051c0b2f7Stbbdev { 263151c0b2f7Stbbdev if (!memPool || !object) return false; 263251c0b2f7Stbbdev 263351c0b2f7Stbbdev // The library is initialized at allocation call, so releasing while 263451c0b2f7Stbbdev // not initialized means foreign object is releasing. 263551c0b2f7Stbbdev MALLOC_ASSERT(isMallocInitialized(), ASSERT_TEXT); 263651c0b2f7Stbbdev MALLOC_ASSERT(memPool->extMemPool.userPool() || isRecognized(object), 263751c0b2f7Stbbdev "Invalid pointer during object releasing is detected."); 263851c0b2f7Stbbdev 263951c0b2f7Stbbdev if (size >= minLargeObjectSize || isLargeObject<ourMem>(object)) 264051c0b2f7Stbbdev memPool->putToLLOCache(memPool->getTLS(/*create=*/false), object); 264151c0b2f7Stbbdev else 264251c0b2f7Stbbdev freeSmallObject(object); 264351c0b2f7Stbbdev return true; 264451c0b2f7Stbbdev } 264551c0b2f7Stbbdev 264651c0b2f7Stbbdev static void *internalMalloc(size_t size) 264751c0b2f7Stbbdev { 264851c0b2f7Stbbdev if (!size) size = sizeof(size_t); 264951c0b2f7Stbbdev 265051c0b2f7Stbbdev #if MALLOC_CHECK_RECURSION 265151c0b2f7Stbbdev if (RecursiveMallocCallProtector::sameThreadActive()) 265251c0b2f7Stbbdev return size<minLargeObjectSize? StartupBlock::allocate(size) : 265351c0b2f7Stbbdev // nested allocation, so skip tls 265457f524caSIlya Isaev (FreeObject*)defaultMemPool->getFromLLOCache(nullptr, size, slabSize); 265551c0b2f7Stbbdev #endif 265651c0b2f7Stbbdev 265751c0b2f7Stbbdev if (!isMallocInitialized()) 265851c0b2f7Stbbdev if (!doInitialization()) 265957f524caSIlya Isaev return nullptr; 266051c0b2f7Stbbdev return internalPoolMalloc(defaultMemPool, size); 266151c0b2f7Stbbdev } 266251c0b2f7Stbbdev 266351c0b2f7Stbbdev static void internalFree(void *object) 266451c0b2f7Stbbdev { 266551c0b2f7Stbbdev internalPoolFree(defaultMemPool, object, 0); 266651c0b2f7Stbbdev } 266751c0b2f7Stbbdev 266851c0b2f7Stbbdev static size_t internalMsize(void* ptr) 266951c0b2f7Stbbdev { 267051c0b2f7Stbbdev MALLOC_ASSERT(ptr, "Invalid pointer passed to internalMsize"); 267151c0b2f7Stbbdev if (isLargeObject<ourMem>(ptr)) { 267251c0b2f7Stbbdev // TODO: return the maximum memory size, that can be written to this object 267351c0b2f7Stbbdev LargeMemoryBlock* lmb = ((LargeObjectHdr*)ptr - 1)->memoryBlock; 267451c0b2f7Stbbdev return lmb->objectSize; 267551c0b2f7Stbbdev } else { 267651c0b2f7Stbbdev Block *block = (Block*)alignDown(ptr, slabSize); 267751c0b2f7Stbbdev return block->findObjectSize(ptr); 267851c0b2f7Stbbdev } 267951c0b2f7Stbbdev } 268051c0b2f7Stbbdev 268151c0b2f7Stbbdev } // namespace internal 268251c0b2f7Stbbdev 268351c0b2f7Stbbdev using namespace rml::internal; 268451c0b2f7Stbbdev 268551c0b2f7Stbbdev // legacy entry point saved for compatibility with binaries complied 268651c0b2f7Stbbdev // with pre-6003 versions of TBB 26878827ea7dSLong Nguyen TBBMALLOC_EXPORT rml::MemoryPool *pool_create(intptr_t pool_id, const MemPoolPolicy *policy) 268851c0b2f7Stbbdev { 268951c0b2f7Stbbdev rml::MemoryPool *pool; 269051c0b2f7Stbbdev MemPoolPolicy pol(policy->pAlloc, policy->pFree, policy->granularity); 269151c0b2f7Stbbdev 269251c0b2f7Stbbdev pool_create_v1(pool_id, &pol, &pool); 269351c0b2f7Stbbdev return pool; 269451c0b2f7Stbbdev } 269551c0b2f7Stbbdev 269651c0b2f7Stbbdev rml::MemPoolError pool_create_v1(intptr_t pool_id, const MemPoolPolicy *policy, 269751c0b2f7Stbbdev rml::MemoryPool **pool) 269851c0b2f7Stbbdev { 269951c0b2f7Stbbdev if ( !policy->pAlloc || policy->version<MemPoolPolicy::TBBMALLOC_POOL_VERSION 270051c0b2f7Stbbdev // empty pFree allowed only for fixed pools 270151c0b2f7Stbbdev || !(policy->fixedPool || policy->pFree)) { 270257f524caSIlya Isaev *pool = nullptr; 270351c0b2f7Stbbdev return INVALID_POLICY; 270451c0b2f7Stbbdev } 270551c0b2f7Stbbdev if ( policy->version>MemPoolPolicy::TBBMALLOC_POOL_VERSION // future versions are not supported 270651c0b2f7Stbbdev // new flags can be added in place of reserved, but default 270751c0b2f7Stbbdev // behaviour must be supported by this version 270851c0b2f7Stbbdev || policy->reserved ) { 270957f524caSIlya Isaev *pool = nullptr; 271051c0b2f7Stbbdev return UNSUPPORTED_POLICY; 271151c0b2f7Stbbdev } 271251c0b2f7Stbbdev if (!isMallocInitialized()) 271351c0b2f7Stbbdev if (!doInitialization()) { 271457f524caSIlya Isaev *pool = nullptr; 271551c0b2f7Stbbdev return NO_MEMORY; 271651c0b2f7Stbbdev } 271751c0b2f7Stbbdev rml::internal::MemoryPool *memPool = 271851c0b2f7Stbbdev (rml::internal::MemoryPool*)internalMalloc((sizeof(rml::internal::MemoryPool))); 271951c0b2f7Stbbdev if (!memPool) { 272057f524caSIlya Isaev *pool = nullptr; 272151c0b2f7Stbbdev return NO_MEMORY; 272251c0b2f7Stbbdev } 27232110128eSsarathnandu memset(static_cast<void*>(memPool), 0, sizeof(rml::internal::MemoryPool)); 272451c0b2f7Stbbdev if (!memPool->init(pool_id, policy)) { 272551c0b2f7Stbbdev internalFree(memPool); 272657f524caSIlya Isaev *pool = nullptr; 272751c0b2f7Stbbdev return NO_MEMORY; 272851c0b2f7Stbbdev } 272951c0b2f7Stbbdev 273051c0b2f7Stbbdev *pool = (rml::MemoryPool*)memPool; 273151c0b2f7Stbbdev return POOL_OK; 273251c0b2f7Stbbdev } 273351c0b2f7Stbbdev 273451c0b2f7Stbbdev bool pool_destroy(rml::MemoryPool* memPool) 273551c0b2f7Stbbdev { 273651c0b2f7Stbbdev if (!memPool) return false; 273751c0b2f7Stbbdev bool ret = ((rml::internal::MemoryPool*)memPool)->destroy(); 273851c0b2f7Stbbdev internalFree(memPool); 273951c0b2f7Stbbdev 274051c0b2f7Stbbdev return ret; 274151c0b2f7Stbbdev } 274251c0b2f7Stbbdev 274351c0b2f7Stbbdev bool pool_reset(rml::MemoryPool* memPool) 274451c0b2f7Stbbdev { 274551c0b2f7Stbbdev if (!memPool) return false; 274651c0b2f7Stbbdev 274751c0b2f7Stbbdev return ((rml::internal::MemoryPool*)memPool)->reset(); 274851c0b2f7Stbbdev } 274951c0b2f7Stbbdev 275051c0b2f7Stbbdev void *pool_malloc(rml::MemoryPool* mPool, size_t size) 275151c0b2f7Stbbdev { 275251c0b2f7Stbbdev return internalPoolMalloc((rml::internal::MemoryPool*)mPool, size); 275351c0b2f7Stbbdev } 275451c0b2f7Stbbdev 275551c0b2f7Stbbdev void *pool_realloc(rml::MemoryPool* mPool, void *object, size_t size) 275651c0b2f7Stbbdev { 275751c0b2f7Stbbdev if (!object) 275851c0b2f7Stbbdev return internalPoolMalloc((rml::internal::MemoryPool*)mPool, size); 275951c0b2f7Stbbdev if (!size) { 276051c0b2f7Stbbdev internalPoolFree((rml::internal::MemoryPool*)mPool, object, 0); 276157f524caSIlya Isaev return nullptr; 276251c0b2f7Stbbdev } 276351c0b2f7Stbbdev return reallocAligned((rml::internal::MemoryPool*)mPool, object, size, 0); 276451c0b2f7Stbbdev } 276551c0b2f7Stbbdev 276651c0b2f7Stbbdev void *pool_aligned_malloc(rml::MemoryPool* mPool, size_t size, size_t alignment) 276751c0b2f7Stbbdev { 276851c0b2f7Stbbdev if (!isPowerOfTwo(alignment) || 0==size) 276957f524caSIlya Isaev return nullptr; 277051c0b2f7Stbbdev 277151c0b2f7Stbbdev return allocateAligned((rml::internal::MemoryPool*)mPool, size, alignment); 277251c0b2f7Stbbdev } 277351c0b2f7Stbbdev 277451c0b2f7Stbbdev void *pool_aligned_realloc(rml::MemoryPool* memPool, void *ptr, size_t size, size_t alignment) 277551c0b2f7Stbbdev { 277651c0b2f7Stbbdev if (!isPowerOfTwo(alignment)) 277757f524caSIlya Isaev return nullptr; 277851c0b2f7Stbbdev rml::internal::MemoryPool *mPool = (rml::internal::MemoryPool*)memPool; 277951c0b2f7Stbbdev void *tmp; 278051c0b2f7Stbbdev 278151c0b2f7Stbbdev if (!ptr) 278251c0b2f7Stbbdev tmp = allocateAligned(mPool, size, alignment); 278351c0b2f7Stbbdev else if (!size) { 278451c0b2f7Stbbdev internalPoolFree(mPool, ptr, 0); 278557f524caSIlya Isaev return nullptr; 278651c0b2f7Stbbdev } else 278751c0b2f7Stbbdev tmp = reallocAligned(mPool, ptr, size, alignment); 278851c0b2f7Stbbdev 278951c0b2f7Stbbdev return tmp; 279051c0b2f7Stbbdev } 279151c0b2f7Stbbdev 279251c0b2f7Stbbdev bool pool_free(rml::MemoryPool *mPool, void *object) 279351c0b2f7Stbbdev { 279451c0b2f7Stbbdev return internalPoolFree((rml::internal::MemoryPool*)mPool, object, 0); 279551c0b2f7Stbbdev } 279651c0b2f7Stbbdev 279751c0b2f7Stbbdev rml::MemoryPool *pool_identify(void *object) 279851c0b2f7Stbbdev { 279951c0b2f7Stbbdev rml::internal::MemoryPool *pool; 280051c0b2f7Stbbdev if (isLargeObject<ourMem>(object)) { 280151c0b2f7Stbbdev LargeObjectHdr *header = (LargeObjectHdr*)object - 1; 280251c0b2f7Stbbdev pool = header->memoryBlock->pool; 280351c0b2f7Stbbdev } else { 280451c0b2f7Stbbdev Block *block = (Block*)alignDown(object, slabSize); 280551c0b2f7Stbbdev pool = block->getMemPool(); 280651c0b2f7Stbbdev } 280751c0b2f7Stbbdev // do not return defaultMemPool, as it can't be used in pool_free() etc 280851c0b2f7Stbbdev __TBB_ASSERT_RELEASE(pool!=defaultMemPool, 280951c0b2f7Stbbdev "rml::pool_identify() can't be used for scalable_malloc() etc results."); 281051c0b2f7Stbbdev return (rml::MemoryPool*)pool; 281151c0b2f7Stbbdev } 281251c0b2f7Stbbdev 281351c0b2f7Stbbdev size_t pool_msize(rml::MemoryPool *mPool, void* object) 281451c0b2f7Stbbdev { 281551c0b2f7Stbbdev if (object) { 281651c0b2f7Stbbdev // No assert for object recognition, cause objects allocated from non-default 281751c0b2f7Stbbdev // memory pool do not participate in range checking and do not have valid backreferences for 281851c0b2f7Stbbdev // small objects. Instead, check that an object belong to the certain memory pool. 281951c0b2f7Stbbdev MALLOC_ASSERT_EX(mPool == pool_identify(object), "Object does not belong to the specified pool"); 282051c0b2f7Stbbdev return internalMsize(object); 282151c0b2f7Stbbdev } 282251c0b2f7Stbbdev errno = EINVAL; 282351c0b2f7Stbbdev // Unlike _msize, return 0 in case of parameter error. 282451c0b2f7Stbbdev // Returning size_t(-1) looks more like the way to troubles. 282551c0b2f7Stbbdev return 0; 282651c0b2f7Stbbdev } 282751c0b2f7Stbbdev 282851c0b2f7Stbbdev } // namespace rml 282951c0b2f7Stbbdev 283051c0b2f7Stbbdev using namespace rml::internal; 283151c0b2f7Stbbdev 283251c0b2f7Stbbdev #if MALLOC_TRACE 283351c0b2f7Stbbdev static unsigned int threadGoingDownCount = 0; 283451c0b2f7Stbbdev #endif 283551c0b2f7Stbbdev 283651c0b2f7Stbbdev /* 283751c0b2f7Stbbdev * When a thread is shutting down this routine should be called to remove all the thread ids 283857f524caSIlya Isaev * from the malloc blocks and replace them with a nullptr thread id. 283951c0b2f7Stbbdev * 284051c0b2f7Stbbdev * For pthreads, the function is set as a callback in pthread_key_create for TLS bin. 284151c0b2f7Stbbdev * It will be automatically called at thread exit with the key value as the argument, 284257f524caSIlya Isaev * unless that value is nullptr. 284351c0b2f7Stbbdev * For Windows, it is called from DllMain( DLL_THREAD_DETACH ). 284451c0b2f7Stbbdev * 284551c0b2f7Stbbdev * However neither of the above is called for the main process thread, so the routine 284651c0b2f7Stbbdev * also needs to be called during the process shutdown. 284751c0b2f7Stbbdev * 284851c0b2f7Stbbdev */ 284951c0b2f7Stbbdev // TODO: Consider making this function part of class MemoryPool. 285051c0b2f7Stbbdev void doThreadShutdownNotification(TLSData* tls, bool main_thread) 285151c0b2f7Stbbdev { 285251c0b2f7Stbbdev TRACEF(( "[ScalableMalloc trace] Thread id %d blocks return start %d\n", 285351c0b2f7Stbbdev getThreadId(), threadGoingDownCount++ )); 285451c0b2f7Stbbdev 285551c0b2f7Stbbdev #if USE_PTHREAD 285651c0b2f7Stbbdev if (tls) { 285751c0b2f7Stbbdev if (!shutdownSync.threadDtorStart()) return; 285851c0b2f7Stbbdev tls->getMemPool()->onThreadShutdown(tls); 285951c0b2f7Stbbdev shutdownSync.threadDtorDone(); 286051c0b2f7Stbbdev } else 286151c0b2f7Stbbdev #endif 286251c0b2f7Stbbdev { 286351c0b2f7Stbbdev suppress_unused_warning(tls); // not used on Windows 286451c0b2f7Stbbdev // The default pool is safe to use at this point: 286551c0b2f7Stbbdev // on Linux, only the main thread can go here before destroying defaultMemPool; 286651c0b2f7Stbbdev // on Windows, shutdown is synchronized via loader lock and isMallocInitialized(). 286751c0b2f7Stbbdev // See also __TBB_mallocProcessShutdownNotification() 286851c0b2f7Stbbdev defaultMemPool->onThreadShutdown(defaultMemPool->getTLS(/*create=*/false)); 286951c0b2f7Stbbdev // Take lock to walk through other pools; but waiting might be dangerous at this point 287051c0b2f7Stbbdev // (e.g. on Windows the main thread might deadlock) 287151c0b2f7Stbbdev bool locked; 287251c0b2f7Stbbdev MallocMutex::scoped_lock lock(MemoryPool::memPoolListLock, /*wait=*/!main_thread, &locked); 287351c0b2f7Stbbdev if (locked) { // the list is safe to process 287451c0b2f7Stbbdev for (MemoryPool *memPool = defaultMemPool->next; memPool; memPool = memPool->next) 287551c0b2f7Stbbdev memPool->onThreadShutdown(memPool->getTLS(/*create=*/false)); 287651c0b2f7Stbbdev } 287751c0b2f7Stbbdev } 287851c0b2f7Stbbdev 287951c0b2f7Stbbdev TRACEF(( "[ScalableMalloc trace] Thread id %d blocks return end\n", getThreadId() )); 288051c0b2f7Stbbdev } 288151c0b2f7Stbbdev 288251c0b2f7Stbbdev #if USE_PTHREAD 288351c0b2f7Stbbdev void mallocThreadShutdownNotification(void* arg) 288451c0b2f7Stbbdev { 288551c0b2f7Stbbdev // The routine is called for each pool (as TLS dtor) on each thread, except for the main thread 288651c0b2f7Stbbdev if (!isMallocInitialized()) return; 288751c0b2f7Stbbdev doThreadShutdownNotification((TLSData*)arg, false); 288851c0b2f7Stbbdev } 288951c0b2f7Stbbdev #else 289051c0b2f7Stbbdev extern "C" void __TBB_mallocThreadShutdownNotification() 289151c0b2f7Stbbdev { 289251c0b2f7Stbbdev // The routine is called once per thread on Windows 289351c0b2f7Stbbdev if (!isMallocInitialized()) return; 289457f524caSIlya Isaev doThreadShutdownNotification(nullptr, false); 289551c0b2f7Stbbdev } 289651c0b2f7Stbbdev #endif 289751c0b2f7Stbbdev 289851c0b2f7Stbbdev extern "C" void __TBB_mallocProcessShutdownNotification(bool windows_process_dying) 289951c0b2f7Stbbdev { 290051c0b2f7Stbbdev if (!isMallocInitialized()) return; 290151c0b2f7Stbbdev 290251c0b2f7Stbbdev // Don't clean allocator internals if the entire process is exiting 290351c0b2f7Stbbdev if (!windows_process_dying) { 290457f524caSIlya Isaev doThreadShutdownNotification(nullptr, /*main_thread=*/true); 290551c0b2f7Stbbdev } 290651c0b2f7Stbbdev #if __TBB_MALLOC_LOCACHE_STAT 290751c0b2f7Stbbdev printf("cache hit ratio %f, size hit %f\n", 290851c0b2f7Stbbdev 1.*cacheHits/mallocCalls, 1.*memHitKB/memAllocKB); 290951c0b2f7Stbbdev defaultMemPool->extMemPool.loc.reportStat(stdout); 291051c0b2f7Stbbdev #endif 291151c0b2f7Stbbdev 291251c0b2f7Stbbdev shutdownSync.processExit(); 291351c0b2f7Stbbdev #if __TBB_SOURCE_DIRECTLY_INCLUDED 291451c0b2f7Stbbdev /* Pthread keys must be deleted as soon as possible to not call key dtor 291551c0b2f7Stbbdev on thread termination when then the tbbmalloc code can be already unloaded. 291651c0b2f7Stbbdev */ 291751c0b2f7Stbbdev defaultMemPool->destroy(); 29181ecde27fSIlya Mishin destroyBackRefMain(&defaultMemPool->extMemPool.backend); 291951c0b2f7Stbbdev ThreadId::destroy(); // Delete key for thread id 292051c0b2f7Stbbdev hugePages.reset(); 292151c0b2f7Stbbdev // new total malloc initialization is possible after this point 292251c0b2f7Stbbdev mallocInitialized.store(0, std::memory_order_release); 292351c0b2f7Stbbdev #endif // __TBB_SOURCE_DIRECTLY_INCLUDED 292451c0b2f7Stbbdev 292551c0b2f7Stbbdev #if COLLECT_STATISTICS 292651c0b2f7Stbbdev unsigned nThreads = ThreadId::getMaxThreadId(); 292751c0b2f7Stbbdev for( int i=1; i<=nThreads && i<MAX_THREADS; ++i ) 292851c0b2f7Stbbdev STAT_print(i); 292951c0b2f7Stbbdev #endif 29300e6d4699SAnuya Welling if (!usedBySrcIncluded) { 293151c0b2f7Stbbdev MALLOC_ITT_FINI_ITTLIB(); 29320e6d4699SAnuya Welling MALLOC_ITT_RELEASE_RESOURCES(); 29330e6d4699SAnuya Welling } 293451c0b2f7Stbbdev } 293551c0b2f7Stbbdev 293651c0b2f7Stbbdev extern "C" void * scalable_malloc(size_t size) 293751c0b2f7Stbbdev { 293851c0b2f7Stbbdev void *ptr = internalMalloc(size); 293951c0b2f7Stbbdev if (!ptr) errno = ENOMEM; 294051c0b2f7Stbbdev return ptr; 294151c0b2f7Stbbdev } 294251c0b2f7Stbbdev 294351c0b2f7Stbbdev extern "C" void scalable_free(void *object) 294451c0b2f7Stbbdev { 294551c0b2f7Stbbdev internalFree(object); 294651c0b2f7Stbbdev } 294751c0b2f7Stbbdev 294851c0b2f7Stbbdev #if MALLOC_ZONE_OVERLOAD_ENABLED 294951c0b2f7Stbbdev extern "C" void __TBB_malloc_free_definite_size(void *object, size_t size) 295051c0b2f7Stbbdev { 295151c0b2f7Stbbdev internalPoolFree(defaultMemPool, object, size); 295251c0b2f7Stbbdev } 295351c0b2f7Stbbdev #endif 295451c0b2f7Stbbdev 295551c0b2f7Stbbdev /* 295651c0b2f7Stbbdev * A variant that provides additional memory safety, by checking whether the given address 295751c0b2f7Stbbdev * was obtained with this allocator, and if not redirecting to the provided alternative call. 295851c0b2f7Stbbdev */ 29598827ea7dSLong Nguyen extern "C" TBBMALLOC_EXPORT void __TBB_malloc_safer_free(void *object, void (*original_free)(void*)) 296051c0b2f7Stbbdev { 296151c0b2f7Stbbdev if (!object) 296251c0b2f7Stbbdev return; 296351c0b2f7Stbbdev 296451c0b2f7Stbbdev // tbbmalloc can allocate object only when tbbmalloc has been initialized 296551c0b2f7Stbbdev if (mallocInitialized.load(std::memory_order_acquire) && defaultMemPool->extMemPool.backend.ptrCanBeValid(object)) { 296651c0b2f7Stbbdev if (isLargeObject<unknownMem>(object)) { 296751c0b2f7Stbbdev // must check 1st for large object, because small object check touches 4 pages on left, 296851c0b2f7Stbbdev // and it can be inaccessible 296951c0b2f7Stbbdev TLSData *tls = defaultMemPool->getTLS(/*create=*/false); 297051c0b2f7Stbbdev 297151c0b2f7Stbbdev defaultMemPool->putToLLOCache(tls, object); 297251c0b2f7Stbbdev return; 297351c0b2f7Stbbdev } else if (isSmallObject(object)) { 297451c0b2f7Stbbdev freeSmallObject(object); 297551c0b2f7Stbbdev return; 297651c0b2f7Stbbdev } 297751c0b2f7Stbbdev } 297851c0b2f7Stbbdev if (original_free) 297951c0b2f7Stbbdev original_free(object); 298051c0b2f7Stbbdev } 298151c0b2f7Stbbdev 298251c0b2f7Stbbdev /********* End the free code *************/ 298351c0b2f7Stbbdev 298451c0b2f7Stbbdev /********* Code for scalable_realloc ***********/ 298551c0b2f7Stbbdev 298651c0b2f7Stbbdev /* 298751c0b2f7Stbbdev * From K&R 298851c0b2f7Stbbdev * "realloc changes the size of the object pointed to by p to size. The contents will 298951c0b2f7Stbbdev * be unchanged up to the minimum of the old and the new sizes. If the new size is larger, 299051c0b2f7Stbbdev * the new space is uninitialized. realloc returns a pointer to the new space, or 299157f524caSIlya Isaev * nullptr if the request cannot be satisfied, in which case *p is unchanged." 299251c0b2f7Stbbdev * 299351c0b2f7Stbbdev */ 299451c0b2f7Stbbdev extern "C" void* scalable_realloc(void* ptr, size_t size) 299551c0b2f7Stbbdev { 299651c0b2f7Stbbdev void *tmp; 299751c0b2f7Stbbdev 299851c0b2f7Stbbdev if (!ptr) 299951c0b2f7Stbbdev tmp = internalMalloc(size); 300051c0b2f7Stbbdev else if (!size) { 300151c0b2f7Stbbdev internalFree(ptr); 300257f524caSIlya Isaev return nullptr; 300351c0b2f7Stbbdev } else 300451c0b2f7Stbbdev tmp = reallocAligned(defaultMemPool, ptr, size, 0); 300551c0b2f7Stbbdev 300651c0b2f7Stbbdev if (!tmp) errno = ENOMEM; 300751c0b2f7Stbbdev return tmp; 300851c0b2f7Stbbdev } 300951c0b2f7Stbbdev 301051c0b2f7Stbbdev /* 301151c0b2f7Stbbdev * A variant that provides additional memory safety, by checking whether the given address 301251c0b2f7Stbbdev * was obtained with this allocator, and if not redirecting to the provided alternative call. 301351c0b2f7Stbbdev */ 30148827ea7dSLong Nguyen extern "C" TBBMALLOC_EXPORT void* __TBB_malloc_safer_realloc(void* ptr, size_t sz, void* original_realloc) 301551c0b2f7Stbbdev { 301651c0b2f7Stbbdev void *tmp; // TODO: fix warnings about uninitialized use of tmp 301751c0b2f7Stbbdev 301851c0b2f7Stbbdev if (!ptr) { 301951c0b2f7Stbbdev tmp = internalMalloc(sz); 302051c0b2f7Stbbdev } else if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(ptr)) { 302151c0b2f7Stbbdev if (!sz) { 302251c0b2f7Stbbdev internalFree(ptr); 302357f524caSIlya Isaev return nullptr; 302451c0b2f7Stbbdev } else { 302551c0b2f7Stbbdev tmp = reallocAligned(defaultMemPool, ptr, sz, 0); 302651c0b2f7Stbbdev } 302751c0b2f7Stbbdev } 302851c0b2f7Stbbdev #if USE_WINTHREAD 302951c0b2f7Stbbdev else if (original_realloc && sz) { 303051c0b2f7Stbbdev orig_ptrs *original_ptrs = static_cast<orig_ptrs*>(original_realloc); 303151c0b2f7Stbbdev if ( original_ptrs->msize ){ 303251c0b2f7Stbbdev size_t oldSize = original_ptrs->msize(ptr); 303351c0b2f7Stbbdev tmp = internalMalloc(sz); 303451c0b2f7Stbbdev if (tmp) { 303551c0b2f7Stbbdev memcpy(tmp, ptr, sz<oldSize? sz : oldSize); 303651c0b2f7Stbbdev if ( original_ptrs->free ){ 303751c0b2f7Stbbdev original_ptrs->free( ptr ); 303851c0b2f7Stbbdev } 303951c0b2f7Stbbdev } 304051c0b2f7Stbbdev } else 304157f524caSIlya Isaev tmp = nullptr; 304251c0b2f7Stbbdev } 304351c0b2f7Stbbdev #else 304451c0b2f7Stbbdev else if (original_realloc) { 304551c0b2f7Stbbdev typedef void* (*realloc_ptr_t)(void*,size_t); 304651c0b2f7Stbbdev realloc_ptr_t original_realloc_ptr; 304751c0b2f7Stbbdev (void *&)original_realloc_ptr = original_realloc; 304851c0b2f7Stbbdev tmp = original_realloc_ptr(ptr,sz); 304951c0b2f7Stbbdev } 305051c0b2f7Stbbdev #endif 305157f524caSIlya Isaev else tmp = nullptr; 305251c0b2f7Stbbdev 305351c0b2f7Stbbdev if (!tmp) errno = ENOMEM; 305451c0b2f7Stbbdev return tmp; 305551c0b2f7Stbbdev } 305651c0b2f7Stbbdev 305751c0b2f7Stbbdev /********* End code for scalable_realloc ***********/ 305851c0b2f7Stbbdev 305951c0b2f7Stbbdev /********* Code for scalable_calloc ***********/ 306051c0b2f7Stbbdev 306151c0b2f7Stbbdev /* 306251c0b2f7Stbbdev * From K&R 306351c0b2f7Stbbdev * calloc returns a pointer to space for an array of nobj objects, 306457f524caSIlya Isaev * each of size size, or nullptr if the request cannot be satisfied. 306551c0b2f7Stbbdev * The space is initialized to zero bytes. 306651c0b2f7Stbbdev * 306751c0b2f7Stbbdev */ 306851c0b2f7Stbbdev 306951c0b2f7Stbbdev extern "C" void * scalable_calloc(size_t nobj, size_t size) 307051c0b2f7Stbbdev { 307151c0b2f7Stbbdev // it's square root of maximal size_t value 307251c0b2f7Stbbdev const size_t mult_not_overflow = size_t(1) << (sizeof(size_t)*CHAR_BIT/2); 307351c0b2f7Stbbdev const size_t arraySize = nobj * size; 307451c0b2f7Stbbdev 307551c0b2f7Stbbdev // check for overflow during multiplication: 307651c0b2f7Stbbdev if (nobj>=mult_not_overflow || size>=mult_not_overflow) // 1) heuristic check 307751c0b2f7Stbbdev if (nobj && arraySize / nobj != size) { // 2) exact check 307851c0b2f7Stbbdev errno = ENOMEM; 307957f524caSIlya Isaev return nullptr; 308051c0b2f7Stbbdev } 308151c0b2f7Stbbdev void* result = internalMalloc(arraySize); 308251c0b2f7Stbbdev if (result) 308351c0b2f7Stbbdev memset(result, 0, arraySize); 308451c0b2f7Stbbdev else 308551c0b2f7Stbbdev errno = ENOMEM; 308651c0b2f7Stbbdev return result; 308751c0b2f7Stbbdev } 308851c0b2f7Stbbdev 308951c0b2f7Stbbdev /********* End code for scalable_calloc ***********/ 309051c0b2f7Stbbdev 309151c0b2f7Stbbdev /********* Code for aligned allocation API **********/ 309251c0b2f7Stbbdev 309351c0b2f7Stbbdev extern "C" int scalable_posix_memalign(void **memptr, size_t alignment, size_t size) 309451c0b2f7Stbbdev { 309551c0b2f7Stbbdev if ( !isPowerOfTwoAtLeast(alignment, sizeof(void*)) ) 309651c0b2f7Stbbdev return EINVAL; 309751c0b2f7Stbbdev void *result = allocateAligned(defaultMemPool, size, alignment); 309851c0b2f7Stbbdev if (!result) 309951c0b2f7Stbbdev return ENOMEM; 310051c0b2f7Stbbdev *memptr = result; 310151c0b2f7Stbbdev return 0; 310251c0b2f7Stbbdev } 310351c0b2f7Stbbdev 310451c0b2f7Stbbdev extern "C" void * scalable_aligned_malloc(size_t size, size_t alignment) 310551c0b2f7Stbbdev { 310651c0b2f7Stbbdev if (!isPowerOfTwo(alignment) || 0==size) { 310751c0b2f7Stbbdev errno = EINVAL; 310857f524caSIlya Isaev return nullptr; 310951c0b2f7Stbbdev } 311051c0b2f7Stbbdev void *tmp = allocateAligned(defaultMemPool, size, alignment); 311151c0b2f7Stbbdev if (!tmp) errno = ENOMEM; 311251c0b2f7Stbbdev return tmp; 311351c0b2f7Stbbdev } 311451c0b2f7Stbbdev 311551c0b2f7Stbbdev extern "C" void * scalable_aligned_realloc(void *ptr, size_t size, size_t alignment) 311651c0b2f7Stbbdev { 311751c0b2f7Stbbdev if (!isPowerOfTwo(alignment)) { 311851c0b2f7Stbbdev errno = EINVAL; 311957f524caSIlya Isaev return nullptr; 312051c0b2f7Stbbdev } 312151c0b2f7Stbbdev void *tmp; 312251c0b2f7Stbbdev 312351c0b2f7Stbbdev if (!ptr) 312451c0b2f7Stbbdev tmp = allocateAligned(defaultMemPool, size, alignment); 312551c0b2f7Stbbdev else if (!size) { 312651c0b2f7Stbbdev scalable_free(ptr); 312757f524caSIlya Isaev return nullptr; 312851c0b2f7Stbbdev } else 312951c0b2f7Stbbdev tmp = reallocAligned(defaultMemPool, ptr, size, alignment); 313051c0b2f7Stbbdev 313151c0b2f7Stbbdev if (!tmp) errno = ENOMEM; 313251c0b2f7Stbbdev return tmp; 313351c0b2f7Stbbdev } 313451c0b2f7Stbbdev 31358827ea7dSLong Nguyen extern "C" TBBMALLOC_EXPORT void * __TBB_malloc_safer_aligned_realloc(void *ptr, size_t size, size_t alignment, void* orig_function) 313651c0b2f7Stbbdev { 313751c0b2f7Stbbdev /* corner cases left out of reallocAligned to not deal with errno there */ 313851c0b2f7Stbbdev if (!isPowerOfTwo(alignment)) { 313951c0b2f7Stbbdev errno = EINVAL; 314057f524caSIlya Isaev return nullptr; 314151c0b2f7Stbbdev } 314257f524caSIlya Isaev void *tmp = nullptr; 314351c0b2f7Stbbdev 314451c0b2f7Stbbdev if (!ptr) { 314551c0b2f7Stbbdev tmp = allocateAligned(defaultMemPool, size, alignment); 314651c0b2f7Stbbdev } else if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(ptr)) { 314751c0b2f7Stbbdev if (!size) { 314851c0b2f7Stbbdev internalFree(ptr); 314957f524caSIlya Isaev return nullptr; 315051c0b2f7Stbbdev } else { 315151c0b2f7Stbbdev tmp = reallocAligned(defaultMemPool, ptr, size, alignment); 315251c0b2f7Stbbdev } 315351c0b2f7Stbbdev } 315451c0b2f7Stbbdev #if USE_WINTHREAD 315551c0b2f7Stbbdev else { 315651c0b2f7Stbbdev orig_aligned_ptrs *original_ptrs = static_cast<orig_aligned_ptrs*>(orig_function); 315751c0b2f7Stbbdev if (size) { 315851c0b2f7Stbbdev // Without orig_msize, we can't do anything with this. 315951c0b2f7Stbbdev // Just keeping old pointer. 316051c0b2f7Stbbdev if ( original_ptrs->aligned_msize ){ 316151c0b2f7Stbbdev // set alignment and offset to have possibly correct oldSize 316251c0b2f7Stbbdev size_t oldSize = original_ptrs->aligned_msize(ptr, sizeof(void*), 0); 316351c0b2f7Stbbdev tmp = allocateAligned(defaultMemPool, size, alignment); 316451c0b2f7Stbbdev if (tmp) { 316551c0b2f7Stbbdev memcpy(tmp, ptr, size<oldSize? size : oldSize); 316651c0b2f7Stbbdev if ( original_ptrs->aligned_free ){ 316751c0b2f7Stbbdev original_ptrs->aligned_free( ptr ); 316851c0b2f7Stbbdev } 316951c0b2f7Stbbdev } 317051c0b2f7Stbbdev } 317151c0b2f7Stbbdev } else { 317251c0b2f7Stbbdev if ( original_ptrs->aligned_free ){ 317351c0b2f7Stbbdev original_ptrs->aligned_free( ptr ); 317451c0b2f7Stbbdev } 317557f524caSIlya Isaev return nullptr; 317651c0b2f7Stbbdev } 317751c0b2f7Stbbdev } 317851c0b2f7Stbbdev #else 317951c0b2f7Stbbdev // As original_realloc can't align result, and there is no way to find 318051c0b2f7Stbbdev // size of reallocating object, we are giving up. 318151c0b2f7Stbbdev suppress_unused_warning(orig_function); 318251c0b2f7Stbbdev #endif 318351c0b2f7Stbbdev if (!tmp) errno = ENOMEM; 318451c0b2f7Stbbdev return tmp; 318551c0b2f7Stbbdev } 318651c0b2f7Stbbdev 318751c0b2f7Stbbdev extern "C" void scalable_aligned_free(void *ptr) 318851c0b2f7Stbbdev { 318951c0b2f7Stbbdev internalFree(ptr); 319051c0b2f7Stbbdev } 319151c0b2f7Stbbdev 319251c0b2f7Stbbdev /********* end code for aligned allocation API **********/ 319351c0b2f7Stbbdev 319451c0b2f7Stbbdev /********* Code for scalable_msize ***********/ 319551c0b2f7Stbbdev 319651c0b2f7Stbbdev /* 319751c0b2f7Stbbdev * Returns the size of a memory block allocated in the heap. 319851c0b2f7Stbbdev */ 319951c0b2f7Stbbdev extern "C" size_t scalable_msize(void* ptr) 320051c0b2f7Stbbdev { 320151c0b2f7Stbbdev if (ptr) { 320251c0b2f7Stbbdev MALLOC_ASSERT(isRecognized(ptr), "Invalid pointer in scalable_msize detected."); 320351c0b2f7Stbbdev return internalMsize(ptr); 320451c0b2f7Stbbdev } 320551c0b2f7Stbbdev errno = EINVAL; 320651c0b2f7Stbbdev // Unlike _msize, return 0 in case of parameter error. 320751c0b2f7Stbbdev // Returning size_t(-1) looks more like the way to troubles. 320851c0b2f7Stbbdev return 0; 320951c0b2f7Stbbdev } 321051c0b2f7Stbbdev 321151c0b2f7Stbbdev /* 321251c0b2f7Stbbdev * A variant that provides additional memory safety, by checking whether the given address 321351c0b2f7Stbbdev * was obtained with this allocator, and if not redirecting to the provided alternative call. 321451c0b2f7Stbbdev */ 32158827ea7dSLong Nguyen extern "C" TBBMALLOC_EXPORT size_t __TBB_malloc_safer_msize(void *object, size_t (*original_msize)(void*)) 321651c0b2f7Stbbdev { 321751c0b2f7Stbbdev if (object) { 321851c0b2f7Stbbdev // Check if the memory was allocated by scalable_malloc 321951c0b2f7Stbbdev if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(object)) 322051c0b2f7Stbbdev return internalMsize(object); 322151c0b2f7Stbbdev else if (original_msize) 322251c0b2f7Stbbdev return original_msize(object); 322351c0b2f7Stbbdev } 322457f524caSIlya Isaev // object is nullptr or unknown, or foreign and no original_msize 322551c0b2f7Stbbdev #if USE_WINTHREAD 322651c0b2f7Stbbdev errno = EINVAL; // errno expected to be set only on this platform 322751c0b2f7Stbbdev #endif 322851c0b2f7Stbbdev return 0; 322951c0b2f7Stbbdev } 323051c0b2f7Stbbdev 323151c0b2f7Stbbdev /* 323251c0b2f7Stbbdev * The same as above but for _aligned_msize case 323351c0b2f7Stbbdev */ 32348827ea7dSLong Nguyen extern "C" TBBMALLOC_EXPORT size_t __TBB_malloc_safer_aligned_msize(void *object, size_t alignment, size_t offset, size_t (*orig_aligned_msize)(void*,size_t,size_t)) 323551c0b2f7Stbbdev { 323651c0b2f7Stbbdev if (object) { 323751c0b2f7Stbbdev // Check if the memory was allocated by scalable_malloc 323851c0b2f7Stbbdev if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(object)) 323951c0b2f7Stbbdev return internalMsize(object); 324051c0b2f7Stbbdev else if (orig_aligned_msize) 324151c0b2f7Stbbdev return orig_aligned_msize(object,alignment,offset); 324251c0b2f7Stbbdev } 324357f524caSIlya Isaev // object is nullptr or unknown 324451c0b2f7Stbbdev errno = EINVAL; 324551c0b2f7Stbbdev return 0; 324651c0b2f7Stbbdev } 324751c0b2f7Stbbdev 324851c0b2f7Stbbdev /********* End code for scalable_msize ***********/ 324951c0b2f7Stbbdev 325051c0b2f7Stbbdev extern "C" int scalable_allocation_mode(int param, intptr_t value) 325151c0b2f7Stbbdev { 325251c0b2f7Stbbdev if (param == TBBMALLOC_SET_SOFT_HEAP_LIMIT) { 325351c0b2f7Stbbdev defaultMemPool->extMemPool.backend.setRecommendedMaxSize((size_t)value); 325451c0b2f7Stbbdev return TBBMALLOC_OK; 325551c0b2f7Stbbdev } else if (param == USE_HUGE_PAGES) { 3256734f0bc0SPablo Romero #if __unix__ 325751c0b2f7Stbbdev switch (value) { 325851c0b2f7Stbbdev case 0: 325951c0b2f7Stbbdev case 1: 326051c0b2f7Stbbdev hugePages.setMode(value); 326151c0b2f7Stbbdev return TBBMALLOC_OK; 326251c0b2f7Stbbdev default: 326351c0b2f7Stbbdev return TBBMALLOC_INVALID_PARAM; 326451c0b2f7Stbbdev } 326551c0b2f7Stbbdev #else 326651c0b2f7Stbbdev return TBBMALLOC_NO_EFFECT; 326751c0b2f7Stbbdev #endif 326851c0b2f7Stbbdev #if __TBB_SOURCE_DIRECTLY_INCLUDED 326951c0b2f7Stbbdev } else if (param == TBBMALLOC_INTERNAL_SOURCE_INCLUDED) { 327051c0b2f7Stbbdev switch (value) { 327151c0b2f7Stbbdev case 0: // used by dynamic library 327251c0b2f7Stbbdev case 1: // used by static library or directly included sources 327351c0b2f7Stbbdev usedBySrcIncluded = value; 327451c0b2f7Stbbdev return TBBMALLOC_OK; 327551c0b2f7Stbbdev default: 327651c0b2f7Stbbdev return TBBMALLOC_INVALID_PARAM; 327751c0b2f7Stbbdev } 327851c0b2f7Stbbdev #endif 327951c0b2f7Stbbdev } else if (param == TBBMALLOC_SET_HUGE_SIZE_THRESHOLD) { 328051c0b2f7Stbbdev defaultMemPool->extMemPool.loc.setHugeSizeThreshold((size_t)value); 328151c0b2f7Stbbdev return TBBMALLOC_OK; 328251c0b2f7Stbbdev } 328351c0b2f7Stbbdev return TBBMALLOC_INVALID_PARAM; 328451c0b2f7Stbbdev } 328551c0b2f7Stbbdev 328651c0b2f7Stbbdev extern "C" int scalable_allocation_command(int cmd, void *param) 328751c0b2f7Stbbdev { 328851c0b2f7Stbbdev if (param) 328951c0b2f7Stbbdev return TBBMALLOC_INVALID_PARAM; 329051c0b2f7Stbbdev 329151c0b2f7Stbbdev bool released = false; 329251c0b2f7Stbbdev switch(cmd) { 329351c0b2f7Stbbdev case TBBMALLOC_CLEAN_THREAD_BUFFERS: 329451c0b2f7Stbbdev if (TLSData *tls = defaultMemPool->getTLS(/*create=*/false)) 329551c0b2f7Stbbdev released = tls->externalCleanup(/*cleanOnlyUnused*/false, /*cleanBins=*/true); 329651c0b2f7Stbbdev break; 329751c0b2f7Stbbdev case TBBMALLOC_CLEAN_ALL_BUFFERS: 329851c0b2f7Stbbdev released = defaultMemPool->extMemPool.hardCachesCleanup(); 329951c0b2f7Stbbdev break; 330051c0b2f7Stbbdev default: 330151c0b2f7Stbbdev return TBBMALLOC_INVALID_PARAM; 330251c0b2f7Stbbdev } 330351c0b2f7Stbbdev return released ? TBBMALLOC_OK : TBBMALLOC_NO_EFFECT; 330451c0b2f7Stbbdev } 3305