1 /* 2 Copyright (c) 2005-2023 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #include "tbbmalloc_internal.h" 18 #include <errno.h> 19 #include <new> /* for placement new */ 20 #include <string.h> /* for memset */ 21 22 #include "oneapi/tbb/version.h" 23 #include "../tbb/environment.h" 24 #include "../tbb/itt_notify.h" // for __TBB_load_ittnotify() 25 26 #if USE_PTHREAD 27 #define TlsSetValue_func pthread_setspecific 28 #define TlsGetValue_func pthread_getspecific 29 #define GetMyTID() pthread_self() 30 #include <sched.h> 31 extern "C" { static void mallocThreadShutdownNotification(void*); } 32 #if __sun || __SUNPRO_CC 33 #define __asm__ asm 34 #endif 35 #include <unistd.h> // sysconf(_SC_PAGESIZE) 36 #elif USE_WINTHREAD 37 #define GetMyTID() GetCurrentThreadId() 38 #if __TBB_WIN8UI_SUPPORT 39 #include<thread> 40 #define TlsSetValue_func FlsSetValue 41 #define TlsGetValue_func FlsGetValue 42 #define TlsAlloc() FlsAlloc(nullptr) 43 #define TLS_ALLOC_FAILURE FLS_OUT_OF_INDEXES 44 #define TlsFree FlsFree 45 #else 46 #define TlsSetValue_func TlsSetValue 47 #define TlsGetValue_func TlsGetValue 48 #define TLS_ALLOC_FAILURE TLS_OUT_OF_INDEXES 49 #endif 50 #else 51 #error Must define USE_PTHREAD or USE_WINTHREAD 52 #endif 53 54 #define FREELIST_NONBLOCKING 1 55 56 namespace rml { 57 class MemoryPool; 58 namespace internal { 59 60 class Block; 61 class MemoryPool; 62 63 #if MALLOC_CHECK_RECURSION 64 65 inline bool isMallocInitialized(); 66 67 #endif // MALLOC_CHECK_RECURSION 68 69 /** Support for handling the special UNUSABLE pointer state **/ 70 const intptr_t UNUSABLE = 0x1; 71 inline bool isSolidPtr( void* ptr ) { 72 return (UNUSABLE|(intptr_t)ptr)!=UNUSABLE; 73 } 74 inline bool isNotForUse( void* ptr ) { 75 return (intptr_t)ptr==UNUSABLE; 76 } 77 78 /* 79 * Block::objectSize value used to mark blocks allocated by startupAlloc 80 */ 81 const uint16_t startupAllocObjSizeMark = ~(uint16_t)0; 82 83 /* 84 * The following constant is used to define the size of struct Block, the block header. 85 * The intent is to have the size of a Block multiple of the cache line size, this allows us to 86 * get good alignment at the cost of some overhead equal to the amount of padding included in the Block. 87 */ 88 const int blockHeaderAlignment = estimatedCacheLineSize; 89 90 /********* The data structures and global objects **************/ 91 92 /* 93 * The malloc routines themselves need to be able to occasionally malloc some space, 94 * in order to set up the structures used by the thread local structures. This 95 * routine performs that functions. 96 */ 97 class BootStrapBlocks { 98 MallocMutex bootStrapLock; 99 Block *bootStrapBlock; 100 Block *bootStrapBlockUsed; 101 FreeObject *bootStrapObjectList; 102 public: 103 void *allocate(MemoryPool *memPool, size_t size); 104 void free(void* ptr); 105 void reset(); 106 }; 107 108 #if USE_INTERNAL_TID 109 class ThreadId { 110 static tls_key_t Tid_key; 111 std::atomic<intptr_t> ThreadCount; 112 113 unsigned int id; 114 115 static unsigned int tlsNumber() { 116 unsigned int result = reinterpret_cast<intptr_t>(TlsGetValue_func(Tid_key)); 117 if( !result ) { 118 RecursiveMallocCallProtector scoped; 119 // Thread-local value is zero -> first call from this thread, 120 // need to initialize with next ID value (IDs start from 1) 121 result = ++ThreadCount; // returned new value! 122 TlsSetValue_func( Tid_key, reinterpret_cast<void*>(result) ); 123 } 124 return result; 125 } 126 public: 127 static bool init() { 128 #if USE_WINTHREAD 129 Tid_key = TlsAlloc(); 130 if (Tid_key == TLS_ALLOC_FAILURE) 131 return false; 132 #else 133 int status = pthread_key_create( &Tid_key, nullptr ); 134 if ( status ) { 135 fprintf (stderr, "The memory manager cannot create tls key during initialization\n"); 136 return false; 137 } 138 #endif /* USE_WINTHREAD */ 139 return true; 140 } 141 #if __TBB_SOURCE_DIRECTLY_INCLUDED 142 static void destroy() { 143 if( Tid_key ) { 144 #if USE_WINTHREAD 145 BOOL status = !(TlsFree( Tid_key )); // fail is zero 146 #else 147 int status = pthread_key_delete( Tid_key ); 148 #endif /* USE_WINTHREAD */ 149 if ( status ) 150 fprintf (stderr, "The memory manager cannot delete tls key\n"); 151 Tid_key = 0; 152 } 153 } 154 #endif 155 156 ThreadId() : id(ThreadId::tlsNumber()) {} 157 bool isCurrentThreadId() const { return id == ThreadId::tlsNumber(); } 158 159 #if COLLECT_STATISTICS || MALLOC_TRACE 160 friend unsigned int getThreadId() { return ThreadId::tlsNumber(); } 161 #endif 162 #if COLLECT_STATISTICS 163 static unsigned getMaxThreadId() { return ThreadCount.load(std::memory_order_relaxed); } 164 165 friend int STAT_increment(ThreadId tid, int bin, int ctr); 166 #endif 167 }; 168 169 tls_key_t ThreadId::Tid_key; 170 intptr_t ThreadId::ThreadCount; 171 172 #if COLLECT_STATISTICS 173 int STAT_increment(ThreadId tid, int bin, int ctr) 174 { 175 return ::STAT_increment(tid.id, bin, ctr); 176 } 177 #endif 178 179 #else // USE_INTERNAL_TID 180 181 class ThreadId { 182 #if USE_PTHREAD 183 std::atomic<pthread_t> tid; 184 #else 185 std::atomic<DWORD> tid; 186 #endif 187 public: 188 ThreadId() : tid(GetMyTID()) {} 189 ThreadId(ThreadId &other) = delete; 190 ~ThreadId() = default; 191 192 #if USE_PTHREAD 193 bool isCurrentThreadId() const { return pthread_equal(pthread_self(), tid.load(std::memory_order_relaxed)); } 194 #else 195 bool isCurrentThreadId() const { return GetCurrentThreadId() == tid.load(std::memory_order_relaxed); } 196 #endif 197 ThreadId& operator=(const ThreadId& other) { 198 tid.store(other.tid.load(std::memory_order_relaxed), std::memory_order_relaxed); 199 return *this; 200 } 201 static bool init() { return true; } 202 #if __TBB_SOURCE_DIRECTLY_INCLUDED 203 static void destroy() {} 204 #endif 205 }; 206 207 #endif // USE_INTERNAL_TID 208 209 /*********** Code to provide thread ID and a thread-local void pointer **********/ 210 211 bool TLSKey::init() 212 { 213 #if USE_WINTHREAD 214 TLS_pointer_key = TlsAlloc(); 215 if (TLS_pointer_key == TLS_ALLOC_FAILURE) 216 return false; 217 #else 218 int status = pthread_key_create( &TLS_pointer_key, mallocThreadShutdownNotification ); 219 if ( status ) 220 return false; 221 #endif /* USE_WINTHREAD */ 222 return true; 223 } 224 225 bool TLSKey::destroy() 226 { 227 #if USE_WINTHREAD 228 BOOL status1 = !(TlsFree(TLS_pointer_key)); // fail is zero 229 #else 230 int status1 = pthread_key_delete(TLS_pointer_key); 231 #endif /* USE_WINTHREAD */ 232 MALLOC_ASSERT(!status1, "The memory manager cannot delete tls key."); 233 return status1==0; 234 } 235 236 inline TLSData* TLSKey::getThreadMallocTLS() const 237 { 238 return (TLSData *)TlsGetValue_func( TLS_pointer_key ); 239 } 240 241 inline void TLSKey::setThreadMallocTLS( TLSData * newvalue ) { 242 RecursiveMallocCallProtector scoped; 243 TlsSetValue_func( TLS_pointer_key, newvalue ); 244 } 245 246 /* The 'next' field in the block header has to maintain some invariants: 247 * it needs to be on a 16K boundary and the first field in the block. 248 * Any value stored there needs to have the lower 14 bits set to 0 249 * so that various assert work. This means that if you want to smash this memory 250 * for debugging purposes you will need to obey this invariant. 251 * The total size of the header needs to be a power of 2 to simplify 252 * the alignment requirements. For now it is a 128 byte structure. 253 * To avoid false sharing, the fields changed only locally are separated 254 * from the fields changed by foreign threads. 255 * Changing the size of the block header would require to change 256 * some bin allocation sizes, in particular "fitting" sizes (see above). 257 */ 258 class Bin; 259 class StartupBlock; 260 261 class MemoryPool { 262 // if no explicit grainsize, expect to see malloc in user's pAlloc 263 // and set reasonable low granularity 264 static const size_t defaultGranularity = estimatedCacheLineSize; 265 266 MemoryPool() = delete; // deny 267 public: 268 static MallocMutex memPoolListLock; 269 270 // list of all active pools is used to release 271 // all TLS data on thread termination or library unload 272 MemoryPool *next, 273 *prev; 274 ExtMemoryPool extMemPool; 275 BootStrapBlocks bootStrapBlocks; 276 277 static void initDefaultPool(); 278 279 bool init(intptr_t poolId, const MemPoolPolicy* memPoolPolicy); 280 bool reset(); 281 bool destroy(); 282 void onThreadShutdown(TLSData *tlsData); 283 284 inline TLSData *getTLS(bool create); 285 void clearTLS() { extMemPool.tlsPointerKey.setThreadMallocTLS(nullptr); } 286 287 Block *getEmptyBlock(size_t size); 288 void returnEmptyBlock(Block *block, bool poolTheBlock); 289 290 // get/put large object to/from local large object cache 291 void *getFromLLOCache(TLSData *tls, size_t size, size_t alignment); 292 void putToLLOCache(TLSData *tls, void *object); 293 }; 294 295 static intptr_t defaultMemPool_space[sizeof(MemoryPool)/sizeof(intptr_t) + 296 (sizeof(MemoryPool)%sizeof(intptr_t)? 1 : 0)]; 297 static MemoryPool *defaultMemPool = (MemoryPool*)defaultMemPool_space; 298 const size_t MemoryPool::defaultGranularity; 299 // zero-initialized 300 MallocMutex MemoryPool::memPoolListLock; 301 // TODO: move huge page status to default pool, because that's its states 302 HugePagesStatus hugePages; 303 static bool usedBySrcIncluded = false; 304 305 // Padding helpers 306 template<size_t padd> 307 struct PaddingImpl { 308 size_t __padding[padd]; 309 }; 310 311 template<> 312 struct PaddingImpl<0> {}; 313 314 template<int N> 315 struct Padding : PaddingImpl<N/sizeof(size_t)> {}; 316 317 // Slab block is 16KB-aligned. To prevent false sharing, separate locally-accessed 318 // fields and fields commonly accessed by not owner threads. 319 class GlobalBlockFields : public BlockI { 320 protected: 321 std::atomic<FreeObject*> publicFreeList; 322 std::atomic<Block*> nextPrivatizable; 323 MemoryPool *poolPtr; 324 }; 325 326 class LocalBlockFields : public GlobalBlockFields, Padding<blockHeaderAlignment - sizeof(GlobalBlockFields)> { 327 protected: 328 Block *next; 329 Block *previous; /* Use double linked list to speed up removal */ 330 FreeObject *bumpPtr; /* Bump pointer moves from the end to the beginning of a block */ 331 FreeObject *freeList; 332 /* Pointer to local data for the owner thread. Used for fast finding tls 333 when releasing object from a block that current thread owned. 334 nullptr for orphaned blocks. */ 335 std::atomic<TLSData*> tlsPtr; 336 ThreadId ownerTid; /* the ID of the thread that owns or last owned the block */ 337 BackRefIdx backRefIdx; 338 uint16_t allocatedCount; /* Number of objects allocated (obviously by the owning thread) */ 339 uint16_t objectSize; 340 bool isFull; 341 342 friend class FreeBlockPool; 343 friend class StartupBlock; 344 friend class LifoList; 345 friend void *BootStrapBlocks::allocate(MemoryPool *, size_t); 346 friend bool OrphanedBlocks::cleanup(Backend*); 347 friend Block *MemoryPool::getEmptyBlock(size_t); 348 }; 349 350 // Use inheritance to guarantee that a user data start on next cache line. 351 // Can't use member for it, because when LocalBlockFields already on cache line, 352 // we must have no additional memory consumption for all compilers. 353 class Block : public LocalBlockFields, 354 Padding<2*blockHeaderAlignment - sizeof(LocalBlockFields)> { 355 public: 356 bool empty() const { 357 if (allocatedCount > 0) return false; 358 MALLOC_ASSERT(!isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT); 359 return true; 360 } 361 inline FreeObject* allocate(); 362 inline FreeObject *allocateFromFreeList(); 363 364 inline bool adjustFullness(); 365 void adjustPositionInBin(Bin* bin = nullptr); 366 #if MALLOC_DEBUG 367 bool freeListNonNull() { return freeList; } 368 #endif 369 void freePublicObject(FreeObject *objectToFree); 370 inline void freeOwnObject(void *object); 371 void reset(); 372 void privatizePublicFreeList( bool reset = true ); 373 void restoreBumpPtr(); 374 void privatizeOrphaned(TLSData *tls, unsigned index); 375 bool readyToShare(); 376 void shareOrphaned(intptr_t binTag, unsigned index); 377 unsigned int getSize() const { 378 MALLOC_ASSERT(isStartupAllocObject() || objectSize<minLargeObjectSize, 379 "Invalid object size"); 380 return isStartupAllocObject()? 0 : objectSize; 381 } 382 const BackRefIdx *getBackRefIdx() const { return &backRefIdx; } 383 inline bool isOwnedByCurrentThread() const; 384 bool isStartupAllocObject() const { return objectSize == startupAllocObjSizeMark; } 385 inline FreeObject *findObjectToFree(const void *object) const; 386 void checkFreePrecond(const void *object) const { 387 #if MALLOC_DEBUG 388 const char *msg = "Possible double free or heap corruption."; 389 // small objects are always at least sizeof(size_t) Byte aligned, 390 // try to check this before this dereference as for invalid objects 391 // this may be unreadable 392 MALLOC_ASSERT(isAligned(object, sizeof(size_t)), "Try to free invalid small object"); 393 #if !__TBB_USE_THREAD_SANITIZER 394 // releasing to free slab 395 MALLOC_ASSERT(allocatedCount>0, msg); 396 #endif 397 // must not point to slab's header 398 MALLOC_ASSERT((uintptr_t)object - (uintptr_t)this >= sizeof(Block), msg); 399 if (startupAllocObjSizeMark == objectSize) // startup block 400 MALLOC_ASSERT(object<=bumpPtr, msg); 401 else { 402 // non-startup objects are 8 Byte aligned 403 MALLOC_ASSERT(isAligned(object, 8), "Try to free invalid small object"); 404 FreeObject *toFree = findObjectToFree(object); 405 #if !__TBB_USE_THREAD_SANITIZER 406 MALLOC_ASSERT(allocatedCount <= (slabSize-sizeof(Block))/objectSize 407 && (!bumpPtr || object>bumpPtr), msg); 408 // check against head of freeList, as this is mostly 409 // expected after double free 410 MALLOC_ASSERT(toFree != freeList, msg); 411 #endif 412 // check against head of publicFreeList, to detect double free 413 // involving foreign thread 414 MALLOC_ASSERT(toFree != publicFreeList.load(std::memory_order_relaxed), msg); 415 } 416 #else 417 suppress_unused_warning(object); 418 #endif 419 } 420 void initEmptyBlock(TLSData *tls, size_t size); 421 size_t findObjectSize(void *object) const; 422 MemoryPool *getMemPool() const { return poolPtr; } // do not use on the hot path! 423 424 protected: 425 void cleanBlockHeader(); 426 427 private: 428 static const float emptyEnoughRatio; /* Threshold on free space needed to "reactivate" a block */ 429 430 inline FreeObject *allocateFromBumpPtr(); 431 inline FreeObject *findAllocatedObject(const void *address) const; 432 #if MALLOC_DEBUG 433 inline bool isProperlyPlaced(const void *object) const; 434 #endif 435 inline void markOwned(TLSData *tls) { 436 MALLOC_ASSERT(!tlsPtr.load(std::memory_order_relaxed), ASSERT_TEXT); 437 ownerTid = ThreadId(); /* save the ID of the current thread */ 438 tlsPtr.store(tls, std::memory_order_relaxed); 439 } 440 inline void markOrphaned() { 441 MALLOC_ASSERT(tlsPtr.load(std::memory_order_relaxed), ASSERT_TEXT); 442 tlsPtr.store(nullptr, std::memory_order_relaxed); 443 } 444 445 friend class Bin; 446 friend class TLSData; 447 friend bool MemoryPool::destroy(); 448 }; 449 450 const float Block::emptyEnoughRatio = 1.0 / 4.0; 451 452 static_assert(sizeof(Block) <= 2*estimatedCacheLineSize, 453 "The class Block does not fit into 2 cache lines on this platform. " 454 "Defining USE_INTERNAL_TID may help to fix it."); 455 456 class Bin { 457 private: 458 public: 459 Block *activeBlk; 460 std::atomic<Block*> mailbox; 461 MallocMutex mailLock; 462 463 public: 464 inline Block* getActiveBlock() const { return activeBlk; } 465 void resetActiveBlock() { activeBlk = nullptr; } 466 inline void setActiveBlock(Block *block); 467 inline Block* setPreviousBlockActive(); 468 Block* getPrivatizedFreeListBlock(); 469 void moveBlockToFront(Block *block); 470 bool cleanPublicFreeLists(); 471 void processEmptyBlock(Block *block, bool poolTheBlock); 472 void addPublicFreeListBlock(Block* block); 473 474 void outofTLSBin(Block* block); 475 void verifyTLSBin(size_t size) const; 476 void pushTLSBin(Block* block); 477 478 #if MALLOC_DEBUG 479 void verifyInitState() const { 480 MALLOC_ASSERT( !activeBlk, ASSERT_TEXT ); 481 MALLOC_ASSERT( !mailbox.load(std::memory_order_relaxed), ASSERT_TEXT ); 482 } 483 #endif 484 485 friend void Block::freePublicObject (FreeObject *objectToFree); 486 }; 487 488 /********* End of the data structures **************/ 489 490 /* 491 * There are bins for all 8 byte aligned objects less than this segregated size; 8 bins in total 492 */ 493 const uint32_t minSmallObjectIndex = 0; 494 const uint32_t numSmallObjectBins = 8; 495 const uint32_t maxSmallObjectSize = 64; 496 497 /* 498 * There are 4 bins between each couple of powers of 2 [64-128-256-...] 499 * from maxSmallObjectSize till this size; 16 bins in total 500 */ 501 const uint32_t minSegregatedObjectIndex = minSmallObjectIndex+numSmallObjectBins; 502 const uint32_t numSegregatedObjectBins = 16; 503 const uint32_t maxSegregatedObjectSize = 1024; 504 505 /* 506 * And there are 5 bins with allocation sizes that are multiples of estimatedCacheLineSize 507 * and selected to fit 9, 6, 4, 3, and 2 allocations in a block. 508 */ 509 const uint32_t minFittingIndex = minSegregatedObjectIndex+numSegregatedObjectBins; 510 const uint32_t numFittingBins = 5; 511 512 const uint32_t fittingAlignment = estimatedCacheLineSize; 513 514 #define SET_FITTING_SIZE(N) ( (slabSize-sizeof(Block))/N ) & ~(fittingAlignment-1) 515 // For blockSize=16*1024, sizeof(Block)=2*estimatedCacheLineSize and fittingAlignment=estimatedCacheLineSize, 516 // the comments show the fitting sizes and the amounts left unused for estimatedCacheLineSize=64/128: 517 const uint32_t fittingSize1 = SET_FITTING_SIZE(9); // 1792/1792 128/000 518 const uint32_t fittingSize2 = SET_FITTING_SIZE(6); // 2688/2688 128/000 519 const uint32_t fittingSize3 = SET_FITTING_SIZE(4); // 4032/3968 128/256 520 const uint32_t fittingSize4 = SET_FITTING_SIZE(3); // 5376/5376 128/000 521 const uint32_t fittingSize5 = SET_FITTING_SIZE(2); // 8128/8064 000/000 522 #undef SET_FITTING_SIZE 523 524 /* 525 * The total number of thread-specific Block-based bins 526 */ 527 const uint32_t numBlockBins = minFittingIndex+numFittingBins; 528 529 /* 530 * Objects of this size and larger are considered large objects. 531 */ 532 const uint32_t minLargeObjectSize = fittingSize5 + 1; 533 534 /* 535 * Per-thread pool of slab blocks. Idea behind it is to not share with other 536 * threads memory that are likely in local cache(s) of our CPU. 537 */ 538 class FreeBlockPool { 539 private: 540 std::atomic<Block*> head; 541 int size; 542 Backend *backend; 543 public: 544 static const int POOL_HIGH_MARK = 32; 545 static const int POOL_LOW_MARK = 8; 546 547 class ResOfGet { 548 ResOfGet() = delete; 549 public: 550 Block* block; 551 bool lastAccMiss; 552 ResOfGet(Block *b, bool lastMiss) : block(b), lastAccMiss(lastMiss) {} 553 }; 554 555 // allocated in zero-initialized memory 556 FreeBlockPool(Backend *bknd) : backend(bknd) {} 557 ResOfGet getBlock(); 558 void returnBlock(Block *block); 559 bool externalCleanup(); // can be called by another thread 560 }; 561 562 template<int LOW_MARK, int HIGH_MARK> 563 class LocalLOCImpl { 564 private: 565 static const size_t MAX_TOTAL_SIZE = 4*1024*1024; 566 // TODO: can single-linked list be faster here? 567 LargeMemoryBlock *tail; // need it when do releasing on overflow 568 std::atomic<LargeMemoryBlock*> head; 569 size_t totalSize; 570 int numOfBlocks; 571 public: 572 bool put(LargeMemoryBlock *object, ExtMemoryPool *extMemPool); 573 LargeMemoryBlock *get(size_t size); 574 bool externalCleanup(ExtMemoryPool *extMemPool); 575 #if __TBB_MALLOC_WHITEBOX_TEST 576 LocalLOCImpl() : tail(nullptr), head(nullptr), totalSize(0), numOfBlocks(0) {} 577 static size_t getMaxSize() { return MAX_TOTAL_SIZE; } 578 static const int LOC_HIGH_MARK = HIGH_MARK; 579 #else 580 // no ctor, object must be created in zero-initialized memory 581 #endif 582 }; 583 584 typedef LocalLOCImpl<8,32> LocalLOC; // set production code parameters 585 586 class TLSData : public TLSRemote { 587 MemoryPool *memPool; 588 public: 589 Bin bin[numBlockBinLimit]; 590 FreeBlockPool freeSlabBlocks; 591 LocalLOC lloc; 592 unsigned currCacheIdx; 593 private: 594 std::atomic<bool> unused; 595 public: 596 TLSData(MemoryPool *mPool, Backend *bknd) : memPool(mPool), freeSlabBlocks(bknd), currCacheIdx(0) {} 597 MemoryPool *getMemPool() const { return memPool; } 598 Bin* getAllocationBin(size_t size); 599 void release(); 600 bool externalCleanup(bool cleanOnlyUnused, bool cleanBins) { 601 if (!unused.load(std::memory_order_relaxed) && cleanOnlyUnused) return false; 602 // Heavy operation in terms of synchronization complexity, 603 // should be called only for the current thread 604 bool released = cleanBins ? cleanupBlockBins() : false; 605 // both cleanups to be called, and the order is not important 606 bool lloc_cleaned = lloc.externalCleanup(&memPool->extMemPool); 607 bool free_slab_blocks_cleaned = freeSlabBlocks.externalCleanup(); 608 return released || lloc_cleaned || free_slab_blocks_cleaned; 609 } 610 bool cleanupBlockBins(); 611 void markUsed() { unused.store(false, std::memory_order_relaxed); } // called by owner when TLS touched 612 void markUnused() { unused.store(true, std::memory_order_relaxed); } // can be called by not owner thread 613 }; 614 615 TLSData *TLSKey::createTLS(MemoryPool *memPool, Backend *backend) 616 { 617 MALLOC_ASSERT( sizeof(TLSData) >= sizeof(Bin) * numBlockBins + sizeof(FreeBlockPool), ASSERT_TEXT ); 618 TLSData* tls = (TLSData*) memPool->bootStrapBlocks.allocate(memPool, sizeof(TLSData)); 619 if ( !tls ) 620 return nullptr; 621 new(tls) TLSData(memPool, backend); 622 /* the block contains zeroes after bootStrapMalloc, so bins are initialized */ 623 #if MALLOC_DEBUG 624 for (uint32_t i = 0; i < numBlockBinLimit; i++) 625 tls->bin[i].verifyInitState(); 626 #endif 627 setThreadMallocTLS(tls); 628 memPool->extMemPool.allLocalCaches.registerThread(tls); 629 return tls; 630 } 631 632 bool TLSData::cleanupBlockBins() 633 { 634 bool released = false; 635 for (uint32_t i = 0; i < numBlockBinLimit; i++) { 636 released |= bin[i].cleanPublicFreeLists(); 637 // After cleaning public free lists, only the active block might be empty. 638 // Do not use processEmptyBlock because it will just restore bumpPtr. 639 Block *block = bin[i].getActiveBlock(); 640 if (block && block->empty()) { 641 bin[i].outofTLSBin(block); 642 memPool->returnEmptyBlock(block, /*poolTheBlock=*/false); 643 released = true; 644 } 645 } 646 return released; 647 } 648 649 bool ExtMemoryPool::releaseAllLocalCaches() 650 { 651 // Iterate all registered TLS data and clean LLOC and Slab pools 652 bool released = allLocalCaches.cleanup(/*cleanOnlyUnused=*/false); 653 654 // Bins privatization is done only for the current thread 655 if (TLSData *tlsData = tlsPointerKey.getThreadMallocTLS()) 656 released |= tlsData->cleanupBlockBins(); 657 658 return released; 659 } 660 661 void AllLocalCaches::registerThread(TLSRemote *tls) 662 { 663 tls->prev = nullptr; 664 MallocMutex::scoped_lock lock(listLock); 665 MALLOC_ASSERT(head!=tls, ASSERT_TEXT); 666 tls->next = head; 667 if (head) 668 head->prev = tls; 669 head = tls; 670 MALLOC_ASSERT(head->next!=head, ASSERT_TEXT); 671 } 672 673 void AllLocalCaches::unregisterThread(TLSRemote *tls) 674 { 675 MallocMutex::scoped_lock lock(listLock); 676 MALLOC_ASSERT(head, "Can't unregister thread: no threads are registered."); 677 if (head == tls) 678 head = tls->next; 679 if (tls->next) 680 tls->next->prev = tls->prev; 681 if (tls->prev) 682 tls->prev->next = tls->next; 683 MALLOC_ASSERT(!tls->next || tls->next->next!=tls->next, ASSERT_TEXT); 684 } 685 686 bool AllLocalCaches::cleanup(bool cleanOnlyUnused) 687 { 688 bool released = false; 689 { 690 MallocMutex::scoped_lock lock(listLock); 691 for (TLSRemote *curr=head; curr; curr=curr->next) 692 released |= static_cast<TLSData*>(curr)->externalCleanup(cleanOnlyUnused, /*cleanBins=*/false); 693 } 694 return released; 695 } 696 697 void AllLocalCaches::markUnused() 698 { 699 bool locked = false; 700 MallocMutex::scoped_lock lock(listLock, /*block=*/false, &locked); 701 if (!locked) // not wait for marking if someone doing something with it 702 return; 703 704 for (TLSRemote *curr=head; curr; curr=curr->next) 705 static_cast<TLSData*>(curr)->markUnused(); 706 } 707 708 #if MALLOC_CHECK_RECURSION 709 MallocMutex RecursiveMallocCallProtector::rmc_mutex; 710 std::atomic<pthread_t> RecursiveMallocCallProtector::owner_thread; 711 std::atomic<void*> RecursiveMallocCallProtector::autoObjPtr; 712 bool RecursiveMallocCallProtector::mallocRecursionDetected; 713 #if __FreeBSD__ 714 bool RecursiveMallocCallProtector::canUsePthread; 715 #endif 716 717 #endif 718 719 /*********** End code to provide thread ID and a TLS pointer **********/ 720 721 // Parameter for isLargeObject, keeps our expectations on memory origin. 722 // Assertions must use unknownMem to reliably report object invalidity. 723 enum MemoryOrigin { 724 ourMem, // allocated by TBB allocator 725 unknownMem // can be allocated by system allocator or TBB allocator 726 }; 727 728 template<MemoryOrigin> 729 #if __TBB_USE_THREAD_SANITIZER 730 // We have a real race when accessing the large object header for 731 // non large objects (e.g. small or foreign objects). 732 // Therefore, we need to hide this access from the thread sanitizer 733 __attribute__((no_sanitize("thread"))) 734 #endif 735 bool isLargeObject(void *object); 736 static void *internalMalloc(size_t size); 737 static void internalFree(void *object); 738 static void *internalPoolMalloc(MemoryPool* mPool, size_t size); 739 static bool internalPoolFree(MemoryPool *mPool, void *object, size_t size); 740 741 #if !MALLOC_DEBUG 742 #if __INTEL_COMPILER || _MSC_VER 743 #define NOINLINE(decl) __declspec(noinline) decl 744 #define ALWAYSINLINE(decl) __forceinline decl 745 #elif __GNUC__ 746 #define NOINLINE(decl) decl __attribute__ ((noinline)) 747 #define ALWAYSINLINE(decl) decl __attribute__ ((always_inline)) 748 #else 749 #define NOINLINE(decl) decl 750 #define ALWAYSINLINE(decl) decl 751 #endif 752 753 static NOINLINE( bool doInitialization() ); 754 ALWAYSINLINE( bool isMallocInitialized() ); 755 756 #undef ALWAYSINLINE 757 #undef NOINLINE 758 #endif /* !MALLOC_DEBUG */ 759 760 761 /********* Now some rough utility code to deal with indexing the size bins. **************/ 762 763 /* 764 * Given a number return the highest non-zero bit in it. It is intended to work with 32-bit values only. 765 * Moreover, on some platforms, for sake of simplicity and performance, it is narrowed to only serve for 64 to 1023. 766 * This is enough for current algorithm of distribution of sizes among bins. 767 * __TBB_Log2 is not used here to minimize dependencies on TBB specific sources. 768 */ 769 #if _WIN64 && _MSC_VER>=1400 && !__INTEL_COMPILER 770 extern "C" unsigned char _BitScanReverse( unsigned long* i, unsigned long w ); 771 #pragma intrinsic(_BitScanReverse) 772 #endif 773 static inline unsigned int highestBitPos(unsigned int n) 774 { 775 MALLOC_ASSERT( n>=64 && n<1024, ASSERT_TEXT ); // only needed for bsr array lookup, but always true 776 unsigned int pos; 777 #if __ARCH_x86_32||__ARCH_x86_64 778 779 # if __unix__||__APPLE__||__MINGW32__ 780 __asm__ ("bsr %1,%0" : "=r"(pos) : "r"(n)); 781 # elif (_WIN32 && (!_WIN64 || __INTEL_COMPILER)) 782 __asm 783 { 784 bsr eax, n 785 mov pos, eax 786 } 787 # elif _WIN64 && _MSC_VER>=1400 788 _BitScanReverse((unsigned long*)&pos, (unsigned long)n); 789 # else 790 # error highestBitPos() not implemented for this platform 791 # endif 792 #elif __arm__ 793 __asm__ __volatile__ 794 ( 795 "clz %0, %1\n" 796 "rsb %0, %0, %2\n" 797 :"=r" (pos) :"r" (n), "I" (31) 798 ); 799 #else 800 static unsigned int bsr[16] = {0/*N/A*/,6,7,7,8,8,8,8,9,9,9,9,9,9,9,9}; 801 pos = bsr[ n>>6 ]; 802 #endif /* __ARCH_* */ 803 return pos; 804 } 805 806 unsigned int getSmallObjectIndex(unsigned int size) 807 { 808 unsigned int result = (size-1)>>3; 809 constexpr bool is_64bit = (8 == sizeof(void*)); 810 if (is_64bit) { 811 // For 64-bit malloc, 16 byte alignment is needed except for bin 0. 812 if (result) result |= 1; // 0,1,3,5,7; bins 2,4,6 are not aligned to 16 bytes 813 } 814 return result; 815 } 816 817 /* 818 * Depending on indexRequest, for a given size return either the index into the bin 819 * for objects of this size, or the actual size of objects in this bin. 820 */ 821 template<bool indexRequest> 822 static unsigned int getIndexOrObjectSize (unsigned int size) 823 { 824 if (size <= maxSmallObjectSize) { // selection from 8/16/24/32/40/48/56/64 825 unsigned int index = getSmallObjectIndex( size ); 826 /* Bin 0 is for 8 bytes, bin 1 is for 16, and so forth */ 827 return indexRequest ? index : (index+1)<<3; 828 } 829 else if (size <= maxSegregatedObjectSize ) { // 80/96/112/128 / 160/192/224/256 / 320/384/448/512 / 640/768/896/1024 830 unsigned int order = highestBitPos(size-1); // which group of bin sizes? 831 MALLOC_ASSERT( 6<=order && order<=9, ASSERT_TEXT ); 832 if (indexRequest) 833 return minSegregatedObjectIndex - (4*6) - 4 + (4*order) + ((size-1)>>(order-2)); 834 else { 835 unsigned int alignment = 128 >> (9-order); // alignment in the group 836 MALLOC_ASSERT( alignment==16 || alignment==32 || alignment==64 || alignment==128, ASSERT_TEXT ); 837 return alignUp(size,alignment); 838 } 839 } 840 else { 841 if( size <= fittingSize3 ) { 842 if( size <= fittingSize2 ) { 843 if( size <= fittingSize1 ) 844 return indexRequest ? minFittingIndex : fittingSize1; 845 else 846 return indexRequest ? minFittingIndex+1 : fittingSize2; 847 } else 848 return indexRequest ? minFittingIndex+2 : fittingSize3; 849 } else { 850 if( size <= fittingSize5 ) { 851 if( size <= fittingSize4 ) 852 return indexRequest ? minFittingIndex+3 : fittingSize4; 853 else 854 return indexRequest ? minFittingIndex+4 : fittingSize5; 855 } else { 856 MALLOC_ASSERT( 0,ASSERT_TEXT ); // this should not happen 857 return ~0U; 858 } 859 } 860 } 861 } 862 863 static unsigned int getIndex (unsigned int size) 864 { 865 return getIndexOrObjectSize</*indexRequest=*/true>(size); 866 } 867 868 static unsigned int getObjectSize (unsigned int size) 869 { 870 return getIndexOrObjectSize</*indexRequest=*/false>(size); 871 } 872 873 874 void *BootStrapBlocks::allocate(MemoryPool *memPool, size_t size) 875 { 876 FreeObject *result; 877 878 MALLOC_ASSERT( size == sizeof(TLSData), ASSERT_TEXT ); 879 880 { // Lock with acquire 881 MallocMutex::scoped_lock scoped_cs(bootStrapLock); 882 883 if( bootStrapObjectList) { 884 result = bootStrapObjectList; 885 bootStrapObjectList = bootStrapObjectList->next; 886 } else { 887 if (!bootStrapBlock) { 888 bootStrapBlock = memPool->getEmptyBlock(size); 889 if (!bootStrapBlock) return nullptr; 890 } 891 result = bootStrapBlock->bumpPtr; 892 bootStrapBlock->bumpPtr = (FreeObject *)((uintptr_t)bootStrapBlock->bumpPtr - bootStrapBlock->objectSize); 893 if ((uintptr_t)bootStrapBlock->bumpPtr < (uintptr_t)bootStrapBlock+sizeof(Block)) { 894 bootStrapBlock->bumpPtr = nullptr; 895 bootStrapBlock->next = bootStrapBlockUsed; 896 bootStrapBlockUsed = bootStrapBlock; 897 bootStrapBlock = nullptr; 898 } 899 } 900 } // Unlock with release 901 memset (result, 0, size); 902 return (void*)result; 903 } 904 905 void BootStrapBlocks::free(void* ptr) 906 { 907 MALLOC_ASSERT( ptr, ASSERT_TEXT ); 908 { // Lock with acquire 909 MallocMutex::scoped_lock scoped_cs(bootStrapLock); 910 ((FreeObject*)ptr)->next = bootStrapObjectList; 911 bootStrapObjectList = (FreeObject*)ptr; 912 } // Unlock with release 913 } 914 915 void BootStrapBlocks::reset() 916 { 917 bootStrapBlock = bootStrapBlockUsed = nullptr; 918 bootStrapObjectList = nullptr; 919 } 920 921 #if !(FREELIST_NONBLOCKING) 922 static MallocMutex publicFreeListLock; // lock for changes of publicFreeList 923 #endif 924 925 /********* End rough utility code **************/ 926 927 /* LifoList assumes zero initialization so a vector of it can be created 928 * by just allocating some space with no call to constructor. 929 * On Linux, it seems to be necessary to avoid linking with C++ libraries. 930 * 931 * By usage convention there is no race on the initialization. */ 932 LifoList::LifoList( ) : top(nullptr) 933 { 934 // MallocMutex assumes zero initialization 935 memset(static_cast<void*>(&lock), 0, sizeof(MallocMutex)); 936 } 937 938 void LifoList::push(Block *block) 939 { 940 MallocMutex::scoped_lock scoped_cs(lock); 941 block->next = top.load(std::memory_order_relaxed); 942 top.store(block, std::memory_order_relaxed); 943 } 944 945 Block *LifoList::pop() 946 { 947 Block* block = nullptr; 948 if (top.load(std::memory_order_relaxed)) { 949 MallocMutex::scoped_lock scoped_cs(lock); 950 block = top.load(std::memory_order_relaxed); 951 if (block) { 952 top.store(block->next, std::memory_order_relaxed); 953 } 954 } 955 return block; 956 } 957 958 Block *LifoList::grab() 959 { 960 Block *block = nullptr; 961 if (top.load(std::memory_order_relaxed)) { 962 MallocMutex::scoped_lock scoped_cs(lock); 963 block = top.load(std::memory_order_relaxed); 964 top.store(nullptr, std::memory_order_relaxed); 965 } 966 return block; 967 } 968 969 /********* Thread and block related code *************/ 970 971 template<bool poolDestroy> void AllLargeBlocksList::releaseAll(Backend *backend) { 972 LargeMemoryBlock *next, *lmb = loHead; 973 loHead = nullptr; 974 975 for (; lmb; lmb = next) { 976 next = lmb->gNext; 977 if (poolDestroy) { 978 // as it's pool destruction, no need to return object to backend, 979 // only remove backrefs, as they are global 980 removeBackRef(lmb->backRefIdx); 981 } else { 982 // clean g(Next|Prev) to prevent removing lmb 983 // from AllLargeBlocksList inside returnLargeObject 984 lmb->gNext = lmb->gPrev = nullptr; 985 backend->returnLargeObject(lmb); 986 } 987 } 988 } 989 990 TLSData* MemoryPool::getTLS(bool create) 991 { 992 TLSData* tls = extMemPool.tlsPointerKey.getThreadMallocTLS(); 993 if (create && !tls) 994 tls = extMemPool.tlsPointerKey.createTLS(this, &extMemPool.backend); 995 return tls; 996 } 997 998 /* 999 * Return the bin for the given size. 1000 */ 1001 inline Bin* TLSData::getAllocationBin(size_t size) 1002 { 1003 return bin + getIndex(size); 1004 } 1005 1006 /* Return an empty uninitialized block in a non-blocking fashion. */ 1007 Block *MemoryPool::getEmptyBlock(size_t size) 1008 { 1009 TLSData* tls = getTLS(/*create=*/false); 1010 // try to use per-thread cache, if TLS available 1011 FreeBlockPool::ResOfGet resOfGet = tls? 1012 tls->freeSlabBlocks.getBlock() : FreeBlockPool::ResOfGet(nullptr, false); 1013 Block *result = resOfGet.block; 1014 1015 if (!result) { // not found in local cache, asks backend for slabs 1016 int num = resOfGet.lastAccMiss? Backend::numOfSlabAllocOnMiss : 1; 1017 BackRefIdx backRefIdx[Backend::numOfSlabAllocOnMiss]; 1018 1019 result = static_cast<Block*>(extMemPool.backend.getSlabBlock(num)); 1020 if (!result) return nullptr; 1021 1022 if (!extMemPool.userPool()) 1023 for (int i=0; i<num; i++) { 1024 backRefIdx[i] = BackRefIdx::newBackRef(/*largeObj=*/false); 1025 if (backRefIdx[i].isInvalid()) { 1026 // roll back resource allocation 1027 for (int j=0; j<i; j++) 1028 removeBackRef(backRefIdx[j]); 1029 Block *b = result; 1030 for (int j=0; j<num; b=(Block*)((uintptr_t)b+slabSize), j++) 1031 extMemPool.backend.putSlabBlock(b); 1032 return nullptr; 1033 } 1034 } 1035 // resources were allocated, register blocks 1036 Block *b = result; 1037 for (int i=0; i<num; b=(Block*)((uintptr_t)b+slabSize), i++) { 1038 // slab block in user's pool must have invalid backRefIdx 1039 if (extMemPool.userPool()) { 1040 new (&b->backRefIdx) BackRefIdx(); 1041 } else { 1042 setBackRef(backRefIdx[i], b); 1043 b->backRefIdx = backRefIdx[i]; 1044 } 1045 b->tlsPtr.store(tls, std::memory_order_relaxed); 1046 b->poolPtr = this; 1047 // all but first one go to per-thread pool 1048 if (i > 0) { 1049 MALLOC_ASSERT(tls, ASSERT_TEXT); 1050 tls->freeSlabBlocks.returnBlock(b); 1051 } 1052 } 1053 } 1054 MALLOC_ASSERT(result, ASSERT_TEXT); 1055 result->initEmptyBlock(tls, size); 1056 STAT_increment(getThreadId(), getIndex(result->objectSize), allocBlockNew); 1057 return result; 1058 } 1059 1060 void MemoryPool::returnEmptyBlock(Block *block, bool poolTheBlock) 1061 { 1062 block->reset(); 1063 if (poolTheBlock) { 1064 getTLS(/*create=*/false)->freeSlabBlocks.returnBlock(block); 1065 } else { 1066 // slab blocks in user's pools do not have valid backRefIdx 1067 if (!extMemPool.userPool()) 1068 removeBackRef(*(block->getBackRefIdx())); 1069 extMemPool.backend.putSlabBlock(block); 1070 } 1071 } 1072 1073 bool ExtMemoryPool::init(intptr_t poolId, rawAllocType rawAlloc, 1074 rawFreeType rawFree, size_t granularity, 1075 bool keepAllMemory, bool fixedPool) 1076 { 1077 this->poolId = poolId; 1078 this->rawAlloc = rawAlloc; 1079 this->rawFree = rawFree; 1080 this->granularity = granularity; 1081 this->keepAllMemory = keepAllMemory; 1082 this->fixedPool = fixedPool; 1083 this->delayRegsReleasing = false; 1084 if (!initTLS()) 1085 return false; 1086 loc.init(this); 1087 backend.init(this); 1088 MALLOC_ASSERT(isPoolValid(), nullptr); 1089 return true; 1090 } 1091 1092 bool ExtMemoryPool::initTLS() { return tlsPointerKey.init(); } 1093 1094 bool MemoryPool::init(intptr_t poolId, const MemPoolPolicy *policy) 1095 { 1096 if (!extMemPool.init(poolId, policy->pAlloc, policy->pFree, 1097 policy->granularity? policy->granularity : defaultGranularity, 1098 policy->keepAllMemory, policy->fixedPool)) 1099 return false; 1100 { 1101 MallocMutex::scoped_lock lock(memPoolListLock); 1102 next = defaultMemPool->next; 1103 defaultMemPool->next = this; 1104 prev = defaultMemPool; 1105 if (next) 1106 next->prev = this; 1107 } 1108 return true; 1109 } 1110 1111 bool MemoryPool::reset() 1112 { 1113 MALLOC_ASSERT(extMemPool.userPool(), "No reset for the system pool."); 1114 // memory is not releasing during pool reset 1115 // TODO: mark regions to release unused on next reset() 1116 extMemPool.delayRegionsReleasing(true); 1117 1118 bootStrapBlocks.reset(); 1119 extMemPool.lmbList.releaseAll</*poolDestroy=*/false>(&extMemPool.backend); 1120 if (!extMemPool.reset()) 1121 return false; 1122 1123 if (!extMemPool.initTLS()) 1124 return false; 1125 extMemPool.delayRegionsReleasing(false); 1126 return true; 1127 } 1128 1129 bool MemoryPool::destroy() 1130 { 1131 #if __TBB_MALLOC_LOCACHE_STAT 1132 extMemPool.loc.reportStat(stdout); 1133 #endif 1134 #if __TBB_MALLOC_BACKEND_STAT 1135 extMemPool.backend.reportStat(stdout); 1136 #endif 1137 { 1138 MallocMutex::scoped_lock lock(memPoolListLock); 1139 // remove itself from global pool list 1140 if (prev) 1141 prev->next = next; 1142 if (next) 1143 next->prev = prev; 1144 } 1145 // slab blocks in non-default pool do not have backreferences, 1146 // only large objects do 1147 if (extMemPool.userPool()) 1148 extMemPool.lmbList.releaseAll</*poolDestroy=*/true>(&extMemPool.backend); 1149 else { 1150 // only one non-userPool() is supported now 1151 MALLOC_ASSERT(this==defaultMemPool, nullptr); 1152 // There and below in extMemPool.destroy(), do not restore initial state 1153 // for user pool, because it's just about to be released. But for system 1154 // pool restoring, we do not want to do zeroing of it on subsequent reload. 1155 bootStrapBlocks.reset(); 1156 extMemPool.orphanedBlocks.reset(); 1157 } 1158 return extMemPool.destroy(); 1159 } 1160 1161 void MemoryPool::onThreadShutdown(TLSData *tlsData) 1162 { 1163 if (tlsData) { // might be called for "empty" TLS 1164 tlsData->release(); 1165 bootStrapBlocks.free(tlsData); 1166 clearTLS(); 1167 } 1168 } 1169 1170 #if MALLOC_DEBUG 1171 void Bin::verifyTLSBin (size_t size) const 1172 { 1173 /* The debug version verifies the TLSBin as needed */ 1174 uint32_t objSize = getObjectSize(size); 1175 1176 if (activeBlk) { 1177 MALLOC_ASSERT( activeBlk->isOwnedByCurrentThread(), ASSERT_TEXT ); 1178 MALLOC_ASSERT( activeBlk->objectSize == objSize, ASSERT_TEXT ); 1179 #if MALLOC_DEBUG>1 1180 for (Block* temp = activeBlk->next; temp; temp=temp->next) { 1181 MALLOC_ASSERT( temp!=activeBlk, ASSERT_TEXT ); 1182 MALLOC_ASSERT( temp->isOwnedByCurrentThread(), ASSERT_TEXT ); 1183 MALLOC_ASSERT( temp->objectSize == objSize, ASSERT_TEXT ); 1184 MALLOC_ASSERT( temp->previous->next == temp, ASSERT_TEXT ); 1185 if (temp->next) { 1186 MALLOC_ASSERT( temp->next->previous == temp, ASSERT_TEXT ); 1187 } 1188 } 1189 for (Block* temp = activeBlk->previous; temp; temp=temp->previous) { 1190 MALLOC_ASSERT( temp!=activeBlk, ASSERT_TEXT ); 1191 MALLOC_ASSERT( temp->isOwnedByCurrentThread(), ASSERT_TEXT ); 1192 MALLOC_ASSERT( temp->objectSize == objSize, ASSERT_TEXT ); 1193 MALLOC_ASSERT( temp->next->previous == temp, ASSERT_TEXT ); 1194 if (temp->previous) { 1195 MALLOC_ASSERT( temp->previous->next == temp, ASSERT_TEXT ); 1196 } 1197 } 1198 #endif /* MALLOC_DEBUG>1 */ 1199 } 1200 } 1201 #else /* MALLOC_DEBUG */ 1202 inline void Bin::verifyTLSBin (size_t) const { } 1203 #endif /* MALLOC_DEBUG */ 1204 1205 /* 1206 * Add a block to the start of this tls bin list. 1207 */ 1208 void Bin::pushTLSBin(Block* block) 1209 { 1210 /* The objectSize should be defined and not a parameter 1211 because the function is applied to partially filled blocks as well */ 1212 unsigned int size = block->objectSize; 1213 1214 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1215 MALLOC_ASSERT( block->objectSize != 0, ASSERT_TEXT ); 1216 MALLOC_ASSERT( block->next == nullptr, ASSERT_TEXT ); 1217 MALLOC_ASSERT( block->previous == nullptr, ASSERT_TEXT ); 1218 1219 MALLOC_ASSERT( this, ASSERT_TEXT ); 1220 verifyTLSBin(size); 1221 1222 block->next = activeBlk; 1223 if( activeBlk ) { 1224 block->previous = activeBlk->previous; 1225 activeBlk->previous = block; 1226 if( block->previous ) 1227 block->previous->next = block; 1228 } else { 1229 activeBlk = block; 1230 } 1231 1232 verifyTLSBin(size); 1233 } 1234 1235 /* 1236 * Take a block out of its tls bin (e.g. before removal). 1237 */ 1238 void Bin::outofTLSBin(Block* block) 1239 { 1240 unsigned int size = block->objectSize; 1241 1242 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1243 MALLOC_ASSERT( block->objectSize != 0, ASSERT_TEXT ); 1244 1245 MALLOC_ASSERT( this, ASSERT_TEXT ); 1246 verifyTLSBin(size); 1247 1248 if (block == activeBlk) { 1249 activeBlk = block->previous? block->previous : block->next; 1250 } 1251 /* Unlink the block */ 1252 if (block->previous) { 1253 MALLOC_ASSERT( block->previous->next == block, ASSERT_TEXT ); 1254 block->previous->next = block->next; 1255 } 1256 if (block->next) { 1257 MALLOC_ASSERT( block->next->previous == block, ASSERT_TEXT ); 1258 block->next->previous = block->previous; 1259 } 1260 block->next = nullptr; 1261 block->previous = nullptr; 1262 1263 verifyTLSBin(size); 1264 } 1265 1266 Block* Bin::getPrivatizedFreeListBlock() 1267 { 1268 Block* block; 1269 MALLOC_ASSERT( this, ASSERT_TEXT ); 1270 // if this method is called, active block usage must be unsuccessful 1271 MALLOC_ASSERT( (!activeBlk && !mailbox.load(std::memory_order_relaxed)) || (activeBlk && activeBlk->isFull), ASSERT_TEXT ); 1272 1273 // the counter should be changed STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 1274 if (!mailbox.load(std::memory_order_acquire)) // hotpath is empty mailbox 1275 return nullptr; 1276 else { // mailbox is not empty, take lock and inspect it 1277 MallocMutex::scoped_lock scoped_cs(mailLock); 1278 block = mailbox.load(std::memory_order_relaxed); 1279 if( block ) { 1280 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1281 MALLOC_ASSERT( !isNotForUse(block->nextPrivatizable.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1282 mailbox.store(block->nextPrivatizable.load(std::memory_order_relaxed), std::memory_order_relaxed); 1283 block->nextPrivatizable.store((Block*)this, std::memory_order_relaxed); 1284 } 1285 } 1286 if( block ) { 1287 MALLOC_ASSERT( isSolidPtr(block->publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1288 block->privatizePublicFreeList(); 1289 block->adjustPositionInBin(this); 1290 } 1291 return block; 1292 } 1293 1294 void Bin::addPublicFreeListBlock(Block* block) 1295 { 1296 MallocMutex::scoped_lock scoped_cs(mailLock); 1297 block->nextPrivatizable.store(mailbox.load(std::memory_order_relaxed), std::memory_order_relaxed); 1298 mailbox.store(block, std::memory_order_relaxed); 1299 } 1300 1301 // Process publicly freed objects in all blocks and return empty blocks 1302 // to the backend in order to reduce overall footprint. 1303 bool Bin::cleanPublicFreeLists() 1304 { 1305 Block* block; 1306 if (!mailbox.load(std::memory_order_acquire)) 1307 return false; 1308 else { 1309 // Grab all the blocks in the mailbox 1310 MallocMutex::scoped_lock scoped_cs(mailLock); 1311 block = mailbox.load(std::memory_order_relaxed); 1312 mailbox.store(nullptr, std::memory_order_relaxed); 1313 } 1314 bool released = false; 1315 while (block) { 1316 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1317 Block* tmp = block->nextPrivatizable.load(std::memory_order_relaxed); 1318 block->nextPrivatizable.store((Block*)this, std::memory_order_relaxed); 1319 block->privatizePublicFreeList(); 1320 if (block->empty()) { 1321 processEmptyBlock(block, /*poolTheBlock=*/false); 1322 released = true; 1323 } else 1324 block->adjustPositionInBin(this); 1325 block = tmp; 1326 } 1327 return released; 1328 } 1329 1330 bool Block::adjustFullness() 1331 { 1332 if (bumpPtr) { 1333 /* If we are still using a bump ptr for this block it is empty enough to use. */ 1334 STAT_increment(getThreadId(), getIndex(objectSize), examineEmptyEnough); 1335 isFull = false; 1336 } else { 1337 const float threshold = (slabSize - sizeof(Block)) * (1 - emptyEnoughRatio); 1338 /* allocatedCount shows how many objects in the block are in use; however it still counts 1339 * blocks freed by other threads; so prior call to privatizePublicFreeList() is recommended */ 1340 isFull = (allocatedCount*objectSize > threshold) ? true : false; 1341 #if COLLECT_STATISTICS 1342 if (isFull) 1343 STAT_increment(getThreadId(), getIndex(objectSize), examineNotEmpty); 1344 else 1345 STAT_increment(getThreadId(), getIndex(objectSize), examineEmptyEnough); 1346 #endif 1347 } 1348 return isFull; 1349 } 1350 1351 // This method resides in class Block, and not in class Bin, in order to avoid 1352 // calling getAllocationBin on a reasonably hot path in Block::freeOwnObject 1353 void Block::adjustPositionInBin(Bin* bin/*=nullptr*/) 1354 { 1355 // If the block were full, but became empty enough to use, 1356 // move it to the front of the list 1357 if (isFull && !adjustFullness()) { 1358 if (!bin) 1359 bin = tlsPtr.load(std::memory_order_relaxed)->getAllocationBin(objectSize); 1360 bin->moveBlockToFront(this); 1361 } 1362 } 1363 1364 /* Restore the bump pointer for an empty block that is planned to use */ 1365 void Block::restoreBumpPtr() 1366 { 1367 MALLOC_ASSERT( allocatedCount == 0, ASSERT_TEXT ); 1368 MALLOC_ASSERT( !isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1369 STAT_increment(getThreadId(), getIndex(objectSize), freeRestoreBumpPtr); 1370 bumpPtr = (FreeObject *)((uintptr_t)this + slabSize - objectSize); 1371 freeList = nullptr; 1372 isFull = false; 1373 } 1374 1375 void Block::freeOwnObject(void *object) 1376 { 1377 tlsPtr.load(std::memory_order_relaxed)->markUsed(); 1378 allocatedCount--; 1379 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 1380 #if COLLECT_STATISTICS 1381 // Note that getAllocationBin is not called on the hottest path with statistics off. 1382 if (tlsPtr.load(std::memory_order_relaxed)->getAllocationBin(objectSize)->getActiveBlock() != this) 1383 STAT_increment(getThreadId(), getIndex(objectSize), freeToInactiveBlock); 1384 else 1385 STAT_increment(getThreadId(), getIndex(objectSize), freeToActiveBlock); 1386 #endif 1387 if (empty()) { 1388 // If the last object of a slab is freed, the slab cannot be marked full 1389 MALLOC_ASSERT(!isFull, ASSERT_TEXT); 1390 tlsPtr.load(std::memory_order_relaxed)->getAllocationBin(objectSize)->processEmptyBlock(this, /*poolTheBlock=*/true); 1391 } else { // hot path 1392 FreeObject *objectToFree = findObjectToFree(object); 1393 objectToFree->next = freeList; 1394 freeList = objectToFree; 1395 adjustPositionInBin(); 1396 } 1397 } 1398 1399 void Block::freePublicObject (FreeObject *objectToFree) 1400 { 1401 FreeObject* localPublicFreeList{}; 1402 1403 MALLOC_ITT_SYNC_RELEASING(&publicFreeList); 1404 #if FREELIST_NONBLOCKING 1405 // TBB_REVAMP_TODO: make it non atomic in non-blocking scenario 1406 localPublicFreeList = publicFreeList.load(std::memory_order_relaxed); 1407 do { 1408 objectToFree->next = localPublicFreeList; 1409 // no backoff necessary because trying to make change, not waiting for a change 1410 } while( !publicFreeList.compare_exchange_strong(localPublicFreeList, objectToFree) ); 1411 #else 1412 STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 1413 { 1414 MallocMutex::scoped_lock scoped_cs(publicFreeListLock); 1415 localPublicFreeList = objectToFree->next = publicFreeList; 1416 publicFreeList = objectToFree; 1417 } 1418 #endif 1419 1420 if( localPublicFreeList==nullptr ) { 1421 // if the block is abandoned, its nextPrivatizable pointer should be UNUSABLE 1422 // otherwise, it should point to the bin the block belongs to. 1423 // reading nextPrivatizable is thread-safe below, because: 1424 // 1) the executing thread atomically got publicFreeList==nullptr and changed it to non-nullptr; 1425 // 2) only owning thread can change it back to nullptr, 1426 // 3) but it can not be done until the block is put to the mailbox 1427 // So the executing thread is now the only one that can change nextPrivatizable 1428 Block* next = nextPrivatizable.load(std::memory_order_acquire); 1429 if( !isNotForUse(next) ) { 1430 MALLOC_ASSERT( next!=nullptr, ASSERT_TEXT ); 1431 Bin* theBin = (Bin*) next; 1432 #if MALLOC_DEBUG && TBB_REVAMP_TODO 1433 // FIXME: The thread that returns the block is not the block's owner. 1434 // The below assertion compares 'theBin' against the caller's local bin, thus, it always fails. 1435 // Need to find a way to get the correct remote bin for comparison. 1436 { // check that nextPrivatizable points to the bin the block belongs to 1437 uint32_t index = getIndex( objectSize ); 1438 TLSData* tls = getThreadMallocTLS(); 1439 MALLOC_ASSERT( theBin==tls->bin+index, ASSERT_TEXT ); 1440 } 1441 #endif // MALLOC_DEBUG 1442 theBin->addPublicFreeListBlock(this); 1443 } 1444 } 1445 STAT_increment(getThreadId(), ThreadCommonCounters, freeToOtherThread); 1446 STAT_increment(ownerTid.load(std::memory_order_relaxed), getIndex(objectSize), freeByOtherThread); 1447 } 1448 1449 // Make objects freed by other threads available for use again 1450 void Block::privatizePublicFreeList( bool reset ) 1451 { 1452 FreeObject *localPublicFreeList; 1453 // If reset is false, publicFreeList should not be zeroed but set to UNUSABLE 1454 // to properly synchronize with other threads freeing objects to this slab. 1455 const intptr_t endMarker = reset ? 0 : UNUSABLE; 1456 1457 // Only the owner thread may reset the pointer to nullptr 1458 MALLOC_ASSERT( isOwnedByCurrentThread() || !reset, ASSERT_TEXT ); 1459 #if FREELIST_NONBLOCKING 1460 localPublicFreeList = publicFreeList.exchange((FreeObject*)endMarker); 1461 #else 1462 STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 1463 { 1464 MallocMutex::scoped_lock scoped_cs(publicFreeListLock); 1465 localPublicFreeList = publicFreeList; 1466 publicFreeList = endMarker; 1467 } 1468 #endif 1469 MALLOC_ITT_SYNC_ACQUIRED(&publicFreeList); 1470 MALLOC_ASSERT( !(reset && isNotForUse(publicFreeList)), ASSERT_TEXT ); 1471 1472 // publicFreeList must have been UNUSABLE or valid, but not nullptr 1473 MALLOC_ASSERT( localPublicFreeList!=nullptr, ASSERT_TEXT ); 1474 if( isSolidPtr(localPublicFreeList) ) { 1475 MALLOC_ASSERT( allocatedCount <= (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 1476 /* other threads did not change the counter freeing our blocks */ 1477 allocatedCount--; 1478 FreeObject *temp = localPublicFreeList; 1479 while( isSolidPtr(temp->next) ){ // the list will end with either nullptr or UNUSABLE 1480 temp = temp->next; 1481 allocatedCount--; 1482 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 1483 } 1484 /* merge with local freeList */ 1485 temp->next = freeList; 1486 freeList = localPublicFreeList; 1487 STAT_increment(getThreadId(), getIndex(objectSize), allocPrivatized); 1488 } 1489 } 1490 1491 void Block::privatizeOrphaned(TLSData *tls, unsigned index) 1492 { 1493 Bin* bin = tls->bin + index; 1494 STAT_increment(getThreadId(), index, allocBlockPublic); 1495 next = nullptr; 1496 previous = nullptr; 1497 MALLOC_ASSERT( publicFreeList.load(std::memory_order_relaxed) != nullptr, ASSERT_TEXT ); 1498 /* There is not a race here since no other thread owns this block */ 1499 markOwned(tls); 1500 // It is safe to change nextPrivatizable, as publicFreeList is not null 1501 MALLOC_ASSERT( isNotForUse(nextPrivatizable.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1502 nextPrivatizable.store((Block*)bin, std::memory_order_relaxed); 1503 // the next call is required to change publicFreeList to 0 1504 privatizePublicFreeList(); 1505 if( empty() ) { 1506 restoreBumpPtr(); 1507 } else { 1508 adjustFullness(); // check the block fullness and set isFull 1509 } 1510 MALLOC_ASSERT( !isNotForUse(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1511 } 1512 1513 1514 bool Block::readyToShare() 1515 { 1516 FreeObject* oldVal = nullptr; 1517 #if FREELIST_NONBLOCKING 1518 publicFreeList.compare_exchange_strong(oldVal, (FreeObject*)UNUSABLE); 1519 #else 1520 STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 1521 { 1522 MallocMutex::scoped_lock scoped_cs(publicFreeListLock); 1523 if ( (oldVal=publicFreeList)==nullptr ) 1524 (intptr_t&)(publicFreeList) = UNUSABLE; 1525 } 1526 #endif 1527 return oldVal==nullptr; 1528 } 1529 1530 void Block::shareOrphaned(intptr_t binTag, unsigned index) 1531 { 1532 MALLOC_ASSERT( binTag, ASSERT_TEXT ); 1533 // unreferenced formal parameter warning 1534 tbb::detail::suppress_unused_warning(index); 1535 STAT_increment(getThreadId(), index, freeBlockPublic); 1536 markOrphaned(); 1537 if ((intptr_t)nextPrivatizable.load(std::memory_order_relaxed) == binTag) { 1538 // First check passed: the block is not in mailbox yet. 1539 // Need to set publicFreeList to non-zero, so other threads 1540 // will not change nextPrivatizable and it can be zeroed. 1541 if ( !readyToShare() ) { 1542 // another thread freed an object; we need to wait until it finishes. 1543 // There is no need for exponential backoff, as the wait here is not for a lock; 1544 // but need to yield, so the thread we wait has a chance to run. 1545 // TODO: add a pause to also be friendly to hyperthreads 1546 int count = 256; 1547 while ((intptr_t)nextPrivatizable.load(std::memory_order_relaxed) == binTag) { 1548 if (--count==0) { 1549 do_yield(); 1550 count = 256; 1551 } 1552 } 1553 } 1554 } 1555 MALLOC_ASSERT( publicFreeList.load(std::memory_order_relaxed) !=nullptr, ASSERT_TEXT ); 1556 // now it is safe to change our data 1557 previous = nullptr; 1558 // it is caller responsibility to ensure that the list of blocks 1559 // formed by nextPrivatizable pointers is kept consistent if required. 1560 // if only called from thread shutdown code, it does not matter. 1561 nextPrivatizable.store((Block*)UNUSABLE, std::memory_order_relaxed); 1562 } 1563 1564 void Block::cleanBlockHeader() 1565 { 1566 next = nullptr; 1567 previous = nullptr; 1568 freeList = nullptr; 1569 allocatedCount = 0; 1570 isFull = false; 1571 tlsPtr.store(nullptr, std::memory_order_relaxed); 1572 1573 publicFreeList.store(nullptr, std::memory_order_relaxed); 1574 } 1575 1576 void Block::initEmptyBlock(TLSData *tls, size_t size) 1577 { 1578 // Having getIndex and getObjectSize called next to each other 1579 // allows better compiler optimization as they basically share the code. 1580 unsigned int index = getIndex(size); 1581 unsigned int objSz = getObjectSize(size); 1582 1583 cleanBlockHeader(); 1584 objectSize = objSz; 1585 markOwned(tls); 1586 // bump pointer should be prepared for first allocation - thus mode it down to objectSize 1587 bumpPtr = (FreeObject *)((uintptr_t)this + slabSize - objectSize); 1588 1589 // each block should have the address where the head of the list of "privatizable" blocks is kept 1590 // the only exception is a block for boot strap which is initialized when TLS is yet nullptr 1591 nextPrivatizable.store( tls? (Block*)(tls->bin + index) : nullptr, std::memory_order_relaxed); 1592 TRACEF(( "[ScalableMalloc trace] Empty block %p is initialized, owner is %ld, objectSize is %d, bumpPtr is %p\n", 1593 this, tlsPtr.load(std::memory_order_relaxed) ? getThreadId() : -1, objectSize, bumpPtr )); 1594 } 1595 1596 Block *OrphanedBlocks::get(TLSData *tls, unsigned int size) 1597 { 1598 // TODO: try to use index from getAllocationBin 1599 unsigned int index = getIndex(size); 1600 Block *block = bins[index].pop(); 1601 if (block) { 1602 MALLOC_ITT_SYNC_ACQUIRED(bins+index); 1603 block->privatizeOrphaned(tls, index); 1604 } 1605 return block; 1606 } 1607 1608 void OrphanedBlocks::put(intptr_t binTag, Block *block) 1609 { 1610 unsigned int index = getIndex(block->getSize()); 1611 block->shareOrphaned(binTag, index); 1612 MALLOC_ITT_SYNC_RELEASING(bins+index); 1613 bins[index].push(block); 1614 } 1615 1616 void OrphanedBlocks::reset() 1617 { 1618 for (uint32_t i=0; i<numBlockBinLimit; i++) 1619 new (bins+i) LifoList(); 1620 } 1621 1622 bool OrphanedBlocks::cleanup(Backend* backend) 1623 { 1624 bool released = false; 1625 for (uint32_t i=0; i<numBlockBinLimit; i++) { 1626 Block* block = bins[i].grab(); 1627 MALLOC_ITT_SYNC_ACQUIRED(bins+i); 1628 while (block) { 1629 Block* next = block->next; 1630 block->privatizePublicFreeList( /*reset=*/false ); // do not set publicFreeList to nullptr 1631 if (block->empty()) { 1632 block->reset(); 1633 // slab blocks in user's pools do not have valid backRefIdx 1634 if (!backend->inUserPool()) 1635 removeBackRef(*(block->getBackRefIdx())); 1636 backend->putSlabBlock(block); 1637 released = true; 1638 } else { 1639 MALLOC_ITT_SYNC_RELEASING(bins+i); 1640 bins[i].push(block); 1641 } 1642 block = next; 1643 } 1644 } 1645 return released; 1646 } 1647 1648 FreeBlockPool::ResOfGet FreeBlockPool::getBlock() 1649 { 1650 Block *b = head.exchange(nullptr); 1651 bool lastAccessMiss; 1652 1653 if (b) { 1654 size--; 1655 Block *newHead = b->next; 1656 lastAccessMiss = false; 1657 head.store(newHead, std::memory_order_release); 1658 } else { 1659 lastAccessMiss = true; 1660 } 1661 return ResOfGet(b, lastAccessMiss); 1662 } 1663 1664 void FreeBlockPool::returnBlock(Block *block) 1665 { 1666 MALLOC_ASSERT( size <= POOL_HIGH_MARK, ASSERT_TEXT ); 1667 Block *localHead = head.exchange(nullptr); 1668 1669 if (!localHead) { 1670 size = 0; // head was stolen by externalClean, correct size accordingly 1671 } else if (size == POOL_HIGH_MARK) { 1672 // release cold blocks and add hot one, 1673 // so keep POOL_LOW_MARK-1 blocks and add new block to head 1674 Block *headToFree = localHead, *helper; 1675 for (int i=0; i<POOL_LOW_MARK-2; i++) 1676 headToFree = headToFree->next; 1677 Block *last = headToFree; 1678 headToFree = headToFree->next; 1679 last->next = nullptr; 1680 size = POOL_LOW_MARK-1; 1681 for (Block *currBl = headToFree; currBl; currBl = helper) { 1682 helper = currBl->next; 1683 // slab blocks in user's pools do not have valid backRefIdx 1684 if (!backend->inUserPool()) 1685 removeBackRef(currBl->backRefIdx); 1686 backend->putSlabBlock(currBl); 1687 } 1688 } 1689 size++; 1690 block->next = localHead; 1691 head.store(block, std::memory_order_release); 1692 } 1693 1694 bool FreeBlockPool::externalCleanup() 1695 { 1696 Block *helper; 1697 bool released = false; 1698 1699 for (Block *currBl=head.exchange(nullptr); currBl; currBl=helper) { 1700 helper = currBl->next; 1701 // slab blocks in user's pools do not have valid backRefIdx 1702 if (!backend->inUserPool()) 1703 removeBackRef(currBl->backRefIdx); 1704 backend->putSlabBlock(currBl); 1705 released = true; 1706 } 1707 return released; 1708 } 1709 1710 /* Prepare the block for returning to FreeBlockPool */ 1711 void Block::reset() 1712 { 1713 // it is caller's responsibility to ensure no data is lost before calling this 1714 MALLOC_ASSERT( allocatedCount==0, ASSERT_TEXT ); 1715 MALLOC_ASSERT( !isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1716 if (!isStartupAllocObject()) 1717 STAT_increment(getThreadId(), getIndex(objectSize), freeBlockBack); 1718 1719 cleanBlockHeader(); 1720 1721 nextPrivatizable.store(nullptr, std::memory_order_relaxed); 1722 1723 objectSize = 0; 1724 // for an empty block, bump pointer should point right after the end of the block 1725 bumpPtr = (FreeObject *)((uintptr_t)this + slabSize); 1726 } 1727 1728 inline void Bin::setActiveBlock (Block *block) 1729 { 1730 // MALLOC_ASSERT( bin, ASSERT_TEXT ); 1731 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1732 // it is the caller responsibility to keep bin consistence (i.e. ensure this block is in the bin list) 1733 activeBlk = block; 1734 } 1735 1736 inline Block* Bin::setPreviousBlockActive() 1737 { 1738 MALLOC_ASSERT( activeBlk, ASSERT_TEXT ); 1739 Block* temp = activeBlk->previous; 1740 if( temp ) { 1741 MALLOC_ASSERT( !(temp->isFull), ASSERT_TEXT ); 1742 activeBlk = temp; 1743 } 1744 return temp; 1745 } 1746 1747 inline bool Block::isOwnedByCurrentThread() const { 1748 return tlsPtr.load(std::memory_order_relaxed) && ownerTid.isCurrentThreadId(); 1749 } 1750 1751 FreeObject *Block::findObjectToFree(const void *object) const 1752 { 1753 FreeObject *objectToFree; 1754 // Due to aligned allocations, a pointer passed to scalable_free 1755 // might differ from the address of internally allocated object. 1756 // Small objects however should always be fine. 1757 if (objectSize <= maxSegregatedObjectSize) 1758 objectToFree = (FreeObject*)object; 1759 // "Fitting size" allocations are suspicious if aligned higher than naturally 1760 else { 1761 if ( ! isAligned(object,2*fittingAlignment) ) 1762 // TODO: the above check is questionable - it gives false negatives in ~50% cases, 1763 // so might even be slower in average than unconditional use of findAllocatedObject. 1764 // here it should be a "real" object 1765 objectToFree = (FreeObject*)object; 1766 else 1767 // here object can be an aligned address, so applying additional checks 1768 objectToFree = findAllocatedObject(object); 1769 MALLOC_ASSERT( isAligned(objectToFree,fittingAlignment), ASSERT_TEXT ); 1770 } 1771 MALLOC_ASSERT( isProperlyPlaced(objectToFree), ASSERT_TEXT ); 1772 1773 return objectToFree; 1774 } 1775 1776 void TLSData::release() 1777 { 1778 memPool->extMemPool.allLocalCaches.unregisterThread(this); 1779 externalCleanup(/*cleanOnlyUnused=*/false, /*cleanBins=*/false); 1780 1781 for (unsigned index = 0; index < numBlockBins; index++) { 1782 Block *activeBlk = bin[index].getActiveBlock(); 1783 if (!activeBlk) 1784 continue; 1785 Block *threadlessBlock = activeBlk->previous; 1786 bool syncOnMailbox = false; 1787 while (threadlessBlock) { 1788 Block *threadBlock = threadlessBlock->previous; 1789 if (threadlessBlock->empty()) { 1790 /* we destroy the thread, so not use its block pool */ 1791 memPool->returnEmptyBlock(threadlessBlock, /*poolTheBlock=*/false); 1792 } else { 1793 memPool->extMemPool.orphanedBlocks.put(intptr_t(bin+index), threadlessBlock); 1794 syncOnMailbox = true; 1795 } 1796 threadlessBlock = threadBlock; 1797 } 1798 threadlessBlock = activeBlk; 1799 while (threadlessBlock) { 1800 Block *threadBlock = threadlessBlock->next; 1801 if (threadlessBlock->empty()) { 1802 /* we destroy the thread, so not use its block pool */ 1803 memPool->returnEmptyBlock(threadlessBlock, /*poolTheBlock=*/false); 1804 } else { 1805 memPool->extMemPool.orphanedBlocks.put(intptr_t(bin+index), threadlessBlock); 1806 syncOnMailbox = true; 1807 } 1808 threadlessBlock = threadBlock; 1809 } 1810 bin[index].resetActiveBlock(); 1811 1812 if (syncOnMailbox) { 1813 // Although, we synchronized on nextPrivatizable inside a block, we still need to 1814 // synchronize on the bin lifetime because the thread releasing an object into the public 1815 // free list is touching the bin (mailbox and mailLock) 1816 MallocMutex::scoped_lock scoped_cs(bin[index].mailLock); 1817 } 1818 } 1819 } 1820 1821 1822 #if MALLOC_CHECK_RECURSION 1823 // TODO: Use dedicated heap for this 1824 1825 /* 1826 * It's a special kind of allocation that can be used when malloc is 1827 * not available (either during startup or when malloc was already called and 1828 * we are, say, inside pthread_setspecific's call). 1829 * Block can contain objects of different sizes, 1830 * allocations are performed by moving bump pointer and increasing of object counter, 1831 * releasing is done via counter of objects allocated in the block 1832 * or moving bump pointer if releasing object is on a bound. 1833 * TODO: make bump pointer to grow to the same backward direction as all the others. 1834 */ 1835 1836 class StartupBlock : public Block { 1837 size_t availableSize() const { 1838 return slabSize - ((uintptr_t)bumpPtr - (uintptr_t)this); 1839 } 1840 static StartupBlock *getBlock(); 1841 public: 1842 static FreeObject *allocate(size_t size); 1843 static size_t msize(void *ptr) { return *((size_t*)ptr - 1); } 1844 void free(void *ptr); 1845 }; 1846 1847 static MallocMutex startupMallocLock; 1848 static StartupBlock *firstStartupBlock; 1849 1850 StartupBlock *StartupBlock::getBlock() 1851 { 1852 BackRefIdx backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/false); 1853 if (backRefIdx.isInvalid()) return nullptr; 1854 1855 StartupBlock *block = static_cast<StartupBlock*>( 1856 defaultMemPool->extMemPool.backend.getSlabBlock(1)); 1857 if (!block) return nullptr; 1858 1859 block->cleanBlockHeader(); 1860 setBackRef(backRefIdx, block); 1861 block->backRefIdx = backRefIdx; 1862 // use startupAllocObjSizeMark to mark objects from startup block marker 1863 block->objectSize = startupAllocObjSizeMark; 1864 block->bumpPtr = (FreeObject *)((uintptr_t)block + sizeof(StartupBlock)); 1865 return block; 1866 } 1867 1868 FreeObject *StartupBlock::allocate(size_t size) 1869 { 1870 FreeObject *result; 1871 StartupBlock *newBlock = nullptr; 1872 1873 /* Objects must be aligned on their natural bounds, 1874 and objects bigger than word on word's bound. */ 1875 size = alignUp(size, sizeof(size_t)); 1876 // We need size of an object to implement msize. 1877 size_t reqSize = size + sizeof(size_t); 1878 { 1879 MallocMutex::scoped_lock scoped_cs(startupMallocLock); 1880 // Re-check whether we need a new block (conditions might have changed) 1881 if (!firstStartupBlock || firstStartupBlock->availableSize() < reqSize) { 1882 if (!newBlock) { 1883 newBlock = StartupBlock::getBlock(); 1884 if (!newBlock) return nullptr; 1885 } 1886 newBlock->next = (Block*)firstStartupBlock; 1887 if (firstStartupBlock) 1888 firstStartupBlock->previous = (Block*)newBlock; 1889 firstStartupBlock = newBlock; 1890 } 1891 result = firstStartupBlock->bumpPtr; 1892 firstStartupBlock->allocatedCount++; 1893 firstStartupBlock->bumpPtr = 1894 (FreeObject *)((uintptr_t)firstStartupBlock->bumpPtr + reqSize); 1895 } 1896 1897 // keep object size at the negative offset 1898 *((size_t*)result) = size; 1899 return (FreeObject*)((size_t*)result+1); 1900 } 1901 1902 void StartupBlock::free(void *ptr) 1903 { 1904 Block* blockToRelease = nullptr; 1905 { 1906 MallocMutex::scoped_lock scoped_cs(startupMallocLock); 1907 1908 MALLOC_ASSERT(firstStartupBlock, ASSERT_TEXT); 1909 MALLOC_ASSERT(startupAllocObjSizeMark==objectSize 1910 && allocatedCount>0, ASSERT_TEXT); 1911 MALLOC_ASSERT((uintptr_t)ptr>=(uintptr_t)this+sizeof(StartupBlock) 1912 && (uintptr_t)ptr+StartupBlock::msize(ptr)<=(uintptr_t)this+slabSize, 1913 ASSERT_TEXT); 1914 if (0 == --allocatedCount) { 1915 if (this == firstStartupBlock) 1916 firstStartupBlock = (StartupBlock*)firstStartupBlock->next; 1917 if (previous) 1918 previous->next = next; 1919 if (next) 1920 next->previous = previous; 1921 blockToRelease = this; 1922 } else if ((uintptr_t)ptr + StartupBlock::msize(ptr) == (uintptr_t)bumpPtr) { 1923 // last object in the block released 1924 FreeObject *newBump = (FreeObject*)((size_t*)ptr - 1); 1925 MALLOC_ASSERT((uintptr_t)newBump>(uintptr_t)this+sizeof(StartupBlock), 1926 ASSERT_TEXT); 1927 bumpPtr = newBump; 1928 } 1929 } 1930 if (blockToRelease) { 1931 blockToRelease->previous = blockToRelease->next = nullptr; 1932 defaultMemPool->returnEmptyBlock(blockToRelease, /*poolTheBlock=*/false); 1933 } 1934 } 1935 1936 #endif /* MALLOC_CHECK_RECURSION */ 1937 1938 /********* End thread related code *************/ 1939 1940 /********* Library initialization *************/ 1941 1942 //! Value indicating the state of initialization. 1943 /* 0 = initialization not started. 1944 * 1 = initialization started but not finished. 1945 * 2 = initialization finished. 1946 * In theory, we only need values 0 and 2. But value 1 is nonetheless 1947 * useful for detecting errors in the double-check pattern. 1948 */ 1949 static std::atomic<intptr_t> mallocInitialized{0}; // implicitly initialized to 0 1950 static MallocMutex initMutex; 1951 1952 /** The leading "\0" is here so that applying "strings" to the binary 1953 delivers a clean result. */ 1954 static char VersionString[] = "\0" TBBMALLOC_VERSION_STRINGS; 1955 1956 #if USE_PTHREAD && __TBB_SOURCE_DIRECTLY_INCLUDED 1957 1958 /* Decrease race interval between dynamic library unloading and pthread key 1959 destructor. Protect only Pthreads with supported unloading. */ 1960 class ShutdownSync { 1961 /* flag is the number of threads in pthread key dtor body 1962 (i.e., between threadDtorStart() and threadDtorDone()) 1963 or the signal to skip dtor, if flag < 0 */ 1964 std::atomic<intptr_t> flag; 1965 static const intptr_t skipDtor = INTPTR_MIN/2; 1966 public: 1967 void init() { flag.store(0, std::memory_order_release); } 1968 /* Suppose that 2*abs(skipDtor) or more threads never call threadDtorStart() 1969 simultaneously, so flag never becomes negative because of that. */ 1970 bool threadDtorStart() { 1971 if (flag.load(std::memory_order_acquire) < 0) 1972 return false; 1973 if (++flag <= 0) { // note that new value returned 1974 flag.fetch_sub(1); // flag is spoiled by us, restore it 1975 return false; 1976 } 1977 return true; 1978 } 1979 void threadDtorDone() { 1980 flag.fetch_sub(1); 1981 } 1982 void processExit() { 1983 if (flag.fetch_add(skipDtor) != 0) { 1984 SpinWaitUntilEq(flag, skipDtor); 1985 } 1986 } 1987 }; 1988 1989 #else 1990 1991 class ShutdownSync { 1992 public: 1993 void init() { } 1994 bool threadDtorStart() { return true; } 1995 void threadDtorDone() { } 1996 void processExit() { } 1997 }; 1998 1999 #endif // USE_PTHREAD && __TBB_SOURCE_DIRECTLY_INCLUDED 2000 2001 static ShutdownSync shutdownSync; 2002 2003 inline bool isMallocInitialized() { 2004 // Load must have acquire fence; otherwise thread taking "initialized" path 2005 // might perform textually later loads *before* mallocInitialized becomes 2. 2006 return 2 == mallocInitialized.load(std::memory_order_acquire); 2007 } 2008 2009 /* Caller is responsible for ensuring this routine is called exactly once. */ 2010 extern "C" void MallocInitializeITT() { 2011 #if __TBB_USE_ITT_NOTIFY 2012 if (!usedBySrcIncluded) 2013 tbb::detail::r1::__TBB_load_ittnotify(); 2014 #endif 2015 } 2016 2017 void MemoryPool::initDefaultPool() { 2018 hugePages.init(); 2019 } 2020 2021 /* 2022 * Allocator initialization routine; 2023 * it is called lazily on the very first scalable_malloc call. 2024 */ 2025 static bool initMemoryManager() 2026 { 2027 TRACEF(( "[ScalableMalloc trace] sizeof(Block) is %d (expected 128); sizeof(uintptr_t) is %d\n", 2028 sizeof(Block), sizeof(uintptr_t) )); 2029 MALLOC_ASSERT( 2*blockHeaderAlignment == sizeof(Block), ASSERT_TEXT ); 2030 MALLOC_ASSERT( sizeof(FreeObject) == sizeof(void*), ASSERT_TEXT ); 2031 MALLOC_ASSERT( isAligned(defaultMemPool, sizeof(intptr_t)), 2032 "Memory pool must be void*-aligned for atomic to work over aligned arguments."); 2033 2034 #if USE_WINTHREAD 2035 const size_t granularity = 64*1024; // granulatity of VirtualAlloc 2036 #else 2037 // POSIX.1-2001-compliant way to get page size 2038 const size_t granularity = sysconf(_SC_PAGESIZE); 2039 #endif 2040 if (!defaultMemPool) { 2041 // Do not rely on static constructors and do the assignment in case 2042 // of library static section not initialized at this call yet. 2043 defaultMemPool = (MemoryPool*)defaultMemPool_space; 2044 } 2045 bool initOk = defaultMemPool-> 2046 extMemPool.init(0, nullptr, nullptr, granularity, 2047 /*keepAllMemory=*/false, /*fixedPool=*/false); 2048 // TODO: extMemPool.init() to not allocate memory 2049 if (!initOk || !initBackRefMain(&defaultMemPool->extMemPool.backend) || !ThreadId::init()) 2050 return false; 2051 MemoryPool::initDefaultPool(); 2052 // init() is required iff initMemoryManager() is called 2053 // after mallocProcessShutdownNotification() 2054 shutdownSync.init(); 2055 #if COLLECT_STATISTICS 2056 initStatisticsCollection(); 2057 #endif 2058 return true; 2059 } 2060 2061 static bool GetBoolEnvironmentVariable(const char* name) { 2062 return tbb::detail::r1::GetBoolEnvironmentVariable(name); 2063 } 2064 2065 //! Ensures that initMemoryManager() is called once and only once. 2066 /** Does not return until initMemoryManager() has been completed by a thread. 2067 There is no need to call this routine if mallocInitialized==2 . */ 2068 static bool doInitialization() 2069 { 2070 MallocMutex::scoped_lock lock( initMutex ); 2071 if (mallocInitialized.load(std::memory_order_relaxed)!=2) { 2072 MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==0, ASSERT_TEXT ); 2073 mallocInitialized.store(1, std::memory_order_relaxed); 2074 RecursiveMallocCallProtector scoped; 2075 if (!initMemoryManager()) { 2076 mallocInitialized.store(0, std::memory_order_relaxed); // restore and out 2077 return false; 2078 } 2079 #ifdef MALLOC_EXTRA_INITIALIZATION 2080 MALLOC_EXTRA_INITIALIZATION; 2081 #endif 2082 #if MALLOC_CHECK_RECURSION 2083 RecursiveMallocCallProtector::detectNaiveOverload(); 2084 #endif 2085 MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==1, ASSERT_TEXT ); 2086 // Store must have release fence, otherwise mallocInitialized==2 2087 // might become remotely visible before side effects of 2088 // initMemoryManager() become remotely visible. 2089 mallocInitialized.store(2, std::memory_order_release); 2090 if( GetBoolEnvironmentVariable("TBB_VERSION") ) { 2091 fputs(VersionString+1,stderr); 2092 hugePages.printStatus(); 2093 } 2094 } 2095 /* It can't be 0 or I would have initialized it */ 2096 MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==2, ASSERT_TEXT ); 2097 return true; 2098 } 2099 2100 /********* End library initialization *************/ 2101 2102 /********* The malloc show begins *************/ 2103 2104 2105 FreeObject *Block::allocateFromFreeList() 2106 { 2107 FreeObject *result; 2108 2109 if (!freeList) return nullptr; 2110 2111 result = freeList; 2112 MALLOC_ASSERT( result, ASSERT_TEXT ); 2113 2114 freeList = result->next; 2115 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 2116 allocatedCount++; 2117 STAT_increment(getThreadId(), getIndex(objectSize), allocFreeListUsed); 2118 2119 return result; 2120 } 2121 2122 FreeObject *Block::allocateFromBumpPtr() 2123 { 2124 FreeObject *result = bumpPtr; 2125 if (result) { 2126 bumpPtr = (FreeObject *) ((uintptr_t) bumpPtr - objectSize); 2127 if ( (uintptr_t)bumpPtr < (uintptr_t)this+sizeof(Block) ) { 2128 bumpPtr = nullptr; 2129 } 2130 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 2131 allocatedCount++; 2132 STAT_increment(getThreadId(), getIndex(objectSize), allocBumpPtrUsed); 2133 } 2134 return result; 2135 } 2136 2137 inline FreeObject* Block::allocate() 2138 { 2139 MALLOC_ASSERT( isOwnedByCurrentThread(), ASSERT_TEXT ); 2140 2141 /* for better cache locality, first looking in the free list. */ 2142 if ( FreeObject *result = allocateFromFreeList() ) { 2143 return result; 2144 } 2145 MALLOC_ASSERT( !freeList, ASSERT_TEXT ); 2146 2147 /* if free list is empty, try thread local bump pointer allocation. */ 2148 if ( FreeObject *result = allocateFromBumpPtr() ) { 2149 return result; 2150 } 2151 MALLOC_ASSERT( !bumpPtr, ASSERT_TEXT ); 2152 2153 /* the block is considered full. */ 2154 isFull = true; 2155 return nullptr; 2156 } 2157 2158 size_t Block::findObjectSize(void *object) const 2159 { 2160 size_t blSize = getSize(); 2161 #if MALLOC_CHECK_RECURSION 2162 // Currently, there is no aligned allocations from startup blocks, 2163 // so we can return just StartupBlock::msize(). 2164 // TODO: This must be extended if we add aligned allocation from startup blocks. 2165 if (!blSize) 2166 return StartupBlock::msize(object); 2167 #endif 2168 // object can be aligned, so real size can be less than block's 2169 size_t size = 2170 blSize - ((uintptr_t)object - (uintptr_t)findObjectToFree(object)); 2171 MALLOC_ASSERT(size>0 && size<minLargeObjectSize, ASSERT_TEXT); 2172 return size; 2173 } 2174 2175 void Bin::moveBlockToFront(Block *block) 2176 { 2177 /* move the block to the front of the bin */ 2178 if (block == activeBlk) return; 2179 outofTLSBin(block); 2180 pushTLSBin(block); 2181 } 2182 2183 void Bin::processEmptyBlock(Block *block, bool poolTheBlock) 2184 { 2185 if (block != activeBlk) { 2186 /* We are not using this block; return it to the pool */ 2187 outofTLSBin(block); 2188 block->getMemPool()->returnEmptyBlock(block, poolTheBlock); 2189 } else { 2190 /* all objects are free - let's restore the bump pointer */ 2191 block->restoreBumpPtr(); 2192 } 2193 } 2194 2195 template<int LOW_MARK, int HIGH_MARK> 2196 bool LocalLOCImpl<LOW_MARK, HIGH_MARK>::put(LargeMemoryBlock *object, ExtMemoryPool *extMemPool) 2197 { 2198 const size_t size = object->unalignedSize; 2199 // not spoil cache with too large object, that can cause its total cleanup 2200 if (size > MAX_TOTAL_SIZE) 2201 return false; 2202 LargeMemoryBlock *localHead = head.exchange(nullptr); 2203 2204 object->prev = nullptr; 2205 object->next = localHead; 2206 if (localHead) 2207 localHead->prev = object; 2208 else { 2209 // those might not be cleaned during local cache stealing, correct them 2210 totalSize = 0; 2211 numOfBlocks = 0; 2212 tail = object; 2213 } 2214 localHead = object; 2215 totalSize += size; 2216 numOfBlocks++; 2217 // must meet both size and number of cached objects constrains 2218 if (totalSize > MAX_TOTAL_SIZE || numOfBlocks >= HIGH_MARK) { 2219 // scanning from tail until meet conditions 2220 while (totalSize > MAX_TOTAL_SIZE || numOfBlocks > LOW_MARK) { 2221 totalSize -= tail->unalignedSize; 2222 numOfBlocks--; 2223 tail = tail->prev; 2224 } 2225 LargeMemoryBlock *headToRelease = tail->next; 2226 tail->next = nullptr; 2227 2228 extMemPool->freeLargeObjectList(headToRelease); 2229 } 2230 2231 head.store(localHead, std::memory_order_release); 2232 return true; 2233 } 2234 2235 template<int LOW_MARK, int HIGH_MARK> 2236 LargeMemoryBlock *LocalLOCImpl<LOW_MARK, HIGH_MARK>::get(size_t size) 2237 { 2238 LargeMemoryBlock *localHead, *res = nullptr; 2239 2240 if (size > MAX_TOTAL_SIZE) 2241 return nullptr; 2242 2243 // TBB_REVAMP_TODO: review this line 2244 if (!head.load(std::memory_order_acquire) || (localHead = head.exchange(nullptr)) == nullptr) { 2245 // do not restore totalSize, numOfBlocks and tail at this point, 2246 // as they are used only in put(), where they must be restored 2247 return nullptr; 2248 } 2249 2250 for (LargeMemoryBlock *curr = localHead; curr; curr=curr->next) { 2251 if (curr->unalignedSize == size) { 2252 res = curr; 2253 if (curr->next) 2254 curr->next->prev = curr->prev; 2255 else 2256 tail = curr->prev; 2257 if (curr != localHead) 2258 curr->prev->next = curr->next; 2259 else 2260 localHead = curr->next; 2261 totalSize -= size; 2262 numOfBlocks--; 2263 break; 2264 } 2265 } 2266 2267 head.store(localHead, std::memory_order_release); 2268 return res; 2269 } 2270 2271 template<int LOW_MARK, int HIGH_MARK> 2272 bool LocalLOCImpl<LOW_MARK, HIGH_MARK>::externalCleanup(ExtMemoryPool *extMemPool) 2273 { 2274 if (LargeMemoryBlock *localHead = head.exchange(nullptr)) { 2275 extMemPool->freeLargeObjectList(localHead); 2276 return true; 2277 } 2278 return false; 2279 } 2280 2281 void *MemoryPool::getFromLLOCache(TLSData* tls, size_t size, size_t alignment) 2282 { 2283 LargeMemoryBlock *lmb = nullptr; 2284 2285 size_t headersSize = sizeof(LargeMemoryBlock)+sizeof(LargeObjectHdr); 2286 size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+alignment); 2287 if (allocationSize < size) // allocationSize is wrapped around after alignToBin 2288 return nullptr; 2289 MALLOC_ASSERT(allocationSize >= alignment, "Overflow must be checked before."); 2290 2291 if (tls) { 2292 tls->markUsed(); 2293 lmb = tls->lloc.get(allocationSize); 2294 } 2295 if (!lmb) 2296 lmb = extMemPool.mallocLargeObject(this, allocationSize); 2297 2298 if (lmb) { 2299 // doing shuffle we suppose that alignment offset guarantees 2300 // that different cache lines are in use 2301 MALLOC_ASSERT(alignment >= estimatedCacheLineSize, ASSERT_TEXT); 2302 2303 void *alignedArea = (void*)alignUp((uintptr_t)lmb+headersSize, alignment); 2304 uintptr_t alignedRight = 2305 alignDown((uintptr_t)lmb+lmb->unalignedSize - size, alignment); 2306 // Has some room to shuffle object between cache lines? 2307 // Note that alignedRight and alignedArea are aligned at alignment. 2308 unsigned ptrDelta = alignedRight - (uintptr_t)alignedArea; 2309 if (ptrDelta && tls) { // !tls is cold path 2310 // for the hot path of alignment==estimatedCacheLineSize, 2311 // allow compilers to use shift for division 2312 // (since estimatedCacheLineSize is a power-of-2 constant) 2313 unsigned numOfPossibleOffsets = alignment == estimatedCacheLineSize? 2314 ptrDelta / estimatedCacheLineSize : 2315 ptrDelta / alignment; 2316 unsigned myCacheIdx = ++tls->currCacheIdx; 2317 unsigned offset = myCacheIdx % numOfPossibleOffsets; 2318 2319 // Move object to a cache line with an offset that is different from 2320 // previous allocation. This supposedly allows us to use cache 2321 // associativity more efficiently. 2322 alignedArea = (void*)((uintptr_t)alignedArea + offset*alignment); 2323 } 2324 MALLOC_ASSERT((uintptr_t)lmb+lmb->unalignedSize >= 2325 (uintptr_t)alignedArea+size, "Object doesn't fit the block."); 2326 LargeObjectHdr *header = (LargeObjectHdr*)alignedArea-1; 2327 header->memoryBlock = lmb; 2328 header->backRefIdx = lmb->backRefIdx; 2329 setBackRef(header->backRefIdx, header); 2330 2331 lmb->objectSize = size; 2332 2333 MALLOC_ASSERT( isLargeObject<unknownMem>(alignedArea), ASSERT_TEXT ); 2334 MALLOC_ASSERT( isAligned(alignedArea, alignment), ASSERT_TEXT ); 2335 2336 return alignedArea; 2337 } 2338 return nullptr; 2339 } 2340 2341 void MemoryPool::putToLLOCache(TLSData *tls, void *object) 2342 { 2343 LargeObjectHdr *header = (LargeObjectHdr*)object - 1; 2344 // overwrite backRefIdx to simplify double free detection 2345 header->backRefIdx = BackRefIdx(); 2346 2347 if (tls) { 2348 tls->markUsed(); 2349 if (tls->lloc.put(header->memoryBlock, &extMemPool)) 2350 return; 2351 } 2352 extMemPool.freeLargeObject(header->memoryBlock); 2353 } 2354 2355 /* 2356 * All aligned allocations fall into one of the following categories: 2357 * 1. if both request size and alignment are <= maxSegregatedObjectSize, 2358 * we just align the size up, and request this amount, because for every size 2359 * aligned to some power of 2, the allocated object is at least that aligned. 2360 * 2. for size<minLargeObjectSize, check if already guaranteed fittingAlignment is enough. 2361 * 3. if size+alignment<minLargeObjectSize, we take an object of fittingSizeN and align 2362 * its address up; given such pointer, scalable_free could find the real object. 2363 * Wrapping of size+alignment is impossible because maximal allowed 2364 * alignment plus minLargeObjectSize can't lead to wrapping. 2365 * 4. otherwise, aligned large object is allocated. 2366 */ 2367 static void *allocateAligned(MemoryPool *memPool, size_t size, size_t alignment) 2368 { 2369 MALLOC_ASSERT( isPowerOfTwo(alignment), ASSERT_TEXT ); 2370 2371 if (!isMallocInitialized()) 2372 if (!doInitialization()) 2373 return nullptr; 2374 2375 void *result; 2376 if (size<=maxSegregatedObjectSize && alignment<=maxSegregatedObjectSize) 2377 result = internalPoolMalloc(memPool, alignUp(size? size: sizeof(size_t), alignment)); 2378 else if (size<minLargeObjectSize) { 2379 if (alignment<=fittingAlignment) 2380 result = internalPoolMalloc(memPool, size); 2381 else if (size+alignment < minLargeObjectSize) { 2382 void *unaligned = internalPoolMalloc(memPool, size+alignment); 2383 if (!unaligned) return nullptr; 2384 result = alignUp(unaligned, alignment); 2385 } else 2386 goto LargeObjAlloc; 2387 } else { 2388 LargeObjAlloc: 2389 TLSData *tls = memPool->getTLS(/*create=*/true); 2390 // take into account only alignment that are higher then natural 2391 result = 2392 memPool->getFromLLOCache(tls, size, largeObjectAlignment>alignment? 2393 largeObjectAlignment: alignment); 2394 } 2395 2396 MALLOC_ASSERT( isAligned(result, alignment), ASSERT_TEXT ); 2397 return result; 2398 } 2399 2400 static void *reallocAligned(MemoryPool *memPool, void *ptr, 2401 size_t newSize, size_t alignment = 0) 2402 { 2403 void *result; 2404 size_t copySize; 2405 2406 if (isLargeObject<ourMem>(ptr)) { 2407 LargeMemoryBlock* lmb = ((LargeObjectHdr *)ptr - 1)->memoryBlock; 2408 copySize = lmb->unalignedSize-((uintptr_t)ptr-(uintptr_t)lmb); 2409 2410 // Apply different strategies if size decreases 2411 if (newSize <= copySize && (0 == alignment || isAligned(ptr, alignment))) { 2412 2413 // For huge objects (that do not fit in backend cache), keep the same space unless 2414 // the new size is at least twice smaller 2415 bool isMemoryBlockHuge = copySize > memPool->extMemPool.backend.getMaxBinnedSize(); 2416 size_t threshold = isMemoryBlockHuge ? copySize / 2 : 0; 2417 if (newSize > threshold) { 2418 lmb->objectSize = newSize; 2419 return ptr; 2420 } 2421 // TODO: For large objects suitable for the backend cache, 2422 // split out the excessive part and put it to the backend. 2423 } 2424 // Reallocate for real 2425 copySize = lmb->objectSize; 2426 #if BACKEND_HAS_MREMAP 2427 if (void *r = memPool->extMemPool.remap(ptr, copySize, newSize, 2428 alignment < largeObjectAlignment ? largeObjectAlignment : alignment)) 2429 return r; 2430 #endif 2431 result = alignment ? allocateAligned(memPool, newSize, alignment) : 2432 internalPoolMalloc(memPool, newSize); 2433 2434 } else { 2435 Block* block = (Block *)alignDown(ptr, slabSize); 2436 copySize = block->findObjectSize(ptr); 2437 2438 // TODO: Move object to another bin if size decreases and the current bin is "empty enough". 2439 // Currently, in case of size decreasing, old pointer is returned 2440 if (newSize <= copySize && (0==alignment || isAligned(ptr, alignment))) { 2441 return ptr; 2442 } else { 2443 result = alignment ? allocateAligned(memPool, newSize, alignment) : 2444 internalPoolMalloc(memPool, newSize); 2445 } 2446 } 2447 if (result) { 2448 memcpy(result, ptr, copySize < newSize ? copySize : newSize); 2449 internalPoolFree(memPool, ptr, 0); 2450 } 2451 return result; 2452 } 2453 2454 #if MALLOC_DEBUG 2455 /* A predicate checks if an object is properly placed inside its block */ 2456 inline bool Block::isProperlyPlaced(const void *object) const 2457 { 2458 return 0 == ((uintptr_t)this + slabSize - (uintptr_t)object) % objectSize; 2459 } 2460 #endif 2461 2462 /* Finds the real object inside the block */ 2463 FreeObject *Block::findAllocatedObject(const void *address) const 2464 { 2465 // calculate offset from the end of the block space 2466 uint16_t offset = (uintptr_t)this + slabSize - (uintptr_t)address; 2467 MALLOC_ASSERT( offset<=slabSize-sizeof(Block), ASSERT_TEXT ); 2468 // find offset difference from a multiple of allocation size 2469 offset %= objectSize; 2470 // and move the address down to where the real object starts. 2471 return (FreeObject*)((uintptr_t)address - (offset? objectSize-offset: 0)); 2472 } 2473 2474 /* 2475 * Bad dereference caused by a foreign pointer is possible only here, not earlier in call chain. 2476 * Separate function isolates SEH code, as it has bad influence on compiler optimization. 2477 */ 2478 static inline BackRefIdx safer_dereference (const BackRefIdx *ptr) 2479 { 2480 BackRefIdx id; 2481 #if _MSC_VER 2482 __try { 2483 #endif 2484 id = dereference(ptr); 2485 #if _MSC_VER 2486 } __except( GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION? 2487 EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH ) { 2488 id = BackRefIdx(); 2489 } 2490 #endif 2491 return id; 2492 } 2493 2494 template<MemoryOrigin memOrigin> 2495 bool isLargeObject(void *object) 2496 { 2497 if (!isAligned(object, largeObjectAlignment)) 2498 return false; 2499 LargeObjectHdr *header = (LargeObjectHdr*)object - 1; 2500 BackRefIdx idx = (memOrigin == unknownMem) ? 2501 safer_dereference(&header->backRefIdx) : dereference(&header->backRefIdx); 2502 2503 return idx.isLargeObject() 2504 // in valid LargeObjectHdr memoryBlock is not nullptr 2505 && header->memoryBlock 2506 // in valid LargeObjectHdr memoryBlock points somewhere before header 2507 // TODO: more strict check 2508 && (uintptr_t)header->memoryBlock < (uintptr_t)header 2509 && getBackRef(idx) == header; 2510 } 2511 2512 static inline bool isSmallObject (void *ptr) 2513 { 2514 Block* expectedBlock = (Block*)alignDown(ptr, slabSize); 2515 const BackRefIdx* idx = expectedBlock->getBackRefIdx(); 2516 2517 bool isSmall = expectedBlock == getBackRef(safer_dereference(idx)); 2518 if (isSmall) 2519 expectedBlock->checkFreePrecond(ptr); 2520 return isSmall; 2521 } 2522 2523 /**** Check if an object was allocated by scalable_malloc ****/ 2524 static inline bool isRecognized (void* ptr) 2525 { 2526 return defaultMemPool->extMemPool.backend.ptrCanBeValid(ptr) && 2527 (isLargeObject<unknownMem>(ptr) || isSmallObject(ptr)); 2528 } 2529 2530 static inline void freeSmallObject(void *object) 2531 { 2532 /* mask low bits to get the block */ 2533 Block *block = (Block *)alignDown(object, slabSize); 2534 block->checkFreePrecond(object); 2535 2536 #if MALLOC_CHECK_RECURSION 2537 if (block->isStartupAllocObject()) { 2538 ((StartupBlock *)block)->free(object); 2539 return; 2540 } 2541 #endif 2542 if (block->isOwnedByCurrentThread()) { 2543 block->freeOwnObject(object); 2544 } else { /* Slower path to add to the shared list, the allocatedCount is updated by the owner thread in malloc. */ 2545 FreeObject *objectToFree = block->findObjectToFree(object); 2546 block->freePublicObject(objectToFree); 2547 } 2548 } 2549 2550 static void *internalPoolMalloc(MemoryPool* memPool, size_t size) 2551 { 2552 Bin* bin; 2553 Block * mallocBlock; 2554 2555 if (!memPool) return nullptr; 2556 2557 if (!size) size = sizeof(size_t); 2558 2559 TLSData *tls = memPool->getTLS(/*create=*/true); 2560 2561 /* Allocate a large object */ 2562 if (size >= minLargeObjectSize) 2563 return memPool->getFromLLOCache(tls, size, largeObjectAlignment); 2564 2565 if (!tls) return nullptr; 2566 2567 tls->markUsed(); 2568 /* 2569 * Get an element in thread-local array corresponding to the given size; 2570 * It keeps ptr to the active block for allocations of this size 2571 */ 2572 bin = tls->getAllocationBin(size); 2573 if ( !bin ) return nullptr; 2574 2575 /* Get a block to try to allocate in. */ 2576 for( mallocBlock = bin->getActiveBlock(); mallocBlock; 2577 mallocBlock = bin->setPreviousBlockActive() ) // the previous block should be empty enough 2578 { 2579 if( FreeObject *result = mallocBlock->allocate() ) 2580 return result; 2581 } 2582 2583 /* 2584 * else privatize publicly freed objects in some block and allocate from it 2585 */ 2586 mallocBlock = bin->getPrivatizedFreeListBlock(); 2587 if (mallocBlock) { 2588 MALLOC_ASSERT( mallocBlock->freeListNonNull(), ASSERT_TEXT ); 2589 if ( FreeObject *result = mallocBlock->allocateFromFreeList() ) 2590 return result; 2591 /* Else something strange happened, need to retry from the beginning; */ 2592 TRACEF(( "[ScalableMalloc trace] Something is wrong: no objects in public free list; reentering.\n" )); 2593 return internalPoolMalloc(memPool, size); 2594 } 2595 2596 /* 2597 * no suitable own blocks, try to get a partial block that some other thread has discarded. 2598 */ 2599 mallocBlock = memPool->extMemPool.orphanedBlocks.get(tls, size); 2600 while (mallocBlock) { 2601 bin->pushTLSBin(mallocBlock); 2602 bin->setActiveBlock(mallocBlock); // TODO: move under the below condition? 2603 if( FreeObject *result = mallocBlock->allocate() ) 2604 return result; 2605 mallocBlock = memPool->extMemPool.orphanedBlocks.get(tls, size); 2606 } 2607 2608 /* 2609 * else try to get a new empty block 2610 */ 2611 mallocBlock = memPool->getEmptyBlock(size); 2612 if (mallocBlock) { 2613 bin->pushTLSBin(mallocBlock); 2614 bin->setActiveBlock(mallocBlock); 2615 if( FreeObject *result = mallocBlock->allocate() ) 2616 return result; 2617 /* Else something strange happened, need to retry from the beginning; */ 2618 TRACEF(( "[ScalableMalloc trace] Something is wrong: no objects in empty block; reentering.\n" )); 2619 return internalPoolMalloc(memPool, size); 2620 } 2621 /* 2622 * else nothing works so return nullptr 2623 */ 2624 TRACEF(( "[ScalableMalloc trace] No memory found, returning nullptr.\n" )); 2625 return nullptr; 2626 } 2627 2628 // When size==0 (i.e. unknown), detect here whether the object is large. 2629 // For size is known and < minLargeObjectSize, we still need to check 2630 // if the actual object is large, because large objects might be used 2631 // for aligned small allocations. 2632 static bool internalPoolFree(MemoryPool *memPool, void *object, size_t size) 2633 { 2634 if (!memPool || !object) return false; 2635 2636 // The library is initialized at allocation call, so releasing while 2637 // not initialized means foreign object is releasing. 2638 MALLOC_ASSERT(isMallocInitialized(), ASSERT_TEXT); 2639 MALLOC_ASSERT(memPool->extMemPool.userPool() || isRecognized(object), 2640 "Invalid pointer during object releasing is detected."); 2641 2642 if (size >= minLargeObjectSize || isLargeObject<ourMem>(object)) 2643 memPool->putToLLOCache(memPool->getTLS(/*create=*/false), object); 2644 else 2645 freeSmallObject(object); 2646 return true; 2647 } 2648 2649 static void *internalMalloc(size_t size) 2650 { 2651 if (!size) size = sizeof(size_t); 2652 2653 #if MALLOC_CHECK_RECURSION 2654 if (RecursiveMallocCallProtector::sameThreadActive()) 2655 return size<minLargeObjectSize? StartupBlock::allocate(size) : 2656 // nested allocation, so skip tls 2657 (FreeObject*)defaultMemPool->getFromLLOCache(nullptr, size, slabSize); 2658 #endif 2659 2660 if (!isMallocInitialized()) 2661 if (!doInitialization()) 2662 return nullptr; 2663 return internalPoolMalloc(defaultMemPool, size); 2664 } 2665 2666 static void internalFree(void *object) 2667 { 2668 internalPoolFree(defaultMemPool, object, 0); 2669 } 2670 2671 static size_t internalMsize(void* ptr) 2672 { 2673 MALLOC_ASSERT(ptr, "Invalid pointer passed to internalMsize"); 2674 if (isLargeObject<ourMem>(ptr)) { 2675 // TODO: return the maximum memory size, that can be written to this object 2676 LargeMemoryBlock* lmb = ((LargeObjectHdr*)ptr - 1)->memoryBlock; 2677 return lmb->objectSize; 2678 } else { 2679 Block *block = (Block*)alignDown(ptr, slabSize); 2680 return block->findObjectSize(ptr); 2681 } 2682 } 2683 2684 } // namespace internal 2685 2686 using namespace rml::internal; 2687 2688 // legacy entry point saved for compatibility with binaries complied 2689 // with pre-6003 versions of TBB 2690 TBBMALLOC_EXPORT rml::MemoryPool *pool_create(intptr_t pool_id, const MemPoolPolicy *policy) 2691 { 2692 rml::MemoryPool *pool; 2693 MemPoolPolicy pol(policy->pAlloc, policy->pFree, policy->granularity); 2694 2695 pool_create_v1(pool_id, &pol, &pool); 2696 return pool; 2697 } 2698 2699 rml::MemPoolError pool_create_v1(intptr_t pool_id, const MemPoolPolicy *policy, 2700 rml::MemoryPool **pool) 2701 { 2702 if ( !policy->pAlloc || policy->version<MemPoolPolicy::TBBMALLOC_POOL_VERSION 2703 // empty pFree allowed only for fixed pools 2704 || !(policy->fixedPool || policy->pFree)) { 2705 *pool = nullptr; 2706 return INVALID_POLICY; 2707 } 2708 if ( policy->version>MemPoolPolicy::TBBMALLOC_POOL_VERSION // future versions are not supported 2709 // new flags can be added in place of reserved, but default 2710 // behaviour must be supported by this version 2711 || policy->reserved ) { 2712 *pool = nullptr; 2713 return UNSUPPORTED_POLICY; 2714 } 2715 if (!isMallocInitialized()) 2716 if (!doInitialization()) { 2717 *pool = nullptr; 2718 return NO_MEMORY; 2719 } 2720 rml::internal::MemoryPool *memPool = 2721 (rml::internal::MemoryPool*)internalMalloc((sizeof(rml::internal::MemoryPool))); 2722 if (!memPool) { 2723 *pool = nullptr; 2724 return NO_MEMORY; 2725 } 2726 memset(static_cast<void*>(memPool), 0, sizeof(rml::internal::MemoryPool)); 2727 if (!memPool->init(pool_id, policy)) { 2728 internalFree(memPool); 2729 *pool = nullptr; 2730 return NO_MEMORY; 2731 } 2732 2733 *pool = (rml::MemoryPool*)memPool; 2734 return POOL_OK; 2735 } 2736 2737 bool pool_destroy(rml::MemoryPool* memPool) 2738 { 2739 if (!memPool) return false; 2740 bool ret = ((rml::internal::MemoryPool*)memPool)->destroy(); 2741 internalFree(memPool); 2742 2743 return ret; 2744 } 2745 2746 bool pool_reset(rml::MemoryPool* memPool) 2747 { 2748 if (!memPool) return false; 2749 2750 return ((rml::internal::MemoryPool*)memPool)->reset(); 2751 } 2752 2753 void *pool_malloc(rml::MemoryPool* mPool, size_t size) 2754 { 2755 return internalPoolMalloc((rml::internal::MemoryPool*)mPool, size); 2756 } 2757 2758 void *pool_realloc(rml::MemoryPool* mPool, void *object, size_t size) 2759 { 2760 if (!object) 2761 return internalPoolMalloc((rml::internal::MemoryPool*)mPool, size); 2762 if (!size) { 2763 internalPoolFree((rml::internal::MemoryPool*)mPool, object, 0); 2764 return nullptr; 2765 } 2766 return reallocAligned((rml::internal::MemoryPool*)mPool, object, size, 0); 2767 } 2768 2769 void *pool_aligned_malloc(rml::MemoryPool* mPool, size_t size, size_t alignment) 2770 { 2771 if (!isPowerOfTwo(alignment) || 0==size) 2772 return nullptr; 2773 2774 return allocateAligned((rml::internal::MemoryPool*)mPool, size, alignment); 2775 } 2776 2777 void *pool_aligned_realloc(rml::MemoryPool* memPool, void *ptr, size_t size, size_t alignment) 2778 { 2779 if (!isPowerOfTwo(alignment)) 2780 return nullptr; 2781 rml::internal::MemoryPool *mPool = (rml::internal::MemoryPool*)memPool; 2782 void *tmp; 2783 2784 if (!ptr) 2785 tmp = allocateAligned(mPool, size, alignment); 2786 else if (!size) { 2787 internalPoolFree(mPool, ptr, 0); 2788 return nullptr; 2789 } else 2790 tmp = reallocAligned(mPool, ptr, size, alignment); 2791 2792 return tmp; 2793 } 2794 2795 bool pool_free(rml::MemoryPool *mPool, void *object) 2796 { 2797 return internalPoolFree((rml::internal::MemoryPool*)mPool, object, 0); 2798 } 2799 2800 rml::MemoryPool *pool_identify(void *object) 2801 { 2802 rml::internal::MemoryPool *pool; 2803 if (isLargeObject<ourMem>(object)) { 2804 LargeObjectHdr *header = (LargeObjectHdr*)object - 1; 2805 pool = header->memoryBlock->pool; 2806 } else { 2807 Block *block = (Block*)alignDown(object, slabSize); 2808 pool = block->getMemPool(); 2809 } 2810 // do not return defaultMemPool, as it can't be used in pool_free() etc 2811 __TBB_ASSERT_RELEASE(pool!=defaultMemPool, 2812 "rml::pool_identify() can't be used for scalable_malloc() etc results."); 2813 return (rml::MemoryPool*)pool; 2814 } 2815 2816 size_t pool_msize(rml::MemoryPool *mPool, void* object) 2817 { 2818 if (object) { 2819 // No assert for object recognition, cause objects allocated from non-default 2820 // memory pool do not participate in range checking and do not have valid backreferences for 2821 // small objects. Instead, check that an object belong to the certain memory pool. 2822 MALLOC_ASSERT_EX(mPool == pool_identify(object), "Object does not belong to the specified pool"); 2823 return internalMsize(object); 2824 } 2825 errno = EINVAL; 2826 // Unlike _msize, return 0 in case of parameter error. 2827 // Returning size_t(-1) looks more like the way to troubles. 2828 return 0; 2829 } 2830 2831 } // namespace rml 2832 2833 using namespace rml::internal; 2834 2835 #if MALLOC_TRACE 2836 static unsigned int threadGoingDownCount = 0; 2837 #endif 2838 2839 /* 2840 * When a thread is shutting down this routine should be called to remove all the thread ids 2841 * from the malloc blocks and replace them with a nullptr thread id. 2842 * 2843 * For pthreads, the function is set as a callback in pthread_key_create for TLS bin. 2844 * It will be automatically called at thread exit with the key value as the argument, 2845 * unless that value is nullptr. 2846 * For Windows, it is called from DllMain( DLL_THREAD_DETACH ). 2847 * 2848 * However neither of the above is called for the main process thread, so the routine 2849 * also needs to be called during the process shutdown. 2850 * 2851 */ 2852 // TODO: Consider making this function part of class MemoryPool. 2853 void doThreadShutdownNotification(TLSData* tls, bool main_thread) 2854 { 2855 TRACEF(( "[ScalableMalloc trace] Thread id %d blocks return start %d\n", 2856 getThreadId(), threadGoingDownCount++ )); 2857 2858 #if USE_PTHREAD 2859 if (tls) { 2860 if (!shutdownSync.threadDtorStart()) return; 2861 tls->getMemPool()->onThreadShutdown(tls); 2862 shutdownSync.threadDtorDone(); 2863 } else 2864 #endif 2865 { 2866 suppress_unused_warning(tls); // not used on Windows 2867 // The default pool is safe to use at this point: 2868 // on Linux, only the main thread can go here before destroying defaultMemPool; 2869 // on Windows, shutdown is synchronized via loader lock and isMallocInitialized(). 2870 // See also __TBB_mallocProcessShutdownNotification() 2871 defaultMemPool->onThreadShutdown(defaultMemPool->getTLS(/*create=*/false)); 2872 // Take lock to walk through other pools; but waiting might be dangerous at this point 2873 // (e.g. on Windows the main thread might deadlock) 2874 bool locked = false; 2875 MallocMutex::scoped_lock lock(MemoryPool::memPoolListLock, /*wait=*/!main_thread, &locked); 2876 if (locked) { // the list is safe to process 2877 for (MemoryPool *memPool = defaultMemPool->next; memPool; memPool = memPool->next) 2878 memPool->onThreadShutdown(memPool->getTLS(/*create=*/false)); 2879 } 2880 } 2881 2882 TRACEF(( "[ScalableMalloc trace] Thread id %d blocks return end\n", getThreadId() )); 2883 } 2884 2885 #if USE_PTHREAD 2886 void mallocThreadShutdownNotification(void* arg) 2887 { 2888 // The routine is called for each pool (as TLS dtor) on each thread, except for the main thread 2889 if (!isMallocInitialized()) return; 2890 doThreadShutdownNotification((TLSData*)arg, false); 2891 } 2892 #else 2893 extern "C" void __TBB_mallocThreadShutdownNotification() 2894 { 2895 // The routine is called once per thread on Windows 2896 if (!isMallocInitialized()) return; 2897 doThreadShutdownNotification(nullptr, false); 2898 } 2899 #endif 2900 2901 extern "C" void __TBB_mallocProcessShutdownNotification(bool windows_process_dying) 2902 { 2903 if (!isMallocInitialized()) return; 2904 2905 // Don't clean allocator internals if the entire process is exiting 2906 if (!windows_process_dying) { 2907 doThreadShutdownNotification(nullptr, /*main_thread=*/true); 2908 } 2909 #if __TBB_MALLOC_LOCACHE_STAT 2910 printf("cache hit ratio %f, size hit %f\n", 2911 1.*cacheHits/mallocCalls, 1.*memHitKB/memAllocKB); 2912 defaultMemPool->extMemPool.loc.reportStat(stdout); 2913 #endif 2914 2915 shutdownSync.processExit(); 2916 #if __TBB_SOURCE_DIRECTLY_INCLUDED 2917 /* Pthread keys must be deleted as soon as possible to not call key dtor 2918 on thread termination when then the tbbmalloc code can be already unloaded. 2919 */ 2920 defaultMemPool->destroy(); 2921 destroyBackRefMain(&defaultMemPool->extMemPool.backend); 2922 ThreadId::destroy(); // Delete key for thread id 2923 hugePages.reset(); 2924 // new total malloc initialization is possible after this point 2925 mallocInitialized.store(0, std::memory_order_release); 2926 #endif // __TBB_SOURCE_DIRECTLY_INCLUDED 2927 2928 #if COLLECT_STATISTICS 2929 unsigned nThreads = ThreadId::getMaxThreadId(); 2930 for( int i=1; i<=nThreads && i<MAX_THREADS; ++i ) 2931 STAT_print(i); 2932 #endif 2933 if (!usedBySrcIncluded) { 2934 MALLOC_ITT_FINI_ITTLIB(); 2935 MALLOC_ITT_RELEASE_RESOURCES(); 2936 } 2937 } 2938 2939 extern "C" void * scalable_malloc(size_t size) 2940 { 2941 void *ptr = internalMalloc(size); 2942 if (!ptr) errno = ENOMEM; 2943 return ptr; 2944 } 2945 2946 extern "C" void scalable_free(void *object) 2947 { 2948 internalFree(object); 2949 } 2950 2951 #if MALLOC_ZONE_OVERLOAD_ENABLED 2952 extern "C" void __TBB_malloc_free_definite_size(void *object, size_t size) 2953 { 2954 internalPoolFree(defaultMemPool, object, size); 2955 } 2956 #endif 2957 2958 /* 2959 * A variant that provides additional memory safety, by checking whether the given address 2960 * was obtained with this allocator, and if not redirecting to the provided alternative call. 2961 */ 2962 extern "C" TBBMALLOC_EXPORT void __TBB_malloc_safer_free(void *object, void (*original_free)(void*)) 2963 { 2964 if (!object) 2965 return; 2966 2967 // tbbmalloc can allocate object only when tbbmalloc has been initialized 2968 if (mallocInitialized.load(std::memory_order_acquire) && defaultMemPool->extMemPool.backend.ptrCanBeValid(object)) { 2969 if (isLargeObject<unknownMem>(object)) { 2970 // must check 1st for large object, because small object check touches 4 pages on left, 2971 // and it can be inaccessible 2972 TLSData *tls = defaultMemPool->getTLS(/*create=*/false); 2973 2974 defaultMemPool->putToLLOCache(tls, object); 2975 return; 2976 } else if (isSmallObject(object)) { 2977 freeSmallObject(object); 2978 return; 2979 } 2980 } 2981 if (original_free) 2982 original_free(object); 2983 } 2984 2985 /********* End the free code *************/ 2986 2987 /********* Code for scalable_realloc ***********/ 2988 2989 /* 2990 * From K&R 2991 * "realloc changes the size of the object pointed to by p to size. The contents will 2992 * be unchanged up to the minimum of the old and the new sizes. If the new size is larger, 2993 * the new space is uninitialized. realloc returns a pointer to the new space, or 2994 * nullptr if the request cannot be satisfied, in which case *p is unchanged." 2995 * 2996 */ 2997 extern "C" void* scalable_realloc(void* ptr, size_t size) 2998 { 2999 void *tmp; 3000 3001 if (!ptr) 3002 tmp = internalMalloc(size); 3003 else if (!size) { 3004 internalFree(ptr); 3005 return nullptr; 3006 } else 3007 tmp = reallocAligned(defaultMemPool, ptr, size, 0); 3008 3009 if (!tmp) errno = ENOMEM; 3010 return tmp; 3011 } 3012 3013 /* 3014 * A variant that provides additional memory safety, by checking whether the given address 3015 * was obtained with this allocator, and if not redirecting to the provided alternative call. 3016 */ 3017 extern "C" TBBMALLOC_EXPORT void* __TBB_malloc_safer_realloc(void* ptr, size_t sz, void* original_realloc) 3018 { 3019 void *tmp; // TODO: fix warnings about uninitialized use of tmp 3020 3021 if (!ptr) { 3022 tmp = internalMalloc(sz); 3023 } else if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(ptr)) { 3024 if (!sz) { 3025 internalFree(ptr); 3026 return nullptr; 3027 } else { 3028 tmp = reallocAligned(defaultMemPool, ptr, sz, 0); 3029 } 3030 } 3031 #if USE_WINTHREAD 3032 else if (original_realloc && sz) { 3033 orig_ptrs *original_ptrs = static_cast<orig_ptrs*>(original_realloc); 3034 if ( original_ptrs->msize ){ 3035 size_t oldSize = original_ptrs->msize(ptr); 3036 tmp = internalMalloc(sz); 3037 if (tmp) { 3038 memcpy(tmp, ptr, sz<oldSize? sz : oldSize); 3039 if ( original_ptrs->free ){ 3040 original_ptrs->free( ptr ); 3041 } 3042 } 3043 } else 3044 tmp = nullptr; 3045 } 3046 #else 3047 else if (original_realloc) { 3048 typedef void* (*realloc_ptr_t)(void*,size_t); 3049 realloc_ptr_t original_realloc_ptr; 3050 (void *&)original_realloc_ptr = original_realloc; 3051 tmp = original_realloc_ptr(ptr,sz); 3052 } 3053 #endif 3054 else tmp = nullptr; 3055 3056 if (!tmp) errno = ENOMEM; 3057 return tmp; 3058 } 3059 3060 /********* End code for scalable_realloc ***********/ 3061 3062 /********* Code for scalable_calloc ***********/ 3063 3064 /* 3065 * From K&R 3066 * calloc returns a pointer to space for an array of nobj objects, 3067 * each of size size, or nullptr if the request cannot be satisfied. 3068 * The space is initialized to zero bytes. 3069 * 3070 */ 3071 3072 extern "C" void * scalable_calloc(size_t nobj, size_t size) 3073 { 3074 // it's square root of maximal size_t value 3075 const size_t mult_not_overflow = size_t(1) << (sizeof(size_t)*CHAR_BIT/2); 3076 const size_t arraySize = nobj * size; 3077 3078 // check for overflow during multiplication: 3079 if (nobj>=mult_not_overflow || size>=mult_not_overflow) // 1) heuristic check 3080 if (nobj && arraySize / nobj != size) { // 2) exact check 3081 errno = ENOMEM; 3082 return nullptr; 3083 } 3084 void* result = internalMalloc(arraySize); 3085 if (result) 3086 memset(result, 0, arraySize); 3087 else 3088 errno = ENOMEM; 3089 return result; 3090 } 3091 3092 /********* End code for scalable_calloc ***********/ 3093 3094 /********* Code for aligned allocation API **********/ 3095 3096 extern "C" int scalable_posix_memalign(void **memptr, size_t alignment, size_t size) 3097 { 3098 if ( !isPowerOfTwoAtLeast(alignment, sizeof(void*)) ) 3099 return EINVAL; 3100 void *result = allocateAligned(defaultMemPool, size, alignment); 3101 if (!result) 3102 return ENOMEM; 3103 *memptr = result; 3104 return 0; 3105 } 3106 3107 extern "C" void * scalable_aligned_malloc(size_t size, size_t alignment) 3108 { 3109 if (!isPowerOfTwo(alignment) || 0==size) { 3110 errno = EINVAL; 3111 return nullptr; 3112 } 3113 void *tmp = allocateAligned(defaultMemPool, size, alignment); 3114 if (!tmp) errno = ENOMEM; 3115 return tmp; 3116 } 3117 3118 extern "C" void * scalable_aligned_realloc(void *ptr, size_t size, size_t alignment) 3119 { 3120 if (!isPowerOfTwo(alignment)) { 3121 errno = EINVAL; 3122 return nullptr; 3123 } 3124 void *tmp; 3125 3126 if (!ptr) 3127 tmp = allocateAligned(defaultMemPool, size, alignment); 3128 else if (!size) { 3129 scalable_free(ptr); 3130 return nullptr; 3131 } else 3132 tmp = reallocAligned(defaultMemPool, ptr, size, alignment); 3133 3134 if (!tmp) errno = ENOMEM; 3135 return tmp; 3136 } 3137 3138 extern "C" TBBMALLOC_EXPORT void * __TBB_malloc_safer_aligned_realloc(void *ptr, size_t size, size_t alignment, void* orig_function) 3139 { 3140 /* corner cases left out of reallocAligned to not deal with errno there */ 3141 if (!isPowerOfTwo(alignment)) { 3142 errno = EINVAL; 3143 return nullptr; 3144 } 3145 void *tmp = nullptr; 3146 3147 if (!ptr) { 3148 tmp = allocateAligned(defaultMemPool, size, alignment); 3149 } else if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(ptr)) { 3150 if (!size) { 3151 internalFree(ptr); 3152 return nullptr; 3153 } else { 3154 tmp = reallocAligned(defaultMemPool, ptr, size, alignment); 3155 } 3156 } 3157 #if USE_WINTHREAD 3158 else { 3159 orig_aligned_ptrs *original_ptrs = static_cast<orig_aligned_ptrs*>(orig_function); 3160 if (size) { 3161 // Without orig_msize, we can't do anything with this. 3162 // Just keeping old pointer. 3163 if ( original_ptrs->aligned_msize ){ 3164 // set alignment and offset to have possibly correct oldSize 3165 size_t oldSize = original_ptrs->aligned_msize(ptr, sizeof(void*), 0); 3166 tmp = allocateAligned(defaultMemPool, size, alignment); 3167 if (tmp) { 3168 memcpy(tmp, ptr, size<oldSize? size : oldSize); 3169 if ( original_ptrs->aligned_free ){ 3170 original_ptrs->aligned_free( ptr ); 3171 } 3172 } 3173 } 3174 } else { 3175 if ( original_ptrs->aligned_free ){ 3176 original_ptrs->aligned_free( ptr ); 3177 } 3178 return nullptr; 3179 } 3180 } 3181 #else 3182 // As original_realloc can't align result, and there is no way to find 3183 // size of reallocating object, we are giving up. 3184 suppress_unused_warning(orig_function); 3185 #endif 3186 if (!tmp) errno = ENOMEM; 3187 return tmp; 3188 } 3189 3190 extern "C" void scalable_aligned_free(void *ptr) 3191 { 3192 internalFree(ptr); 3193 } 3194 3195 /********* end code for aligned allocation API **********/ 3196 3197 /********* Code for scalable_msize ***********/ 3198 3199 /* 3200 * Returns the size of a memory block allocated in the heap. 3201 */ 3202 extern "C" size_t scalable_msize(void* ptr) 3203 { 3204 if (ptr) { 3205 MALLOC_ASSERT(isRecognized(ptr), "Invalid pointer in scalable_msize detected."); 3206 return internalMsize(ptr); 3207 } 3208 errno = EINVAL; 3209 // Unlike _msize, return 0 in case of parameter error. 3210 // Returning size_t(-1) looks more like the way to troubles. 3211 return 0; 3212 } 3213 3214 /* 3215 * A variant that provides additional memory safety, by checking whether the given address 3216 * was obtained with this allocator, and if not redirecting to the provided alternative call. 3217 */ 3218 extern "C" TBBMALLOC_EXPORT size_t __TBB_malloc_safer_msize(void *object, size_t (*original_msize)(void*)) 3219 { 3220 if (object) { 3221 // Check if the memory was allocated by scalable_malloc 3222 if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(object)) 3223 return internalMsize(object); 3224 else if (original_msize) 3225 return original_msize(object); 3226 } 3227 // object is nullptr or unknown, or foreign and no original_msize 3228 #if USE_WINTHREAD 3229 errno = EINVAL; // errno expected to be set only on this platform 3230 #endif 3231 return 0; 3232 } 3233 3234 /* 3235 * The same as above but for _aligned_msize case 3236 */ 3237 extern "C" TBBMALLOC_EXPORT size_t __TBB_malloc_safer_aligned_msize(void *object, size_t alignment, size_t offset, size_t (*orig_aligned_msize)(void*,size_t,size_t)) 3238 { 3239 if (object) { 3240 // Check if the memory was allocated by scalable_malloc 3241 if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(object)) 3242 return internalMsize(object); 3243 else if (orig_aligned_msize) 3244 return orig_aligned_msize(object,alignment,offset); 3245 } 3246 // object is nullptr or unknown 3247 errno = EINVAL; 3248 return 0; 3249 } 3250 3251 /********* End code for scalable_msize ***********/ 3252 3253 extern "C" int scalable_allocation_mode(int param, intptr_t value) 3254 { 3255 if (param == TBBMALLOC_SET_SOFT_HEAP_LIMIT) { 3256 defaultMemPool->extMemPool.backend.setRecommendedMaxSize((size_t)value); 3257 return TBBMALLOC_OK; 3258 } else if (param == USE_HUGE_PAGES) { 3259 #if __unix__ 3260 switch (value) { 3261 case 0: 3262 case 1: 3263 hugePages.setMode(value); 3264 return TBBMALLOC_OK; 3265 default: 3266 return TBBMALLOC_INVALID_PARAM; 3267 } 3268 #else 3269 return TBBMALLOC_NO_EFFECT; 3270 #endif 3271 #if __TBB_SOURCE_DIRECTLY_INCLUDED 3272 } else if (param == TBBMALLOC_INTERNAL_SOURCE_INCLUDED) { 3273 switch (value) { 3274 case 0: // used by dynamic library 3275 case 1: // used by static library or directly included sources 3276 usedBySrcIncluded = value; 3277 return TBBMALLOC_OK; 3278 default: 3279 return TBBMALLOC_INVALID_PARAM; 3280 } 3281 #endif 3282 } else if (param == TBBMALLOC_SET_HUGE_SIZE_THRESHOLD) { 3283 defaultMemPool->extMemPool.loc.setHugeSizeThreshold((size_t)value); 3284 return TBBMALLOC_OK; 3285 } 3286 return TBBMALLOC_INVALID_PARAM; 3287 } 3288 3289 extern "C" int scalable_allocation_command(int cmd, void *param) 3290 { 3291 if (param) 3292 return TBBMALLOC_INVALID_PARAM; 3293 3294 bool released = false; 3295 switch(cmd) { 3296 case TBBMALLOC_CLEAN_THREAD_BUFFERS: 3297 if (TLSData *tls = defaultMemPool->getTLS(/*create=*/false)) 3298 released = tls->externalCleanup(/*cleanOnlyUnused*/false, /*cleanBins=*/true); 3299 break; 3300 case TBBMALLOC_CLEAN_ALL_BUFFERS: 3301 released = defaultMemPool->extMemPool.hardCachesCleanup(true); 3302 break; 3303 default: 3304 return TBBMALLOC_INVALID_PARAM; 3305 } 3306 return released ? TBBMALLOC_OK : TBBMALLOC_NO_EFFECT; 3307 } 3308