1 /* 2 Copyright (c) 2005-2023 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #include "tbbmalloc_internal.h" 18 #include <errno.h> 19 #include <new> /* for placement new */ 20 #include <string.h> /* for memset */ 21 22 #include "oneapi/tbb/version.h" 23 #include "../tbb/environment.h" 24 #include "../tbb/itt_notify.h" // for __TBB_load_ittnotify() 25 26 #if USE_PTHREAD 27 #define TlsSetValue_func pthread_setspecific 28 #define TlsGetValue_func pthread_getspecific 29 #define GetMyTID() pthread_self() 30 #include <sched.h> 31 extern "C" { static void mallocThreadShutdownNotification(void*); } 32 #if __sun || __SUNPRO_CC 33 #define __asm__ asm 34 #endif 35 #include <unistd.h> // sysconf(_SC_PAGESIZE) 36 #elif USE_WINTHREAD 37 #define GetMyTID() GetCurrentThreadId() 38 #if __TBB_WIN8UI_SUPPORT 39 #include<thread> 40 #define TlsSetValue_func FlsSetValue 41 #define TlsGetValue_func FlsGetValue 42 #define TlsAlloc() FlsAlloc(nullptr) 43 #define TLS_ALLOC_FAILURE FLS_OUT_OF_INDEXES 44 #define TlsFree FlsFree 45 #else 46 #define TlsSetValue_func TlsSetValue 47 #define TlsGetValue_func TlsGetValue 48 #define TLS_ALLOC_FAILURE TLS_OUT_OF_INDEXES 49 #endif 50 #else 51 #error Must define USE_PTHREAD or USE_WINTHREAD 52 #endif 53 54 #define FREELIST_NONBLOCKING 1 55 56 namespace rml { 57 class MemoryPool; 58 namespace internal { 59 60 class Block; 61 class MemoryPool; 62 63 #if MALLOC_CHECK_RECURSION 64 65 inline bool isMallocInitialized(); 66 67 #endif // MALLOC_CHECK_RECURSION 68 69 /** Support for handling the special UNUSABLE pointer state **/ 70 const intptr_t UNUSABLE = 0x1; 71 inline bool isSolidPtr( void* ptr ) { 72 return (UNUSABLE|(intptr_t)ptr)!=UNUSABLE; 73 } 74 inline bool isNotForUse( void* ptr ) { 75 return (intptr_t)ptr==UNUSABLE; 76 } 77 78 /* 79 * Block::objectSize value used to mark blocks allocated by startupAlloc 80 */ 81 const uint16_t startupAllocObjSizeMark = ~(uint16_t)0; 82 83 /* 84 * The following constant is used to define the size of struct Block, the block header. 85 * The intent is to have the size of a Block multiple of the cache line size, this allows us to 86 * get good alignment at the cost of some overhead equal to the amount of padding included in the Block. 87 */ 88 const int blockHeaderAlignment = estimatedCacheLineSize; 89 90 /********* The data structures and global objects **************/ 91 92 /* 93 * The malloc routines themselves need to be able to occasionally malloc some space, 94 * in order to set up the structures used by the thread local structures. This 95 * routine performs that functions. 96 */ 97 class BootStrapBlocks { 98 MallocMutex bootStrapLock; 99 Block *bootStrapBlock; 100 Block *bootStrapBlockUsed; 101 FreeObject *bootStrapObjectList; 102 public: 103 void *allocate(MemoryPool *memPool, size_t size); 104 void free(void* ptr); 105 void reset(); 106 }; 107 108 #if USE_INTERNAL_TID 109 class ThreadId { 110 static tls_key_t Tid_key; 111 std::atomic<intptr_t> ThreadCount; 112 113 unsigned int id; 114 115 static unsigned int tlsNumber() { 116 unsigned int result = reinterpret_cast<intptr_t>(TlsGetValue_func(Tid_key)); 117 if( !result ) { 118 RecursiveMallocCallProtector scoped; 119 // Thread-local value is zero -> first call from this thread, 120 // need to initialize with next ID value (IDs start from 1) 121 result = ++ThreadCount; // returned new value! 122 TlsSetValue_func( Tid_key, reinterpret_cast<void*>(result) ); 123 } 124 return result; 125 } 126 public: 127 static bool init() { 128 #if USE_WINTHREAD 129 Tid_key = TlsAlloc(); 130 if (Tid_key == TLS_ALLOC_FAILURE) 131 return false; 132 #else 133 int status = pthread_key_create( &Tid_key, nullptr ); 134 if ( status ) { 135 fprintf (stderr, "The memory manager cannot create tls key during initialization\n"); 136 return false; 137 } 138 #endif /* USE_WINTHREAD */ 139 return true; 140 } 141 #if __TBB_SOURCE_DIRECTLY_INCLUDED 142 static void destroy() { 143 if( Tid_key ) { 144 #if USE_WINTHREAD 145 BOOL status = !(TlsFree( Tid_key )); // fail is zero 146 #else 147 int status = pthread_key_delete( Tid_key ); 148 #endif /* USE_WINTHREAD */ 149 if ( status ) 150 fprintf (stderr, "The memory manager cannot delete tls key\n"); 151 Tid_key = 0; 152 } 153 } 154 #endif 155 156 ThreadId() : id(ThreadId::tlsNumber()) {} 157 bool isCurrentThreadId() const { return id == ThreadId::tlsNumber(); } 158 159 #if COLLECT_STATISTICS || MALLOC_TRACE 160 friend unsigned int getThreadId() { return ThreadId::tlsNumber(); } 161 #endif 162 #if COLLECT_STATISTICS 163 static unsigned getMaxThreadId() { return ThreadCount.load(std::memory_order_relaxed); } 164 165 friend int STAT_increment(ThreadId tid, int bin, int ctr); 166 #endif 167 }; 168 169 tls_key_t ThreadId::Tid_key; 170 intptr_t ThreadId::ThreadCount; 171 172 #if COLLECT_STATISTICS 173 int STAT_increment(ThreadId tid, int bin, int ctr) 174 { 175 return ::STAT_increment(tid.id, bin, ctr); 176 } 177 #endif 178 179 #else // USE_INTERNAL_TID 180 181 class ThreadId { 182 #if USE_PTHREAD 183 std::atomic<pthread_t> tid; 184 #else 185 std::atomic<DWORD> tid; 186 #endif 187 public: 188 ThreadId() : tid(GetMyTID()) {} 189 ThreadId(ThreadId &other) { 190 tid.store(other.tid.load(std::memory_order_relaxed), std::memory_order_relaxed); 191 } 192 ~ThreadId() {} 193 194 #if USE_PTHREAD 195 bool isCurrentThreadId() const { return pthread_equal(pthread_self(), tid.load(std::memory_order_relaxed)); } 196 #else 197 bool isCurrentThreadId() const { return GetCurrentThreadId() == tid.load(std::memory_order_relaxed); } 198 #endif 199 ThreadId& operator=(const ThreadId& other) { 200 tid.store(other.tid.load(std::memory_order_relaxed), std::memory_order_relaxed); 201 return *this; 202 } 203 static bool init() { return true; } 204 #if __TBB_SOURCE_DIRECTLY_INCLUDED 205 static void destroy() {} 206 #endif 207 }; 208 209 #endif // USE_INTERNAL_TID 210 211 /*********** Code to provide thread ID and a thread-local void pointer **********/ 212 213 bool TLSKey::init() 214 { 215 #if USE_WINTHREAD 216 TLS_pointer_key = TlsAlloc(); 217 if (TLS_pointer_key == TLS_ALLOC_FAILURE) 218 return false; 219 #else 220 int status = pthread_key_create( &TLS_pointer_key, mallocThreadShutdownNotification ); 221 if ( status ) 222 return false; 223 #endif /* USE_WINTHREAD */ 224 return true; 225 } 226 227 bool TLSKey::destroy() 228 { 229 #if USE_WINTHREAD 230 BOOL status1 = !(TlsFree(TLS_pointer_key)); // fail is zero 231 #else 232 int status1 = pthread_key_delete(TLS_pointer_key); 233 #endif /* USE_WINTHREAD */ 234 MALLOC_ASSERT(!status1, "The memory manager cannot delete tls key."); 235 return status1==0; 236 } 237 238 inline TLSData* TLSKey::getThreadMallocTLS() const 239 { 240 return (TLSData *)TlsGetValue_func( TLS_pointer_key ); 241 } 242 243 inline void TLSKey::setThreadMallocTLS( TLSData * newvalue ) { 244 RecursiveMallocCallProtector scoped; 245 TlsSetValue_func( TLS_pointer_key, newvalue ); 246 } 247 248 /* The 'next' field in the block header has to maintain some invariants: 249 * it needs to be on a 16K boundary and the first field in the block. 250 * Any value stored there needs to have the lower 14 bits set to 0 251 * so that various assert work. This means that if you want to smash this memory 252 * for debugging purposes you will need to obey this invariant. 253 * The total size of the header needs to be a power of 2 to simplify 254 * the alignment requirements. For now it is a 128 byte structure. 255 * To avoid false sharing, the fields changed only locally are separated 256 * from the fields changed by foreign threads. 257 * Changing the size of the block header would require to change 258 * some bin allocation sizes, in particular "fitting" sizes (see above). 259 */ 260 class Bin; 261 class StartupBlock; 262 263 class MemoryPool { 264 // if no explicit grainsize, expect to see malloc in user's pAlloc 265 // and set reasonable low granularity 266 static const size_t defaultGranularity = estimatedCacheLineSize; 267 268 MemoryPool() = delete; // deny 269 public: 270 static MallocMutex memPoolListLock; 271 272 // list of all active pools is used to release 273 // all TLS data on thread termination or library unload 274 MemoryPool *next, 275 *prev; 276 ExtMemoryPool extMemPool; 277 BootStrapBlocks bootStrapBlocks; 278 279 static void initDefaultPool(); 280 281 bool init(intptr_t poolId, const MemPoolPolicy* memPoolPolicy); 282 bool reset(); 283 bool destroy(); 284 void onThreadShutdown(TLSData *tlsData); 285 286 inline TLSData *getTLS(bool create); 287 void clearTLS() { extMemPool.tlsPointerKey.setThreadMallocTLS(nullptr); } 288 289 Block *getEmptyBlock(size_t size); 290 void returnEmptyBlock(Block *block, bool poolTheBlock); 291 292 // get/put large object to/from local large object cache 293 void *getFromLLOCache(TLSData *tls, size_t size, size_t alignment); 294 void putToLLOCache(TLSData *tls, void *object); 295 }; 296 297 static intptr_t defaultMemPool_space[sizeof(MemoryPool)/sizeof(intptr_t) + 298 (sizeof(MemoryPool)%sizeof(intptr_t)? 1 : 0)]; 299 static MemoryPool *defaultMemPool = (MemoryPool*)defaultMemPool_space; 300 const size_t MemoryPool::defaultGranularity; 301 // zero-initialized 302 MallocMutex MemoryPool::memPoolListLock; 303 // TODO: move huge page status to default pool, because that's its states 304 HugePagesStatus hugePages; 305 static bool usedBySrcIncluded = false; 306 307 // Padding helpers 308 template<size_t padd> 309 struct PaddingImpl { 310 size_t __padding[padd]; 311 }; 312 313 template<> 314 struct PaddingImpl<0> {}; 315 316 template<int N> 317 struct Padding : PaddingImpl<N/sizeof(size_t)> {}; 318 319 // Slab block is 16KB-aligned. To prevent false sharing, separate locally-accessed 320 // fields and fields commonly accessed by not owner threads. 321 class GlobalBlockFields : public BlockI { 322 protected: 323 std::atomic<FreeObject*> publicFreeList; 324 std::atomic<Block*> nextPrivatizable; 325 MemoryPool *poolPtr; 326 }; 327 328 class LocalBlockFields : public GlobalBlockFields, Padding<blockHeaderAlignment - sizeof(GlobalBlockFields)> { 329 protected: 330 Block *next; 331 Block *previous; /* Use double linked list to speed up removal */ 332 FreeObject *bumpPtr; /* Bump pointer moves from the end to the beginning of a block */ 333 FreeObject *freeList; 334 /* Pointer to local data for the owner thread. Used for fast finding tls 335 when releasing object from a block that current thread owned. 336 nullptr for orphaned blocks. */ 337 std::atomic<TLSData*> tlsPtr; 338 ThreadId ownerTid; /* the ID of the thread that owns or last owned the block */ 339 BackRefIdx backRefIdx; 340 uint16_t allocatedCount; /* Number of objects allocated (obviously by the owning thread) */ 341 uint16_t objectSize; 342 bool isFull; 343 344 friend class FreeBlockPool; 345 friend class StartupBlock; 346 friend class LifoList; 347 friend void *BootStrapBlocks::allocate(MemoryPool *, size_t); 348 friend bool OrphanedBlocks::cleanup(Backend*); 349 friend Block *MemoryPool::getEmptyBlock(size_t); 350 }; 351 352 // Use inheritance to guarantee that a user data start on next cache line. 353 // Can't use member for it, because when LocalBlockFields already on cache line, 354 // we must have no additional memory consumption for all compilers. 355 class Block : public LocalBlockFields, 356 Padding<2*blockHeaderAlignment - sizeof(LocalBlockFields)> { 357 public: 358 bool empty() const { 359 if (allocatedCount > 0) return false; 360 MALLOC_ASSERT(!isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT); 361 return true; 362 } 363 inline FreeObject* allocate(); 364 inline FreeObject *allocateFromFreeList(); 365 366 inline bool adjustFullness(); 367 void adjustPositionInBin(Bin* bin = nullptr); 368 #if MALLOC_DEBUG 369 bool freeListNonNull() { return freeList; } 370 #endif 371 void freePublicObject(FreeObject *objectToFree); 372 inline void freeOwnObject(void *object); 373 void reset(); 374 void privatizePublicFreeList( bool reset = true ); 375 void restoreBumpPtr(); 376 void privatizeOrphaned(TLSData *tls, unsigned index); 377 bool readyToShare(); 378 void shareOrphaned(intptr_t binTag, unsigned index); 379 unsigned int getSize() const { 380 MALLOC_ASSERT(isStartupAllocObject() || objectSize<minLargeObjectSize, 381 "Invalid object size"); 382 return isStartupAllocObject()? 0 : objectSize; 383 } 384 const BackRefIdx *getBackRefIdx() const { return &backRefIdx; } 385 inline bool isOwnedByCurrentThread() const; 386 bool isStartupAllocObject() const { return objectSize == startupAllocObjSizeMark; } 387 inline FreeObject *findObjectToFree(const void *object) const; 388 void checkFreePrecond(const void *object) const { 389 #if MALLOC_DEBUG 390 const char *msg = "Possible double free or heap corruption."; 391 // small objects are always at least sizeof(size_t) Byte aligned, 392 // try to check this before this dereference as for invalid objects 393 // this may be unreadable 394 MALLOC_ASSERT(isAligned(object, sizeof(size_t)), "Try to free invalid small object"); 395 #if !__TBB_USE_THREAD_SANITIZER 396 // releasing to free slab 397 MALLOC_ASSERT(allocatedCount>0, msg); 398 #endif 399 // must not point to slab's header 400 MALLOC_ASSERT((uintptr_t)object - (uintptr_t)this >= sizeof(Block), msg); 401 if (startupAllocObjSizeMark == objectSize) // startup block 402 MALLOC_ASSERT(object<=bumpPtr, msg); 403 else { 404 // non-startup objects are 8 Byte aligned 405 MALLOC_ASSERT(isAligned(object, 8), "Try to free invalid small object"); 406 FreeObject *toFree = findObjectToFree(object); 407 #if !__TBB_USE_THREAD_SANITIZER 408 MALLOC_ASSERT(allocatedCount <= (slabSize-sizeof(Block))/objectSize 409 && (!bumpPtr || object>bumpPtr), msg); 410 // check against head of freeList, as this is mostly 411 // expected after double free 412 MALLOC_ASSERT(toFree != freeList, msg); 413 #endif 414 // check against head of publicFreeList, to detect double free 415 // involving foreign thread 416 MALLOC_ASSERT(toFree != publicFreeList.load(std::memory_order_relaxed), msg); 417 } 418 #else 419 suppress_unused_warning(object); 420 #endif 421 } 422 void initEmptyBlock(TLSData *tls, size_t size); 423 size_t findObjectSize(void *object) const; 424 MemoryPool *getMemPool() const { return poolPtr; } // do not use on the hot path! 425 426 protected: 427 void cleanBlockHeader(); 428 429 private: 430 static const float emptyEnoughRatio; /* Threshold on free space needed to "reactivate" a block */ 431 432 inline FreeObject *allocateFromBumpPtr(); 433 inline FreeObject *findAllocatedObject(const void *address) const; 434 #if MALLOC_DEBUG 435 inline bool isProperlyPlaced(const void *object) const; 436 #endif 437 inline void markOwned(TLSData *tls) { 438 MALLOC_ASSERT(!tlsPtr.load(std::memory_order_relaxed), ASSERT_TEXT); 439 ownerTid = ThreadId(); /* save the ID of the current thread */ 440 tlsPtr.store(tls, std::memory_order_relaxed); 441 } 442 inline void markOrphaned() { 443 MALLOC_ASSERT(tlsPtr.load(std::memory_order_relaxed), ASSERT_TEXT); 444 tlsPtr.store(nullptr, std::memory_order_relaxed); 445 } 446 447 friend class Bin; 448 friend class TLSData; 449 friend bool MemoryPool::destroy(); 450 }; 451 452 const float Block::emptyEnoughRatio = 1.0 / 4.0; 453 454 static_assert(sizeof(Block) <= 2*estimatedCacheLineSize, 455 "The class Block does not fit into 2 cache lines on this platform. " 456 "Defining USE_INTERNAL_TID may help to fix it."); 457 458 class Bin { 459 private: 460 public: 461 Block *activeBlk; 462 std::atomic<Block*> mailbox; 463 MallocMutex mailLock; 464 465 public: 466 inline Block* getActiveBlock() const { return activeBlk; } 467 void resetActiveBlock() { activeBlk = nullptr; } 468 inline void setActiveBlock(Block *block); 469 inline Block* setPreviousBlockActive(); 470 Block* getPrivatizedFreeListBlock(); 471 void moveBlockToFront(Block *block); 472 bool cleanPublicFreeLists(); 473 void processEmptyBlock(Block *block, bool poolTheBlock); 474 void addPublicFreeListBlock(Block* block); 475 476 void outofTLSBin(Block* block); 477 void verifyTLSBin(size_t size) const; 478 void pushTLSBin(Block* block); 479 480 #if MALLOC_DEBUG 481 void verifyInitState() const { 482 MALLOC_ASSERT( !activeBlk, ASSERT_TEXT ); 483 MALLOC_ASSERT( !mailbox.load(std::memory_order_relaxed), ASSERT_TEXT ); 484 } 485 #endif 486 487 friend void Block::freePublicObject (FreeObject *objectToFree); 488 }; 489 490 /********* End of the data structures **************/ 491 492 /* 493 * There are bins for all 8 byte aligned objects less than this segregated size; 8 bins in total 494 */ 495 const uint32_t minSmallObjectIndex = 0; 496 const uint32_t numSmallObjectBins = 8; 497 const uint32_t maxSmallObjectSize = 64; 498 499 /* 500 * There are 4 bins between each couple of powers of 2 [64-128-256-...] 501 * from maxSmallObjectSize till this size; 16 bins in total 502 */ 503 const uint32_t minSegregatedObjectIndex = minSmallObjectIndex+numSmallObjectBins; 504 const uint32_t numSegregatedObjectBins = 16; 505 const uint32_t maxSegregatedObjectSize = 1024; 506 507 /* 508 * And there are 5 bins with allocation sizes that are multiples of estimatedCacheLineSize 509 * and selected to fit 9, 6, 4, 3, and 2 allocations in a block. 510 */ 511 const uint32_t minFittingIndex = minSegregatedObjectIndex+numSegregatedObjectBins; 512 const uint32_t numFittingBins = 5; 513 514 const uint32_t fittingAlignment = estimatedCacheLineSize; 515 516 #define SET_FITTING_SIZE(N) ( (slabSize-sizeof(Block))/N ) & ~(fittingAlignment-1) 517 // For blockSize=16*1024, sizeof(Block)=2*estimatedCacheLineSize and fittingAlignment=estimatedCacheLineSize, 518 // the comments show the fitting sizes and the amounts left unused for estimatedCacheLineSize=64/128: 519 const uint32_t fittingSize1 = SET_FITTING_SIZE(9); // 1792/1792 128/000 520 const uint32_t fittingSize2 = SET_FITTING_SIZE(6); // 2688/2688 128/000 521 const uint32_t fittingSize3 = SET_FITTING_SIZE(4); // 4032/3968 128/256 522 const uint32_t fittingSize4 = SET_FITTING_SIZE(3); // 5376/5376 128/000 523 const uint32_t fittingSize5 = SET_FITTING_SIZE(2); // 8128/8064 000/000 524 #undef SET_FITTING_SIZE 525 526 /* 527 * The total number of thread-specific Block-based bins 528 */ 529 const uint32_t numBlockBins = minFittingIndex+numFittingBins; 530 531 /* 532 * Objects of this size and larger are considered large objects. 533 */ 534 const uint32_t minLargeObjectSize = fittingSize5 + 1; 535 536 /* 537 * Per-thread pool of slab blocks. Idea behind it is to not share with other 538 * threads memory that are likely in local cache(s) of our CPU. 539 */ 540 class FreeBlockPool { 541 private: 542 std::atomic<Block*> head; 543 int size; 544 Backend *backend; 545 public: 546 static const int POOL_HIGH_MARK = 32; 547 static const int POOL_LOW_MARK = 8; 548 549 class ResOfGet { 550 ResOfGet() = delete; 551 public: 552 Block* block; 553 bool lastAccMiss; 554 ResOfGet(Block *b, bool lastMiss) : block(b), lastAccMiss(lastMiss) {} 555 }; 556 557 // allocated in zero-initialized memory 558 FreeBlockPool(Backend *bknd) : backend(bknd) {} 559 ResOfGet getBlock(); 560 void returnBlock(Block *block); 561 bool externalCleanup(); // can be called by another thread 562 }; 563 564 template<int LOW_MARK, int HIGH_MARK> 565 class LocalLOCImpl { 566 private: 567 static const size_t MAX_TOTAL_SIZE = 4*1024*1024; 568 // TODO: can single-linked list be faster here? 569 LargeMemoryBlock *tail; // need it when do releasing on overflow 570 std::atomic<LargeMemoryBlock*> head; 571 size_t totalSize; 572 int numOfBlocks; 573 public: 574 bool put(LargeMemoryBlock *object, ExtMemoryPool *extMemPool); 575 LargeMemoryBlock *get(size_t size); 576 bool externalCleanup(ExtMemoryPool *extMemPool); 577 #if __TBB_MALLOC_WHITEBOX_TEST 578 LocalLOCImpl() : tail(nullptr), head(nullptr), totalSize(0), numOfBlocks(0) {} 579 static size_t getMaxSize() { return MAX_TOTAL_SIZE; } 580 static const int LOC_HIGH_MARK = HIGH_MARK; 581 #else 582 // no ctor, object must be created in zero-initialized memory 583 #endif 584 }; 585 586 typedef LocalLOCImpl<8,32> LocalLOC; // set production code parameters 587 588 class TLSData : public TLSRemote { 589 MemoryPool *memPool; 590 public: 591 Bin bin[numBlockBinLimit]; 592 FreeBlockPool freeSlabBlocks; 593 LocalLOC lloc; 594 unsigned currCacheIdx; 595 private: 596 std::atomic<bool> unused; 597 public: 598 TLSData(MemoryPool *mPool, Backend *bknd) : memPool(mPool), freeSlabBlocks(bknd), currCacheIdx(0) {} 599 MemoryPool *getMemPool() const { return memPool; } 600 Bin* getAllocationBin(size_t size); 601 void release(); 602 bool externalCleanup(bool cleanOnlyUnused, bool cleanBins) { 603 if (!unused.load(std::memory_order_relaxed) && cleanOnlyUnused) return false; 604 // Heavy operation in terms of synchronization complexity, 605 // should be called only for the current thread 606 bool released = cleanBins ? cleanupBlockBins() : false; 607 // both cleanups to be called, and the order is not important 608 bool lloc_cleaned = lloc.externalCleanup(&memPool->extMemPool); 609 bool free_slab_blocks_cleaned = freeSlabBlocks.externalCleanup(); 610 return released || lloc_cleaned || free_slab_blocks_cleaned; 611 } 612 bool cleanupBlockBins(); 613 void markUsed() { unused.store(false, std::memory_order_relaxed); } // called by owner when TLS touched 614 void markUnused() { unused.store(true, std::memory_order_relaxed); } // can be called by not owner thread 615 }; 616 617 TLSData *TLSKey::createTLS(MemoryPool *memPool, Backend *backend) 618 { 619 MALLOC_ASSERT( sizeof(TLSData) >= sizeof(Bin) * numBlockBins + sizeof(FreeBlockPool), ASSERT_TEXT ); 620 TLSData* tls = (TLSData*) memPool->bootStrapBlocks.allocate(memPool, sizeof(TLSData)); 621 if ( !tls ) 622 return nullptr; 623 new(tls) TLSData(memPool, backend); 624 /* the block contains zeroes after bootStrapMalloc, so bins are initialized */ 625 #if MALLOC_DEBUG 626 for (uint32_t i = 0; i < numBlockBinLimit; i++) 627 tls->bin[i].verifyInitState(); 628 #endif 629 setThreadMallocTLS(tls); 630 memPool->extMemPool.allLocalCaches.registerThread(tls); 631 return tls; 632 } 633 634 bool TLSData::cleanupBlockBins() 635 { 636 bool released = false; 637 for (uint32_t i = 0; i < numBlockBinLimit; i++) { 638 released |= bin[i].cleanPublicFreeLists(); 639 // After cleaning public free lists, only the active block might be empty. 640 // Do not use processEmptyBlock because it will just restore bumpPtr. 641 Block *block = bin[i].getActiveBlock(); 642 if (block && block->empty()) { 643 bin[i].outofTLSBin(block); 644 memPool->returnEmptyBlock(block, /*poolTheBlock=*/false); 645 released = true; 646 } 647 } 648 return released; 649 } 650 651 bool ExtMemoryPool::releaseAllLocalCaches() 652 { 653 // Iterate all registered TLS data and clean LLOC and Slab pools 654 bool released = allLocalCaches.cleanup(/*cleanOnlyUnused=*/false); 655 656 // Bins privatization is done only for the current thread 657 if (TLSData *tlsData = tlsPointerKey.getThreadMallocTLS()) 658 released |= tlsData->cleanupBlockBins(); 659 660 return released; 661 } 662 663 void AllLocalCaches::registerThread(TLSRemote *tls) 664 { 665 tls->prev = nullptr; 666 MallocMutex::scoped_lock lock(listLock); 667 MALLOC_ASSERT(head!=tls, ASSERT_TEXT); 668 tls->next = head; 669 if (head) 670 head->prev = tls; 671 head = tls; 672 MALLOC_ASSERT(head->next!=head, ASSERT_TEXT); 673 } 674 675 void AllLocalCaches::unregisterThread(TLSRemote *tls) 676 { 677 MallocMutex::scoped_lock lock(listLock); 678 MALLOC_ASSERT(head, "Can't unregister thread: no threads are registered."); 679 if (head == tls) 680 head = tls->next; 681 if (tls->next) 682 tls->next->prev = tls->prev; 683 if (tls->prev) 684 tls->prev->next = tls->next; 685 MALLOC_ASSERT(!tls->next || tls->next->next!=tls->next, ASSERT_TEXT); 686 } 687 688 bool AllLocalCaches::cleanup(bool cleanOnlyUnused) 689 { 690 bool released = false; 691 { 692 MallocMutex::scoped_lock lock(listLock); 693 for (TLSRemote *curr=head; curr; curr=curr->next) 694 released |= static_cast<TLSData*>(curr)->externalCleanup(cleanOnlyUnused, /*cleanBins=*/false); 695 } 696 return released; 697 } 698 699 void AllLocalCaches::markUnused() 700 { 701 bool locked = false; 702 MallocMutex::scoped_lock lock(listLock, /*block=*/false, &locked); 703 if (!locked) // not wait for marking if someone doing something with it 704 return; 705 706 for (TLSRemote *curr=head; curr; curr=curr->next) 707 static_cast<TLSData*>(curr)->markUnused(); 708 } 709 710 #if MALLOC_CHECK_RECURSION 711 MallocMutex RecursiveMallocCallProtector::rmc_mutex; 712 std::atomic<pthread_t> RecursiveMallocCallProtector::owner_thread; 713 std::atomic<void*> RecursiveMallocCallProtector::autoObjPtr; 714 bool RecursiveMallocCallProtector::mallocRecursionDetected; 715 #if __FreeBSD__ 716 bool RecursiveMallocCallProtector::canUsePthread; 717 #endif 718 719 #endif 720 721 /*********** End code to provide thread ID and a TLS pointer **********/ 722 723 // Parameter for isLargeObject, keeps our expectations on memory origin. 724 // Assertions must use unknownMem to reliably report object invalidity. 725 enum MemoryOrigin { 726 ourMem, // allocated by TBB allocator 727 unknownMem // can be allocated by system allocator or TBB allocator 728 }; 729 730 template<MemoryOrigin> 731 #if __TBB_USE_THREAD_SANITIZER 732 // We have a real race when accessing the large object header for 733 // non large objects (e.g. small or foreign objects). 734 // Therefore, we need to hide this access from the thread sanitizer 735 __attribute__((no_sanitize("thread"))) 736 #endif 737 bool isLargeObject(void *object); 738 static void *internalMalloc(size_t size); 739 static void internalFree(void *object); 740 static void *internalPoolMalloc(MemoryPool* mPool, size_t size); 741 static bool internalPoolFree(MemoryPool *mPool, void *object, size_t size); 742 743 #if !MALLOC_DEBUG 744 #if __INTEL_COMPILER || _MSC_VER 745 #define NOINLINE(decl) __declspec(noinline) decl 746 #define ALWAYSINLINE(decl) __forceinline decl 747 #elif __GNUC__ 748 #define NOINLINE(decl) decl __attribute__ ((noinline)) 749 #define ALWAYSINLINE(decl) decl __attribute__ ((always_inline)) 750 #else 751 #define NOINLINE(decl) decl 752 #define ALWAYSINLINE(decl) decl 753 #endif 754 755 static NOINLINE( bool doInitialization() ); 756 ALWAYSINLINE( bool isMallocInitialized() ); 757 758 #undef ALWAYSINLINE 759 #undef NOINLINE 760 #endif /* !MALLOC_DEBUG */ 761 762 763 /********* Now some rough utility code to deal with indexing the size bins. **************/ 764 765 /* 766 * Given a number return the highest non-zero bit in it. It is intended to work with 32-bit values only. 767 * Moreover, on some platforms, for sake of simplicity and performance, it is narrowed to only serve for 64 to 1023. 768 * This is enough for current algorithm of distribution of sizes among bins. 769 * __TBB_Log2 is not used here to minimize dependencies on TBB specific sources. 770 */ 771 #if _WIN64 && _MSC_VER>=1400 && !__INTEL_COMPILER 772 extern "C" unsigned char _BitScanReverse( unsigned long* i, unsigned long w ); 773 #pragma intrinsic(_BitScanReverse) 774 #endif 775 static inline unsigned int highestBitPos(unsigned int n) 776 { 777 MALLOC_ASSERT( n>=64 && n<1024, ASSERT_TEXT ); // only needed for bsr array lookup, but always true 778 unsigned int pos; 779 #if __ARCH_x86_32||__ARCH_x86_64 780 781 # if __unix__||__APPLE__||__MINGW32__ 782 __asm__ ("bsr %1,%0" : "=r"(pos) : "r"(n)); 783 # elif (_WIN32 && (!_WIN64 || __INTEL_COMPILER)) 784 __asm 785 { 786 bsr eax, n 787 mov pos, eax 788 } 789 # elif _WIN64 && _MSC_VER>=1400 790 _BitScanReverse((unsigned long*)&pos, (unsigned long)n); 791 # else 792 # error highestBitPos() not implemented for this platform 793 # endif 794 #elif __arm__ 795 __asm__ __volatile__ 796 ( 797 "clz %0, %1\n" 798 "rsb %0, %0, %2\n" 799 :"=r" (pos) :"r" (n), "I" (31) 800 ); 801 #else 802 static unsigned int bsr[16] = {0/*N/A*/,6,7,7,8,8,8,8,9,9,9,9,9,9,9,9}; 803 pos = bsr[ n>>6 ]; 804 #endif /* __ARCH_* */ 805 return pos; 806 } 807 808 unsigned int getSmallObjectIndex(unsigned int size) 809 { 810 unsigned int result = (size-1)>>3; 811 constexpr bool is_64bit = (8 == sizeof(void*)); 812 if (is_64bit) { 813 // For 64-bit malloc, 16 byte alignment is needed except for bin 0. 814 if (result) result |= 1; // 0,1,3,5,7; bins 2,4,6 are not aligned to 16 bytes 815 } 816 return result; 817 } 818 819 /* 820 * Depending on indexRequest, for a given size return either the index into the bin 821 * for objects of this size, or the actual size of objects in this bin. 822 */ 823 template<bool indexRequest> 824 static unsigned int getIndexOrObjectSize (unsigned int size) 825 { 826 if (size <= maxSmallObjectSize) { // selection from 8/16/24/32/40/48/56/64 827 unsigned int index = getSmallObjectIndex( size ); 828 /* Bin 0 is for 8 bytes, bin 1 is for 16, and so forth */ 829 return indexRequest ? index : (index+1)<<3; 830 } 831 else if (size <= maxSegregatedObjectSize ) { // 80/96/112/128 / 160/192/224/256 / 320/384/448/512 / 640/768/896/1024 832 unsigned int order = highestBitPos(size-1); // which group of bin sizes? 833 MALLOC_ASSERT( 6<=order && order<=9, ASSERT_TEXT ); 834 if (indexRequest) 835 return minSegregatedObjectIndex - (4*6) - 4 + (4*order) + ((size-1)>>(order-2)); 836 else { 837 unsigned int alignment = 128 >> (9-order); // alignment in the group 838 MALLOC_ASSERT( alignment==16 || alignment==32 || alignment==64 || alignment==128, ASSERT_TEXT ); 839 return alignUp(size,alignment); 840 } 841 } 842 else { 843 if( size <= fittingSize3 ) { 844 if( size <= fittingSize2 ) { 845 if( size <= fittingSize1 ) 846 return indexRequest ? minFittingIndex : fittingSize1; 847 else 848 return indexRequest ? minFittingIndex+1 : fittingSize2; 849 } else 850 return indexRequest ? minFittingIndex+2 : fittingSize3; 851 } else { 852 if( size <= fittingSize5 ) { 853 if( size <= fittingSize4 ) 854 return indexRequest ? minFittingIndex+3 : fittingSize4; 855 else 856 return indexRequest ? minFittingIndex+4 : fittingSize5; 857 } else { 858 MALLOC_ASSERT( 0,ASSERT_TEXT ); // this should not happen 859 return ~0U; 860 } 861 } 862 } 863 } 864 865 static unsigned int getIndex (unsigned int size) 866 { 867 return getIndexOrObjectSize</*indexRequest=*/true>(size); 868 } 869 870 static unsigned int getObjectSize (unsigned int size) 871 { 872 return getIndexOrObjectSize</*indexRequest=*/false>(size); 873 } 874 875 876 void *BootStrapBlocks::allocate(MemoryPool *memPool, size_t size) 877 { 878 FreeObject *result; 879 880 MALLOC_ASSERT( size == sizeof(TLSData), ASSERT_TEXT ); 881 882 { // Lock with acquire 883 MallocMutex::scoped_lock scoped_cs(bootStrapLock); 884 885 if( bootStrapObjectList) { 886 result = bootStrapObjectList; 887 bootStrapObjectList = bootStrapObjectList->next; 888 } else { 889 if (!bootStrapBlock) { 890 bootStrapBlock = memPool->getEmptyBlock(size); 891 if (!bootStrapBlock) return nullptr; 892 } 893 result = bootStrapBlock->bumpPtr; 894 bootStrapBlock->bumpPtr = (FreeObject *)((uintptr_t)bootStrapBlock->bumpPtr - bootStrapBlock->objectSize); 895 if ((uintptr_t)bootStrapBlock->bumpPtr < (uintptr_t)bootStrapBlock+sizeof(Block)) { 896 bootStrapBlock->bumpPtr = nullptr; 897 bootStrapBlock->next = bootStrapBlockUsed; 898 bootStrapBlockUsed = bootStrapBlock; 899 bootStrapBlock = nullptr; 900 } 901 } 902 } // Unlock with release 903 memset (result, 0, size); 904 return (void*)result; 905 } 906 907 void BootStrapBlocks::free(void* ptr) 908 { 909 MALLOC_ASSERT( ptr, ASSERT_TEXT ); 910 { // Lock with acquire 911 MallocMutex::scoped_lock scoped_cs(bootStrapLock); 912 ((FreeObject*)ptr)->next = bootStrapObjectList; 913 bootStrapObjectList = (FreeObject*)ptr; 914 } // Unlock with release 915 } 916 917 void BootStrapBlocks::reset() 918 { 919 bootStrapBlock = bootStrapBlockUsed = nullptr; 920 bootStrapObjectList = nullptr; 921 } 922 923 #if !(FREELIST_NONBLOCKING) 924 static MallocMutex publicFreeListLock; // lock for changes of publicFreeList 925 #endif 926 927 /********* End rough utility code **************/ 928 929 /* LifoList assumes zero initialization so a vector of it can be created 930 * by just allocating some space with no call to constructor. 931 * On Linux, it seems to be necessary to avoid linking with C++ libraries. 932 * 933 * By usage convention there is no race on the initialization. */ 934 LifoList::LifoList( ) : top(nullptr) 935 { 936 // MallocMutex assumes zero initialization 937 memset(static_cast<void*>(&lock), 0, sizeof(MallocMutex)); 938 } 939 940 void LifoList::push(Block *block) 941 { 942 MallocMutex::scoped_lock scoped_cs(lock); 943 block->next = top.load(std::memory_order_relaxed); 944 top.store(block, std::memory_order_relaxed); 945 } 946 947 Block *LifoList::pop() 948 { 949 Block* block = nullptr; 950 if (top.load(std::memory_order_relaxed)) { 951 MallocMutex::scoped_lock scoped_cs(lock); 952 block = top.load(std::memory_order_relaxed); 953 if (block) { 954 top.store(block->next, std::memory_order_relaxed); 955 } 956 } 957 return block; 958 } 959 960 Block *LifoList::grab() 961 { 962 Block *block = nullptr; 963 if (top.load(std::memory_order_relaxed)) { 964 MallocMutex::scoped_lock scoped_cs(lock); 965 block = top.load(std::memory_order_relaxed); 966 top.store(nullptr, std::memory_order_relaxed); 967 } 968 return block; 969 } 970 971 /********* Thread and block related code *************/ 972 973 template<bool poolDestroy> void AllLargeBlocksList::releaseAll(Backend *backend) { 974 LargeMemoryBlock *next, *lmb = loHead; 975 loHead = nullptr; 976 977 for (; lmb; lmb = next) { 978 next = lmb->gNext; 979 if (poolDestroy) { 980 // as it's pool destruction, no need to return object to backend, 981 // only remove backrefs, as they are global 982 removeBackRef(lmb->backRefIdx); 983 } else { 984 // clean g(Next|Prev) to prevent removing lmb 985 // from AllLargeBlocksList inside returnLargeObject 986 lmb->gNext = lmb->gPrev = nullptr; 987 backend->returnLargeObject(lmb); 988 } 989 } 990 } 991 992 TLSData* MemoryPool::getTLS(bool create) 993 { 994 TLSData* tls = extMemPool.tlsPointerKey.getThreadMallocTLS(); 995 if (create && !tls) 996 tls = extMemPool.tlsPointerKey.createTLS(this, &extMemPool.backend); 997 return tls; 998 } 999 1000 /* 1001 * Return the bin for the given size. 1002 */ 1003 inline Bin* TLSData::getAllocationBin(size_t size) 1004 { 1005 return bin + getIndex(size); 1006 } 1007 1008 /* Return an empty uninitialized block in a non-blocking fashion. */ 1009 Block *MemoryPool::getEmptyBlock(size_t size) 1010 { 1011 TLSData* tls = getTLS(/*create=*/false); 1012 // try to use per-thread cache, if TLS available 1013 FreeBlockPool::ResOfGet resOfGet = tls? 1014 tls->freeSlabBlocks.getBlock() : FreeBlockPool::ResOfGet(nullptr, false); 1015 Block *result = resOfGet.block; 1016 1017 if (!result) { // not found in local cache, asks backend for slabs 1018 int num = resOfGet.lastAccMiss? Backend::numOfSlabAllocOnMiss : 1; 1019 BackRefIdx backRefIdx[Backend::numOfSlabAllocOnMiss]; 1020 1021 result = static_cast<Block*>(extMemPool.backend.getSlabBlock(num)); 1022 if (!result) return nullptr; 1023 1024 if (!extMemPool.userPool()) 1025 for (int i=0; i<num; i++) { 1026 backRefIdx[i] = BackRefIdx::newBackRef(/*largeObj=*/false); 1027 if (backRefIdx[i].isInvalid()) { 1028 // roll back resource allocation 1029 for (int j=0; j<i; j++) 1030 removeBackRef(backRefIdx[j]); 1031 Block *b = result; 1032 for (int j=0; j<num; b=(Block*)((uintptr_t)b+slabSize), j++) 1033 extMemPool.backend.putSlabBlock(b); 1034 return nullptr; 1035 } 1036 } 1037 // resources were allocated, register blocks 1038 Block *b = result; 1039 for (int i=0; i<num; b=(Block*)((uintptr_t)b+slabSize), i++) { 1040 // slab block in user's pool must have invalid backRefIdx 1041 if (extMemPool.userPool()) { 1042 new (&b->backRefIdx) BackRefIdx(); 1043 } else { 1044 setBackRef(backRefIdx[i], b); 1045 b->backRefIdx = backRefIdx[i]; 1046 } 1047 b->tlsPtr.store(tls, std::memory_order_relaxed); 1048 b->poolPtr = this; 1049 // all but first one go to per-thread pool 1050 if (i > 0) { 1051 MALLOC_ASSERT(tls, ASSERT_TEXT); 1052 tls->freeSlabBlocks.returnBlock(b); 1053 } 1054 } 1055 } 1056 MALLOC_ASSERT(result, ASSERT_TEXT); 1057 result->initEmptyBlock(tls, size); 1058 STAT_increment(getThreadId(), getIndex(result->objectSize), allocBlockNew); 1059 return result; 1060 } 1061 1062 void MemoryPool::returnEmptyBlock(Block *block, bool poolTheBlock) 1063 { 1064 block->reset(); 1065 if (poolTheBlock) { 1066 getTLS(/*create=*/false)->freeSlabBlocks.returnBlock(block); 1067 } else { 1068 // slab blocks in user's pools do not have valid backRefIdx 1069 if (!extMemPool.userPool()) 1070 removeBackRef(*(block->getBackRefIdx())); 1071 extMemPool.backend.putSlabBlock(block); 1072 } 1073 } 1074 1075 bool ExtMemoryPool::init(intptr_t poolId, rawAllocType rawAlloc, 1076 rawFreeType rawFree, size_t granularity, 1077 bool keepAllMemory, bool fixedPool) 1078 { 1079 this->poolId = poolId; 1080 this->rawAlloc = rawAlloc; 1081 this->rawFree = rawFree; 1082 this->granularity = granularity; 1083 this->keepAllMemory = keepAllMemory; 1084 this->fixedPool = fixedPool; 1085 this->delayRegsReleasing = false; 1086 if (!initTLS()) 1087 return false; 1088 loc.init(this); 1089 backend.init(this); 1090 MALLOC_ASSERT(isPoolValid(), nullptr); 1091 return true; 1092 } 1093 1094 bool ExtMemoryPool::initTLS() { return tlsPointerKey.init(); } 1095 1096 bool MemoryPool::init(intptr_t poolId, const MemPoolPolicy *policy) 1097 { 1098 if (!extMemPool.init(poolId, policy->pAlloc, policy->pFree, 1099 policy->granularity? policy->granularity : defaultGranularity, 1100 policy->keepAllMemory, policy->fixedPool)) 1101 return false; 1102 { 1103 MallocMutex::scoped_lock lock(memPoolListLock); 1104 next = defaultMemPool->next; 1105 defaultMemPool->next = this; 1106 prev = defaultMemPool; 1107 if (next) 1108 next->prev = this; 1109 } 1110 return true; 1111 } 1112 1113 bool MemoryPool::reset() 1114 { 1115 MALLOC_ASSERT(extMemPool.userPool(), "No reset for the system pool."); 1116 // memory is not releasing during pool reset 1117 // TODO: mark regions to release unused on next reset() 1118 extMemPool.delayRegionsReleasing(true); 1119 1120 bootStrapBlocks.reset(); 1121 extMemPool.lmbList.releaseAll</*poolDestroy=*/false>(&extMemPool.backend); 1122 if (!extMemPool.reset()) 1123 return false; 1124 1125 if (!extMemPool.initTLS()) 1126 return false; 1127 extMemPool.delayRegionsReleasing(false); 1128 return true; 1129 } 1130 1131 bool MemoryPool::destroy() 1132 { 1133 #if __TBB_MALLOC_LOCACHE_STAT 1134 extMemPool.loc.reportStat(stdout); 1135 #endif 1136 #if __TBB_MALLOC_BACKEND_STAT 1137 extMemPool.backend.reportStat(stdout); 1138 #endif 1139 { 1140 MallocMutex::scoped_lock lock(memPoolListLock); 1141 // remove itself from global pool list 1142 if (prev) 1143 prev->next = next; 1144 if (next) 1145 next->prev = prev; 1146 } 1147 // slab blocks in non-default pool do not have backreferences, 1148 // only large objects do 1149 if (extMemPool.userPool()) 1150 extMemPool.lmbList.releaseAll</*poolDestroy=*/true>(&extMemPool.backend); 1151 else { 1152 // only one non-userPool() is supported now 1153 MALLOC_ASSERT(this==defaultMemPool, nullptr); 1154 // There and below in extMemPool.destroy(), do not restore initial state 1155 // for user pool, because it's just about to be released. But for system 1156 // pool restoring, we do not want to do zeroing of it on subsequent reload. 1157 bootStrapBlocks.reset(); 1158 extMemPool.orphanedBlocks.reset(); 1159 } 1160 return extMemPool.destroy(); 1161 } 1162 1163 void MemoryPool::onThreadShutdown(TLSData *tlsData) 1164 { 1165 if (tlsData) { // might be called for "empty" TLS 1166 tlsData->release(); 1167 bootStrapBlocks.free(tlsData); 1168 clearTLS(); 1169 } 1170 } 1171 1172 #if MALLOC_DEBUG 1173 void Bin::verifyTLSBin (size_t size) const 1174 { 1175 /* The debug version verifies the TLSBin as needed */ 1176 uint32_t objSize = getObjectSize(size); 1177 1178 if (activeBlk) { 1179 MALLOC_ASSERT( activeBlk->isOwnedByCurrentThread(), ASSERT_TEXT ); 1180 MALLOC_ASSERT( activeBlk->objectSize == objSize, ASSERT_TEXT ); 1181 #if MALLOC_DEBUG>1 1182 for (Block* temp = activeBlk->next; temp; temp=temp->next) { 1183 MALLOC_ASSERT( temp!=activeBlk, ASSERT_TEXT ); 1184 MALLOC_ASSERT( temp->isOwnedByCurrentThread(), ASSERT_TEXT ); 1185 MALLOC_ASSERT( temp->objectSize == objSize, ASSERT_TEXT ); 1186 MALLOC_ASSERT( temp->previous->next == temp, ASSERT_TEXT ); 1187 if (temp->next) { 1188 MALLOC_ASSERT( temp->next->previous == temp, ASSERT_TEXT ); 1189 } 1190 } 1191 for (Block* temp = activeBlk->previous; temp; temp=temp->previous) { 1192 MALLOC_ASSERT( temp!=activeBlk, ASSERT_TEXT ); 1193 MALLOC_ASSERT( temp->isOwnedByCurrentThread(), ASSERT_TEXT ); 1194 MALLOC_ASSERT( temp->objectSize == objSize, ASSERT_TEXT ); 1195 MALLOC_ASSERT( temp->next->previous == temp, ASSERT_TEXT ); 1196 if (temp->previous) { 1197 MALLOC_ASSERT( temp->previous->next == temp, ASSERT_TEXT ); 1198 } 1199 } 1200 #endif /* MALLOC_DEBUG>1 */ 1201 } 1202 } 1203 #else /* MALLOC_DEBUG */ 1204 inline void Bin::verifyTLSBin (size_t) const { } 1205 #endif /* MALLOC_DEBUG */ 1206 1207 /* 1208 * Add a block to the start of this tls bin list. 1209 */ 1210 void Bin::pushTLSBin(Block* block) 1211 { 1212 /* The objectSize should be defined and not a parameter 1213 because the function is applied to partially filled blocks as well */ 1214 unsigned int size = block->objectSize; 1215 1216 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1217 MALLOC_ASSERT( block->objectSize != 0, ASSERT_TEXT ); 1218 MALLOC_ASSERT( block->next == nullptr, ASSERT_TEXT ); 1219 MALLOC_ASSERT( block->previous == nullptr, ASSERT_TEXT ); 1220 1221 MALLOC_ASSERT( this, ASSERT_TEXT ); 1222 verifyTLSBin(size); 1223 1224 block->next = activeBlk; 1225 if( activeBlk ) { 1226 block->previous = activeBlk->previous; 1227 activeBlk->previous = block; 1228 if( block->previous ) 1229 block->previous->next = block; 1230 } else { 1231 activeBlk = block; 1232 } 1233 1234 verifyTLSBin(size); 1235 } 1236 1237 /* 1238 * Take a block out of its tls bin (e.g. before removal). 1239 */ 1240 void Bin::outofTLSBin(Block* block) 1241 { 1242 unsigned int size = block->objectSize; 1243 1244 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1245 MALLOC_ASSERT( block->objectSize != 0, ASSERT_TEXT ); 1246 1247 MALLOC_ASSERT( this, ASSERT_TEXT ); 1248 verifyTLSBin(size); 1249 1250 if (block == activeBlk) { 1251 activeBlk = block->previous? block->previous : block->next; 1252 } 1253 /* Unlink the block */ 1254 if (block->previous) { 1255 MALLOC_ASSERT( block->previous->next == block, ASSERT_TEXT ); 1256 block->previous->next = block->next; 1257 } 1258 if (block->next) { 1259 MALLOC_ASSERT( block->next->previous == block, ASSERT_TEXT ); 1260 block->next->previous = block->previous; 1261 } 1262 block->next = nullptr; 1263 block->previous = nullptr; 1264 1265 verifyTLSBin(size); 1266 } 1267 1268 Block* Bin::getPrivatizedFreeListBlock() 1269 { 1270 Block* block; 1271 MALLOC_ASSERT( this, ASSERT_TEXT ); 1272 // if this method is called, active block usage must be unsuccessful 1273 MALLOC_ASSERT( (!activeBlk && !mailbox.load(std::memory_order_relaxed)) || (activeBlk && activeBlk->isFull), ASSERT_TEXT ); 1274 1275 // the counter should be changed STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 1276 if (!mailbox.load(std::memory_order_acquire)) // hotpath is empty mailbox 1277 return nullptr; 1278 else { // mailbox is not empty, take lock and inspect it 1279 MallocMutex::scoped_lock scoped_cs(mailLock); 1280 block = mailbox.load(std::memory_order_relaxed); 1281 if( block ) { 1282 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1283 MALLOC_ASSERT( !isNotForUse(block->nextPrivatizable.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1284 mailbox.store(block->nextPrivatizable.load(std::memory_order_relaxed), std::memory_order_relaxed); 1285 block->nextPrivatizable.store((Block*)this, std::memory_order_relaxed); 1286 } 1287 } 1288 if( block ) { 1289 MALLOC_ASSERT( isSolidPtr(block->publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1290 block->privatizePublicFreeList(); 1291 block->adjustPositionInBin(this); 1292 } 1293 return block; 1294 } 1295 1296 void Bin::addPublicFreeListBlock(Block* block) 1297 { 1298 MallocMutex::scoped_lock scoped_cs(mailLock); 1299 block->nextPrivatizable.store(mailbox.load(std::memory_order_relaxed), std::memory_order_relaxed); 1300 mailbox.store(block, std::memory_order_relaxed); 1301 } 1302 1303 // Process publicly freed objects in all blocks and return empty blocks 1304 // to the backend in order to reduce overall footprint. 1305 bool Bin::cleanPublicFreeLists() 1306 { 1307 Block* block; 1308 if (!mailbox.load(std::memory_order_acquire)) 1309 return false; 1310 else { 1311 // Grab all the blocks in the mailbox 1312 MallocMutex::scoped_lock scoped_cs(mailLock); 1313 block = mailbox.load(std::memory_order_relaxed); 1314 mailbox.store(nullptr, std::memory_order_relaxed); 1315 } 1316 bool released = false; 1317 while (block) { 1318 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1319 Block* tmp = block->nextPrivatizable.load(std::memory_order_relaxed); 1320 block->nextPrivatizable.store((Block*)this, std::memory_order_relaxed); 1321 block->privatizePublicFreeList(); 1322 if (block->empty()) { 1323 processEmptyBlock(block, /*poolTheBlock=*/false); 1324 released = true; 1325 } else 1326 block->adjustPositionInBin(this); 1327 block = tmp; 1328 } 1329 return released; 1330 } 1331 1332 bool Block::adjustFullness() 1333 { 1334 if (bumpPtr) { 1335 /* If we are still using a bump ptr for this block it is empty enough to use. */ 1336 STAT_increment(getThreadId(), getIndex(objectSize), examineEmptyEnough); 1337 isFull = false; 1338 } else { 1339 const float threshold = (slabSize - sizeof(Block)) * (1 - emptyEnoughRatio); 1340 /* allocatedCount shows how many objects in the block are in use; however it still counts 1341 * blocks freed by other threads; so prior call to privatizePublicFreeList() is recommended */ 1342 isFull = (allocatedCount*objectSize > threshold) ? true : false; 1343 #if COLLECT_STATISTICS 1344 if (isFull) 1345 STAT_increment(getThreadId(), getIndex(objectSize), examineNotEmpty); 1346 else 1347 STAT_increment(getThreadId(), getIndex(objectSize), examineEmptyEnough); 1348 #endif 1349 } 1350 return isFull; 1351 } 1352 1353 // This method resides in class Block, and not in class Bin, in order to avoid 1354 // calling getAllocationBin on a reasonably hot path in Block::freeOwnObject 1355 void Block::adjustPositionInBin(Bin* bin/*=nullptr*/) 1356 { 1357 // If the block were full, but became empty enough to use, 1358 // move it to the front of the list 1359 if (isFull && !adjustFullness()) { 1360 if (!bin) 1361 bin = tlsPtr.load(std::memory_order_relaxed)->getAllocationBin(objectSize); 1362 bin->moveBlockToFront(this); 1363 } 1364 } 1365 1366 /* Restore the bump pointer for an empty block that is planned to use */ 1367 void Block::restoreBumpPtr() 1368 { 1369 MALLOC_ASSERT( allocatedCount == 0, ASSERT_TEXT ); 1370 MALLOC_ASSERT( !isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1371 STAT_increment(getThreadId(), getIndex(objectSize), freeRestoreBumpPtr); 1372 bumpPtr = (FreeObject *)((uintptr_t)this + slabSize - objectSize); 1373 freeList = nullptr; 1374 isFull = false; 1375 } 1376 1377 void Block::freeOwnObject(void *object) 1378 { 1379 tlsPtr.load(std::memory_order_relaxed)->markUsed(); 1380 allocatedCount--; 1381 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 1382 #if COLLECT_STATISTICS 1383 // Note that getAllocationBin is not called on the hottest path with statistics off. 1384 if (tlsPtr.load(std::memory_order_relaxed)->getAllocationBin(objectSize)->getActiveBlock() != this) 1385 STAT_increment(getThreadId(), getIndex(objectSize), freeToInactiveBlock); 1386 else 1387 STAT_increment(getThreadId(), getIndex(objectSize), freeToActiveBlock); 1388 #endif 1389 if (empty()) { 1390 // If the last object of a slab is freed, the slab cannot be marked full 1391 MALLOC_ASSERT(!isFull, ASSERT_TEXT); 1392 tlsPtr.load(std::memory_order_relaxed)->getAllocationBin(objectSize)->processEmptyBlock(this, /*poolTheBlock=*/true); 1393 } else { // hot path 1394 FreeObject *objectToFree = findObjectToFree(object); 1395 objectToFree->next = freeList; 1396 freeList = objectToFree; 1397 adjustPositionInBin(); 1398 } 1399 } 1400 1401 void Block::freePublicObject (FreeObject *objectToFree) 1402 { 1403 FreeObject* localPublicFreeList{}; 1404 1405 MALLOC_ITT_SYNC_RELEASING(&publicFreeList); 1406 #if FREELIST_NONBLOCKING 1407 // TBB_REVAMP_TODO: make it non atomic in non-blocking scenario 1408 localPublicFreeList = publicFreeList.load(std::memory_order_relaxed); 1409 do { 1410 objectToFree->next = localPublicFreeList; 1411 // no backoff necessary because trying to make change, not waiting for a change 1412 } while( !publicFreeList.compare_exchange_strong(localPublicFreeList, objectToFree) ); 1413 #else 1414 STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 1415 { 1416 MallocMutex::scoped_lock scoped_cs(publicFreeListLock); 1417 localPublicFreeList = objectToFree->next = publicFreeList; 1418 publicFreeList = objectToFree; 1419 } 1420 #endif 1421 1422 if( localPublicFreeList==nullptr ) { 1423 // if the block is abandoned, its nextPrivatizable pointer should be UNUSABLE 1424 // otherwise, it should point to the bin the block belongs to. 1425 // reading nextPrivatizable is thread-safe below, because: 1426 // 1) the executing thread atomically got publicFreeList==nullptr and changed it to non-nullptr; 1427 // 2) only owning thread can change it back to nullptr, 1428 // 3) but it can not be done until the block is put to the mailbox 1429 // So the executing thread is now the only one that can change nextPrivatizable 1430 Block* next = nextPrivatizable.load(std::memory_order_acquire); 1431 if( !isNotForUse(next) ) { 1432 MALLOC_ASSERT( next!=nullptr, ASSERT_TEXT ); 1433 Bin* theBin = (Bin*) next; 1434 #if MALLOC_DEBUG && TBB_REVAMP_TODO 1435 // FIXME: The thread that returns the block is not the block's owner. 1436 // The below assertion compares 'theBin' against the caller's local bin, thus, it always fails. 1437 // Need to find a way to get the correct remote bin for comparison. 1438 { // check that nextPrivatizable points to the bin the block belongs to 1439 uint32_t index = getIndex( objectSize ); 1440 TLSData* tls = getThreadMallocTLS(); 1441 MALLOC_ASSERT( theBin==tls->bin+index, ASSERT_TEXT ); 1442 } 1443 #endif // MALLOC_DEBUG 1444 theBin->addPublicFreeListBlock(this); 1445 } 1446 } 1447 STAT_increment(getThreadId(), ThreadCommonCounters, freeToOtherThread); 1448 STAT_increment(ownerTid.load(std::memory_order_relaxed), getIndex(objectSize), freeByOtherThread); 1449 } 1450 1451 // Make objects freed by other threads available for use again 1452 void Block::privatizePublicFreeList( bool reset ) 1453 { 1454 FreeObject *localPublicFreeList; 1455 // If reset is false, publicFreeList should not be zeroed but set to UNUSABLE 1456 // to properly synchronize with other threads freeing objects to this slab. 1457 const intptr_t endMarker = reset ? 0 : UNUSABLE; 1458 1459 // Only the owner thread may reset the pointer to nullptr 1460 MALLOC_ASSERT( isOwnedByCurrentThread() || !reset, ASSERT_TEXT ); 1461 #if FREELIST_NONBLOCKING 1462 localPublicFreeList = publicFreeList.exchange((FreeObject*)endMarker); 1463 #else 1464 STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 1465 { 1466 MallocMutex::scoped_lock scoped_cs(publicFreeListLock); 1467 localPublicFreeList = publicFreeList; 1468 publicFreeList = endMarker; 1469 } 1470 #endif 1471 MALLOC_ITT_SYNC_ACQUIRED(&publicFreeList); 1472 MALLOC_ASSERT( !(reset && isNotForUse(publicFreeList)), ASSERT_TEXT ); 1473 1474 // publicFreeList must have been UNUSABLE or valid, but not nullptr 1475 MALLOC_ASSERT( localPublicFreeList!=nullptr, ASSERT_TEXT ); 1476 if( isSolidPtr(localPublicFreeList) ) { 1477 MALLOC_ASSERT( allocatedCount <= (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 1478 /* other threads did not change the counter freeing our blocks */ 1479 allocatedCount--; 1480 FreeObject *temp = localPublicFreeList; 1481 while( isSolidPtr(temp->next) ){ // the list will end with either nullptr or UNUSABLE 1482 temp = temp->next; 1483 allocatedCount--; 1484 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 1485 } 1486 /* merge with local freeList */ 1487 temp->next = freeList; 1488 freeList = localPublicFreeList; 1489 STAT_increment(getThreadId(), getIndex(objectSize), allocPrivatized); 1490 } 1491 } 1492 1493 void Block::privatizeOrphaned(TLSData *tls, unsigned index) 1494 { 1495 Bin* bin = tls->bin + index; 1496 STAT_increment(getThreadId(), index, allocBlockPublic); 1497 next = nullptr; 1498 previous = nullptr; 1499 MALLOC_ASSERT( publicFreeList.load(std::memory_order_relaxed) != nullptr, ASSERT_TEXT ); 1500 /* There is not a race here since no other thread owns this block */ 1501 markOwned(tls); 1502 // It is safe to change nextPrivatizable, as publicFreeList is not null 1503 MALLOC_ASSERT( isNotForUse(nextPrivatizable.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1504 nextPrivatizable.store((Block*)bin, std::memory_order_relaxed); 1505 // the next call is required to change publicFreeList to 0 1506 privatizePublicFreeList(); 1507 if( empty() ) { 1508 restoreBumpPtr(); 1509 } else { 1510 adjustFullness(); // check the block fullness and set isFull 1511 } 1512 MALLOC_ASSERT( !isNotForUse(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1513 } 1514 1515 1516 bool Block::readyToShare() 1517 { 1518 FreeObject* oldVal = nullptr; 1519 #if FREELIST_NONBLOCKING 1520 publicFreeList.compare_exchange_strong(oldVal, (FreeObject*)UNUSABLE); 1521 #else 1522 STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 1523 { 1524 MallocMutex::scoped_lock scoped_cs(publicFreeListLock); 1525 if ( (oldVal=publicFreeList)==nullptr ) 1526 (intptr_t&)(publicFreeList) = UNUSABLE; 1527 } 1528 #endif 1529 return oldVal==nullptr; 1530 } 1531 1532 void Block::shareOrphaned(intptr_t binTag, unsigned index) 1533 { 1534 MALLOC_ASSERT( binTag, ASSERT_TEXT ); 1535 // unreferenced formal parameter warning 1536 tbb::detail::suppress_unused_warning(index); 1537 STAT_increment(getThreadId(), index, freeBlockPublic); 1538 markOrphaned(); 1539 if ((intptr_t)nextPrivatizable.load(std::memory_order_relaxed) == binTag) { 1540 // First check passed: the block is not in mailbox yet. 1541 // Need to set publicFreeList to non-zero, so other threads 1542 // will not change nextPrivatizable and it can be zeroed. 1543 if ( !readyToShare() ) { 1544 // another thread freed an object; we need to wait until it finishes. 1545 // There is no need for exponential backoff, as the wait here is not for a lock; 1546 // but need to yield, so the thread we wait has a chance to run. 1547 // TODO: add a pause to also be friendly to hyperthreads 1548 int count = 256; 1549 while ((intptr_t)nextPrivatizable.load(std::memory_order_relaxed) == binTag) { 1550 if (--count==0) { 1551 do_yield(); 1552 count = 256; 1553 } 1554 } 1555 } 1556 } 1557 MALLOC_ASSERT( publicFreeList.load(std::memory_order_relaxed) !=nullptr, ASSERT_TEXT ); 1558 // now it is safe to change our data 1559 previous = nullptr; 1560 // it is caller responsibility to ensure that the list of blocks 1561 // formed by nextPrivatizable pointers is kept consistent if required. 1562 // if only called from thread shutdown code, it does not matter. 1563 nextPrivatizable.store((Block*)UNUSABLE, std::memory_order_relaxed); 1564 } 1565 1566 void Block::cleanBlockHeader() 1567 { 1568 next = nullptr; 1569 previous = nullptr; 1570 freeList = nullptr; 1571 allocatedCount = 0; 1572 isFull = false; 1573 tlsPtr.store(nullptr, std::memory_order_relaxed); 1574 1575 publicFreeList.store(nullptr, std::memory_order_relaxed); 1576 } 1577 1578 void Block::initEmptyBlock(TLSData *tls, size_t size) 1579 { 1580 // Having getIndex and getObjectSize called next to each other 1581 // allows better compiler optimization as they basically share the code. 1582 unsigned int index = getIndex(size); 1583 unsigned int objSz = getObjectSize(size); 1584 1585 cleanBlockHeader(); 1586 objectSize = objSz; 1587 markOwned(tls); 1588 // bump pointer should be prepared for first allocation - thus mode it down to objectSize 1589 bumpPtr = (FreeObject *)((uintptr_t)this + slabSize - objectSize); 1590 1591 // each block should have the address where the head of the list of "privatizable" blocks is kept 1592 // the only exception is a block for boot strap which is initialized when TLS is yet nullptr 1593 nextPrivatizable.store( tls? (Block*)(tls->bin + index) : nullptr, std::memory_order_relaxed); 1594 TRACEF(( "[ScalableMalloc trace] Empty block %p is initialized, owner is %ld, objectSize is %d, bumpPtr is %p\n", 1595 this, tlsPtr.load(std::memory_order_relaxed) ? getThreadId() : -1, objectSize, bumpPtr )); 1596 } 1597 1598 Block *OrphanedBlocks::get(TLSData *tls, unsigned int size) 1599 { 1600 // TODO: try to use index from getAllocationBin 1601 unsigned int index = getIndex(size); 1602 Block *block = bins[index].pop(); 1603 if (block) { 1604 MALLOC_ITT_SYNC_ACQUIRED(bins+index); 1605 block->privatizeOrphaned(tls, index); 1606 } 1607 return block; 1608 } 1609 1610 void OrphanedBlocks::put(intptr_t binTag, Block *block) 1611 { 1612 unsigned int index = getIndex(block->getSize()); 1613 block->shareOrphaned(binTag, index); 1614 MALLOC_ITT_SYNC_RELEASING(bins+index); 1615 bins[index].push(block); 1616 } 1617 1618 void OrphanedBlocks::reset() 1619 { 1620 for (uint32_t i=0; i<numBlockBinLimit; i++) 1621 new (bins+i) LifoList(); 1622 } 1623 1624 bool OrphanedBlocks::cleanup(Backend* backend) 1625 { 1626 bool released = false; 1627 for (uint32_t i=0; i<numBlockBinLimit; i++) { 1628 Block* block = bins[i].grab(); 1629 MALLOC_ITT_SYNC_ACQUIRED(bins+i); 1630 while (block) { 1631 Block* next = block->next; 1632 block->privatizePublicFreeList( /*reset=*/false ); // do not set publicFreeList to nullptr 1633 if (block->empty()) { 1634 block->reset(); 1635 // slab blocks in user's pools do not have valid backRefIdx 1636 if (!backend->inUserPool()) 1637 removeBackRef(*(block->getBackRefIdx())); 1638 backend->putSlabBlock(block); 1639 released = true; 1640 } else { 1641 MALLOC_ITT_SYNC_RELEASING(bins+i); 1642 bins[i].push(block); 1643 } 1644 block = next; 1645 } 1646 } 1647 return released; 1648 } 1649 1650 FreeBlockPool::ResOfGet FreeBlockPool::getBlock() 1651 { 1652 Block *b = head.exchange(nullptr); 1653 bool lastAccessMiss; 1654 1655 if (b) { 1656 size--; 1657 Block *newHead = b->next; 1658 lastAccessMiss = false; 1659 head.store(newHead, std::memory_order_release); 1660 } else { 1661 lastAccessMiss = true; 1662 } 1663 return ResOfGet(b, lastAccessMiss); 1664 } 1665 1666 void FreeBlockPool::returnBlock(Block *block) 1667 { 1668 MALLOC_ASSERT( size <= POOL_HIGH_MARK, ASSERT_TEXT ); 1669 Block *localHead = head.exchange(nullptr); 1670 1671 if (!localHead) { 1672 size = 0; // head was stolen by externalClean, correct size accordingly 1673 } else if (size == POOL_HIGH_MARK) { 1674 // release cold blocks and add hot one, 1675 // so keep POOL_LOW_MARK-1 blocks and add new block to head 1676 Block *headToFree = localHead, *helper; 1677 for (int i=0; i<POOL_LOW_MARK-2; i++) 1678 headToFree = headToFree->next; 1679 Block *last = headToFree; 1680 headToFree = headToFree->next; 1681 last->next = nullptr; 1682 size = POOL_LOW_MARK-1; 1683 for (Block *currBl = headToFree; currBl; currBl = helper) { 1684 helper = currBl->next; 1685 // slab blocks in user's pools do not have valid backRefIdx 1686 if (!backend->inUserPool()) 1687 removeBackRef(currBl->backRefIdx); 1688 backend->putSlabBlock(currBl); 1689 } 1690 } 1691 size++; 1692 block->next = localHead; 1693 head.store(block, std::memory_order_release); 1694 } 1695 1696 bool FreeBlockPool::externalCleanup() 1697 { 1698 Block *helper; 1699 bool released = false; 1700 1701 for (Block *currBl=head.exchange(nullptr); currBl; currBl=helper) { 1702 helper = currBl->next; 1703 // slab blocks in user's pools do not have valid backRefIdx 1704 if (!backend->inUserPool()) 1705 removeBackRef(currBl->backRefIdx); 1706 backend->putSlabBlock(currBl); 1707 released = true; 1708 } 1709 return released; 1710 } 1711 1712 /* Prepare the block for returning to FreeBlockPool */ 1713 void Block::reset() 1714 { 1715 // it is caller's responsibility to ensure no data is lost before calling this 1716 MALLOC_ASSERT( allocatedCount==0, ASSERT_TEXT ); 1717 MALLOC_ASSERT( !isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1718 if (!isStartupAllocObject()) 1719 STAT_increment(getThreadId(), getIndex(objectSize), freeBlockBack); 1720 1721 cleanBlockHeader(); 1722 1723 nextPrivatizable.store(nullptr, std::memory_order_relaxed); 1724 1725 objectSize = 0; 1726 // for an empty block, bump pointer should point right after the end of the block 1727 bumpPtr = (FreeObject *)((uintptr_t)this + slabSize); 1728 } 1729 1730 inline void Bin::setActiveBlock (Block *block) 1731 { 1732 // MALLOC_ASSERT( bin, ASSERT_TEXT ); 1733 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1734 // it is the caller responsibility to keep bin consistence (i.e. ensure this block is in the bin list) 1735 activeBlk = block; 1736 } 1737 1738 inline Block* Bin::setPreviousBlockActive() 1739 { 1740 MALLOC_ASSERT( activeBlk, ASSERT_TEXT ); 1741 Block* temp = activeBlk->previous; 1742 if( temp ) { 1743 MALLOC_ASSERT( !(temp->isFull), ASSERT_TEXT ); 1744 activeBlk = temp; 1745 } 1746 return temp; 1747 } 1748 1749 inline bool Block::isOwnedByCurrentThread() const { 1750 return tlsPtr.load(std::memory_order_relaxed) && ownerTid.isCurrentThreadId(); 1751 } 1752 1753 FreeObject *Block::findObjectToFree(const void *object) const 1754 { 1755 FreeObject *objectToFree; 1756 // Due to aligned allocations, a pointer passed to scalable_free 1757 // might differ from the address of internally allocated object. 1758 // Small objects however should always be fine. 1759 if (objectSize <= maxSegregatedObjectSize) 1760 objectToFree = (FreeObject*)object; 1761 // "Fitting size" allocations are suspicious if aligned higher than naturally 1762 else { 1763 if ( ! isAligned(object,2*fittingAlignment) ) 1764 // TODO: the above check is questionable - it gives false negatives in ~50% cases, 1765 // so might even be slower in average than unconditional use of findAllocatedObject. 1766 // here it should be a "real" object 1767 objectToFree = (FreeObject*)object; 1768 else 1769 // here object can be an aligned address, so applying additional checks 1770 objectToFree = findAllocatedObject(object); 1771 MALLOC_ASSERT( isAligned(objectToFree,fittingAlignment), ASSERT_TEXT ); 1772 } 1773 MALLOC_ASSERT( isProperlyPlaced(objectToFree), ASSERT_TEXT ); 1774 1775 return objectToFree; 1776 } 1777 1778 void TLSData::release() 1779 { 1780 memPool->extMemPool.allLocalCaches.unregisterThread(this); 1781 externalCleanup(/*cleanOnlyUnused=*/false, /*cleanBins=*/false); 1782 1783 for (unsigned index = 0; index < numBlockBins; index++) { 1784 Block *activeBlk = bin[index].getActiveBlock(); 1785 if (!activeBlk) 1786 continue; 1787 Block *threadlessBlock = activeBlk->previous; 1788 bool syncOnMailbox = false; 1789 while (threadlessBlock) { 1790 Block *threadBlock = threadlessBlock->previous; 1791 if (threadlessBlock->empty()) { 1792 /* we destroy the thread, so not use its block pool */ 1793 memPool->returnEmptyBlock(threadlessBlock, /*poolTheBlock=*/false); 1794 } else { 1795 memPool->extMemPool.orphanedBlocks.put(intptr_t(bin+index), threadlessBlock); 1796 syncOnMailbox = true; 1797 } 1798 threadlessBlock = threadBlock; 1799 } 1800 threadlessBlock = activeBlk; 1801 while (threadlessBlock) { 1802 Block *threadBlock = threadlessBlock->next; 1803 if (threadlessBlock->empty()) { 1804 /* we destroy the thread, so not use its block pool */ 1805 memPool->returnEmptyBlock(threadlessBlock, /*poolTheBlock=*/false); 1806 } else { 1807 memPool->extMemPool.orphanedBlocks.put(intptr_t(bin+index), threadlessBlock); 1808 syncOnMailbox = true; 1809 } 1810 threadlessBlock = threadBlock; 1811 } 1812 bin[index].resetActiveBlock(); 1813 1814 if (syncOnMailbox) { 1815 // Although, we synchronized on nextPrivatizable inside a block, we still need to 1816 // synchronize on the bin lifetime because the thread releasing an object into the public 1817 // free list is touching the bin (mailbox and mailLock) 1818 MallocMutex::scoped_lock scoped_cs(bin[index].mailLock); 1819 } 1820 } 1821 } 1822 1823 1824 #if MALLOC_CHECK_RECURSION 1825 // TODO: Use dedicated heap for this 1826 1827 /* 1828 * It's a special kind of allocation that can be used when malloc is 1829 * not available (either during startup or when malloc was already called and 1830 * we are, say, inside pthread_setspecific's call). 1831 * Block can contain objects of different sizes, 1832 * allocations are performed by moving bump pointer and increasing of object counter, 1833 * releasing is done via counter of objects allocated in the block 1834 * or moving bump pointer if releasing object is on a bound. 1835 * TODO: make bump pointer to grow to the same backward direction as all the others. 1836 */ 1837 1838 class StartupBlock : public Block { 1839 size_t availableSize() const { 1840 return slabSize - ((uintptr_t)bumpPtr - (uintptr_t)this); 1841 } 1842 static StartupBlock *getBlock(); 1843 public: 1844 static FreeObject *allocate(size_t size); 1845 static size_t msize(void *ptr) { return *((size_t*)ptr - 1); } 1846 void free(void *ptr); 1847 }; 1848 1849 static MallocMutex startupMallocLock; 1850 static StartupBlock *firstStartupBlock; 1851 1852 StartupBlock *StartupBlock::getBlock() 1853 { 1854 BackRefIdx backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/false); 1855 if (backRefIdx.isInvalid()) return nullptr; 1856 1857 StartupBlock *block = static_cast<StartupBlock*>( 1858 defaultMemPool->extMemPool.backend.getSlabBlock(1)); 1859 if (!block) return nullptr; 1860 1861 block->cleanBlockHeader(); 1862 setBackRef(backRefIdx, block); 1863 block->backRefIdx = backRefIdx; 1864 // use startupAllocObjSizeMark to mark objects from startup block marker 1865 block->objectSize = startupAllocObjSizeMark; 1866 block->bumpPtr = (FreeObject *)((uintptr_t)block + sizeof(StartupBlock)); 1867 return block; 1868 } 1869 1870 FreeObject *StartupBlock::allocate(size_t size) 1871 { 1872 FreeObject *result; 1873 StartupBlock *newBlock = nullptr; 1874 1875 /* Objects must be aligned on their natural bounds, 1876 and objects bigger than word on word's bound. */ 1877 size = alignUp(size, sizeof(size_t)); 1878 // We need size of an object to implement msize. 1879 size_t reqSize = size + sizeof(size_t); 1880 { 1881 MallocMutex::scoped_lock scoped_cs(startupMallocLock); 1882 // Re-check whether we need a new block (conditions might have changed) 1883 if (!firstStartupBlock || firstStartupBlock->availableSize() < reqSize) { 1884 if (!newBlock) { 1885 newBlock = StartupBlock::getBlock(); 1886 if (!newBlock) return nullptr; 1887 } 1888 newBlock->next = (Block*)firstStartupBlock; 1889 if (firstStartupBlock) 1890 firstStartupBlock->previous = (Block*)newBlock; 1891 firstStartupBlock = newBlock; 1892 } 1893 result = firstStartupBlock->bumpPtr; 1894 firstStartupBlock->allocatedCount++; 1895 firstStartupBlock->bumpPtr = 1896 (FreeObject *)((uintptr_t)firstStartupBlock->bumpPtr + reqSize); 1897 } 1898 1899 // keep object size at the negative offset 1900 *((size_t*)result) = size; 1901 return (FreeObject*)((size_t*)result+1); 1902 } 1903 1904 void StartupBlock::free(void *ptr) 1905 { 1906 Block* blockToRelease = nullptr; 1907 { 1908 MallocMutex::scoped_lock scoped_cs(startupMallocLock); 1909 1910 MALLOC_ASSERT(firstStartupBlock, ASSERT_TEXT); 1911 MALLOC_ASSERT(startupAllocObjSizeMark==objectSize 1912 && allocatedCount>0, ASSERT_TEXT); 1913 MALLOC_ASSERT((uintptr_t)ptr>=(uintptr_t)this+sizeof(StartupBlock) 1914 && (uintptr_t)ptr+StartupBlock::msize(ptr)<=(uintptr_t)this+slabSize, 1915 ASSERT_TEXT); 1916 if (0 == --allocatedCount) { 1917 if (this == firstStartupBlock) 1918 firstStartupBlock = (StartupBlock*)firstStartupBlock->next; 1919 if (previous) 1920 previous->next = next; 1921 if (next) 1922 next->previous = previous; 1923 blockToRelease = this; 1924 } else if ((uintptr_t)ptr + StartupBlock::msize(ptr) == (uintptr_t)bumpPtr) { 1925 // last object in the block released 1926 FreeObject *newBump = (FreeObject*)((size_t*)ptr - 1); 1927 MALLOC_ASSERT((uintptr_t)newBump>(uintptr_t)this+sizeof(StartupBlock), 1928 ASSERT_TEXT); 1929 bumpPtr = newBump; 1930 } 1931 } 1932 if (blockToRelease) { 1933 blockToRelease->previous = blockToRelease->next = nullptr; 1934 defaultMemPool->returnEmptyBlock(blockToRelease, /*poolTheBlock=*/false); 1935 } 1936 } 1937 1938 #endif /* MALLOC_CHECK_RECURSION */ 1939 1940 /********* End thread related code *************/ 1941 1942 /********* Library initialization *************/ 1943 1944 //! Value indicating the state of initialization. 1945 /* 0 = initialization not started. 1946 * 1 = initialization started but not finished. 1947 * 2 = initialization finished. 1948 * In theory, we only need values 0 and 2. But value 1 is nonetheless 1949 * useful for detecting errors in the double-check pattern. 1950 */ 1951 static std::atomic<intptr_t> mallocInitialized{0}; // implicitly initialized to 0 1952 static MallocMutex initMutex; 1953 1954 /** The leading "\0" is here so that applying "strings" to the binary 1955 delivers a clean result. */ 1956 static char VersionString[] = "\0" TBBMALLOC_VERSION_STRINGS; 1957 1958 #if USE_PTHREAD && __TBB_SOURCE_DIRECTLY_INCLUDED 1959 1960 /* Decrease race interval between dynamic library unloading and pthread key 1961 destructor. Protect only Pthreads with supported unloading. */ 1962 class ShutdownSync { 1963 /* flag is the number of threads in pthread key dtor body 1964 (i.e., between threadDtorStart() and threadDtorDone()) 1965 or the signal to skip dtor, if flag < 0 */ 1966 std::atomic<intptr_t> flag; 1967 static const intptr_t skipDtor = INTPTR_MIN/2; 1968 public: 1969 void init() { flag.store(0, std::memory_order_release); } 1970 /* Suppose that 2*abs(skipDtor) or more threads never call threadDtorStart() 1971 simultaneously, so flag never becomes negative because of that. */ 1972 bool threadDtorStart() { 1973 if (flag.load(std::memory_order_acquire) < 0) 1974 return false; 1975 if (++flag <= 0) { // note that new value returned 1976 flag.fetch_sub(1); // flag is spoiled by us, restore it 1977 return false; 1978 } 1979 return true; 1980 } 1981 void threadDtorDone() { 1982 flag.fetch_sub(1); 1983 } 1984 void processExit() { 1985 if (flag.fetch_add(skipDtor) != 0) { 1986 SpinWaitUntilEq(flag, skipDtor); 1987 } 1988 } 1989 }; 1990 1991 #else 1992 1993 class ShutdownSync { 1994 public: 1995 void init() { } 1996 bool threadDtorStart() { return true; } 1997 void threadDtorDone() { } 1998 void processExit() { } 1999 }; 2000 2001 #endif // USE_PTHREAD && __TBB_SOURCE_DIRECTLY_INCLUDED 2002 2003 static ShutdownSync shutdownSync; 2004 2005 inline bool isMallocInitialized() { 2006 // Load must have acquire fence; otherwise thread taking "initialized" path 2007 // might perform textually later loads *before* mallocInitialized becomes 2. 2008 return 2 == mallocInitialized.load(std::memory_order_acquire); 2009 } 2010 2011 /* Caller is responsible for ensuring this routine is called exactly once. */ 2012 extern "C" void MallocInitializeITT() { 2013 #if __TBB_USE_ITT_NOTIFY 2014 if (!usedBySrcIncluded) 2015 tbb::detail::r1::__TBB_load_ittnotify(); 2016 #endif 2017 } 2018 2019 void MemoryPool::initDefaultPool() { 2020 hugePages.init(); 2021 } 2022 2023 /* 2024 * Allocator initialization routine; 2025 * it is called lazily on the very first scalable_malloc call. 2026 */ 2027 static bool initMemoryManager() 2028 { 2029 TRACEF(( "[ScalableMalloc trace] sizeof(Block) is %d (expected 128); sizeof(uintptr_t) is %d\n", 2030 sizeof(Block), sizeof(uintptr_t) )); 2031 MALLOC_ASSERT( 2*blockHeaderAlignment == sizeof(Block), ASSERT_TEXT ); 2032 MALLOC_ASSERT( sizeof(FreeObject) == sizeof(void*), ASSERT_TEXT ); 2033 MALLOC_ASSERT( isAligned(defaultMemPool, sizeof(intptr_t)), 2034 "Memory pool must be void*-aligned for atomic to work over aligned arguments."); 2035 2036 #if USE_WINTHREAD 2037 const size_t granularity = 64*1024; // granulatity of VirtualAlloc 2038 #else 2039 // POSIX.1-2001-compliant way to get page size 2040 const size_t granularity = sysconf(_SC_PAGESIZE); 2041 #endif 2042 if (!defaultMemPool) { 2043 // Do not rely on static constructors and do the assignment in case 2044 // of library static section not initialized at this call yet. 2045 defaultMemPool = (MemoryPool*)defaultMemPool_space; 2046 } 2047 bool initOk = defaultMemPool-> 2048 extMemPool.init(0, nullptr, nullptr, granularity, 2049 /*keepAllMemory=*/false, /*fixedPool=*/false); 2050 // TODO: extMemPool.init() to not allocate memory 2051 if (!initOk || !initBackRefMain(&defaultMemPool->extMemPool.backend) || !ThreadId::init()) 2052 return false; 2053 MemoryPool::initDefaultPool(); 2054 // init() is required iff initMemoryManager() is called 2055 // after mallocProcessShutdownNotification() 2056 shutdownSync.init(); 2057 #if COLLECT_STATISTICS 2058 initStatisticsCollection(); 2059 #endif 2060 return true; 2061 } 2062 2063 static bool GetBoolEnvironmentVariable(const char* name) { 2064 return tbb::detail::r1::GetBoolEnvironmentVariable(name); 2065 } 2066 2067 //! Ensures that initMemoryManager() is called once and only once. 2068 /** Does not return until initMemoryManager() has been completed by a thread. 2069 There is no need to call this routine if mallocInitialized==2 . */ 2070 static bool doInitialization() 2071 { 2072 MallocMutex::scoped_lock lock( initMutex ); 2073 if (mallocInitialized.load(std::memory_order_relaxed)!=2) { 2074 MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==0, ASSERT_TEXT ); 2075 mallocInitialized.store(1, std::memory_order_relaxed); 2076 RecursiveMallocCallProtector scoped; 2077 if (!initMemoryManager()) { 2078 mallocInitialized.store(0, std::memory_order_relaxed); // restore and out 2079 return false; 2080 } 2081 #ifdef MALLOC_EXTRA_INITIALIZATION 2082 MALLOC_EXTRA_INITIALIZATION; 2083 #endif 2084 #if MALLOC_CHECK_RECURSION 2085 RecursiveMallocCallProtector::detectNaiveOverload(); 2086 #endif 2087 MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==1, ASSERT_TEXT ); 2088 // Store must have release fence, otherwise mallocInitialized==2 2089 // might become remotely visible before side effects of 2090 // initMemoryManager() become remotely visible. 2091 mallocInitialized.store(2, std::memory_order_release); 2092 if( GetBoolEnvironmentVariable("TBB_VERSION") ) { 2093 fputs(VersionString+1,stderr); 2094 hugePages.printStatus(); 2095 } 2096 } 2097 /* It can't be 0 or I would have initialized it */ 2098 MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==2, ASSERT_TEXT ); 2099 return true; 2100 } 2101 2102 /********* End library initialization *************/ 2103 2104 /********* The malloc show begins *************/ 2105 2106 2107 FreeObject *Block::allocateFromFreeList() 2108 { 2109 FreeObject *result; 2110 2111 if (!freeList) return nullptr; 2112 2113 result = freeList; 2114 MALLOC_ASSERT( result, ASSERT_TEXT ); 2115 2116 freeList = result->next; 2117 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 2118 allocatedCount++; 2119 STAT_increment(getThreadId(), getIndex(objectSize), allocFreeListUsed); 2120 2121 return result; 2122 } 2123 2124 FreeObject *Block::allocateFromBumpPtr() 2125 { 2126 FreeObject *result = bumpPtr; 2127 if (result) { 2128 bumpPtr = (FreeObject *) ((uintptr_t) bumpPtr - objectSize); 2129 if ( (uintptr_t)bumpPtr < (uintptr_t)this+sizeof(Block) ) { 2130 bumpPtr = nullptr; 2131 } 2132 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 2133 allocatedCount++; 2134 STAT_increment(getThreadId(), getIndex(objectSize), allocBumpPtrUsed); 2135 } 2136 return result; 2137 } 2138 2139 inline FreeObject* Block::allocate() 2140 { 2141 MALLOC_ASSERT( isOwnedByCurrentThread(), ASSERT_TEXT ); 2142 2143 /* for better cache locality, first looking in the free list. */ 2144 if ( FreeObject *result = allocateFromFreeList() ) { 2145 return result; 2146 } 2147 MALLOC_ASSERT( !freeList, ASSERT_TEXT ); 2148 2149 /* if free list is empty, try thread local bump pointer allocation. */ 2150 if ( FreeObject *result = allocateFromBumpPtr() ) { 2151 return result; 2152 } 2153 MALLOC_ASSERT( !bumpPtr, ASSERT_TEXT ); 2154 2155 /* the block is considered full. */ 2156 isFull = true; 2157 return nullptr; 2158 } 2159 2160 size_t Block::findObjectSize(void *object) const 2161 { 2162 size_t blSize = getSize(); 2163 #if MALLOC_CHECK_RECURSION 2164 // Currently, there is no aligned allocations from startup blocks, 2165 // so we can return just StartupBlock::msize(). 2166 // TODO: This must be extended if we add aligned allocation from startup blocks. 2167 if (!blSize) 2168 return StartupBlock::msize(object); 2169 #endif 2170 // object can be aligned, so real size can be less than block's 2171 size_t size = 2172 blSize - ((uintptr_t)object - (uintptr_t)findObjectToFree(object)); 2173 MALLOC_ASSERT(size>0 && size<minLargeObjectSize, ASSERT_TEXT); 2174 return size; 2175 } 2176 2177 void Bin::moveBlockToFront(Block *block) 2178 { 2179 /* move the block to the front of the bin */ 2180 if (block == activeBlk) return; 2181 outofTLSBin(block); 2182 pushTLSBin(block); 2183 } 2184 2185 void Bin::processEmptyBlock(Block *block, bool poolTheBlock) 2186 { 2187 if (block != activeBlk) { 2188 /* We are not using this block; return it to the pool */ 2189 outofTLSBin(block); 2190 block->getMemPool()->returnEmptyBlock(block, poolTheBlock); 2191 } else { 2192 /* all objects are free - let's restore the bump pointer */ 2193 block->restoreBumpPtr(); 2194 } 2195 } 2196 2197 template<int LOW_MARK, int HIGH_MARK> 2198 bool LocalLOCImpl<LOW_MARK, HIGH_MARK>::put(LargeMemoryBlock *object, ExtMemoryPool *extMemPool) 2199 { 2200 const size_t size = object->unalignedSize; 2201 // not spoil cache with too large object, that can cause its total cleanup 2202 if (size > MAX_TOTAL_SIZE) 2203 return false; 2204 LargeMemoryBlock *localHead = head.exchange(nullptr); 2205 2206 object->prev = nullptr; 2207 object->next = localHead; 2208 if (localHead) 2209 localHead->prev = object; 2210 else { 2211 // those might not be cleaned during local cache stealing, correct them 2212 totalSize = 0; 2213 numOfBlocks = 0; 2214 tail = object; 2215 } 2216 localHead = object; 2217 totalSize += size; 2218 numOfBlocks++; 2219 // must meet both size and number of cached objects constrains 2220 if (totalSize > MAX_TOTAL_SIZE || numOfBlocks >= HIGH_MARK) { 2221 // scanning from tail until meet conditions 2222 while (totalSize > MAX_TOTAL_SIZE || numOfBlocks > LOW_MARK) { 2223 totalSize -= tail->unalignedSize; 2224 numOfBlocks--; 2225 tail = tail->prev; 2226 } 2227 LargeMemoryBlock *headToRelease = tail->next; 2228 tail->next = nullptr; 2229 2230 extMemPool->freeLargeObjectList(headToRelease); 2231 } 2232 2233 head.store(localHead, std::memory_order_release); 2234 return true; 2235 } 2236 2237 template<int LOW_MARK, int HIGH_MARK> 2238 LargeMemoryBlock *LocalLOCImpl<LOW_MARK, HIGH_MARK>::get(size_t size) 2239 { 2240 LargeMemoryBlock *localHead, *res = nullptr; 2241 2242 if (size > MAX_TOTAL_SIZE) 2243 return nullptr; 2244 2245 // TBB_REVAMP_TODO: review this line 2246 if (!head.load(std::memory_order_acquire) || (localHead = head.exchange(nullptr)) == nullptr) { 2247 // do not restore totalSize, numOfBlocks and tail at this point, 2248 // as they are used only in put(), where they must be restored 2249 return nullptr; 2250 } 2251 2252 for (LargeMemoryBlock *curr = localHead; curr; curr=curr->next) { 2253 if (curr->unalignedSize == size) { 2254 res = curr; 2255 if (curr->next) 2256 curr->next->prev = curr->prev; 2257 else 2258 tail = curr->prev; 2259 if (curr != localHead) 2260 curr->prev->next = curr->next; 2261 else 2262 localHead = curr->next; 2263 totalSize -= size; 2264 numOfBlocks--; 2265 break; 2266 } 2267 } 2268 2269 head.store(localHead, std::memory_order_release); 2270 return res; 2271 } 2272 2273 template<int LOW_MARK, int HIGH_MARK> 2274 bool LocalLOCImpl<LOW_MARK, HIGH_MARK>::externalCleanup(ExtMemoryPool *extMemPool) 2275 { 2276 if (LargeMemoryBlock *localHead = head.exchange(nullptr)) { 2277 extMemPool->freeLargeObjectList(localHead); 2278 return true; 2279 } 2280 return false; 2281 } 2282 2283 void *MemoryPool::getFromLLOCache(TLSData* tls, size_t size, size_t alignment) 2284 { 2285 LargeMemoryBlock *lmb = nullptr; 2286 2287 size_t headersSize = sizeof(LargeMemoryBlock)+sizeof(LargeObjectHdr); 2288 size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+alignment); 2289 if (allocationSize < size) // allocationSize is wrapped around after alignToBin 2290 return nullptr; 2291 MALLOC_ASSERT(allocationSize >= alignment, "Overflow must be checked before."); 2292 2293 if (tls) { 2294 tls->markUsed(); 2295 lmb = tls->lloc.get(allocationSize); 2296 } 2297 if (!lmb) 2298 lmb = extMemPool.mallocLargeObject(this, allocationSize); 2299 2300 if (lmb) { 2301 // doing shuffle we suppose that alignment offset guarantees 2302 // that different cache lines are in use 2303 MALLOC_ASSERT(alignment >= estimatedCacheLineSize, ASSERT_TEXT); 2304 2305 void *alignedArea = (void*)alignUp((uintptr_t)lmb+headersSize, alignment); 2306 uintptr_t alignedRight = 2307 alignDown((uintptr_t)lmb+lmb->unalignedSize - size, alignment); 2308 // Has some room to shuffle object between cache lines? 2309 // Note that alignedRight and alignedArea are aligned at alignment. 2310 unsigned ptrDelta = alignedRight - (uintptr_t)alignedArea; 2311 if (ptrDelta && tls) { // !tls is cold path 2312 // for the hot path of alignment==estimatedCacheLineSize, 2313 // allow compilers to use shift for division 2314 // (since estimatedCacheLineSize is a power-of-2 constant) 2315 unsigned numOfPossibleOffsets = alignment == estimatedCacheLineSize? 2316 ptrDelta / estimatedCacheLineSize : 2317 ptrDelta / alignment; 2318 unsigned myCacheIdx = ++tls->currCacheIdx; 2319 unsigned offset = myCacheIdx % numOfPossibleOffsets; 2320 2321 // Move object to a cache line with an offset that is different from 2322 // previous allocation. This supposedly allows us to use cache 2323 // associativity more efficiently. 2324 alignedArea = (void*)((uintptr_t)alignedArea + offset*alignment); 2325 } 2326 MALLOC_ASSERT((uintptr_t)lmb+lmb->unalignedSize >= 2327 (uintptr_t)alignedArea+size, "Object doesn't fit the block."); 2328 LargeObjectHdr *header = (LargeObjectHdr*)alignedArea-1; 2329 header->memoryBlock = lmb; 2330 header->backRefIdx = lmb->backRefIdx; 2331 setBackRef(header->backRefIdx, header); 2332 2333 lmb->objectSize = size; 2334 2335 MALLOC_ASSERT( isLargeObject<unknownMem>(alignedArea), ASSERT_TEXT ); 2336 MALLOC_ASSERT( isAligned(alignedArea, alignment), ASSERT_TEXT ); 2337 2338 return alignedArea; 2339 } 2340 return nullptr; 2341 } 2342 2343 void MemoryPool::putToLLOCache(TLSData *tls, void *object) 2344 { 2345 LargeObjectHdr *header = (LargeObjectHdr*)object - 1; 2346 // overwrite backRefIdx to simplify double free detection 2347 header->backRefIdx = BackRefIdx(); 2348 2349 if (tls) { 2350 tls->markUsed(); 2351 if (tls->lloc.put(header->memoryBlock, &extMemPool)) 2352 return; 2353 } 2354 extMemPool.freeLargeObject(header->memoryBlock); 2355 } 2356 2357 /* 2358 * All aligned allocations fall into one of the following categories: 2359 * 1. if both request size and alignment are <= maxSegregatedObjectSize, 2360 * we just align the size up, and request this amount, because for every size 2361 * aligned to some power of 2, the allocated object is at least that aligned. 2362 * 2. for size<minLargeObjectSize, check if already guaranteed fittingAlignment is enough. 2363 * 3. if size+alignment<minLargeObjectSize, we take an object of fittingSizeN and align 2364 * its address up; given such pointer, scalable_free could find the real object. 2365 * Wrapping of size+alignment is impossible because maximal allowed 2366 * alignment plus minLargeObjectSize can't lead to wrapping. 2367 * 4. otherwise, aligned large object is allocated. 2368 */ 2369 static void *allocateAligned(MemoryPool *memPool, size_t size, size_t alignment) 2370 { 2371 MALLOC_ASSERT( isPowerOfTwo(alignment), ASSERT_TEXT ); 2372 2373 if (!isMallocInitialized()) 2374 if (!doInitialization()) 2375 return nullptr; 2376 2377 void *result; 2378 if (size<=maxSegregatedObjectSize && alignment<=maxSegregatedObjectSize) 2379 result = internalPoolMalloc(memPool, alignUp(size? size: sizeof(size_t), alignment)); 2380 else if (size<minLargeObjectSize) { 2381 if (alignment<=fittingAlignment) 2382 result = internalPoolMalloc(memPool, size); 2383 else if (size+alignment < minLargeObjectSize) { 2384 void *unaligned = internalPoolMalloc(memPool, size+alignment); 2385 if (!unaligned) return nullptr; 2386 result = alignUp(unaligned, alignment); 2387 } else 2388 goto LargeObjAlloc; 2389 } else { 2390 LargeObjAlloc: 2391 TLSData *tls = memPool->getTLS(/*create=*/true); 2392 // take into account only alignment that are higher then natural 2393 result = 2394 memPool->getFromLLOCache(tls, size, largeObjectAlignment>alignment? 2395 largeObjectAlignment: alignment); 2396 } 2397 2398 MALLOC_ASSERT( isAligned(result, alignment), ASSERT_TEXT ); 2399 return result; 2400 } 2401 2402 static void *reallocAligned(MemoryPool *memPool, void *ptr, 2403 size_t newSize, size_t alignment = 0) 2404 { 2405 void *result; 2406 size_t copySize; 2407 2408 if (isLargeObject<ourMem>(ptr)) { 2409 LargeMemoryBlock* lmb = ((LargeObjectHdr *)ptr - 1)->memoryBlock; 2410 copySize = lmb->unalignedSize-((uintptr_t)ptr-(uintptr_t)lmb); 2411 2412 // Apply different strategies if size decreases 2413 if (newSize <= copySize && (0 == alignment || isAligned(ptr, alignment))) { 2414 2415 // For huge objects (that do not fit in backend cache), keep the same space unless 2416 // the new size is at least twice smaller 2417 bool isMemoryBlockHuge = copySize > memPool->extMemPool.backend.getMaxBinnedSize(); 2418 size_t threshold = isMemoryBlockHuge ? copySize / 2 : 0; 2419 if (newSize > threshold) { 2420 lmb->objectSize = newSize; 2421 return ptr; 2422 } 2423 // TODO: For large objects suitable for the backend cache, 2424 // split out the excessive part and put it to the backend. 2425 } 2426 // Reallocate for real 2427 copySize = lmb->objectSize; 2428 #if BACKEND_HAS_MREMAP 2429 if (void *r = memPool->extMemPool.remap(ptr, copySize, newSize, 2430 alignment < largeObjectAlignment ? largeObjectAlignment : alignment)) 2431 return r; 2432 #endif 2433 result = alignment ? allocateAligned(memPool, newSize, alignment) : 2434 internalPoolMalloc(memPool, newSize); 2435 2436 } else { 2437 Block* block = (Block *)alignDown(ptr, slabSize); 2438 copySize = block->findObjectSize(ptr); 2439 2440 // TODO: Move object to another bin if size decreases and the current bin is "empty enough". 2441 // Currently, in case of size decreasing, old pointer is returned 2442 if (newSize <= copySize && (0==alignment || isAligned(ptr, alignment))) { 2443 return ptr; 2444 } else { 2445 result = alignment ? allocateAligned(memPool, newSize, alignment) : 2446 internalPoolMalloc(memPool, newSize); 2447 } 2448 } 2449 if (result) { 2450 memcpy(result, ptr, copySize < newSize ? copySize : newSize); 2451 internalPoolFree(memPool, ptr, 0); 2452 } 2453 return result; 2454 } 2455 2456 #if MALLOC_DEBUG 2457 /* A predicate checks if an object is properly placed inside its block */ 2458 inline bool Block::isProperlyPlaced(const void *object) const 2459 { 2460 return 0 == ((uintptr_t)this + slabSize - (uintptr_t)object) % objectSize; 2461 } 2462 #endif 2463 2464 /* Finds the real object inside the block */ 2465 FreeObject *Block::findAllocatedObject(const void *address) const 2466 { 2467 // calculate offset from the end of the block space 2468 uint16_t offset = (uintptr_t)this + slabSize - (uintptr_t)address; 2469 MALLOC_ASSERT( offset<=slabSize-sizeof(Block), ASSERT_TEXT ); 2470 // find offset difference from a multiple of allocation size 2471 offset %= objectSize; 2472 // and move the address down to where the real object starts. 2473 return (FreeObject*)((uintptr_t)address - (offset? objectSize-offset: 0)); 2474 } 2475 2476 /* 2477 * Bad dereference caused by a foreign pointer is possible only here, not earlier in call chain. 2478 * Separate function isolates SEH code, as it has bad influence on compiler optimization. 2479 */ 2480 static inline BackRefIdx safer_dereference (const BackRefIdx *ptr) 2481 { 2482 BackRefIdx id; 2483 #if _MSC_VER 2484 __try { 2485 #endif 2486 id = dereference(ptr); 2487 #if _MSC_VER 2488 } __except( GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION? 2489 EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH ) { 2490 id = BackRefIdx(); 2491 } 2492 #endif 2493 return id; 2494 } 2495 2496 template<MemoryOrigin memOrigin> 2497 bool isLargeObject(void *object) 2498 { 2499 if (!isAligned(object, largeObjectAlignment)) 2500 return false; 2501 LargeObjectHdr *header = (LargeObjectHdr*)object - 1; 2502 BackRefIdx idx = (memOrigin == unknownMem) ? 2503 safer_dereference(&header->backRefIdx) : dereference(&header->backRefIdx); 2504 2505 return idx.isLargeObject() 2506 // in valid LargeObjectHdr memoryBlock is not nullptr 2507 && header->memoryBlock 2508 // in valid LargeObjectHdr memoryBlock points somewhere before header 2509 // TODO: more strict check 2510 && (uintptr_t)header->memoryBlock < (uintptr_t)header 2511 && getBackRef(idx) == header; 2512 } 2513 2514 static inline bool isSmallObject (void *ptr) 2515 { 2516 Block* expectedBlock = (Block*)alignDown(ptr, slabSize); 2517 const BackRefIdx* idx = expectedBlock->getBackRefIdx(); 2518 2519 bool isSmall = expectedBlock == getBackRef(safer_dereference(idx)); 2520 if (isSmall) 2521 expectedBlock->checkFreePrecond(ptr); 2522 return isSmall; 2523 } 2524 2525 /**** Check if an object was allocated by scalable_malloc ****/ 2526 static inline bool isRecognized (void* ptr) 2527 { 2528 return defaultMemPool->extMemPool.backend.ptrCanBeValid(ptr) && 2529 (isLargeObject<unknownMem>(ptr) || isSmallObject(ptr)); 2530 } 2531 2532 static inline void freeSmallObject(void *object) 2533 { 2534 /* mask low bits to get the block */ 2535 Block *block = (Block *)alignDown(object, slabSize); 2536 block->checkFreePrecond(object); 2537 2538 #if MALLOC_CHECK_RECURSION 2539 if (block->isStartupAllocObject()) { 2540 ((StartupBlock *)block)->free(object); 2541 return; 2542 } 2543 #endif 2544 if (block->isOwnedByCurrentThread()) { 2545 block->freeOwnObject(object); 2546 } else { /* Slower path to add to the shared list, the allocatedCount is updated by the owner thread in malloc. */ 2547 FreeObject *objectToFree = block->findObjectToFree(object); 2548 block->freePublicObject(objectToFree); 2549 } 2550 } 2551 2552 static void *internalPoolMalloc(MemoryPool* memPool, size_t size) 2553 { 2554 Bin* bin; 2555 Block * mallocBlock; 2556 2557 if (!memPool) return nullptr; 2558 2559 if (!size) size = sizeof(size_t); 2560 2561 TLSData *tls = memPool->getTLS(/*create=*/true); 2562 2563 /* Allocate a large object */ 2564 if (size >= minLargeObjectSize) 2565 return memPool->getFromLLOCache(tls, size, largeObjectAlignment); 2566 2567 if (!tls) return nullptr; 2568 2569 tls->markUsed(); 2570 /* 2571 * Get an element in thread-local array corresponding to the given size; 2572 * It keeps ptr to the active block for allocations of this size 2573 */ 2574 bin = tls->getAllocationBin(size); 2575 if ( !bin ) return nullptr; 2576 2577 /* Get a block to try to allocate in. */ 2578 for( mallocBlock = bin->getActiveBlock(); mallocBlock; 2579 mallocBlock = bin->setPreviousBlockActive() ) // the previous block should be empty enough 2580 { 2581 if( FreeObject *result = mallocBlock->allocate() ) 2582 return result; 2583 } 2584 2585 /* 2586 * else privatize publicly freed objects in some block and allocate from it 2587 */ 2588 mallocBlock = bin->getPrivatizedFreeListBlock(); 2589 if (mallocBlock) { 2590 MALLOC_ASSERT( mallocBlock->freeListNonNull(), ASSERT_TEXT ); 2591 if ( FreeObject *result = mallocBlock->allocateFromFreeList() ) 2592 return result; 2593 /* Else something strange happened, need to retry from the beginning; */ 2594 TRACEF(( "[ScalableMalloc trace] Something is wrong: no objects in public free list; reentering.\n" )); 2595 return internalPoolMalloc(memPool, size); 2596 } 2597 2598 /* 2599 * no suitable own blocks, try to get a partial block that some other thread has discarded. 2600 */ 2601 mallocBlock = memPool->extMemPool.orphanedBlocks.get(tls, size); 2602 while (mallocBlock) { 2603 bin->pushTLSBin(mallocBlock); 2604 bin->setActiveBlock(mallocBlock); // TODO: move under the below condition? 2605 if( FreeObject *result = mallocBlock->allocate() ) 2606 return result; 2607 mallocBlock = memPool->extMemPool.orphanedBlocks.get(tls, size); 2608 } 2609 2610 /* 2611 * else try to get a new empty block 2612 */ 2613 mallocBlock = memPool->getEmptyBlock(size); 2614 if (mallocBlock) { 2615 bin->pushTLSBin(mallocBlock); 2616 bin->setActiveBlock(mallocBlock); 2617 if( FreeObject *result = mallocBlock->allocate() ) 2618 return result; 2619 /* Else something strange happened, need to retry from the beginning; */ 2620 TRACEF(( "[ScalableMalloc trace] Something is wrong: no objects in empty block; reentering.\n" )); 2621 return internalPoolMalloc(memPool, size); 2622 } 2623 /* 2624 * else nothing works so return nullptr 2625 */ 2626 TRACEF(( "[ScalableMalloc trace] No memory found, returning nullptr.\n" )); 2627 return nullptr; 2628 } 2629 2630 // When size==0 (i.e. unknown), detect here whether the object is large. 2631 // For size is known and < minLargeObjectSize, we still need to check 2632 // if the actual object is large, because large objects might be used 2633 // for aligned small allocations. 2634 static bool internalPoolFree(MemoryPool *memPool, void *object, size_t size) 2635 { 2636 if (!memPool || !object) return false; 2637 2638 // The library is initialized at allocation call, so releasing while 2639 // not initialized means foreign object is releasing. 2640 MALLOC_ASSERT(isMallocInitialized(), ASSERT_TEXT); 2641 MALLOC_ASSERT(memPool->extMemPool.userPool() || isRecognized(object), 2642 "Invalid pointer during object releasing is detected."); 2643 2644 if (size >= minLargeObjectSize || isLargeObject<ourMem>(object)) 2645 memPool->putToLLOCache(memPool->getTLS(/*create=*/false), object); 2646 else 2647 freeSmallObject(object); 2648 return true; 2649 } 2650 2651 static void *internalMalloc(size_t size) 2652 { 2653 if (!size) size = sizeof(size_t); 2654 2655 #if MALLOC_CHECK_RECURSION 2656 if (RecursiveMallocCallProtector::sameThreadActive()) 2657 return size<minLargeObjectSize? StartupBlock::allocate(size) : 2658 // nested allocation, so skip tls 2659 (FreeObject*)defaultMemPool->getFromLLOCache(nullptr, size, slabSize); 2660 #endif 2661 2662 if (!isMallocInitialized()) 2663 if (!doInitialization()) 2664 return nullptr; 2665 return internalPoolMalloc(defaultMemPool, size); 2666 } 2667 2668 static void internalFree(void *object) 2669 { 2670 internalPoolFree(defaultMemPool, object, 0); 2671 } 2672 2673 static size_t internalMsize(void* ptr) 2674 { 2675 MALLOC_ASSERT(ptr, "Invalid pointer passed to internalMsize"); 2676 if (isLargeObject<ourMem>(ptr)) { 2677 // TODO: return the maximum memory size, that can be written to this object 2678 LargeMemoryBlock* lmb = ((LargeObjectHdr*)ptr - 1)->memoryBlock; 2679 return lmb->objectSize; 2680 } else { 2681 Block *block = (Block*)alignDown(ptr, slabSize); 2682 return block->findObjectSize(ptr); 2683 } 2684 } 2685 2686 } // namespace internal 2687 2688 using namespace rml::internal; 2689 2690 // legacy entry point saved for compatibility with binaries complied 2691 // with pre-6003 versions of TBB 2692 TBBMALLOC_EXPORT rml::MemoryPool *pool_create(intptr_t pool_id, const MemPoolPolicy *policy) 2693 { 2694 rml::MemoryPool *pool; 2695 MemPoolPolicy pol(policy->pAlloc, policy->pFree, policy->granularity); 2696 2697 pool_create_v1(pool_id, &pol, &pool); 2698 return pool; 2699 } 2700 2701 rml::MemPoolError pool_create_v1(intptr_t pool_id, const MemPoolPolicy *policy, 2702 rml::MemoryPool **pool) 2703 { 2704 if ( !policy->pAlloc || policy->version<MemPoolPolicy::TBBMALLOC_POOL_VERSION 2705 // empty pFree allowed only for fixed pools 2706 || !(policy->fixedPool || policy->pFree)) { 2707 *pool = nullptr; 2708 return INVALID_POLICY; 2709 } 2710 if ( policy->version>MemPoolPolicy::TBBMALLOC_POOL_VERSION // future versions are not supported 2711 // new flags can be added in place of reserved, but default 2712 // behaviour must be supported by this version 2713 || policy->reserved ) { 2714 *pool = nullptr; 2715 return UNSUPPORTED_POLICY; 2716 } 2717 if (!isMallocInitialized()) 2718 if (!doInitialization()) { 2719 *pool = nullptr; 2720 return NO_MEMORY; 2721 } 2722 rml::internal::MemoryPool *memPool = 2723 (rml::internal::MemoryPool*)internalMalloc((sizeof(rml::internal::MemoryPool))); 2724 if (!memPool) { 2725 *pool = nullptr; 2726 return NO_MEMORY; 2727 } 2728 memset(static_cast<void*>(memPool), 0, sizeof(rml::internal::MemoryPool)); 2729 if (!memPool->init(pool_id, policy)) { 2730 internalFree(memPool); 2731 *pool = nullptr; 2732 return NO_MEMORY; 2733 } 2734 2735 *pool = (rml::MemoryPool*)memPool; 2736 return POOL_OK; 2737 } 2738 2739 bool pool_destroy(rml::MemoryPool* memPool) 2740 { 2741 if (!memPool) return false; 2742 bool ret = ((rml::internal::MemoryPool*)memPool)->destroy(); 2743 internalFree(memPool); 2744 2745 return ret; 2746 } 2747 2748 bool pool_reset(rml::MemoryPool* memPool) 2749 { 2750 if (!memPool) return false; 2751 2752 return ((rml::internal::MemoryPool*)memPool)->reset(); 2753 } 2754 2755 void *pool_malloc(rml::MemoryPool* mPool, size_t size) 2756 { 2757 return internalPoolMalloc((rml::internal::MemoryPool*)mPool, size); 2758 } 2759 2760 void *pool_realloc(rml::MemoryPool* mPool, void *object, size_t size) 2761 { 2762 if (!object) 2763 return internalPoolMalloc((rml::internal::MemoryPool*)mPool, size); 2764 if (!size) { 2765 internalPoolFree((rml::internal::MemoryPool*)mPool, object, 0); 2766 return nullptr; 2767 } 2768 return reallocAligned((rml::internal::MemoryPool*)mPool, object, size, 0); 2769 } 2770 2771 void *pool_aligned_malloc(rml::MemoryPool* mPool, size_t size, size_t alignment) 2772 { 2773 if (!isPowerOfTwo(alignment) || 0==size) 2774 return nullptr; 2775 2776 return allocateAligned((rml::internal::MemoryPool*)mPool, size, alignment); 2777 } 2778 2779 void *pool_aligned_realloc(rml::MemoryPool* memPool, void *ptr, size_t size, size_t alignment) 2780 { 2781 if (!isPowerOfTwo(alignment)) 2782 return nullptr; 2783 rml::internal::MemoryPool *mPool = (rml::internal::MemoryPool*)memPool; 2784 void *tmp; 2785 2786 if (!ptr) 2787 tmp = allocateAligned(mPool, size, alignment); 2788 else if (!size) { 2789 internalPoolFree(mPool, ptr, 0); 2790 return nullptr; 2791 } else 2792 tmp = reallocAligned(mPool, ptr, size, alignment); 2793 2794 return tmp; 2795 } 2796 2797 bool pool_free(rml::MemoryPool *mPool, void *object) 2798 { 2799 return internalPoolFree((rml::internal::MemoryPool*)mPool, object, 0); 2800 } 2801 2802 rml::MemoryPool *pool_identify(void *object) 2803 { 2804 rml::internal::MemoryPool *pool; 2805 if (isLargeObject<ourMem>(object)) { 2806 LargeObjectHdr *header = (LargeObjectHdr*)object - 1; 2807 pool = header->memoryBlock->pool; 2808 } else { 2809 Block *block = (Block*)alignDown(object, slabSize); 2810 pool = block->getMemPool(); 2811 } 2812 // do not return defaultMemPool, as it can't be used in pool_free() etc 2813 __TBB_ASSERT_RELEASE(pool!=defaultMemPool, 2814 "rml::pool_identify() can't be used for scalable_malloc() etc results."); 2815 return (rml::MemoryPool*)pool; 2816 } 2817 2818 size_t pool_msize(rml::MemoryPool *mPool, void* object) 2819 { 2820 if (object) { 2821 // No assert for object recognition, cause objects allocated from non-default 2822 // memory pool do not participate in range checking and do not have valid backreferences for 2823 // small objects. Instead, check that an object belong to the certain memory pool. 2824 MALLOC_ASSERT_EX(mPool == pool_identify(object), "Object does not belong to the specified pool"); 2825 return internalMsize(object); 2826 } 2827 errno = EINVAL; 2828 // Unlike _msize, return 0 in case of parameter error. 2829 // Returning size_t(-1) looks more like the way to troubles. 2830 return 0; 2831 } 2832 2833 } // namespace rml 2834 2835 using namespace rml::internal; 2836 2837 #if MALLOC_TRACE 2838 static unsigned int threadGoingDownCount = 0; 2839 #endif 2840 2841 /* 2842 * When a thread is shutting down this routine should be called to remove all the thread ids 2843 * from the malloc blocks and replace them with a nullptr thread id. 2844 * 2845 * For pthreads, the function is set as a callback in pthread_key_create for TLS bin. 2846 * It will be automatically called at thread exit with the key value as the argument, 2847 * unless that value is nullptr. 2848 * For Windows, it is called from DllMain( DLL_THREAD_DETACH ). 2849 * 2850 * However neither of the above is called for the main process thread, so the routine 2851 * also needs to be called during the process shutdown. 2852 * 2853 */ 2854 // TODO: Consider making this function part of class MemoryPool. 2855 void doThreadShutdownNotification(TLSData* tls, bool main_thread) 2856 { 2857 TRACEF(( "[ScalableMalloc trace] Thread id %d blocks return start %d\n", 2858 getThreadId(), threadGoingDownCount++ )); 2859 2860 #if USE_PTHREAD 2861 if (tls) { 2862 if (!shutdownSync.threadDtorStart()) return; 2863 tls->getMemPool()->onThreadShutdown(tls); 2864 shutdownSync.threadDtorDone(); 2865 } else 2866 #endif 2867 { 2868 suppress_unused_warning(tls); // not used on Windows 2869 // The default pool is safe to use at this point: 2870 // on Linux, only the main thread can go here before destroying defaultMemPool; 2871 // on Windows, shutdown is synchronized via loader lock and isMallocInitialized(). 2872 // See also __TBB_mallocProcessShutdownNotification() 2873 defaultMemPool->onThreadShutdown(defaultMemPool->getTLS(/*create=*/false)); 2874 // Take lock to walk through other pools; but waiting might be dangerous at this point 2875 // (e.g. on Windows the main thread might deadlock) 2876 bool locked = false; 2877 MallocMutex::scoped_lock lock(MemoryPool::memPoolListLock, /*wait=*/!main_thread, &locked); 2878 if (locked) { // the list is safe to process 2879 for (MemoryPool *memPool = defaultMemPool->next; memPool; memPool = memPool->next) 2880 memPool->onThreadShutdown(memPool->getTLS(/*create=*/false)); 2881 } 2882 } 2883 2884 TRACEF(( "[ScalableMalloc trace] Thread id %d blocks return end\n", getThreadId() )); 2885 } 2886 2887 #if USE_PTHREAD 2888 void mallocThreadShutdownNotification(void* arg) 2889 { 2890 // The routine is called for each pool (as TLS dtor) on each thread, except for the main thread 2891 if (!isMallocInitialized()) return; 2892 doThreadShutdownNotification((TLSData*)arg, false); 2893 } 2894 #else 2895 extern "C" void __TBB_mallocThreadShutdownNotification() 2896 { 2897 // The routine is called once per thread on Windows 2898 if (!isMallocInitialized()) return; 2899 doThreadShutdownNotification(nullptr, false); 2900 } 2901 #endif 2902 2903 extern "C" void __TBB_mallocProcessShutdownNotification(bool windows_process_dying) 2904 { 2905 if (!isMallocInitialized()) return; 2906 2907 // Don't clean allocator internals if the entire process is exiting 2908 if (!windows_process_dying) { 2909 doThreadShutdownNotification(nullptr, /*main_thread=*/true); 2910 } 2911 #if __TBB_MALLOC_LOCACHE_STAT 2912 printf("cache hit ratio %f, size hit %f\n", 2913 1.*cacheHits/mallocCalls, 1.*memHitKB/memAllocKB); 2914 defaultMemPool->extMemPool.loc.reportStat(stdout); 2915 #endif 2916 2917 shutdownSync.processExit(); 2918 #if __TBB_SOURCE_DIRECTLY_INCLUDED 2919 /* Pthread keys must be deleted as soon as possible to not call key dtor 2920 on thread termination when then the tbbmalloc code can be already unloaded. 2921 */ 2922 defaultMemPool->destroy(); 2923 destroyBackRefMain(&defaultMemPool->extMemPool.backend); 2924 ThreadId::destroy(); // Delete key for thread id 2925 hugePages.reset(); 2926 // new total malloc initialization is possible after this point 2927 mallocInitialized.store(0, std::memory_order_release); 2928 #endif // __TBB_SOURCE_DIRECTLY_INCLUDED 2929 2930 #if COLLECT_STATISTICS 2931 unsigned nThreads = ThreadId::getMaxThreadId(); 2932 for( int i=1; i<=nThreads && i<MAX_THREADS; ++i ) 2933 STAT_print(i); 2934 #endif 2935 if (!usedBySrcIncluded) { 2936 MALLOC_ITT_FINI_ITTLIB(); 2937 MALLOC_ITT_RELEASE_RESOURCES(); 2938 } 2939 } 2940 2941 extern "C" void * scalable_malloc(size_t size) 2942 { 2943 void *ptr = internalMalloc(size); 2944 if (!ptr) errno = ENOMEM; 2945 return ptr; 2946 } 2947 2948 extern "C" void scalable_free(void *object) 2949 { 2950 internalFree(object); 2951 } 2952 2953 #if MALLOC_ZONE_OVERLOAD_ENABLED 2954 extern "C" void __TBB_malloc_free_definite_size(void *object, size_t size) 2955 { 2956 internalPoolFree(defaultMemPool, object, size); 2957 } 2958 #endif 2959 2960 /* 2961 * A variant that provides additional memory safety, by checking whether the given address 2962 * was obtained with this allocator, and if not redirecting to the provided alternative call. 2963 */ 2964 extern "C" TBBMALLOC_EXPORT void __TBB_malloc_safer_free(void *object, void (*original_free)(void*)) 2965 { 2966 if (!object) 2967 return; 2968 2969 // tbbmalloc can allocate object only when tbbmalloc has been initialized 2970 if (mallocInitialized.load(std::memory_order_acquire) && defaultMemPool->extMemPool.backend.ptrCanBeValid(object)) { 2971 if (isLargeObject<unknownMem>(object)) { 2972 // must check 1st for large object, because small object check touches 4 pages on left, 2973 // and it can be inaccessible 2974 TLSData *tls = defaultMemPool->getTLS(/*create=*/false); 2975 2976 defaultMemPool->putToLLOCache(tls, object); 2977 return; 2978 } else if (isSmallObject(object)) { 2979 freeSmallObject(object); 2980 return; 2981 } 2982 } 2983 if (original_free) 2984 original_free(object); 2985 } 2986 2987 /********* End the free code *************/ 2988 2989 /********* Code for scalable_realloc ***********/ 2990 2991 /* 2992 * From K&R 2993 * "realloc changes the size of the object pointed to by p to size. The contents will 2994 * be unchanged up to the minimum of the old and the new sizes. If the new size is larger, 2995 * the new space is uninitialized. realloc returns a pointer to the new space, or 2996 * nullptr if the request cannot be satisfied, in which case *p is unchanged." 2997 * 2998 */ 2999 extern "C" void* scalable_realloc(void* ptr, size_t size) 3000 { 3001 void *tmp; 3002 3003 if (!ptr) 3004 tmp = internalMalloc(size); 3005 else if (!size) { 3006 internalFree(ptr); 3007 return nullptr; 3008 } else 3009 tmp = reallocAligned(defaultMemPool, ptr, size, 0); 3010 3011 if (!tmp) errno = ENOMEM; 3012 return tmp; 3013 } 3014 3015 /* 3016 * A variant that provides additional memory safety, by checking whether the given address 3017 * was obtained with this allocator, and if not redirecting to the provided alternative call. 3018 */ 3019 extern "C" TBBMALLOC_EXPORT void* __TBB_malloc_safer_realloc(void* ptr, size_t sz, void* original_realloc) 3020 { 3021 void *tmp; // TODO: fix warnings about uninitialized use of tmp 3022 3023 if (!ptr) { 3024 tmp = internalMalloc(sz); 3025 } else if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(ptr)) { 3026 if (!sz) { 3027 internalFree(ptr); 3028 return nullptr; 3029 } else { 3030 tmp = reallocAligned(defaultMemPool, ptr, sz, 0); 3031 } 3032 } 3033 #if USE_WINTHREAD 3034 else if (original_realloc && sz) { 3035 orig_ptrs *original_ptrs = static_cast<orig_ptrs*>(original_realloc); 3036 if ( original_ptrs->msize ){ 3037 size_t oldSize = original_ptrs->msize(ptr); 3038 tmp = internalMalloc(sz); 3039 if (tmp) { 3040 memcpy(tmp, ptr, sz<oldSize? sz : oldSize); 3041 if ( original_ptrs->free ){ 3042 original_ptrs->free( ptr ); 3043 } 3044 } 3045 } else 3046 tmp = nullptr; 3047 } 3048 #else 3049 else if (original_realloc) { 3050 typedef void* (*realloc_ptr_t)(void*,size_t); 3051 realloc_ptr_t original_realloc_ptr; 3052 (void *&)original_realloc_ptr = original_realloc; 3053 tmp = original_realloc_ptr(ptr,sz); 3054 } 3055 #endif 3056 else tmp = nullptr; 3057 3058 if (!tmp) errno = ENOMEM; 3059 return tmp; 3060 } 3061 3062 /********* End code for scalable_realloc ***********/ 3063 3064 /********* Code for scalable_calloc ***********/ 3065 3066 /* 3067 * From K&R 3068 * calloc returns a pointer to space for an array of nobj objects, 3069 * each of size size, or nullptr if the request cannot be satisfied. 3070 * The space is initialized to zero bytes. 3071 * 3072 */ 3073 3074 extern "C" void * scalable_calloc(size_t nobj, size_t size) 3075 { 3076 // it's square root of maximal size_t value 3077 const size_t mult_not_overflow = size_t(1) << (sizeof(size_t)*CHAR_BIT/2); 3078 const size_t arraySize = nobj * size; 3079 3080 // check for overflow during multiplication: 3081 if (nobj>=mult_not_overflow || size>=mult_not_overflow) // 1) heuristic check 3082 if (nobj && arraySize / nobj != size) { // 2) exact check 3083 errno = ENOMEM; 3084 return nullptr; 3085 } 3086 void* result = internalMalloc(arraySize); 3087 if (result) 3088 memset(result, 0, arraySize); 3089 else 3090 errno = ENOMEM; 3091 return result; 3092 } 3093 3094 /********* End code for scalable_calloc ***********/ 3095 3096 /********* Code for aligned allocation API **********/ 3097 3098 extern "C" int scalable_posix_memalign(void **memptr, size_t alignment, size_t size) 3099 { 3100 if ( !isPowerOfTwoAtLeast(alignment, sizeof(void*)) ) 3101 return EINVAL; 3102 void *result = allocateAligned(defaultMemPool, size, alignment); 3103 if (!result) 3104 return ENOMEM; 3105 *memptr = result; 3106 return 0; 3107 } 3108 3109 extern "C" void * scalable_aligned_malloc(size_t size, size_t alignment) 3110 { 3111 if (!isPowerOfTwo(alignment) || 0==size) { 3112 errno = EINVAL; 3113 return nullptr; 3114 } 3115 void *tmp = allocateAligned(defaultMemPool, size, alignment); 3116 if (!tmp) errno = ENOMEM; 3117 return tmp; 3118 } 3119 3120 extern "C" void * scalable_aligned_realloc(void *ptr, size_t size, size_t alignment) 3121 { 3122 if (!isPowerOfTwo(alignment)) { 3123 errno = EINVAL; 3124 return nullptr; 3125 } 3126 void *tmp; 3127 3128 if (!ptr) 3129 tmp = allocateAligned(defaultMemPool, size, alignment); 3130 else if (!size) { 3131 scalable_free(ptr); 3132 return nullptr; 3133 } else 3134 tmp = reallocAligned(defaultMemPool, ptr, size, alignment); 3135 3136 if (!tmp) errno = ENOMEM; 3137 return tmp; 3138 } 3139 3140 extern "C" TBBMALLOC_EXPORT void * __TBB_malloc_safer_aligned_realloc(void *ptr, size_t size, size_t alignment, void* orig_function) 3141 { 3142 /* corner cases left out of reallocAligned to not deal with errno there */ 3143 if (!isPowerOfTwo(alignment)) { 3144 errno = EINVAL; 3145 return nullptr; 3146 } 3147 void *tmp = nullptr; 3148 3149 if (!ptr) { 3150 tmp = allocateAligned(defaultMemPool, size, alignment); 3151 } else if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(ptr)) { 3152 if (!size) { 3153 internalFree(ptr); 3154 return nullptr; 3155 } else { 3156 tmp = reallocAligned(defaultMemPool, ptr, size, alignment); 3157 } 3158 } 3159 #if USE_WINTHREAD 3160 else { 3161 orig_aligned_ptrs *original_ptrs = static_cast<orig_aligned_ptrs*>(orig_function); 3162 if (size) { 3163 // Without orig_msize, we can't do anything with this. 3164 // Just keeping old pointer. 3165 if ( original_ptrs->aligned_msize ){ 3166 // set alignment and offset to have possibly correct oldSize 3167 size_t oldSize = original_ptrs->aligned_msize(ptr, sizeof(void*), 0); 3168 tmp = allocateAligned(defaultMemPool, size, alignment); 3169 if (tmp) { 3170 memcpy(tmp, ptr, size<oldSize? size : oldSize); 3171 if ( original_ptrs->aligned_free ){ 3172 original_ptrs->aligned_free( ptr ); 3173 } 3174 } 3175 } 3176 } else { 3177 if ( original_ptrs->aligned_free ){ 3178 original_ptrs->aligned_free( ptr ); 3179 } 3180 return nullptr; 3181 } 3182 } 3183 #else 3184 // As original_realloc can't align result, and there is no way to find 3185 // size of reallocating object, we are giving up. 3186 suppress_unused_warning(orig_function); 3187 #endif 3188 if (!tmp) errno = ENOMEM; 3189 return tmp; 3190 } 3191 3192 extern "C" void scalable_aligned_free(void *ptr) 3193 { 3194 internalFree(ptr); 3195 } 3196 3197 /********* end code for aligned allocation API **********/ 3198 3199 /********* Code for scalable_msize ***********/ 3200 3201 /* 3202 * Returns the size of a memory block allocated in the heap. 3203 */ 3204 extern "C" size_t scalable_msize(void* ptr) 3205 { 3206 if (ptr) { 3207 MALLOC_ASSERT(isRecognized(ptr), "Invalid pointer in scalable_msize detected."); 3208 return internalMsize(ptr); 3209 } 3210 errno = EINVAL; 3211 // Unlike _msize, return 0 in case of parameter error. 3212 // Returning size_t(-1) looks more like the way to troubles. 3213 return 0; 3214 } 3215 3216 /* 3217 * A variant that provides additional memory safety, by checking whether the given address 3218 * was obtained with this allocator, and if not redirecting to the provided alternative call. 3219 */ 3220 extern "C" TBBMALLOC_EXPORT size_t __TBB_malloc_safer_msize(void *object, size_t (*original_msize)(void*)) 3221 { 3222 if (object) { 3223 // Check if the memory was allocated by scalable_malloc 3224 if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(object)) 3225 return internalMsize(object); 3226 else if (original_msize) 3227 return original_msize(object); 3228 } 3229 // object is nullptr or unknown, or foreign and no original_msize 3230 #if USE_WINTHREAD 3231 errno = EINVAL; // errno expected to be set only on this platform 3232 #endif 3233 return 0; 3234 } 3235 3236 /* 3237 * The same as above but for _aligned_msize case 3238 */ 3239 extern "C" TBBMALLOC_EXPORT size_t __TBB_malloc_safer_aligned_msize(void *object, size_t alignment, size_t offset, size_t (*orig_aligned_msize)(void*,size_t,size_t)) 3240 { 3241 if (object) { 3242 // Check if the memory was allocated by scalable_malloc 3243 if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(object)) 3244 return internalMsize(object); 3245 else if (orig_aligned_msize) 3246 return orig_aligned_msize(object,alignment,offset); 3247 } 3248 // object is nullptr or unknown 3249 errno = EINVAL; 3250 return 0; 3251 } 3252 3253 /********* End code for scalable_msize ***********/ 3254 3255 extern "C" int scalable_allocation_mode(int param, intptr_t value) 3256 { 3257 if (param == TBBMALLOC_SET_SOFT_HEAP_LIMIT) { 3258 defaultMemPool->extMemPool.backend.setRecommendedMaxSize((size_t)value); 3259 return TBBMALLOC_OK; 3260 } else if (param == USE_HUGE_PAGES) { 3261 #if __unix__ 3262 switch (value) { 3263 case 0: 3264 case 1: 3265 hugePages.setMode(value); 3266 return TBBMALLOC_OK; 3267 default: 3268 return TBBMALLOC_INVALID_PARAM; 3269 } 3270 #else 3271 return TBBMALLOC_NO_EFFECT; 3272 #endif 3273 #if __TBB_SOURCE_DIRECTLY_INCLUDED 3274 } else if (param == TBBMALLOC_INTERNAL_SOURCE_INCLUDED) { 3275 switch (value) { 3276 case 0: // used by dynamic library 3277 case 1: // used by static library or directly included sources 3278 usedBySrcIncluded = value; 3279 return TBBMALLOC_OK; 3280 default: 3281 return TBBMALLOC_INVALID_PARAM; 3282 } 3283 #endif 3284 } else if (param == TBBMALLOC_SET_HUGE_SIZE_THRESHOLD) { 3285 defaultMemPool->extMemPool.loc.setHugeSizeThreshold((size_t)value); 3286 return TBBMALLOC_OK; 3287 } 3288 return TBBMALLOC_INVALID_PARAM; 3289 } 3290 3291 extern "C" int scalable_allocation_command(int cmd, void *param) 3292 { 3293 if (param) 3294 return TBBMALLOC_INVALID_PARAM; 3295 3296 bool released = false; 3297 switch(cmd) { 3298 case TBBMALLOC_CLEAN_THREAD_BUFFERS: 3299 if (TLSData *tls = defaultMemPool->getTLS(/*create=*/false)) 3300 released = tls->externalCleanup(/*cleanOnlyUnused*/false, /*cleanBins=*/true); 3301 break; 3302 case TBBMALLOC_CLEAN_ALL_BUFFERS: 3303 released = defaultMemPool->extMemPool.hardCachesCleanup(true); 3304 break; 3305 default: 3306 return TBBMALLOC_INVALID_PARAM; 3307 } 3308 return released ? TBBMALLOC_OK : TBBMALLOC_NO_EFFECT; 3309 } 3310