1 /* 2 Copyright (c) 2005-2023 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #include "tbbmalloc_internal.h" 18 #include <errno.h> 19 #include <new> /* for placement new */ 20 #include <string.h> /* for memset */ 21 22 #include "oneapi/tbb/version.h" 23 #include "../tbb/environment.h" 24 #include "../tbb/itt_notify.h" // for __TBB_load_ittnotify() 25 26 #if USE_PTHREAD 27 #define TlsSetValue_func pthread_setspecific 28 #define TlsGetValue_func pthread_getspecific 29 #define GetMyTID() pthread_self() 30 #include <sched.h> 31 extern "C" { static void mallocThreadShutdownNotification(void*); } 32 #if __sun || __SUNPRO_CC 33 #define __asm__ asm 34 #endif 35 #include <unistd.h> // sysconf(_SC_PAGESIZE) 36 #elif USE_WINTHREAD 37 #define GetMyTID() GetCurrentThreadId() 38 #if __TBB_WIN8UI_SUPPORT 39 #include<thread> 40 #define TlsSetValue_func FlsSetValue 41 #define TlsGetValue_func FlsGetValue 42 #define TlsAlloc() FlsAlloc(nullptr) 43 #define TLS_ALLOC_FAILURE FLS_OUT_OF_INDEXES 44 #define TlsFree FlsFree 45 #else 46 #define TlsSetValue_func TlsSetValue 47 #define TlsGetValue_func TlsGetValue 48 #define TLS_ALLOC_FAILURE TLS_OUT_OF_INDEXES 49 #endif 50 #else 51 #error Must define USE_PTHREAD or USE_WINTHREAD 52 #endif 53 54 #define FREELIST_NONBLOCKING 1 55 56 namespace rml { 57 class MemoryPool; 58 namespace internal { 59 60 class Block; 61 class MemoryPool; 62 63 #if MALLOC_CHECK_RECURSION 64 65 inline bool isMallocInitialized(); 66 67 #endif // MALLOC_CHECK_RECURSION 68 69 /** Support for handling the special UNUSABLE pointer state **/ 70 const intptr_t UNUSABLE = 0x1; 71 inline bool isSolidPtr( void* ptr ) { 72 return (UNUSABLE|(intptr_t)ptr)!=UNUSABLE; 73 } 74 inline bool isNotForUse( void* ptr ) { 75 return (intptr_t)ptr==UNUSABLE; 76 } 77 78 /* 79 * Block::objectSize value used to mark blocks allocated by startupAlloc 80 */ 81 const uint16_t startupAllocObjSizeMark = ~(uint16_t)0; 82 83 /* 84 * The following constant is used to define the size of struct Block, the block header. 85 * The intent is to have the size of a Block multiple of the cache line size, this allows us to 86 * get good alignment at the cost of some overhead equal to the amount of padding included in the Block. 87 */ 88 const int blockHeaderAlignment = estimatedCacheLineSize; 89 90 /********* The data structures and global objects **************/ 91 92 /* 93 * The malloc routines themselves need to be able to occasionally malloc some space, 94 * in order to set up the structures used by the thread local structures. This 95 * routine performs that functions. 96 */ 97 class BootStrapBlocks { 98 MallocMutex bootStrapLock; 99 Block *bootStrapBlock; 100 Block *bootStrapBlockUsed; 101 FreeObject *bootStrapObjectList; 102 public: 103 void *allocate(MemoryPool *memPool, size_t size); 104 void free(void* ptr); 105 void reset(); 106 }; 107 108 #if USE_INTERNAL_TID 109 class ThreadId { 110 static tls_key_t Tid_key; 111 std::atomic<intptr_t> ThreadCount; 112 113 unsigned int id; 114 115 static unsigned int tlsNumber() { 116 unsigned int result = reinterpret_cast<intptr_t>(TlsGetValue_func(Tid_key)); 117 if( !result ) { 118 RecursiveMallocCallProtector scoped; 119 // Thread-local value is zero -> first call from this thread, 120 // need to initialize with next ID value (IDs start from 1) 121 result = ++ThreadCount; // returned new value! 122 TlsSetValue_func( Tid_key, reinterpret_cast<void*>(result) ); 123 } 124 return result; 125 } 126 public: 127 static bool init() { 128 #if USE_WINTHREAD 129 Tid_key = TlsAlloc(); 130 if (Tid_key == TLS_ALLOC_FAILURE) 131 return false; 132 #else 133 int status = pthread_key_create( &Tid_key, nullptr ); 134 if ( status ) { 135 fprintf (stderr, "The memory manager cannot create tls key during initialization\n"); 136 return false; 137 } 138 #endif /* USE_WINTHREAD */ 139 return true; 140 } 141 #if __TBB_SOURCE_DIRECTLY_INCLUDED 142 static void destroy() { 143 if( Tid_key ) { 144 #if USE_WINTHREAD 145 BOOL status = !(TlsFree( Tid_key )); // fail is zero 146 #else 147 int status = pthread_key_delete( Tid_key ); 148 #endif /* USE_WINTHREAD */ 149 if ( status ) 150 fprintf (stderr, "The memory manager cannot delete tls key\n"); 151 Tid_key = 0; 152 } 153 } 154 #endif 155 156 ThreadId() : id(ThreadId::tlsNumber()) {} 157 bool isCurrentThreadId() const { return id == ThreadId::tlsNumber(); } 158 159 #if COLLECT_STATISTICS || MALLOC_TRACE 160 friend unsigned int getThreadId() { return ThreadId::tlsNumber(); } 161 #endif 162 #if COLLECT_STATISTICS 163 static unsigned getMaxThreadId() { return ThreadCount.load(std::memory_order_relaxed); } 164 165 friend int STAT_increment(ThreadId tid, int bin, int ctr); 166 #endif 167 }; 168 169 tls_key_t ThreadId::Tid_key; 170 intptr_t ThreadId::ThreadCount; 171 172 #if COLLECT_STATISTICS 173 int STAT_increment(ThreadId tid, int bin, int ctr) 174 { 175 return ::STAT_increment(tid.id, bin, ctr); 176 } 177 #endif 178 179 #else // USE_INTERNAL_TID 180 181 class ThreadId { 182 #if USE_PTHREAD 183 std::atomic<pthread_t> tid; 184 #else 185 std::atomic<DWORD> tid; 186 #endif 187 public: 188 ThreadId() : tid(GetMyTID()) {} 189 #if USE_PTHREAD 190 bool isCurrentThreadId() const { return pthread_equal(pthread_self(), tid.load(std::memory_order_relaxed)); } 191 #else 192 bool isCurrentThreadId() const { return GetCurrentThreadId() == tid.load(std::memory_order_relaxed); } 193 #endif 194 ThreadId& operator=(const ThreadId& other) { 195 tid.store(other.tid.load(std::memory_order_relaxed), std::memory_order_relaxed); 196 return *this; 197 } 198 static bool init() { return true; } 199 #if __TBB_SOURCE_DIRECTLY_INCLUDED 200 static void destroy() {} 201 #endif 202 }; 203 204 #endif // USE_INTERNAL_TID 205 206 /*********** Code to provide thread ID and a thread-local void pointer **********/ 207 208 bool TLSKey::init() 209 { 210 #if USE_WINTHREAD 211 TLS_pointer_key = TlsAlloc(); 212 if (TLS_pointer_key == TLS_ALLOC_FAILURE) 213 return false; 214 #else 215 int status = pthread_key_create( &TLS_pointer_key, mallocThreadShutdownNotification ); 216 if ( status ) 217 return false; 218 #endif /* USE_WINTHREAD */ 219 return true; 220 } 221 222 bool TLSKey::destroy() 223 { 224 #if USE_WINTHREAD 225 BOOL status1 = !(TlsFree(TLS_pointer_key)); // fail is zero 226 #else 227 int status1 = pthread_key_delete(TLS_pointer_key); 228 #endif /* USE_WINTHREAD */ 229 MALLOC_ASSERT(!status1, "The memory manager cannot delete tls key."); 230 return status1==0; 231 } 232 233 inline TLSData* TLSKey::getThreadMallocTLS() const 234 { 235 return (TLSData *)TlsGetValue_func( TLS_pointer_key ); 236 } 237 238 inline void TLSKey::setThreadMallocTLS( TLSData * newvalue ) { 239 RecursiveMallocCallProtector scoped; 240 TlsSetValue_func( TLS_pointer_key, newvalue ); 241 } 242 243 /* The 'next' field in the block header has to maintain some invariants: 244 * it needs to be on a 16K boundary and the first field in the block. 245 * Any value stored there needs to have the lower 14 bits set to 0 246 * so that various assert work. This means that if you want to smash this memory 247 * for debugging purposes you will need to obey this invariant. 248 * The total size of the header needs to be a power of 2 to simplify 249 * the alignment requirements. For now it is a 128 byte structure. 250 * To avoid false sharing, the fields changed only locally are separated 251 * from the fields changed by foreign threads. 252 * Changing the size of the block header would require to change 253 * some bin allocation sizes, in particular "fitting" sizes (see above). 254 */ 255 class Bin; 256 class StartupBlock; 257 258 class MemoryPool { 259 // if no explicit grainsize, expect to see malloc in user's pAlloc 260 // and set reasonable low granularity 261 static const size_t defaultGranularity = estimatedCacheLineSize; 262 263 MemoryPool() = delete; // deny 264 public: 265 static MallocMutex memPoolListLock; 266 267 // list of all active pools is used to release 268 // all TLS data on thread termination or library unload 269 MemoryPool *next, 270 *prev; 271 ExtMemoryPool extMemPool; 272 BootStrapBlocks bootStrapBlocks; 273 274 static void initDefaultPool(); 275 276 bool init(intptr_t poolId, const MemPoolPolicy* memPoolPolicy); 277 bool reset(); 278 bool destroy(); 279 void onThreadShutdown(TLSData *tlsData); 280 281 inline TLSData *getTLS(bool create); 282 void clearTLS() { extMemPool.tlsPointerKey.setThreadMallocTLS(nullptr); } 283 284 Block *getEmptyBlock(size_t size); 285 void returnEmptyBlock(Block *block, bool poolTheBlock); 286 287 // get/put large object to/from local large object cache 288 void *getFromLLOCache(TLSData *tls, size_t size, size_t alignment); 289 void putToLLOCache(TLSData *tls, void *object); 290 }; 291 292 static intptr_t defaultMemPool_space[sizeof(MemoryPool)/sizeof(intptr_t) + 293 (sizeof(MemoryPool)%sizeof(intptr_t)? 1 : 0)]; 294 static MemoryPool *defaultMemPool = (MemoryPool*)defaultMemPool_space; 295 const size_t MemoryPool::defaultGranularity; 296 // zero-initialized 297 MallocMutex MemoryPool::memPoolListLock; 298 // TODO: move huge page status to default pool, because that's its states 299 HugePagesStatus hugePages; 300 static bool usedBySrcIncluded = false; 301 302 // Padding helpers 303 template<size_t padd> 304 struct PaddingImpl { 305 size_t __padding[padd]; 306 }; 307 308 template<> 309 struct PaddingImpl<0> {}; 310 311 template<int N> 312 struct Padding : PaddingImpl<N/sizeof(size_t)> {}; 313 314 // Slab block is 16KB-aligned. To prevent false sharing, separate locally-accessed 315 // fields and fields commonly accessed by not owner threads. 316 class GlobalBlockFields : public BlockI { 317 protected: 318 std::atomic<FreeObject*> publicFreeList; 319 std::atomic<Block*> nextPrivatizable; 320 MemoryPool *poolPtr; 321 }; 322 323 class LocalBlockFields : public GlobalBlockFields, Padding<blockHeaderAlignment - sizeof(GlobalBlockFields)> { 324 protected: 325 Block *next; 326 Block *previous; /* Use double linked list to speed up removal */ 327 FreeObject *bumpPtr; /* Bump pointer moves from the end to the beginning of a block */ 328 FreeObject *freeList; 329 /* Pointer to local data for the owner thread. Used for fast finding tls 330 when releasing object from a block that current thread owned. 331 nullptr for orphaned blocks. */ 332 std::atomic<TLSData*> tlsPtr; 333 ThreadId ownerTid; /* the ID of the thread that owns or last owned the block */ 334 BackRefIdx backRefIdx; 335 uint16_t allocatedCount; /* Number of objects allocated (obviously by the owning thread) */ 336 uint16_t objectSize; 337 bool isFull; 338 339 friend class FreeBlockPool; 340 friend class StartupBlock; 341 friend class LifoList; 342 friend void *BootStrapBlocks::allocate(MemoryPool *, size_t); 343 friend bool OrphanedBlocks::cleanup(Backend*); 344 friend Block *MemoryPool::getEmptyBlock(size_t); 345 }; 346 347 // Use inheritance to guarantee that a user data start on next cache line. 348 // Can't use member for it, because when LocalBlockFields already on cache line, 349 // we must have no additional memory consumption for all compilers. 350 class Block : public LocalBlockFields, 351 Padding<2*blockHeaderAlignment - sizeof(LocalBlockFields)> { 352 public: 353 bool empty() const { 354 if (allocatedCount > 0) return false; 355 MALLOC_ASSERT(!isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT); 356 return true; 357 } 358 inline FreeObject* allocate(); 359 inline FreeObject *allocateFromFreeList(); 360 361 inline bool adjustFullness(); 362 void adjustPositionInBin(Bin* bin = nullptr); 363 #if MALLOC_DEBUG 364 bool freeListNonNull() { return freeList; } 365 #endif 366 void freePublicObject(FreeObject *objectToFree); 367 inline void freeOwnObject(void *object); 368 void reset(); 369 void privatizePublicFreeList( bool reset = true ); 370 void restoreBumpPtr(); 371 void privatizeOrphaned(TLSData *tls, unsigned index); 372 bool readyToShare(); 373 void shareOrphaned(intptr_t binTag, unsigned index); 374 unsigned int getSize() const { 375 MALLOC_ASSERT(isStartupAllocObject() || objectSize<minLargeObjectSize, 376 "Invalid object size"); 377 return isStartupAllocObject()? 0 : objectSize; 378 } 379 const BackRefIdx *getBackRefIdx() const { return &backRefIdx; } 380 inline bool isOwnedByCurrentThread() const; 381 bool isStartupAllocObject() const { return objectSize == startupAllocObjSizeMark; } 382 inline FreeObject *findObjectToFree(const void *object) const; 383 void checkFreePrecond(const void *object) const { 384 #if MALLOC_DEBUG 385 const char *msg = "Possible double free or heap corruption."; 386 // small objects are always at least sizeof(size_t) Byte aligned, 387 // try to check this before this dereference as for invalid objects 388 // this may be unreadable 389 MALLOC_ASSERT(isAligned(object, sizeof(size_t)), "Try to free invalid small object"); 390 #if !__TBB_USE_THREAD_SANITIZER 391 // releasing to free slab 392 MALLOC_ASSERT(allocatedCount>0, msg); 393 #endif 394 // must not point to slab's header 395 MALLOC_ASSERT((uintptr_t)object - (uintptr_t)this >= sizeof(Block), msg); 396 if (startupAllocObjSizeMark == objectSize) // startup block 397 MALLOC_ASSERT(object<=bumpPtr, msg); 398 else { 399 // non-startup objects are 8 Byte aligned 400 MALLOC_ASSERT(isAligned(object, 8), "Try to free invalid small object"); 401 FreeObject *toFree = findObjectToFree(object); 402 #if !__TBB_USE_THREAD_SANITIZER 403 MALLOC_ASSERT(allocatedCount <= (slabSize-sizeof(Block))/objectSize 404 && (!bumpPtr || object>bumpPtr), msg); 405 // check against head of freeList, as this is mostly 406 // expected after double free 407 MALLOC_ASSERT(toFree != freeList, msg); 408 #endif 409 // check against head of publicFreeList, to detect double free 410 // involving foreign thread 411 MALLOC_ASSERT(toFree != publicFreeList.load(std::memory_order_relaxed), msg); 412 } 413 #else 414 suppress_unused_warning(object); 415 #endif 416 } 417 void initEmptyBlock(TLSData *tls, size_t size); 418 size_t findObjectSize(void *object) const; 419 MemoryPool *getMemPool() const { return poolPtr; } // do not use on the hot path! 420 421 protected: 422 void cleanBlockHeader(); 423 424 private: 425 static const float emptyEnoughRatio; /* Threshold on free space needed to "reactivate" a block */ 426 427 inline FreeObject *allocateFromBumpPtr(); 428 inline FreeObject *findAllocatedObject(const void *address) const; 429 #if MALLOC_DEBUG 430 inline bool isProperlyPlaced(const void *object) const; 431 #endif 432 inline void markOwned(TLSData *tls) { 433 MALLOC_ASSERT(!tlsPtr.load(std::memory_order_relaxed), ASSERT_TEXT); 434 ownerTid = ThreadId(); /* save the ID of the current thread */ 435 tlsPtr.store(tls, std::memory_order_relaxed); 436 } 437 inline void markOrphaned() { 438 MALLOC_ASSERT(tlsPtr.load(std::memory_order_relaxed), ASSERT_TEXT); 439 tlsPtr.store(nullptr, std::memory_order_relaxed); 440 } 441 442 friend class Bin; 443 friend class TLSData; 444 friend bool MemoryPool::destroy(); 445 }; 446 447 const float Block::emptyEnoughRatio = 1.0 / 4.0; 448 449 static_assert(sizeof(Block) <= 2*estimatedCacheLineSize, 450 "The class Block does not fit into 2 cache lines on this platform. " 451 "Defining USE_INTERNAL_TID may help to fix it."); 452 453 class Bin { 454 private: 455 public: 456 Block *activeBlk; 457 std::atomic<Block*> mailbox; 458 MallocMutex mailLock; 459 460 public: 461 inline Block* getActiveBlock() const { return activeBlk; } 462 void resetActiveBlock() { activeBlk = nullptr; } 463 inline void setActiveBlock(Block *block); 464 inline Block* setPreviousBlockActive(); 465 Block* getPrivatizedFreeListBlock(); 466 void moveBlockToFront(Block *block); 467 bool cleanPublicFreeLists(); 468 void processEmptyBlock(Block *block, bool poolTheBlock); 469 void addPublicFreeListBlock(Block* block); 470 471 void outofTLSBin(Block* block); 472 void verifyTLSBin(size_t size) const; 473 void pushTLSBin(Block* block); 474 475 #if MALLOC_DEBUG 476 void verifyInitState() const { 477 MALLOC_ASSERT( !activeBlk, ASSERT_TEXT ); 478 MALLOC_ASSERT( !mailbox.load(std::memory_order_relaxed), ASSERT_TEXT ); 479 } 480 #endif 481 482 friend void Block::freePublicObject (FreeObject *objectToFree); 483 }; 484 485 /********* End of the data structures **************/ 486 487 /* 488 * There are bins for all 8 byte aligned objects less than this segregated size; 8 bins in total 489 */ 490 const uint32_t minSmallObjectIndex = 0; 491 const uint32_t numSmallObjectBins = 8; 492 const uint32_t maxSmallObjectSize = 64; 493 494 /* 495 * There are 4 bins between each couple of powers of 2 [64-128-256-...] 496 * from maxSmallObjectSize till this size; 16 bins in total 497 */ 498 const uint32_t minSegregatedObjectIndex = minSmallObjectIndex+numSmallObjectBins; 499 const uint32_t numSegregatedObjectBins = 16; 500 const uint32_t maxSegregatedObjectSize = 1024; 501 502 /* 503 * And there are 5 bins with allocation sizes that are multiples of estimatedCacheLineSize 504 * and selected to fit 9, 6, 4, 3, and 2 allocations in a block. 505 */ 506 const uint32_t minFittingIndex = minSegregatedObjectIndex+numSegregatedObjectBins; 507 const uint32_t numFittingBins = 5; 508 509 const uint32_t fittingAlignment = estimatedCacheLineSize; 510 511 #define SET_FITTING_SIZE(N) ( (slabSize-sizeof(Block))/N ) & ~(fittingAlignment-1) 512 // For blockSize=16*1024, sizeof(Block)=2*estimatedCacheLineSize and fittingAlignment=estimatedCacheLineSize, 513 // the comments show the fitting sizes and the amounts left unused for estimatedCacheLineSize=64/128: 514 const uint32_t fittingSize1 = SET_FITTING_SIZE(9); // 1792/1792 128/000 515 const uint32_t fittingSize2 = SET_FITTING_SIZE(6); // 2688/2688 128/000 516 const uint32_t fittingSize3 = SET_FITTING_SIZE(4); // 4032/3968 128/256 517 const uint32_t fittingSize4 = SET_FITTING_SIZE(3); // 5376/5376 128/000 518 const uint32_t fittingSize5 = SET_FITTING_SIZE(2); // 8128/8064 000/000 519 #undef SET_FITTING_SIZE 520 521 /* 522 * The total number of thread-specific Block-based bins 523 */ 524 const uint32_t numBlockBins = minFittingIndex+numFittingBins; 525 526 /* 527 * Objects of this size and larger are considered large objects. 528 */ 529 const uint32_t minLargeObjectSize = fittingSize5 + 1; 530 531 /* 532 * Per-thread pool of slab blocks. Idea behind it is to not share with other 533 * threads memory that are likely in local cache(s) of our CPU. 534 */ 535 class FreeBlockPool { 536 private: 537 std::atomic<Block*> head; 538 int size; 539 Backend *backend; 540 public: 541 static const int POOL_HIGH_MARK = 32; 542 static const int POOL_LOW_MARK = 8; 543 544 class ResOfGet { 545 ResOfGet() = delete; 546 public: 547 Block* block; 548 bool lastAccMiss; 549 ResOfGet(Block *b, bool lastMiss) : block(b), lastAccMiss(lastMiss) {} 550 }; 551 552 // allocated in zero-initialized memory 553 FreeBlockPool(Backend *bknd) : backend(bknd) {} 554 ResOfGet getBlock(); 555 void returnBlock(Block *block); 556 bool externalCleanup(); // can be called by another thread 557 }; 558 559 template<int LOW_MARK, int HIGH_MARK> 560 class LocalLOCImpl { 561 private: 562 static const size_t MAX_TOTAL_SIZE = 4*1024*1024; 563 // TODO: can single-linked list be faster here? 564 LargeMemoryBlock *tail; // need it when do releasing on overflow 565 std::atomic<LargeMemoryBlock*> head; 566 size_t totalSize; 567 int numOfBlocks; 568 public: 569 bool put(LargeMemoryBlock *object, ExtMemoryPool *extMemPool); 570 LargeMemoryBlock *get(size_t size); 571 bool externalCleanup(ExtMemoryPool *extMemPool); 572 #if __TBB_MALLOC_WHITEBOX_TEST 573 LocalLOCImpl() : tail(nullptr), head(nullptr), totalSize(0), numOfBlocks(0) {} 574 static size_t getMaxSize() { return MAX_TOTAL_SIZE; } 575 static const int LOC_HIGH_MARK = HIGH_MARK; 576 #else 577 // no ctor, object must be created in zero-initialized memory 578 #endif 579 }; 580 581 typedef LocalLOCImpl<8,32> LocalLOC; // set production code parameters 582 583 class TLSData : public TLSRemote { 584 MemoryPool *memPool; 585 public: 586 Bin bin[numBlockBinLimit]; 587 FreeBlockPool freeSlabBlocks; 588 LocalLOC lloc; 589 unsigned currCacheIdx; 590 private: 591 std::atomic<bool> unused; 592 public: 593 TLSData(MemoryPool *mPool, Backend *bknd) : memPool(mPool), freeSlabBlocks(bknd), currCacheIdx(0) {} 594 MemoryPool *getMemPool() const { return memPool; } 595 Bin* getAllocationBin(size_t size); 596 void release(); 597 bool externalCleanup(bool cleanOnlyUnused, bool cleanBins) { 598 if (!unused.load(std::memory_order_relaxed) && cleanOnlyUnused) return false; 599 // Heavy operation in terms of synchronization complexity, 600 // should be called only for the current thread 601 bool released = cleanBins ? cleanupBlockBins() : false; 602 // both cleanups to be called, and the order is not important 603 bool lloc_cleaned = lloc.externalCleanup(&memPool->extMemPool); 604 bool free_slab_blocks_cleaned = freeSlabBlocks.externalCleanup(); 605 return released || lloc_cleaned || free_slab_blocks_cleaned; 606 } 607 bool cleanupBlockBins(); 608 void markUsed() { unused.store(false, std::memory_order_relaxed); } // called by owner when TLS touched 609 void markUnused() { unused.store(true, std::memory_order_relaxed); } // can be called by not owner thread 610 }; 611 612 TLSData *TLSKey::createTLS(MemoryPool *memPool, Backend *backend) 613 { 614 MALLOC_ASSERT( sizeof(TLSData) >= sizeof(Bin) * numBlockBins + sizeof(FreeBlockPool), ASSERT_TEXT ); 615 TLSData* tls = (TLSData*) memPool->bootStrapBlocks.allocate(memPool, sizeof(TLSData)); 616 if ( !tls ) 617 return nullptr; 618 new(tls) TLSData(memPool, backend); 619 /* the block contains zeroes after bootStrapMalloc, so bins are initialized */ 620 #if MALLOC_DEBUG 621 for (uint32_t i = 0; i < numBlockBinLimit; i++) 622 tls->bin[i].verifyInitState(); 623 #endif 624 setThreadMallocTLS(tls); 625 memPool->extMemPool.allLocalCaches.registerThread(tls); 626 return tls; 627 } 628 629 bool TLSData::cleanupBlockBins() 630 { 631 bool released = false; 632 for (uint32_t i = 0; i < numBlockBinLimit; i++) { 633 released |= bin[i].cleanPublicFreeLists(); 634 // After cleaning public free lists, only the active block might be empty. 635 // Do not use processEmptyBlock because it will just restore bumpPtr. 636 Block *block = bin[i].getActiveBlock(); 637 if (block && block->empty()) { 638 bin[i].outofTLSBin(block); 639 memPool->returnEmptyBlock(block, /*poolTheBlock=*/false); 640 released = true; 641 } 642 } 643 return released; 644 } 645 646 bool ExtMemoryPool::releaseAllLocalCaches() 647 { 648 // Iterate all registered TLS data and clean LLOC and Slab pools 649 bool released = allLocalCaches.cleanup(/*cleanOnlyUnused=*/false); 650 651 // Bins privatization is done only for the current thread 652 if (TLSData *tlsData = tlsPointerKey.getThreadMallocTLS()) 653 released |= tlsData->cleanupBlockBins(); 654 655 return released; 656 } 657 658 void AllLocalCaches::registerThread(TLSRemote *tls) 659 { 660 tls->prev = nullptr; 661 MallocMutex::scoped_lock lock(listLock); 662 MALLOC_ASSERT(head!=tls, ASSERT_TEXT); 663 tls->next = head; 664 if (head) 665 head->prev = tls; 666 head = tls; 667 MALLOC_ASSERT(head->next!=head, ASSERT_TEXT); 668 } 669 670 void AllLocalCaches::unregisterThread(TLSRemote *tls) 671 { 672 MallocMutex::scoped_lock lock(listLock); 673 MALLOC_ASSERT(head, "Can't unregister thread: no threads are registered."); 674 if (head == tls) 675 head = tls->next; 676 if (tls->next) 677 tls->next->prev = tls->prev; 678 if (tls->prev) 679 tls->prev->next = tls->next; 680 MALLOC_ASSERT(!tls->next || tls->next->next!=tls->next, ASSERT_TEXT); 681 } 682 683 bool AllLocalCaches::cleanup(bool cleanOnlyUnused) 684 { 685 bool released = false; 686 { 687 MallocMutex::scoped_lock lock(listLock); 688 for (TLSRemote *curr=head; curr; curr=curr->next) 689 released |= static_cast<TLSData*>(curr)->externalCleanup(cleanOnlyUnused, /*cleanBins=*/false); 690 } 691 return released; 692 } 693 694 void AllLocalCaches::markUnused() 695 { 696 bool locked = false; 697 MallocMutex::scoped_lock lock(listLock, /*block=*/false, &locked); 698 if (!locked) // not wait for marking if someone doing something with it 699 return; 700 701 for (TLSRemote *curr=head; curr; curr=curr->next) 702 static_cast<TLSData*>(curr)->markUnused(); 703 } 704 705 #if MALLOC_CHECK_RECURSION 706 MallocMutex RecursiveMallocCallProtector::rmc_mutex; 707 std::atomic<pthread_t> RecursiveMallocCallProtector::owner_thread; 708 std::atomic<void*> RecursiveMallocCallProtector::autoObjPtr; 709 bool RecursiveMallocCallProtector::mallocRecursionDetected; 710 #if __FreeBSD__ 711 bool RecursiveMallocCallProtector::canUsePthread; 712 #endif 713 714 #endif 715 716 /*********** End code to provide thread ID and a TLS pointer **********/ 717 718 // Parameter for isLargeObject, keeps our expectations on memory origin. 719 // Assertions must use unknownMem to reliably report object invalidity. 720 enum MemoryOrigin { 721 ourMem, // allocated by TBB allocator 722 unknownMem // can be allocated by system allocator or TBB allocator 723 }; 724 725 template<MemoryOrigin> 726 #if __TBB_USE_THREAD_SANITIZER 727 // We have a real race when accessing the large object header for 728 // non large objects (e.g. small or foreign objects). 729 // Therefore, we need to hide this access from the thread sanitizer 730 __attribute__((no_sanitize("thread"))) 731 #endif 732 bool isLargeObject(void *object); 733 static void *internalMalloc(size_t size); 734 static void internalFree(void *object); 735 static void *internalPoolMalloc(MemoryPool* mPool, size_t size); 736 static bool internalPoolFree(MemoryPool *mPool, void *object, size_t size); 737 738 #if !MALLOC_DEBUG 739 #if __INTEL_COMPILER || _MSC_VER 740 #define NOINLINE(decl) __declspec(noinline) decl 741 #define ALWAYSINLINE(decl) __forceinline decl 742 #elif __GNUC__ 743 #define NOINLINE(decl) decl __attribute__ ((noinline)) 744 #define ALWAYSINLINE(decl) decl __attribute__ ((always_inline)) 745 #else 746 #define NOINLINE(decl) decl 747 #define ALWAYSINLINE(decl) decl 748 #endif 749 750 static NOINLINE( bool doInitialization() ); 751 ALWAYSINLINE( bool isMallocInitialized() ); 752 753 #undef ALWAYSINLINE 754 #undef NOINLINE 755 #endif /* !MALLOC_DEBUG */ 756 757 758 /********* Now some rough utility code to deal with indexing the size bins. **************/ 759 760 /* 761 * Given a number return the highest non-zero bit in it. It is intended to work with 32-bit values only. 762 * Moreover, on some platforms, for sake of simplicity and performance, it is narrowed to only serve for 64 to 1023. 763 * This is enough for current algorithm of distribution of sizes among bins. 764 * __TBB_Log2 is not used here to minimize dependencies on TBB specific sources. 765 */ 766 #if _WIN64 && _MSC_VER>=1400 && !__INTEL_COMPILER 767 extern "C" unsigned char _BitScanReverse( unsigned long* i, unsigned long w ); 768 #pragma intrinsic(_BitScanReverse) 769 #endif 770 static inline unsigned int highestBitPos(unsigned int n) 771 { 772 MALLOC_ASSERT( n>=64 && n<1024, ASSERT_TEXT ); // only needed for bsr array lookup, but always true 773 unsigned int pos; 774 #if __ARCH_x86_32||__ARCH_x86_64 775 776 # if __unix__||__APPLE__||__MINGW32__ 777 __asm__ ("bsr %1,%0" : "=r"(pos) : "r"(n)); 778 # elif (_WIN32 && (!_WIN64 || __INTEL_COMPILER)) 779 __asm 780 { 781 bsr eax, n 782 mov pos, eax 783 } 784 # elif _WIN64 && _MSC_VER>=1400 785 _BitScanReverse((unsigned long*)&pos, (unsigned long)n); 786 # else 787 # error highestBitPos() not implemented for this platform 788 # endif 789 #elif __arm__ 790 __asm__ __volatile__ 791 ( 792 "clz %0, %1\n" 793 "rsb %0, %0, %2\n" 794 :"=r" (pos) :"r" (n), "I" (31) 795 ); 796 #else 797 static unsigned int bsr[16] = {0/*N/A*/,6,7,7,8,8,8,8,9,9,9,9,9,9,9,9}; 798 pos = bsr[ n>>6 ]; 799 #endif /* __ARCH_* */ 800 return pos; 801 } 802 803 unsigned int getSmallObjectIndex(unsigned int size) 804 { 805 unsigned int result = (size-1)>>3; 806 constexpr bool is_64bit = (8 == sizeof(void*)); 807 if (is_64bit) { 808 // For 64-bit malloc, 16 byte alignment is needed except for bin 0. 809 if (result) result |= 1; // 0,1,3,5,7; bins 2,4,6 are not aligned to 16 bytes 810 } 811 return result; 812 } 813 814 /* 815 * Depending on indexRequest, for a given size return either the index into the bin 816 * for objects of this size, or the actual size of objects in this bin. 817 */ 818 template<bool indexRequest> 819 static unsigned int getIndexOrObjectSize (unsigned int size) 820 { 821 if (size <= maxSmallObjectSize) { // selection from 8/16/24/32/40/48/56/64 822 unsigned int index = getSmallObjectIndex( size ); 823 /* Bin 0 is for 8 bytes, bin 1 is for 16, and so forth */ 824 return indexRequest ? index : (index+1)<<3; 825 } 826 else if (size <= maxSegregatedObjectSize ) { // 80/96/112/128 / 160/192/224/256 / 320/384/448/512 / 640/768/896/1024 827 unsigned int order = highestBitPos(size-1); // which group of bin sizes? 828 MALLOC_ASSERT( 6<=order && order<=9, ASSERT_TEXT ); 829 if (indexRequest) 830 return minSegregatedObjectIndex - (4*6) - 4 + (4*order) + ((size-1)>>(order-2)); 831 else { 832 unsigned int alignment = 128 >> (9-order); // alignment in the group 833 MALLOC_ASSERT( alignment==16 || alignment==32 || alignment==64 || alignment==128, ASSERT_TEXT ); 834 return alignUp(size,alignment); 835 } 836 } 837 else { 838 if( size <= fittingSize3 ) { 839 if( size <= fittingSize2 ) { 840 if( size <= fittingSize1 ) 841 return indexRequest ? minFittingIndex : fittingSize1; 842 else 843 return indexRequest ? minFittingIndex+1 : fittingSize2; 844 } else 845 return indexRequest ? minFittingIndex+2 : fittingSize3; 846 } else { 847 if( size <= fittingSize5 ) { 848 if( size <= fittingSize4 ) 849 return indexRequest ? minFittingIndex+3 : fittingSize4; 850 else 851 return indexRequest ? minFittingIndex+4 : fittingSize5; 852 } else { 853 MALLOC_ASSERT( 0,ASSERT_TEXT ); // this should not happen 854 return ~0U; 855 } 856 } 857 } 858 } 859 860 static unsigned int getIndex (unsigned int size) 861 { 862 return getIndexOrObjectSize</*indexRequest=*/true>(size); 863 } 864 865 static unsigned int getObjectSize (unsigned int size) 866 { 867 return getIndexOrObjectSize</*indexRequest=*/false>(size); 868 } 869 870 871 void *BootStrapBlocks::allocate(MemoryPool *memPool, size_t size) 872 { 873 FreeObject *result; 874 875 MALLOC_ASSERT( size == sizeof(TLSData), ASSERT_TEXT ); 876 877 { // Lock with acquire 878 MallocMutex::scoped_lock scoped_cs(bootStrapLock); 879 880 if( bootStrapObjectList) { 881 result = bootStrapObjectList; 882 bootStrapObjectList = bootStrapObjectList->next; 883 } else { 884 if (!bootStrapBlock) { 885 bootStrapBlock = memPool->getEmptyBlock(size); 886 if (!bootStrapBlock) return nullptr; 887 } 888 result = bootStrapBlock->bumpPtr; 889 bootStrapBlock->bumpPtr = (FreeObject *)((uintptr_t)bootStrapBlock->bumpPtr - bootStrapBlock->objectSize); 890 if ((uintptr_t)bootStrapBlock->bumpPtr < (uintptr_t)bootStrapBlock+sizeof(Block)) { 891 bootStrapBlock->bumpPtr = nullptr; 892 bootStrapBlock->next = bootStrapBlockUsed; 893 bootStrapBlockUsed = bootStrapBlock; 894 bootStrapBlock = nullptr; 895 } 896 } 897 } // Unlock with release 898 memset (result, 0, size); 899 return (void*)result; 900 } 901 902 void BootStrapBlocks::free(void* ptr) 903 { 904 MALLOC_ASSERT( ptr, ASSERT_TEXT ); 905 { // Lock with acquire 906 MallocMutex::scoped_lock scoped_cs(bootStrapLock); 907 ((FreeObject*)ptr)->next = bootStrapObjectList; 908 bootStrapObjectList = (FreeObject*)ptr; 909 } // Unlock with release 910 } 911 912 void BootStrapBlocks::reset() 913 { 914 bootStrapBlock = bootStrapBlockUsed = nullptr; 915 bootStrapObjectList = nullptr; 916 } 917 918 #if !(FREELIST_NONBLOCKING) 919 static MallocMutex publicFreeListLock; // lock for changes of publicFreeList 920 #endif 921 922 /********* End rough utility code **************/ 923 924 /* LifoList assumes zero initialization so a vector of it can be created 925 * by just allocating some space with no call to constructor. 926 * On Linux, it seems to be necessary to avoid linking with C++ libraries. 927 * 928 * By usage convention there is no race on the initialization. */ 929 LifoList::LifoList( ) : top(nullptr) 930 { 931 // MallocMutex assumes zero initialization 932 memset(static_cast<void*>(&lock), 0, sizeof(MallocMutex)); 933 } 934 935 void LifoList::push(Block *block) 936 { 937 MallocMutex::scoped_lock scoped_cs(lock); 938 block->next = top.load(std::memory_order_relaxed); 939 top.store(block, std::memory_order_relaxed); 940 } 941 942 Block *LifoList::pop() 943 { 944 Block* block = nullptr; 945 if (top.load(std::memory_order_relaxed)) { 946 MallocMutex::scoped_lock scoped_cs(lock); 947 block = top.load(std::memory_order_relaxed); 948 if (block) { 949 top.store(block->next, std::memory_order_relaxed); 950 } 951 } 952 return block; 953 } 954 955 Block *LifoList::grab() 956 { 957 Block *block = nullptr; 958 if (top.load(std::memory_order_relaxed)) { 959 MallocMutex::scoped_lock scoped_cs(lock); 960 block = top.load(std::memory_order_relaxed); 961 top.store(nullptr, std::memory_order_relaxed); 962 } 963 return block; 964 } 965 966 /********* Thread and block related code *************/ 967 968 template<bool poolDestroy> void AllLargeBlocksList::releaseAll(Backend *backend) { 969 LargeMemoryBlock *next, *lmb = loHead; 970 loHead = nullptr; 971 972 for (; lmb; lmb = next) { 973 next = lmb->gNext; 974 if (poolDestroy) { 975 // as it's pool destruction, no need to return object to backend, 976 // only remove backrefs, as they are global 977 removeBackRef(lmb->backRefIdx); 978 } else { 979 // clean g(Next|Prev) to prevent removing lmb 980 // from AllLargeBlocksList inside returnLargeObject 981 lmb->gNext = lmb->gPrev = nullptr; 982 backend->returnLargeObject(lmb); 983 } 984 } 985 } 986 987 TLSData* MemoryPool::getTLS(bool create) 988 { 989 TLSData* tls = extMemPool.tlsPointerKey.getThreadMallocTLS(); 990 if (create && !tls) 991 tls = extMemPool.tlsPointerKey.createTLS(this, &extMemPool.backend); 992 return tls; 993 } 994 995 /* 996 * Return the bin for the given size. 997 */ 998 inline Bin* TLSData::getAllocationBin(size_t size) 999 { 1000 return bin + getIndex(size); 1001 } 1002 1003 /* Return an empty uninitialized block in a non-blocking fashion. */ 1004 Block *MemoryPool::getEmptyBlock(size_t size) 1005 { 1006 TLSData* tls = getTLS(/*create=*/false); 1007 // try to use per-thread cache, if TLS available 1008 FreeBlockPool::ResOfGet resOfGet = tls? 1009 tls->freeSlabBlocks.getBlock() : FreeBlockPool::ResOfGet(nullptr, false); 1010 Block *result = resOfGet.block; 1011 1012 if (!result) { // not found in local cache, asks backend for slabs 1013 int num = resOfGet.lastAccMiss? Backend::numOfSlabAllocOnMiss : 1; 1014 BackRefIdx backRefIdx[Backend::numOfSlabAllocOnMiss]; 1015 1016 result = static_cast<Block*>(extMemPool.backend.getSlabBlock(num)); 1017 if (!result) return nullptr; 1018 1019 if (!extMemPool.userPool()) 1020 for (int i=0; i<num; i++) { 1021 backRefIdx[i] = BackRefIdx::newBackRef(/*largeObj=*/false); 1022 if (backRefIdx[i].isInvalid()) { 1023 // roll back resource allocation 1024 for (int j=0; j<i; j++) 1025 removeBackRef(backRefIdx[j]); 1026 Block *b = result; 1027 for (int j=0; j<num; b=(Block*)((uintptr_t)b+slabSize), j++) 1028 extMemPool.backend.putSlabBlock(b); 1029 return nullptr; 1030 } 1031 } 1032 // resources were allocated, register blocks 1033 Block *b = result; 1034 for (int i=0; i<num; b=(Block*)((uintptr_t)b+slabSize), i++) { 1035 // slab block in user's pool must have invalid backRefIdx 1036 if (extMemPool.userPool()) { 1037 new (&b->backRefIdx) BackRefIdx(); 1038 } else { 1039 setBackRef(backRefIdx[i], b); 1040 b->backRefIdx = backRefIdx[i]; 1041 } 1042 b->tlsPtr.store(tls, std::memory_order_relaxed); 1043 b->poolPtr = this; 1044 // all but first one go to per-thread pool 1045 if (i > 0) { 1046 MALLOC_ASSERT(tls, ASSERT_TEXT); 1047 tls->freeSlabBlocks.returnBlock(b); 1048 } 1049 } 1050 } 1051 MALLOC_ASSERT(result, ASSERT_TEXT); 1052 result->initEmptyBlock(tls, size); 1053 STAT_increment(getThreadId(), getIndex(result->objectSize), allocBlockNew); 1054 return result; 1055 } 1056 1057 void MemoryPool::returnEmptyBlock(Block *block, bool poolTheBlock) 1058 { 1059 block->reset(); 1060 if (poolTheBlock) { 1061 getTLS(/*create=*/false)->freeSlabBlocks.returnBlock(block); 1062 } else { 1063 // slab blocks in user's pools do not have valid backRefIdx 1064 if (!extMemPool.userPool()) 1065 removeBackRef(*(block->getBackRefIdx())); 1066 extMemPool.backend.putSlabBlock(block); 1067 } 1068 } 1069 1070 bool ExtMemoryPool::init(intptr_t poolId, rawAllocType rawAlloc, 1071 rawFreeType rawFree, size_t granularity, 1072 bool keepAllMemory, bool fixedPool) 1073 { 1074 this->poolId = poolId; 1075 this->rawAlloc = rawAlloc; 1076 this->rawFree = rawFree; 1077 this->granularity = granularity; 1078 this->keepAllMemory = keepAllMemory; 1079 this->fixedPool = fixedPool; 1080 this->delayRegsReleasing = false; 1081 if (!initTLS()) 1082 return false; 1083 loc.init(this); 1084 backend.init(this); 1085 MALLOC_ASSERT(isPoolValid(), nullptr); 1086 return true; 1087 } 1088 1089 bool ExtMemoryPool::initTLS() { return tlsPointerKey.init(); } 1090 1091 bool MemoryPool::init(intptr_t poolId, const MemPoolPolicy *policy) 1092 { 1093 if (!extMemPool.init(poolId, policy->pAlloc, policy->pFree, 1094 policy->granularity? policy->granularity : defaultGranularity, 1095 policy->keepAllMemory, policy->fixedPool)) 1096 return false; 1097 { 1098 MallocMutex::scoped_lock lock(memPoolListLock); 1099 next = defaultMemPool->next; 1100 defaultMemPool->next = this; 1101 prev = defaultMemPool; 1102 if (next) 1103 next->prev = this; 1104 } 1105 return true; 1106 } 1107 1108 bool MemoryPool::reset() 1109 { 1110 MALLOC_ASSERT(extMemPool.userPool(), "No reset for the system pool."); 1111 // memory is not releasing during pool reset 1112 // TODO: mark regions to release unused on next reset() 1113 extMemPool.delayRegionsReleasing(true); 1114 1115 bootStrapBlocks.reset(); 1116 extMemPool.lmbList.releaseAll</*poolDestroy=*/false>(&extMemPool.backend); 1117 if (!extMemPool.reset()) 1118 return false; 1119 1120 if (!extMemPool.initTLS()) 1121 return false; 1122 extMemPool.delayRegionsReleasing(false); 1123 return true; 1124 } 1125 1126 bool MemoryPool::destroy() 1127 { 1128 #if __TBB_MALLOC_LOCACHE_STAT 1129 extMemPool.loc.reportStat(stdout); 1130 #endif 1131 #if __TBB_MALLOC_BACKEND_STAT 1132 extMemPool.backend.reportStat(stdout); 1133 #endif 1134 { 1135 MallocMutex::scoped_lock lock(memPoolListLock); 1136 // remove itself from global pool list 1137 if (prev) 1138 prev->next = next; 1139 if (next) 1140 next->prev = prev; 1141 } 1142 // slab blocks in non-default pool do not have backreferences, 1143 // only large objects do 1144 if (extMemPool.userPool()) 1145 extMemPool.lmbList.releaseAll</*poolDestroy=*/true>(&extMemPool.backend); 1146 else { 1147 // only one non-userPool() is supported now 1148 MALLOC_ASSERT(this==defaultMemPool, nullptr); 1149 // There and below in extMemPool.destroy(), do not restore initial state 1150 // for user pool, because it's just about to be released. But for system 1151 // pool restoring, we do not want to do zeroing of it on subsequent reload. 1152 bootStrapBlocks.reset(); 1153 extMemPool.orphanedBlocks.reset(); 1154 } 1155 return extMemPool.destroy(); 1156 } 1157 1158 void MemoryPool::onThreadShutdown(TLSData *tlsData) 1159 { 1160 if (tlsData) { // might be called for "empty" TLS 1161 tlsData->release(); 1162 bootStrapBlocks.free(tlsData); 1163 clearTLS(); 1164 } 1165 } 1166 1167 #if MALLOC_DEBUG 1168 void Bin::verifyTLSBin (size_t size) const 1169 { 1170 /* The debug version verifies the TLSBin as needed */ 1171 uint32_t objSize = getObjectSize(size); 1172 1173 if (activeBlk) { 1174 MALLOC_ASSERT( activeBlk->isOwnedByCurrentThread(), ASSERT_TEXT ); 1175 MALLOC_ASSERT( activeBlk->objectSize == objSize, ASSERT_TEXT ); 1176 #if MALLOC_DEBUG>1 1177 for (Block* temp = activeBlk->next; temp; temp=temp->next) { 1178 MALLOC_ASSERT( temp!=activeBlk, ASSERT_TEXT ); 1179 MALLOC_ASSERT( temp->isOwnedByCurrentThread(), ASSERT_TEXT ); 1180 MALLOC_ASSERT( temp->objectSize == objSize, ASSERT_TEXT ); 1181 MALLOC_ASSERT( temp->previous->next == temp, ASSERT_TEXT ); 1182 if (temp->next) { 1183 MALLOC_ASSERT( temp->next->previous == temp, ASSERT_TEXT ); 1184 } 1185 } 1186 for (Block* temp = activeBlk->previous; temp; temp=temp->previous) { 1187 MALLOC_ASSERT( temp!=activeBlk, ASSERT_TEXT ); 1188 MALLOC_ASSERT( temp->isOwnedByCurrentThread(), ASSERT_TEXT ); 1189 MALLOC_ASSERT( temp->objectSize == objSize, ASSERT_TEXT ); 1190 MALLOC_ASSERT( temp->next->previous == temp, ASSERT_TEXT ); 1191 if (temp->previous) { 1192 MALLOC_ASSERT( temp->previous->next == temp, ASSERT_TEXT ); 1193 } 1194 } 1195 #endif /* MALLOC_DEBUG>1 */ 1196 } 1197 } 1198 #else /* MALLOC_DEBUG */ 1199 inline void Bin::verifyTLSBin (size_t) const { } 1200 #endif /* MALLOC_DEBUG */ 1201 1202 /* 1203 * Add a block to the start of this tls bin list. 1204 */ 1205 void Bin::pushTLSBin(Block* block) 1206 { 1207 /* The objectSize should be defined and not a parameter 1208 because the function is applied to partially filled blocks as well */ 1209 unsigned int size = block->objectSize; 1210 1211 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1212 MALLOC_ASSERT( block->objectSize != 0, ASSERT_TEXT ); 1213 MALLOC_ASSERT( block->next == nullptr, ASSERT_TEXT ); 1214 MALLOC_ASSERT( block->previous == nullptr, ASSERT_TEXT ); 1215 1216 MALLOC_ASSERT( this, ASSERT_TEXT ); 1217 verifyTLSBin(size); 1218 1219 block->next = activeBlk; 1220 if( activeBlk ) { 1221 block->previous = activeBlk->previous; 1222 activeBlk->previous = block; 1223 if( block->previous ) 1224 block->previous->next = block; 1225 } else { 1226 activeBlk = block; 1227 } 1228 1229 verifyTLSBin(size); 1230 } 1231 1232 /* 1233 * Take a block out of its tls bin (e.g. before removal). 1234 */ 1235 void Bin::outofTLSBin(Block* block) 1236 { 1237 unsigned int size = block->objectSize; 1238 1239 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1240 MALLOC_ASSERT( block->objectSize != 0, ASSERT_TEXT ); 1241 1242 MALLOC_ASSERT( this, ASSERT_TEXT ); 1243 verifyTLSBin(size); 1244 1245 if (block == activeBlk) { 1246 activeBlk = block->previous? block->previous : block->next; 1247 } 1248 /* Unlink the block */ 1249 if (block->previous) { 1250 MALLOC_ASSERT( block->previous->next == block, ASSERT_TEXT ); 1251 block->previous->next = block->next; 1252 } 1253 if (block->next) { 1254 MALLOC_ASSERT( block->next->previous == block, ASSERT_TEXT ); 1255 block->next->previous = block->previous; 1256 } 1257 block->next = nullptr; 1258 block->previous = nullptr; 1259 1260 verifyTLSBin(size); 1261 } 1262 1263 Block* Bin::getPrivatizedFreeListBlock() 1264 { 1265 Block* block; 1266 MALLOC_ASSERT( this, ASSERT_TEXT ); 1267 // if this method is called, active block usage must be unsuccessful 1268 MALLOC_ASSERT( (!activeBlk && !mailbox.load(std::memory_order_relaxed)) || (activeBlk && activeBlk->isFull), ASSERT_TEXT ); 1269 1270 // the counter should be changed STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 1271 if (!mailbox.load(std::memory_order_acquire)) // hotpath is empty mailbox 1272 return nullptr; 1273 else { // mailbox is not empty, take lock and inspect it 1274 MallocMutex::scoped_lock scoped_cs(mailLock); 1275 block = mailbox.load(std::memory_order_relaxed); 1276 if( block ) { 1277 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1278 MALLOC_ASSERT( !isNotForUse(block->nextPrivatizable.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1279 mailbox.store(block->nextPrivatizable.load(std::memory_order_relaxed), std::memory_order_relaxed); 1280 block->nextPrivatizable.store((Block*)this, std::memory_order_relaxed); 1281 } 1282 } 1283 if( block ) { 1284 MALLOC_ASSERT( isSolidPtr(block->publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1285 block->privatizePublicFreeList(); 1286 block->adjustPositionInBin(this); 1287 } 1288 return block; 1289 } 1290 1291 void Bin::addPublicFreeListBlock(Block* block) 1292 { 1293 MallocMutex::scoped_lock scoped_cs(mailLock); 1294 block->nextPrivatizable.store(mailbox.load(std::memory_order_relaxed), std::memory_order_relaxed); 1295 mailbox.store(block, std::memory_order_relaxed); 1296 } 1297 1298 // Process publicly freed objects in all blocks and return empty blocks 1299 // to the backend in order to reduce overall footprint. 1300 bool Bin::cleanPublicFreeLists() 1301 { 1302 Block* block; 1303 if (!mailbox.load(std::memory_order_acquire)) 1304 return false; 1305 else { 1306 // Grab all the blocks in the mailbox 1307 MallocMutex::scoped_lock scoped_cs(mailLock); 1308 block = mailbox.load(std::memory_order_relaxed); 1309 mailbox.store(nullptr, std::memory_order_relaxed); 1310 } 1311 bool released = false; 1312 while (block) { 1313 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1314 Block* tmp = block->nextPrivatizable.load(std::memory_order_relaxed); 1315 block->nextPrivatizable.store((Block*)this, std::memory_order_relaxed); 1316 block->privatizePublicFreeList(); 1317 if (block->empty()) { 1318 processEmptyBlock(block, /*poolTheBlock=*/false); 1319 released = true; 1320 } else 1321 block->adjustPositionInBin(this); 1322 block = tmp; 1323 } 1324 return released; 1325 } 1326 1327 bool Block::adjustFullness() 1328 { 1329 if (bumpPtr) { 1330 /* If we are still using a bump ptr for this block it is empty enough to use. */ 1331 STAT_increment(getThreadId(), getIndex(objectSize), examineEmptyEnough); 1332 isFull = false; 1333 } else { 1334 const float threshold = (slabSize - sizeof(Block)) * (1 - emptyEnoughRatio); 1335 /* allocatedCount shows how many objects in the block are in use; however it still counts 1336 * blocks freed by other threads; so prior call to privatizePublicFreeList() is recommended */ 1337 isFull = (allocatedCount*objectSize > threshold) ? true : false; 1338 #if COLLECT_STATISTICS 1339 if (isFull) 1340 STAT_increment(getThreadId(), getIndex(objectSize), examineNotEmpty); 1341 else 1342 STAT_increment(getThreadId(), getIndex(objectSize), examineEmptyEnough); 1343 #endif 1344 } 1345 return isFull; 1346 } 1347 1348 // This method resides in class Block, and not in class Bin, in order to avoid 1349 // calling getAllocationBin on a reasonably hot path in Block::freeOwnObject 1350 void Block::adjustPositionInBin(Bin* bin/*=nullptr*/) 1351 { 1352 // If the block were full, but became empty enough to use, 1353 // move it to the front of the list 1354 if (isFull && !adjustFullness()) { 1355 if (!bin) 1356 bin = tlsPtr.load(std::memory_order_relaxed)->getAllocationBin(objectSize); 1357 bin->moveBlockToFront(this); 1358 } 1359 } 1360 1361 /* Restore the bump pointer for an empty block that is planned to use */ 1362 void Block::restoreBumpPtr() 1363 { 1364 MALLOC_ASSERT( allocatedCount == 0, ASSERT_TEXT ); 1365 MALLOC_ASSERT( !isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1366 STAT_increment(getThreadId(), getIndex(objectSize), freeRestoreBumpPtr); 1367 bumpPtr = (FreeObject *)((uintptr_t)this + slabSize - objectSize); 1368 freeList = nullptr; 1369 isFull = false; 1370 } 1371 1372 void Block::freeOwnObject(void *object) 1373 { 1374 tlsPtr.load(std::memory_order_relaxed)->markUsed(); 1375 allocatedCount--; 1376 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 1377 #if COLLECT_STATISTICS 1378 // Note that getAllocationBin is not called on the hottest path with statistics off. 1379 if (tlsPtr.load(std::memory_order_relaxed)->getAllocationBin(objectSize)->getActiveBlock() != this) 1380 STAT_increment(getThreadId(), getIndex(objectSize), freeToInactiveBlock); 1381 else 1382 STAT_increment(getThreadId(), getIndex(objectSize), freeToActiveBlock); 1383 #endif 1384 if (empty()) { 1385 // If the last object of a slab is freed, the slab cannot be marked full 1386 MALLOC_ASSERT(!isFull, ASSERT_TEXT); 1387 tlsPtr.load(std::memory_order_relaxed)->getAllocationBin(objectSize)->processEmptyBlock(this, /*poolTheBlock=*/true); 1388 } else { // hot path 1389 FreeObject *objectToFree = findObjectToFree(object); 1390 objectToFree->next = freeList; 1391 freeList = objectToFree; 1392 adjustPositionInBin(); 1393 } 1394 } 1395 1396 void Block::freePublicObject (FreeObject *objectToFree) 1397 { 1398 FreeObject* localPublicFreeList{}; 1399 1400 MALLOC_ITT_SYNC_RELEASING(&publicFreeList); 1401 #if FREELIST_NONBLOCKING 1402 // TBB_REVAMP_TODO: make it non atomic in non-blocking scenario 1403 localPublicFreeList = publicFreeList.load(std::memory_order_relaxed); 1404 do { 1405 objectToFree->next = localPublicFreeList; 1406 // no backoff necessary because trying to make change, not waiting for a change 1407 } while( !publicFreeList.compare_exchange_strong(localPublicFreeList, objectToFree) ); 1408 #else 1409 STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 1410 { 1411 MallocMutex::scoped_lock scoped_cs(publicFreeListLock); 1412 localPublicFreeList = objectToFree->next = publicFreeList; 1413 publicFreeList = objectToFree; 1414 } 1415 #endif 1416 1417 if( localPublicFreeList==nullptr ) { 1418 // if the block is abandoned, its nextPrivatizable pointer should be UNUSABLE 1419 // otherwise, it should point to the bin the block belongs to. 1420 // reading nextPrivatizable is thread-safe below, because: 1421 // 1) the executing thread atomically got publicFreeList==nullptr and changed it to non-nullptr; 1422 // 2) only owning thread can change it back to nullptr, 1423 // 3) but it can not be done until the block is put to the mailbox 1424 // So the executing thread is now the only one that can change nextPrivatizable 1425 Block* next = nextPrivatizable.load(std::memory_order_acquire); 1426 if( !isNotForUse(next) ) { 1427 MALLOC_ASSERT( next!=nullptr, ASSERT_TEXT ); 1428 Bin* theBin = (Bin*) next; 1429 #if MALLOC_DEBUG && TBB_REVAMP_TODO 1430 // FIXME: The thread that returns the block is not the block's owner. 1431 // The below assertion compares 'theBin' against the caller's local bin, thus, it always fails. 1432 // Need to find a way to get the correct remote bin for comparison. 1433 { // check that nextPrivatizable points to the bin the block belongs to 1434 uint32_t index = getIndex( objectSize ); 1435 TLSData* tls = getThreadMallocTLS(); 1436 MALLOC_ASSERT( theBin==tls->bin+index, ASSERT_TEXT ); 1437 } 1438 #endif // MALLOC_DEBUG 1439 theBin->addPublicFreeListBlock(this); 1440 } 1441 } 1442 STAT_increment(getThreadId(), ThreadCommonCounters, freeToOtherThread); 1443 STAT_increment(ownerTid.load(std::memory_order_relaxed), getIndex(objectSize), freeByOtherThread); 1444 } 1445 1446 // Make objects freed by other threads available for use again 1447 void Block::privatizePublicFreeList( bool reset ) 1448 { 1449 FreeObject *localPublicFreeList; 1450 // If reset is false, publicFreeList should not be zeroed but set to UNUSABLE 1451 // to properly synchronize with other threads freeing objects to this slab. 1452 const intptr_t endMarker = reset ? 0 : UNUSABLE; 1453 1454 // Only the owner thread may reset the pointer to nullptr 1455 MALLOC_ASSERT( isOwnedByCurrentThread() || !reset, ASSERT_TEXT ); 1456 #if FREELIST_NONBLOCKING 1457 localPublicFreeList = publicFreeList.exchange((FreeObject*)endMarker); 1458 #else 1459 STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 1460 { 1461 MallocMutex::scoped_lock scoped_cs(publicFreeListLock); 1462 localPublicFreeList = publicFreeList; 1463 publicFreeList = endMarker; 1464 } 1465 #endif 1466 MALLOC_ITT_SYNC_ACQUIRED(&publicFreeList); 1467 MALLOC_ASSERT( !(reset && isNotForUse(publicFreeList)), ASSERT_TEXT ); 1468 1469 // publicFreeList must have been UNUSABLE or valid, but not nullptr 1470 MALLOC_ASSERT( localPublicFreeList!=nullptr, ASSERT_TEXT ); 1471 if( isSolidPtr(localPublicFreeList) ) { 1472 MALLOC_ASSERT( allocatedCount <= (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 1473 /* other threads did not change the counter freeing our blocks */ 1474 allocatedCount--; 1475 FreeObject *temp = localPublicFreeList; 1476 while( isSolidPtr(temp->next) ){ // the list will end with either nullptr or UNUSABLE 1477 temp = temp->next; 1478 allocatedCount--; 1479 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 1480 } 1481 /* merge with local freeList */ 1482 temp->next = freeList; 1483 freeList = localPublicFreeList; 1484 STAT_increment(getThreadId(), getIndex(objectSize), allocPrivatized); 1485 } 1486 } 1487 1488 void Block::privatizeOrphaned(TLSData *tls, unsigned index) 1489 { 1490 Bin* bin = tls->bin + index; 1491 STAT_increment(getThreadId(), index, allocBlockPublic); 1492 next = nullptr; 1493 previous = nullptr; 1494 MALLOC_ASSERT( publicFreeList.load(std::memory_order_relaxed) != nullptr, ASSERT_TEXT ); 1495 /* There is not a race here since no other thread owns this block */ 1496 markOwned(tls); 1497 // It is safe to change nextPrivatizable, as publicFreeList is not null 1498 MALLOC_ASSERT( isNotForUse(nextPrivatizable.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1499 nextPrivatizable.store((Block*)bin, std::memory_order_relaxed); 1500 // the next call is required to change publicFreeList to 0 1501 privatizePublicFreeList(); 1502 if( empty() ) { 1503 restoreBumpPtr(); 1504 } else { 1505 adjustFullness(); // check the block fullness and set isFull 1506 } 1507 MALLOC_ASSERT( !isNotForUse(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1508 } 1509 1510 1511 bool Block::readyToShare() 1512 { 1513 FreeObject* oldVal = nullptr; 1514 #if FREELIST_NONBLOCKING 1515 publicFreeList.compare_exchange_strong(oldVal, (FreeObject*)UNUSABLE); 1516 #else 1517 STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 1518 { 1519 MallocMutex::scoped_lock scoped_cs(publicFreeListLock); 1520 if ( (oldVal=publicFreeList)==nullptr ) 1521 (intptr_t&)(publicFreeList) = UNUSABLE; 1522 } 1523 #endif 1524 return oldVal==nullptr; 1525 } 1526 1527 void Block::shareOrphaned(intptr_t binTag, unsigned index) 1528 { 1529 MALLOC_ASSERT( binTag, ASSERT_TEXT ); 1530 // unreferenced formal parameter warning 1531 tbb::detail::suppress_unused_warning(index); 1532 STAT_increment(getThreadId(), index, freeBlockPublic); 1533 markOrphaned(); 1534 if ((intptr_t)nextPrivatizable.load(std::memory_order_relaxed) == binTag) { 1535 // First check passed: the block is not in mailbox yet. 1536 // Need to set publicFreeList to non-zero, so other threads 1537 // will not change nextPrivatizable and it can be zeroed. 1538 if ( !readyToShare() ) { 1539 // another thread freed an object; we need to wait until it finishes. 1540 // There is no need for exponential backoff, as the wait here is not for a lock; 1541 // but need to yield, so the thread we wait has a chance to run. 1542 // TODO: add a pause to also be friendly to hyperthreads 1543 int count = 256; 1544 while ((intptr_t)nextPrivatizable.load(std::memory_order_relaxed) == binTag) { 1545 if (--count==0) { 1546 do_yield(); 1547 count = 256; 1548 } 1549 } 1550 } 1551 } 1552 MALLOC_ASSERT( publicFreeList.load(std::memory_order_relaxed) !=nullptr, ASSERT_TEXT ); 1553 // now it is safe to change our data 1554 previous = nullptr; 1555 // it is caller responsibility to ensure that the list of blocks 1556 // formed by nextPrivatizable pointers is kept consistent if required. 1557 // if only called from thread shutdown code, it does not matter. 1558 nextPrivatizable.store((Block*)UNUSABLE, std::memory_order_relaxed); 1559 } 1560 1561 void Block::cleanBlockHeader() 1562 { 1563 next = nullptr; 1564 previous = nullptr; 1565 freeList = nullptr; 1566 allocatedCount = 0; 1567 isFull = false; 1568 tlsPtr.store(nullptr, std::memory_order_relaxed); 1569 1570 publicFreeList.store(nullptr, std::memory_order_relaxed); 1571 } 1572 1573 void Block::initEmptyBlock(TLSData *tls, size_t size) 1574 { 1575 // Having getIndex and getObjectSize called next to each other 1576 // allows better compiler optimization as they basically share the code. 1577 unsigned int index = getIndex(size); 1578 unsigned int objSz = getObjectSize(size); 1579 1580 cleanBlockHeader(); 1581 objectSize = objSz; 1582 markOwned(tls); 1583 // bump pointer should be prepared for first allocation - thus mode it down to objectSize 1584 bumpPtr = (FreeObject *)((uintptr_t)this + slabSize - objectSize); 1585 1586 // each block should have the address where the head of the list of "privatizable" blocks is kept 1587 // the only exception is a block for boot strap which is initialized when TLS is yet nullptr 1588 nextPrivatizable.store( tls? (Block*)(tls->bin + index) : nullptr, std::memory_order_relaxed); 1589 TRACEF(( "[ScalableMalloc trace] Empty block %p is initialized, owner is %ld, objectSize is %d, bumpPtr is %p\n", 1590 this, tlsPtr.load(std::memory_order_relaxed) ? getThreadId() : -1, objectSize, bumpPtr )); 1591 } 1592 1593 Block *OrphanedBlocks::get(TLSData *tls, unsigned int size) 1594 { 1595 // TODO: try to use index from getAllocationBin 1596 unsigned int index = getIndex(size); 1597 Block *block = bins[index].pop(); 1598 if (block) { 1599 MALLOC_ITT_SYNC_ACQUIRED(bins+index); 1600 block->privatizeOrphaned(tls, index); 1601 } 1602 return block; 1603 } 1604 1605 void OrphanedBlocks::put(intptr_t binTag, Block *block) 1606 { 1607 unsigned int index = getIndex(block->getSize()); 1608 block->shareOrphaned(binTag, index); 1609 MALLOC_ITT_SYNC_RELEASING(bins+index); 1610 bins[index].push(block); 1611 } 1612 1613 void OrphanedBlocks::reset() 1614 { 1615 for (uint32_t i=0; i<numBlockBinLimit; i++) 1616 new (bins+i) LifoList(); 1617 } 1618 1619 bool OrphanedBlocks::cleanup(Backend* backend) 1620 { 1621 bool released = false; 1622 for (uint32_t i=0; i<numBlockBinLimit; i++) { 1623 Block* block = bins[i].grab(); 1624 MALLOC_ITT_SYNC_ACQUIRED(bins+i); 1625 while (block) { 1626 Block* next = block->next; 1627 block->privatizePublicFreeList( /*reset=*/false ); // do not set publicFreeList to nullptr 1628 if (block->empty()) { 1629 block->reset(); 1630 // slab blocks in user's pools do not have valid backRefIdx 1631 if (!backend->inUserPool()) 1632 removeBackRef(*(block->getBackRefIdx())); 1633 backend->putSlabBlock(block); 1634 released = true; 1635 } else { 1636 MALLOC_ITT_SYNC_RELEASING(bins+i); 1637 bins[i].push(block); 1638 } 1639 block = next; 1640 } 1641 } 1642 return released; 1643 } 1644 1645 FreeBlockPool::ResOfGet FreeBlockPool::getBlock() 1646 { 1647 Block *b = head.exchange(nullptr); 1648 bool lastAccessMiss; 1649 1650 if (b) { 1651 size--; 1652 Block *newHead = b->next; 1653 lastAccessMiss = false; 1654 head.store(newHead, std::memory_order_release); 1655 } else { 1656 lastAccessMiss = true; 1657 } 1658 return ResOfGet(b, lastAccessMiss); 1659 } 1660 1661 void FreeBlockPool::returnBlock(Block *block) 1662 { 1663 MALLOC_ASSERT( size <= POOL_HIGH_MARK, ASSERT_TEXT ); 1664 Block *localHead = head.exchange(nullptr); 1665 1666 if (!localHead) { 1667 size = 0; // head was stolen by externalClean, correct size accordingly 1668 } else if (size == POOL_HIGH_MARK) { 1669 // release cold blocks and add hot one, 1670 // so keep POOL_LOW_MARK-1 blocks and add new block to head 1671 Block *headToFree = localHead, *helper; 1672 for (int i=0; i<POOL_LOW_MARK-2; i++) 1673 headToFree = headToFree->next; 1674 Block *last = headToFree; 1675 headToFree = headToFree->next; 1676 last->next = nullptr; 1677 size = POOL_LOW_MARK-1; 1678 for (Block *currBl = headToFree; currBl; currBl = helper) { 1679 helper = currBl->next; 1680 // slab blocks in user's pools do not have valid backRefIdx 1681 if (!backend->inUserPool()) 1682 removeBackRef(currBl->backRefIdx); 1683 backend->putSlabBlock(currBl); 1684 } 1685 } 1686 size++; 1687 block->next = localHead; 1688 head.store(block, std::memory_order_release); 1689 } 1690 1691 bool FreeBlockPool::externalCleanup() 1692 { 1693 Block *helper; 1694 bool released = false; 1695 1696 for (Block *currBl=head.exchange(nullptr); currBl; currBl=helper) { 1697 helper = currBl->next; 1698 // slab blocks in user's pools do not have valid backRefIdx 1699 if (!backend->inUserPool()) 1700 removeBackRef(currBl->backRefIdx); 1701 backend->putSlabBlock(currBl); 1702 released = true; 1703 } 1704 return released; 1705 } 1706 1707 /* Prepare the block for returning to FreeBlockPool */ 1708 void Block::reset() 1709 { 1710 // it is caller's responsibility to ensure no data is lost before calling this 1711 MALLOC_ASSERT( allocatedCount==0, ASSERT_TEXT ); 1712 MALLOC_ASSERT( !isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1713 if (!isStartupAllocObject()) 1714 STAT_increment(getThreadId(), getIndex(objectSize), freeBlockBack); 1715 1716 cleanBlockHeader(); 1717 1718 nextPrivatizable.store(nullptr, std::memory_order_relaxed); 1719 1720 objectSize = 0; 1721 // for an empty block, bump pointer should point right after the end of the block 1722 bumpPtr = (FreeObject *)((uintptr_t)this + slabSize); 1723 } 1724 1725 inline void Bin::setActiveBlock (Block *block) 1726 { 1727 // MALLOC_ASSERT( bin, ASSERT_TEXT ); 1728 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1729 // it is the caller responsibility to keep bin consistence (i.e. ensure this block is in the bin list) 1730 activeBlk = block; 1731 } 1732 1733 inline Block* Bin::setPreviousBlockActive() 1734 { 1735 MALLOC_ASSERT( activeBlk, ASSERT_TEXT ); 1736 Block* temp = activeBlk->previous; 1737 if( temp ) { 1738 MALLOC_ASSERT( !(temp->isFull), ASSERT_TEXT ); 1739 activeBlk = temp; 1740 } 1741 return temp; 1742 } 1743 1744 inline bool Block::isOwnedByCurrentThread() const { 1745 return tlsPtr.load(std::memory_order_relaxed) && ownerTid.isCurrentThreadId(); 1746 } 1747 1748 FreeObject *Block::findObjectToFree(const void *object) const 1749 { 1750 FreeObject *objectToFree; 1751 // Due to aligned allocations, a pointer passed to scalable_free 1752 // might differ from the address of internally allocated object. 1753 // Small objects however should always be fine. 1754 if (objectSize <= maxSegregatedObjectSize) 1755 objectToFree = (FreeObject*)object; 1756 // "Fitting size" allocations are suspicious if aligned higher than naturally 1757 else { 1758 if ( ! isAligned(object,2*fittingAlignment) ) 1759 // TODO: the above check is questionable - it gives false negatives in ~50% cases, 1760 // so might even be slower in average than unconditional use of findAllocatedObject. 1761 // here it should be a "real" object 1762 objectToFree = (FreeObject*)object; 1763 else 1764 // here object can be an aligned address, so applying additional checks 1765 objectToFree = findAllocatedObject(object); 1766 MALLOC_ASSERT( isAligned(objectToFree,fittingAlignment), ASSERT_TEXT ); 1767 } 1768 MALLOC_ASSERT( isProperlyPlaced(objectToFree), ASSERT_TEXT ); 1769 1770 return objectToFree; 1771 } 1772 1773 void TLSData::release() 1774 { 1775 memPool->extMemPool.allLocalCaches.unregisterThread(this); 1776 externalCleanup(/*cleanOnlyUnused=*/false, /*cleanBins=*/false); 1777 1778 for (unsigned index = 0; index < numBlockBins; index++) { 1779 Block *activeBlk = bin[index].getActiveBlock(); 1780 if (!activeBlk) 1781 continue; 1782 Block *threadlessBlock = activeBlk->previous; 1783 bool syncOnMailbox = false; 1784 while (threadlessBlock) { 1785 Block *threadBlock = threadlessBlock->previous; 1786 if (threadlessBlock->empty()) { 1787 /* we destroy the thread, so not use its block pool */ 1788 memPool->returnEmptyBlock(threadlessBlock, /*poolTheBlock=*/false); 1789 } else { 1790 memPool->extMemPool.orphanedBlocks.put(intptr_t(bin+index), threadlessBlock); 1791 syncOnMailbox = true; 1792 } 1793 threadlessBlock = threadBlock; 1794 } 1795 threadlessBlock = activeBlk; 1796 while (threadlessBlock) { 1797 Block *threadBlock = threadlessBlock->next; 1798 if (threadlessBlock->empty()) { 1799 /* we destroy the thread, so not use its block pool */ 1800 memPool->returnEmptyBlock(threadlessBlock, /*poolTheBlock=*/false); 1801 } else { 1802 memPool->extMemPool.orphanedBlocks.put(intptr_t(bin+index), threadlessBlock); 1803 syncOnMailbox = true; 1804 } 1805 threadlessBlock = threadBlock; 1806 } 1807 bin[index].resetActiveBlock(); 1808 1809 if (syncOnMailbox) { 1810 // Although, we synchronized on nextPrivatizable inside a block, we still need to 1811 // synchronize on the bin lifetime because the thread releasing an object into the public 1812 // free list is touching the bin (mailbox and mailLock) 1813 MallocMutex::scoped_lock scoped_cs(bin[index].mailLock); 1814 } 1815 } 1816 } 1817 1818 1819 #if MALLOC_CHECK_RECURSION 1820 // TODO: Use dedicated heap for this 1821 1822 /* 1823 * It's a special kind of allocation that can be used when malloc is 1824 * not available (either during startup or when malloc was already called and 1825 * we are, say, inside pthread_setspecific's call). 1826 * Block can contain objects of different sizes, 1827 * allocations are performed by moving bump pointer and increasing of object counter, 1828 * releasing is done via counter of objects allocated in the block 1829 * or moving bump pointer if releasing object is on a bound. 1830 * TODO: make bump pointer to grow to the same backward direction as all the others. 1831 */ 1832 1833 class StartupBlock : public Block { 1834 size_t availableSize() const { 1835 return slabSize - ((uintptr_t)bumpPtr - (uintptr_t)this); 1836 } 1837 static StartupBlock *getBlock(); 1838 public: 1839 static FreeObject *allocate(size_t size); 1840 static size_t msize(void *ptr) { return *((size_t*)ptr - 1); } 1841 void free(void *ptr); 1842 }; 1843 1844 static MallocMutex startupMallocLock; 1845 static StartupBlock *firstStartupBlock; 1846 1847 StartupBlock *StartupBlock::getBlock() 1848 { 1849 BackRefIdx backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/false); 1850 if (backRefIdx.isInvalid()) return nullptr; 1851 1852 StartupBlock *block = static_cast<StartupBlock*>( 1853 defaultMemPool->extMemPool.backend.getSlabBlock(1)); 1854 if (!block) return nullptr; 1855 1856 block->cleanBlockHeader(); 1857 setBackRef(backRefIdx, block); 1858 block->backRefIdx = backRefIdx; 1859 // use startupAllocObjSizeMark to mark objects from startup block marker 1860 block->objectSize = startupAllocObjSizeMark; 1861 block->bumpPtr = (FreeObject *)((uintptr_t)block + sizeof(StartupBlock)); 1862 return block; 1863 } 1864 1865 FreeObject *StartupBlock::allocate(size_t size) 1866 { 1867 FreeObject *result; 1868 StartupBlock *newBlock = nullptr; 1869 1870 /* Objects must be aligned on their natural bounds, 1871 and objects bigger than word on word's bound. */ 1872 size = alignUp(size, sizeof(size_t)); 1873 // We need size of an object to implement msize. 1874 size_t reqSize = size + sizeof(size_t); 1875 { 1876 MallocMutex::scoped_lock scoped_cs(startupMallocLock); 1877 // Re-check whether we need a new block (conditions might have changed) 1878 if (!firstStartupBlock || firstStartupBlock->availableSize() < reqSize) { 1879 if (!newBlock) { 1880 newBlock = StartupBlock::getBlock(); 1881 if (!newBlock) return nullptr; 1882 } 1883 newBlock->next = (Block*)firstStartupBlock; 1884 if (firstStartupBlock) 1885 firstStartupBlock->previous = (Block*)newBlock; 1886 firstStartupBlock = newBlock; 1887 } 1888 result = firstStartupBlock->bumpPtr; 1889 firstStartupBlock->allocatedCount++; 1890 firstStartupBlock->bumpPtr = 1891 (FreeObject *)((uintptr_t)firstStartupBlock->bumpPtr + reqSize); 1892 } 1893 1894 // keep object size at the negative offset 1895 *((size_t*)result) = size; 1896 return (FreeObject*)((size_t*)result+1); 1897 } 1898 1899 void StartupBlock::free(void *ptr) 1900 { 1901 Block* blockToRelease = nullptr; 1902 { 1903 MallocMutex::scoped_lock scoped_cs(startupMallocLock); 1904 1905 MALLOC_ASSERT(firstStartupBlock, ASSERT_TEXT); 1906 MALLOC_ASSERT(startupAllocObjSizeMark==objectSize 1907 && allocatedCount>0, ASSERT_TEXT); 1908 MALLOC_ASSERT((uintptr_t)ptr>=(uintptr_t)this+sizeof(StartupBlock) 1909 && (uintptr_t)ptr+StartupBlock::msize(ptr)<=(uintptr_t)this+slabSize, 1910 ASSERT_TEXT); 1911 if (0 == --allocatedCount) { 1912 if (this == firstStartupBlock) 1913 firstStartupBlock = (StartupBlock*)firstStartupBlock->next; 1914 if (previous) 1915 previous->next = next; 1916 if (next) 1917 next->previous = previous; 1918 blockToRelease = this; 1919 } else if ((uintptr_t)ptr + StartupBlock::msize(ptr) == (uintptr_t)bumpPtr) { 1920 // last object in the block released 1921 FreeObject *newBump = (FreeObject*)((size_t*)ptr - 1); 1922 MALLOC_ASSERT((uintptr_t)newBump>(uintptr_t)this+sizeof(StartupBlock), 1923 ASSERT_TEXT); 1924 bumpPtr = newBump; 1925 } 1926 } 1927 if (blockToRelease) { 1928 blockToRelease->previous = blockToRelease->next = nullptr; 1929 defaultMemPool->returnEmptyBlock(blockToRelease, /*poolTheBlock=*/false); 1930 } 1931 } 1932 1933 #endif /* MALLOC_CHECK_RECURSION */ 1934 1935 /********* End thread related code *************/ 1936 1937 /********* Library initialization *************/ 1938 1939 //! Value indicating the state of initialization. 1940 /* 0 = initialization not started. 1941 * 1 = initialization started but not finished. 1942 * 2 = initialization finished. 1943 * In theory, we only need values 0 and 2. But value 1 is nonetheless 1944 * useful for detecting errors in the double-check pattern. 1945 */ 1946 static std::atomic<intptr_t> mallocInitialized{0}; // implicitly initialized to 0 1947 static MallocMutex initMutex; 1948 1949 /** The leading "\0" is here so that applying "strings" to the binary 1950 delivers a clean result. */ 1951 static char VersionString[] = "\0" TBBMALLOC_VERSION_STRINGS; 1952 1953 #if USE_PTHREAD && __TBB_SOURCE_DIRECTLY_INCLUDED 1954 1955 /* Decrease race interval between dynamic library unloading and pthread key 1956 destructor. Protect only Pthreads with supported unloading. */ 1957 class ShutdownSync { 1958 /* flag is the number of threads in pthread key dtor body 1959 (i.e., between threadDtorStart() and threadDtorDone()) 1960 or the signal to skip dtor, if flag < 0 */ 1961 std::atomic<intptr_t> flag; 1962 static const intptr_t skipDtor = INTPTR_MIN/2; 1963 public: 1964 void init() { flag.store(0, std::memory_order_release); } 1965 /* Suppose that 2*abs(skipDtor) or more threads never call threadDtorStart() 1966 simultaneously, so flag never becomes negative because of that. */ 1967 bool threadDtorStart() { 1968 if (flag.load(std::memory_order_acquire) < 0) 1969 return false; 1970 if (++flag <= 0) { // note that new value returned 1971 flag.fetch_sub(1); // flag is spoiled by us, restore it 1972 return false; 1973 } 1974 return true; 1975 } 1976 void threadDtorDone() { 1977 flag.fetch_sub(1); 1978 } 1979 void processExit() { 1980 if (flag.fetch_add(skipDtor) != 0) { 1981 SpinWaitUntilEq(flag, skipDtor); 1982 } 1983 } 1984 }; 1985 1986 #else 1987 1988 class ShutdownSync { 1989 public: 1990 void init() { } 1991 bool threadDtorStart() { return true; } 1992 void threadDtorDone() { } 1993 void processExit() { } 1994 }; 1995 1996 #endif // USE_PTHREAD && __TBB_SOURCE_DIRECTLY_INCLUDED 1997 1998 static ShutdownSync shutdownSync; 1999 2000 inline bool isMallocInitialized() { 2001 // Load must have acquire fence; otherwise thread taking "initialized" path 2002 // might perform textually later loads *before* mallocInitialized becomes 2. 2003 return 2 == mallocInitialized.load(std::memory_order_acquire); 2004 } 2005 2006 /* Caller is responsible for ensuring this routine is called exactly once. */ 2007 extern "C" void MallocInitializeITT() { 2008 #if __TBB_USE_ITT_NOTIFY 2009 if (!usedBySrcIncluded) 2010 tbb::detail::r1::__TBB_load_ittnotify(); 2011 #endif 2012 } 2013 2014 void MemoryPool::initDefaultPool() { 2015 hugePages.init(); 2016 } 2017 2018 /* 2019 * Allocator initialization routine; 2020 * it is called lazily on the very first scalable_malloc call. 2021 */ 2022 static bool initMemoryManager() 2023 { 2024 TRACEF(( "[ScalableMalloc trace] sizeof(Block) is %d (expected 128); sizeof(uintptr_t) is %d\n", 2025 sizeof(Block), sizeof(uintptr_t) )); 2026 MALLOC_ASSERT( 2*blockHeaderAlignment == sizeof(Block), ASSERT_TEXT ); 2027 MALLOC_ASSERT( sizeof(FreeObject) == sizeof(void*), ASSERT_TEXT ); 2028 MALLOC_ASSERT( isAligned(defaultMemPool, sizeof(intptr_t)), 2029 "Memory pool must be void*-aligned for atomic to work over aligned arguments."); 2030 2031 #if USE_WINTHREAD 2032 const size_t granularity = 64*1024; // granulatity of VirtualAlloc 2033 #else 2034 // POSIX.1-2001-compliant way to get page size 2035 const size_t granularity = sysconf(_SC_PAGESIZE); 2036 #endif 2037 if (!defaultMemPool) { 2038 // Do not rely on static constructors and do the assignment in case 2039 // of library static section not initialized at this call yet. 2040 defaultMemPool = (MemoryPool*)defaultMemPool_space; 2041 } 2042 bool initOk = defaultMemPool-> 2043 extMemPool.init(0, nullptr, nullptr, granularity, 2044 /*keepAllMemory=*/false, /*fixedPool=*/false); 2045 // TODO: extMemPool.init() to not allocate memory 2046 if (!initOk || !initBackRefMain(&defaultMemPool->extMemPool.backend) || !ThreadId::init()) 2047 return false; 2048 MemoryPool::initDefaultPool(); 2049 // init() is required iff initMemoryManager() is called 2050 // after mallocProcessShutdownNotification() 2051 shutdownSync.init(); 2052 #if COLLECT_STATISTICS 2053 initStatisticsCollection(); 2054 #endif 2055 return true; 2056 } 2057 2058 static bool GetBoolEnvironmentVariable(const char* name) { 2059 return tbb::detail::r1::GetBoolEnvironmentVariable(name); 2060 } 2061 2062 //! Ensures that initMemoryManager() is called once and only once. 2063 /** Does not return until initMemoryManager() has been completed by a thread. 2064 There is no need to call this routine if mallocInitialized==2 . */ 2065 static bool doInitialization() 2066 { 2067 MallocMutex::scoped_lock lock( initMutex ); 2068 if (mallocInitialized.load(std::memory_order_relaxed)!=2) { 2069 MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==0, ASSERT_TEXT ); 2070 mallocInitialized.store(1, std::memory_order_relaxed); 2071 RecursiveMallocCallProtector scoped; 2072 if (!initMemoryManager()) { 2073 mallocInitialized.store(0, std::memory_order_relaxed); // restore and out 2074 return false; 2075 } 2076 #ifdef MALLOC_EXTRA_INITIALIZATION 2077 MALLOC_EXTRA_INITIALIZATION; 2078 #endif 2079 #if MALLOC_CHECK_RECURSION 2080 RecursiveMallocCallProtector::detectNaiveOverload(); 2081 #endif 2082 MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==1, ASSERT_TEXT ); 2083 // Store must have release fence, otherwise mallocInitialized==2 2084 // might become remotely visible before side effects of 2085 // initMemoryManager() become remotely visible. 2086 mallocInitialized.store(2, std::memory_order_release); 2087 if( GetBoolEnvironmentVariable("TBB_VERSION") ) { 2088 fputs(VersionString+1,stderr); 2089 hugePages.printStatus(); 2090 } 2091 } 2092 /* It can't be 0 or I would have initialized it */ 2093 MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==2, ASSERT_TEXT ); 2094 return true; 2095 } 2096 2097 /********* End library initialization *************/ 2098 2099 /********* The malloc show begins *************/ 2100 2101 2102 FreeObject *Block::allocateFromFreeList() 2103 { 2104 FreeObject *result; 2105 2106 if (!freeList) return nullptr; 2107 2108 result = freeList; 2109 MALLOC_ASSERT( result, ASSERT_TEXT ); 2110 2111 freeList = result->next; 2112 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 2113 allocatedCount++; 2114 STAT_increment(getThreadId(), getIndex(objectSize), allocFreeListUsed); 2115 2116 return result; 2117 } 2118 2119 FreeObject *Block::allocateFromBumpPtr() 2120 { 2121 FreeObject *result = bumpPtr; 2122 if (result) { 2123 bumpPtr = (FreeObject *) ((uintptr_t) bumpPtr - objectSize); 2124 if ( (uintptr_t)bumpPtr < (uintptr_t)this+sizeof(Block) ) { 2125 bumpPtr = nullptr; 2126 } 2127 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 2128 allocatedCount++; 2129 STAT_increment(getThreadId(), getIndex(objectSize), allocBumpPtrUsed); 2130 } 2131 return result; 2132 } 2133 2134 inline FreeObject* Block::allocate() 2135 { 2136 MALLOC_ASSERT( isOwnedByCurrentThread(), ASSERT_TEXT ); 2137 2138 /* for better cache locality, first looking in the free list. */ 2139 if ( FreeObject *result = allocateFromFreeList() ) { 2140 return result; 2141 } 2142 MALLOC_ASSERT( !freeList, ASSERT_TEXT ); 2143 2144 /* if free list is empty, try thread local bump pointer allocation. */ 2145 if ( FreeObject *result = allocateFromBumpPtr() ) { 2146 return result; 2147 } 2148 MALLOC_ASSERT( !bumpPtr, ASSERT_TEXT ); 2149 2150 /* the block is considered full. */ 2151 isFull = true; 2152 return nullptr; 2153 } 2154 2155 size_t Block::findObjectSize(void *object) const 2156 { 2157 size_t blSize = getSize(); 2158 #if MALLOC_CHECK_RECURSION 2159 // Currently, there is no aligned allocations from startup blocks, 2160 // so we can return just StartupBlock::msize(). 2161 // TODO: This must be extended if we add aligned allocation from startup blocks. 2162 if (!blSize) 2163 return StartupBlock::msize(object); 2164 #endif 2165 // object can be aligned, so real size can be less than block's 2166 size_t size = 2167 blSize - ((uintptr_t)object - (uintptr_t)findObjectToFree(object)); 2168 MALLOC_ASSERT(size>0 && size<minLargeObjectSize, ASSERT_TEXT); 2169 return size; 2170 } 2171 2172 void Bin::moveBlockToFront(Block *block) 2173 { 2174 /* move the block to the front of the bin */ 2175 if (block == activeBlk) return; 2176 outofTLSBin(block); 2177 pushTLSBin(block); 2178 } 2179 2180 void Bin::processEmptyBlock(Block *block, bool poolTheBlock) 2181 { 2182 if (block != activeBlk) { 2183 /* We are not using this block; return it to the pool */ 2184 outofTLSBin(block); 2185 block->getMemPool()->returnEmptyBlock(block, poolTheBlock); 2186 } else { 2187 /* all objects are free - let's restore the bump pointer */ 2188 block->restoreBumpPtr(); 2189 } 2190 } 2191 2192 template<int LOW_MARK, int HIGH_MARK> 2193 bool LocalLOCImpl<LOW_MARK, HIGH_MARK>::put(LargeMemoryBlock *object, ExtMemoryPool *extMemPool) 2194 { 2195 const size_t size = object->unalignedSize; 2196 // not spoil cache with too large object, that can cause its total cleanup 2197 if (size > MAX_TOTAL_SIZE) 2198 return false; 2199 LargeMemoryBlock *localHead = head.exchange(nullptr); 2200 2201 object->prev = nullptr; 2202 object->next = localHead; 2203 if (localHead) 2204 localHead->prev = object; 2205 else { 2206 // those might not be cleaned during local cache stealing, correct them 2207 totalSize = 0; 2208 numOfBlocks = 0; 2209 tail = object; 2210 } 2211 localHead = object; 2212 totalSize += size; 2213 numOfBlocks++; 2214 // must meet both size and number of cached objects constrains 2215 if (totalSize > MAX_TOTAL_SIZE || numOfBlocks >= HIGH_MARK) { 2216 // scanning from tail until meet conditions 2217 while (totalSize > MAX_TOTAL_SIZE || numOfBlocks > LOW_MARK) { 2218 totalSize -= tail->unalignedSize; 2219 numOfBlocks--; 2220 tail = tail->prev; 2221 } 2222 LargeMemoryBlock *headToRelease = tail->next; 2223 tail->next = nullptr; 2224 2225 extMemPool->freeLargeObjectList(headToRelease); 2226 } 2227 2228 head.store(localHead, std::memory_order_release); 2229 return true; 2230 } 2231 2232 template<int LOW_MARK, int HIGH_MARK> 2233 LargeMemoryBlock *LocalLOCImpl<LOW_MARK, HIGH_MARK>::get(size_t size) 2234 { 2235 LargeMemoryBlock *localHead, *res = nullptr; 2236 2237 if (size > MAX_TOTAL_SIZE) 2238 return nullptr; 2239 2240 // TBB_REVAMP_TODO: review this line 2241 if (!head.load(std::memory_order_acquire) || (localHead = head.exchange(nullptr)) == nullptr) { 2242 // do not restore totalSize, numOfBlocks and tail at this point, 2243 // as they are used only in put(), where they must be restored 2244 return nullptr; 2245 } 2246 2247 for (LargeMemoryBlock *curr = localHead; curr; curr=curr->next) { 2248 if (curr->unalignedSize == size) { 2249 res = curr; 2250 if (curr->next) 2251 curr->next->prev = curr->prev; 2252 else 2253 tail = curr->prev; 2254 if (curr != localHead) 2255 curr->prev->next = curr->next; 2256 else 2257 localHead = curr->next; 2258 totalSize -= size; 2259 numOfBlocks--; 2260 break; 2261 } 2262 } 2263 2264 head.store(localHead, std::memory_order_release); 2265 return res; 2266 } 2267 2268 template<int LOW_MARK, int HIGH_MARK> 2269 bool LocalLOCImpl<LOW_MARK, HIGH_MARK>::externalCleanup(ExtMemoryPool *extMemPool) 2270 { 2271 if (LargeMemoryBlock *localHead = head.exchange(nullptr)) { 2272 extMemPool->freeLargeObjectList(localHead); 2273 return true; 2274 } 2275 return false; 2276 } 2277 2278 void *MemoryPool::getFromLLOCache(TLSData* tls, size_t size, size_t alignment) 2279 { 2280 LargeMemoryBlock *lmb = nullptr; 2281 2282 size_t headersSize = sizeof(LargeMemoryBlock)+sizeof(LargeObjectHdr); 2283 size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+alignment); 2284 if (allocationSize < size) // allocationSize is wrapped around after alignToBin 2285 return nullptr; 2286 MALLOC_ASSERT(allocationSize >= alignment, "Overflow must be checked before."); 2287 2288 if (tls) { 2289 tls->markUsed(); 2290 lmb = tls->lloc.get(allocationSize); 2291 } 2292 if (!lmb) 2293 lmb = extMemPool.mallocLargeObject(this, allocationSize); 2294 2295 if (lmb) { 2296 // doing shuffle we suppose that alignment offset guarantees 2297 // that different cache lines are in use 2298 MALLOC_ASSERT(alignment >= estimatedCacheLineSize, ASSERT_TEXT); 2299 2300 void *alignedArea = (void*)alignUp((uintptr_t)lmb+headersSize, alignment); 2301 uintptr_t alignedRight = 2302 alignDown((uintptr_t)lmb+lmb->unalignedSize - size, alignment); 2303 // Has some room to shuffle object between cache lines? 2304 // Note that alignedRight and alignedArea are aligned at alignment. 2305 unsigned ptrDelta = alignedRight - (uintptr_t)alignedArea; 2306 if (ptrDelta && tls) { // !tls is cold path 2307 // for the hot path of alignment==estimatedCacheLineSize, 2308 // allow compilers to use shift for division 2309 // (since estimatedCacheLineSize is a power-of-2 constant) 2310 unsigned numOfPossibleOffsets = alignment == estimatedCacheLineSize? 2311 ptrDelta / estimatedCacheLineSize : 2312 ptrDelta / alignment; 2313 unsigned myCacheIdx = ++tls->currCacheIdx; 2314 unsigned offset = myCacheIdx % numOfPossibleOffsets; 2315 2316 // Move object to a cache line with an offset that is different from 2317 // previous allocation. This supposedly allows us to use cache 2318 // associativity more efficiently. 2319 alignedArea = (void*)((uintptr_t)alignedArea + offset*alignment); 2320 } 2321 MALLOC_ASSERT((uintptr_t)lmb+lmb->unalignedSize >= 2322 (uintptr_t)alignedArea+size, "Object doesn't fit the block."); 2323 LargeObjectHdr *header = (LargeObjectHdr*)alignedArea-1; 2324 header->memoryBlock = lmb; 2325 header->backRefIdx = lmb->backRefIdx; 2326 setBackRef(header->backRefIdx, header); 2327 2328 lmb->objectSize = size; 2329 2330 MALLOC_ASSERT( isLargeObject<unknownMem>(alignedArea), ASSERT_TEXT ); 2331 MALLOC_ASSERT( isAligned(alignedArea, alignment), ASSERT_TEXT ); 2332 2333 return alignedArea; 2334 } 2335 return nullptr; 2336 } 2337 2338 void MemoryPool::putToLLOCache(TLSData *tls, void *object) 2339 { 2340 LargeObjectHdr *header = (LargeObjectHdr*)object - 1; 2341 // overwrite backRefIdx to simplify double free detection 2342 header->backRefIdx = BackRefIdx(); 2343 2344 if (tls) { 2345 tls->markUsed(); 2346 if (tls->lloc.put(header->memoryBlock, &extMemPool)) 2347 return; 2348 } 2349 extMemPool.freeLargeObject(header->memoryBlock); 2350 } 2351 2352 /* 2353 * All aligned allocations fall into one of the following categories: 2354 * 1. if both request size and alignment are <= maxSegregatedObjectSize, 2355 * we just align the size up, and request this amount, because for every size 2356 * aligned to some power of 2, the allocated object is at least that aligned. 2357 * 2. for size<minLargeObjectSize, check if already guaranteed fittingAlignment is enough. 2358 * 3. if size+alignment<minLargeObjectSize, we take an object of fittingSizeN and align 2359 * its address up; given such pointer, scalable_free could find the real object. 2360 * Wrapping of size+alignment is impossible because maximal allowed 2361 * alignment plus minLargeObjectSize can't lead to wrapping. 2362 * 4. otherwise, aligned large object is allocated. 2363 */ 2364 static void *allocateAligned(MemoryPool *memPool, size_t size, size_t alignment) 2365 { 2366 MALLOC_ASSERT( isPowerOfTwo(alignment), ASSERT_TEXT ); 2367 2368 if (!isMallocInitialized()) 2369 if (!doInitialization()) 2370 return nullptr; 2371 2372 void *result; 2373 if (size<=maxSegregatedObjectSize && alignment<=maxSegregatedObjectSize) 2374 result = internalPoolMalloc(memPool, alignUp(size? size: sizeof(size_t), alignment)); 2375 else if (size<minLargeObjectSize) { 2376 if (alignment<=fittingAlignment) 2377 result = internalPoolMalloc(memPool, size); 2378 else if (size+alignment < minLargeObjectSize) { 2379 void *unaligned = internalPoolMalloc(memPool, size+alignment); 2380 if (!unaligned) return nullptr; 2381 result = alignUp(unaligned, alignment); 2382 } else 2383 goto LargeObjAlloc; 2384 } else { 2385 LargeObjAlloc: 2386 TLSData *tls = memPool->getTLS(/*create=*/true); 2387 // take into account only alignment that are higher then natural 2388 result = 2389 memPool->getFromLLOCache(tls, size, largeObjectAlignment>alignment? 2390 largeObjectAlignment: alignment); 2391 } 2392 2393 MALLOC_ASSERT( isAligned(result, alignment), ASSERT_TEXT ); 2394 return result; 2395 } 2396 2397 static void *reallocAligned(MemoryPool *memPool, void *ptr, 2398 size_t newSize, size_t alignment = 0) 2399 { 2400 void *result; 2401 size_t copySize; 2402 2403 if (isLargeObject<ourMem>(ptr)) { 2404 LargeMemoryBlock* lmb = ((LargeObjectHdr *)ptr - 1)->memoryBlock; 2405 copySize = lmb->unalignedSize-((uintptr_t)ptr-(uintptr_t)lmb); 2406 2407 // Apply different strategies if size decreases 2408 if (newSize <= copySize && (0 == alignment || isAligned(ptr, alignment))) { 2409 2410 // For huge objects (that do not fit in backend cache), keep the same space unless 2411 // the new size is at least twice smaller 2412 bool isMemoryBlockHuge = copySize > memPool->extMemPool.backend.getMaxBinnedSize(); 2413 size_t threshold = isMemoryBlockHuge ? copySize / 2 : 0; 2414 if (newSize > threshold) { 2415 lmb->objectSize = newSize; 2416 return ptr; 2417 } 2418 // TODO: For large objects suitable for the backend cache, 2419 // split out the excessive part and put it to the backend. 2420 } 2421 // Reallocate for real 2422 copySize = lmb->objectSize; 2423 #if BACKEND_HAS_MREMAP 2424 if (void *r = memPool->extMemPool.remap(ptr, copySize, newSize, 2425 alignment < largeObjectAlignment ? largeObjectAlignment : alignment)) 2426 return r; 2427 #endif 2428 result = alignment ? allocateAligned(memPool, newSize, alignment) : 2429 internalPoolMalloc(memPool, newSize); 2430 2431 } else { 2432 Block* block = (Block *)alignDown(ptr, slabSize); 2433 copySize = block->findObjectSize(ptr); 2434 2435 // TODO: Move object to another bin if size decreases and the current bin is "empty enough". 2436 // Currently, in case of size decreasing, old pointer is returned 2437 if (newSize <= copySize && (0==alignment || isAligned(ptr, alignment))) { 2438 return ptr; 2439 } else { 2440 result = alignment ? allocateAligned(memPool, newSize, alignment) : 2441 internalPoolMalloc(memPool, newSize); 2442 } 2443 } 2444 if (result) { 2445 memcpy(result, ptr, copySize < newSize ? copySize : newSize); 2446 internalPoolFree(memPool, ptr, 0); 2447 } 2448 return result; 2449 } 2450 2451 #if MALLOC_DEBUG 2452 /* A predicate checks if an object is properly placed inside its block */ 2453 inline bool Block::isProperlyPlaced(const void *object) const 2454 { 2455 return 0 == ((uintptr_t)this + slabSize - (uintptr_t)object) % objectSize; 2456 } 2457 #endif 2458 2459 /* Finds the real object inside the block */ 2460 FreeObject *Block::findAllocatedObject(const void *address) const 2461 { 2462 // calculate offset from the end of the block space 2463 uint16_t offset = (uintptr_t)this + slabSize - (uintptr_t)address; 2464 MALLOC_ASSERT( offset<=slabSize-sizeof(Block), ASSERT_TEXT ); 2465 // find offset difference from a multiple of allocation size 2466 offset %= objectSize; 2467 // and move the address down to where the real object starts. 2468 return (FreeObject*)((uintptr_t)address - (offset? objectSize-offset: 0)); 2469 } 2470 2471 /* 2472 * Bad dereference caused by a foreign pointer is possible only here, not earlier in call chain. 2473 * Separate function isolates SEH code, as it has bad influence on compiler optimization. 2474 */ 2475 static inline BackRefIdx safer_dereference (const BackRefIdx *ptr) 2476 { 2477 BackRefIdx id; 2478 #if _MSC_VER 2479 __try { 2480 #endif 2481 id = dereference(ptr); 2482 #if _MSC_VER 2483 } __except( GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION? 2484 EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH ) { 2485 id = BackRefIdx(); 2486 } 2487 #endif 2488 return id; 2489 } 2490 2491 template<MemoryOrigin memOrigin> 2492 bool isLargeObject(void *object) 2493 { 2494 if (!isAligned(object, largeObjectAlignment)) 2495 return false; 2496 LargeObjectHdr *header = (LargeObjectHdr*)object - 1; 2497 BackRefIdx idx = (memOrigin == unknownMem) ? 2498 safer_dereference(&header->backRefIdx) : dereference(&header->backRefIdx); 2499 2500 return idx.isLargeObject() 2501 // in valid LargeObjectHdr memoryBlock is not nullptr 2502 && header->memoryBlock 2503 // in valid LargeObjectHdr memoryBlock points somewhere before header 2504 // TODO: more strict check 2505 && (uintptr_t)header->memoryBlock < (uintptr_t)header 2506 && getBackRef(idx) == header; 2507 } 2508 2509 static inline bool isSmallObject (void *ptr) 2510 { 2511 Block* expectedBlock = (Block*)alignDown(ptr, slabSize); 2512 const BackRefIdx* idx = expectedBlock->getBackRefIdx(); 2513 2514 bool isSmall = expectedBlock == getBackRef(safer_dereference(idx)); 2515 if (isSmall) 2516 expectedBlock->checkFreePrecond(ptr); 2517 return isSmall; 2518 } 2519 2520 /**** Check if an object was allocated by scalable_malloc ****/ 2521 static inline bool isRecognized (void* ptr) 2522 { 2523 return defaultMemPool->extMemPool.backend.ptrCanBeValid(ptr) && 2524 (isLargeObject<unknownMem>(ptr) || isSmallObject(ptr)); 2525 } 2526 2527 static inline void freeSmallObject(void *object) 2528 { 2529 /* mask low bits to get the block */ 2530 Block *block = (Block *)alignDown(object, slabSize); 2531 block->checkFreePrecond(object); 2532 2533 #if MALLOC_CHECK_RECURSION 2534 if (block->isStartupAllocObject()) { 2535 ((StartupBlock *)block)->free(object); 2536 return; 2537 } 2538 #endif 2539 if (block->isOwnedByCurrentThread()) { 2540 block->freeOwnObject(object); 2541 } else { /* Slower path to add to the shared list, the allocatedCount is updated by the owner thread in malloc. */ 2542 FreeObject *objectToFree = block->findObjectToFree(object); 2543 block->freePublicObject(objectToFree); 2544 } 2545 } 2546 2547 static void *internalPoolMalloc(MemoryPool* memPool, size_t size) 2548 { 2549 Bin* bin; 2550 Block * mallocBlock; 2551 2552 if (!memPool) return nullptr; 2553 2554 if (!size) size = sizeof(size_t); 2555 2556 TLSData *tls = memPool->getTLS(/*create=*/true); 2557 2558 /* Allocate a large object */ 2559 if (size >= minLargeObjectSize) 2560 return memPool->getFromLLOCache(tls, size, largeObjectAlignment); 2561 2562 if (!tls) return nullptr; 2563 2564 tls->markUsed(); 2565 /* 2566 * Get an element in thread-local array corresponding to the given size; 2567 * It keeps ptr to the active block for allocations of this size 2568 */ 2569 bin = tls->getAllocationBin(size); 2570 if ( !bin ) return nullptr; 2571 2572 /* Get a block to try to allocate in. */ 2573 for( mallocBlock = bin->getActiveBlock(); mallocBlock; 2574 mallocBlock = bin->setPreviousBlockActive() ) // the previous block should be empty enough 2575 { 2576 if( FreeObject *result = mallocBlock->allocate() ) 2577 return result; 2578 } 2579 2580 /* 2581 * else privatize publicly freed objects in some block and allocate from it 2582 */ 2583 mallocBlock = bin->getPrivatizedFreeListBlock(); 2584 if (mallocBlock) { 2585 MALLOC_ASSERT( mallocBlock->freeListNonNull(), ASSERT_TEXT ); 2586 if ( FreeObject *result = mallocBlock->allocateFromFreeList() ) 2587 return result; 2588 /* Else something strange happened, need to retry from the beginning; */ 2589 TRACEF(( "[ScalableMalloc trace] Something is wrong: no objects in public free list; reentering.\n" )); 2590 return internalPoolMalloc(memPool, size); 2591 } 2592 2593 /* 2594 * no suitable own blocks, try to get a partial block that some other thread has discarded. 2595 */ 2596 mallocBlock = memPool->extMemPool.orphanedBlocks.get(tls, size); 2597 while (mallocBlock) { 2598 bin->pushTLSBin(mallocBlock); 2599 bin->setActiveBlock(mallocBlock); // TODO: move under the below condition? 2600 if( FreeObject *result = mallocBlock->allocate() ) 2601 return result; 2602 mallocBlock = memPool->extMemPool.orphanedBlocks.get(tls, size); 2603 } 2604 2605 /* 2606 * else try to get a new empty block 2607 */ 2608 mallocBlock = memPool->getEmptyBlock(size); 2609 if (mallocBlock) { 2610 bin->pushTLSBin(mallocBlock); 2611 bin->setActiveBlock(mallocBlock); 2612 if( FreeObject *result = mallocBlock->allocate() ) 2613 return result; 2614 /* Else something strange happened, need to retry from the beginning; */ 2615 TRACEF(( "[ScalableMalloc trace] Something is wrong: no objects in empty block; reentering.\n" )); 2616 return internalPoolMalloc(memPool, size); 2617 } 2618 /* 2619 * else nothing works so return nullptr 2620 */ 2621 TRACEF(( "[ScalableMalloc trace] No memory found, returning nullptr.\n" )); 2622 return nullptr; 2623 } 2624 2625 // When size==0 (i.e. unknown), detect here whether the object is large. 2626 // For size is known and < minLargeObjectSize, we still need to check 2627 // if the actual object is large, because large objects might be used 2628 // for aligned small allocations. 2629 static bool internalPoolFree(MemoryPool *memPool, void *object, size_t size) 2630 { 2631 if (!memPool || !object) return false; 2632 2633 // The library is initialized at allocation call, so releasing while 2634 // not initialized means foreign object is releasing. 2635 MALLOC_ASSERT(isMallocInitialized(), ASSERT_TEXT); 2636 MALLOC_ASSERT(memPool->extMemPool.userPool() || isRecognized(object), 2637 "Invalid pointer during object releasing is detected."); 2638 2639 if (size >= minLargeObjectSize || isLargeObject<ourMem>(object)) 2640 memPool->putToLLOCache(memPool->getTLS(/*create=*/false), object); 2641 else 2642 freeSmallObject(object); 2643 return true; 2644 } 2645 2646 static void *internalMalloc(size_t size) 2647 { 2648 if (!size) size = sizeof(size_t); 2649 2650 #if MALLOC_CHECK_RECURSION 2651 if (RecursiveMallocCallProtector::sameThreadActive()) 2652 return size<minLargeObjectSize? StartupBlock::allocate(size) : 2653 // nested allocation, so skip tls 2654 (FreeObject*)defaultMemPool->getFromLLOCache(nullptr, size, slabSize); 2655 #endif 2656 2657 if (!isMallocInitialized()) 2658 if (!doInitialization()) 2659 return nullptr; 2660 return internalPoolMalloc(defaultMemPool, size); 2661 } 2662 2663 static void internalFree(void *object) 2664 { 2665 internalPoolFree(defaultMemPool, object, 0); 2666 } 2667 2668 static size_t internalMsize(void* ptr) 2669 { 2670 MALLOC_ASSERT(ptr, "Invalid pointer passed to internalMsize"); 2671 if (isLargeObject<ourMem>(ptr)) { 2672 // TODO: return the maximum memory size, that can be written to this object 2673 LargeMemoryBlock* lmb = ((LargeObjectHdr*)ptr - 1)->memoryBlock; 2674 return lmb->objectSize; 2675 } else { 2676 Block *block = (Block*)alignDown(ptr, slabSize); 2677 return block->findObjectSize(ptr); 2678 } 2679 } 2680 2681 } // namespace internal 2682 2683 using namespace rml::internal; 2684 2685 // legacy entry point saved for compatibility with binaries complied 2686 // with pre-6003 versions of TBB 2687 TBBMALLOC_EXPORT rml::MemoryPool *pool_create(intptr_t pool_id, const MemPoolPolicy *policy) 2688 { 2689 rml::MemoryPool *pool; 2690 MemPoolPolicy pol(policy->pAlloc, policy->pFree, policy->granularity); 2691 2692 pool_create_v1(pool_id, &pol, &pool); 2693 return pool; 2694 } 2695 2696 rml::MemPoolError pool_create_v1(intptr_t pool_id, const MemPoolPolicy *policy, 2697 rml::MemoryPool **pool) 2698 { 2699 if ( !policy->pAlloc || policy->version<MemPoolPolicy::TBBMALLOC_POOL_VERSION 2700 // empty pFree allowed only for fixed pools 2701 || !(policy->fixedPool || policy->pFree)) { 2702 *pool = nullptr; 2703 return INVALID_POLICY; 2704 } 2705 if ( policy->version>MemPoolPolicy::TBBMALLOC_POOL_VERSION // future versions are not supported 2706 // new flags can be added in place of reserved, but default 2707 // behaviour must be supported by this version 2708 || policy->reserved ) { 2709 *pool = nullptr; 2710 return UNSUPPORTED_POLICY; 2711 } 2712 if (!isMallocInitialized()) 2713 if (!doInitialization()) { 2714 *pool = nullptr; 2715 return NO_MEMORY; 2716 } 2717 rml::internal::MemoryPool *memPool = 2718 (rml::internal::MemoryPool*)internalMalloc((sizeof(rml::internal::MemoryPool))); 2719 if (!memPool) { 2720 *pool = nullptr; 2721 return NO_MEMORY; 2722 } 2723 memset(static_cast<void*>(memPool), 0, sizeof(rml::internal::MemoryPool)); 2724 if (!memPool->init(pool_id, policy)) { 2725 internalFree(memPool); 2726 *pool = nullptr; 2727 return NO_MEMORY; 2728 } 2729 2730 *pool = (rml::MemoryPool*)memPool; 2731 return POOL_OK; 2732 } 2733 2734 bool pool_destroy(rml::MemoryPool* memPool) 2735 { 2736 if (!memPool) return false; 2737 bool ret = ((rml::internal::MemoryPool*)memPool)->destroy(); 2738 internalFree(memPool); 2739 2740 return ret; 2741 } 2742 2743 bool pool_reset(rml::MemoryPool* memPool) 2744 { 2745 if (!memPool) return false; 2746 2747 return ((rml::internal::MemoryPool*)memPool)->reset(); 2748 } 2749 2750 void *pool_malloc(rml::MemoryPool* mPool, size_t size) 2751 { 2752 return internalPoolMalloc((rml::internal::MemoryPool*)mPool, size); 2753 } 2754 2755 void *pool_realloc(rml::MemoryPool* mPool, void *object, size_t size) 2756 { 2757 if (!object) 2758 return internalPoolMalloc((rml::internal::MemoryPool*)mPool, size); 2759 if (!size) { 2760 internalPoolFree((rml::internal::MemoryPool*)mPool, object, 0); 2761 return nullptr; 2762 } 2763 return reallocAligned((rml::internal::MemoryPool*)mPool, object, size, 0); 2764 } 2765 2766 void *pool_aligned_malloc(rml::MemoryPool* mPool, size_t size, size_t alignment) 2767 { 2768 if (!isPowerOfTwo(alignment) || 0==size) 2769 return nullptr; 2770 2771 return allocateAligned((rml::internal::MemoryPool*)mPool, size, alignment); 2772 } 2773 2774 void *pool_aligned_realloc(rml::MemoryPool* memPool, void *ptr, size_t size, size_t alignment) 2775 { 2776 if (!isPowerOfTwo(alignment)) 2777 return nullptr; 2778 rml::internal::MemoryPool *mPool = (rml::internal::MemoryPool*)memPool; 2779 void *tmp; 2780 2781 if (!ptr) 2782 tmp = allocateAligned(mPool, size, alignment); 2783 else if (!size) { 2784 internalPoolFree(mPool, ptr, 0); 2785 return nullptr; 2786 } else 2787 tmp = reallocAligned(mPool, ptr, size, alignment); 2788 2789 return tmp; 2790 } 2791 2792 bool pool_free(rml::MemoryPool *mPool, void *object) 2793 { 2794 return internalPoolFree((rml::internal::MemoryPool*)mPool, object, 0); 2795 } 2796 2797 rml::MemoryPool *pool_identify(void *object) 2798 { 2799 rml::internal::MemoryPool *pool; 2800 if (isLargeObject<ourMem>(object)) { 2801 LargeObjectHdr *header = (LargeObjectHdr*)object - 1; 2802 pool = header->memoryBlock->pool; 2803 } else { 2804 Block *block = (Block*)alignDown(object, slabSize); 2805 pool = block->getMemPool(); 2806 } 2807 // do not return defaultMemPool, as it can't be used in pool_free() etc 2808 __TBB_ASSERT_RELEASE(pool!=defaultMemPool, 2809 "rml::pool_identify() can't be used for scalable_malloc() etc results."); 2810 return (rml::MemoryPool*)pool; 2811 } 2812 2813 size_t pool_msize(rml::MemoryPool *mPool, void* object) 2814 { 2815 if (object) { 2816 // No assert for object recognition, cause objects allocated from non-default 2817 // memory pool do not participate in range checking and do not have valid backreferences for 2818 // small objects. Instead, check that an object belong to the certain memory pool. 2819 MALLOC_ASSERT_EX(mPool == pool_identify(object), "Object does not belong to the specified pool"); 2820 return internalMsize(object); 2821 } 2822 errno = EINVAL; 2823 // Unlike _msize, return 0 in case of parameter error. 2824 // Returning size_t(-1) looks more like the way to troubles. 2825 return 0; 2826 } 2827 2828 } // namespace rml 2829 2830 using namespace rml::internal; 2831 2832 #if MALLOC_TRACE 2833 static unsigned int threadGoingDownCount = 0; 2834 #endif 2835 2836 /* 2837 * When a thread is shutting down this routine should be called to remove all the thread ids 2838 * from the malloc blocks and replace them with a nullptr thread id. 2839 * 2840 * For pthreads, the function is set as a callback in pthread_key_create for TLS bin. 2841 * It will be automatically called at thread exit with the key value as the argument, 2842 * unless that value is nullptr. 2843 * For Windows, it is called from DllMain( DLL_THREAD_DETACH ). 2844 * 2845 * However neither of the above is called for the main process thread, so the routine 2846 * also needs to be called during the process shutdown. 2847 * 2848 */ 2849 // TODO: Consider making this function part of class MemoryPool. 2850 void doThreadShutdownNotification(TLSData* tls, bool main_thread) 2851 { 2852 TRACEF(( "[ScalableMalloc trace] Thread id %d blocks return start %d\n", 2853 getThreadId(), threadGoingDownCount++ )); 2854 2855 #if USE_PTHREAD 2856 if (tls) { 2857 if (!shutdownSync.threadDtorStart()) return; 2858 tls->getMemPool()->onThreadShutdown(tls); 2859 shutdownSync.threadDtorDone(); 2860 } else 2861 #endif 2862 { 2863 suppress_unused_warning(tls); // not used on Windows 2864 // The default pool is safe to use at this point: 2865 // on Linux, only the main thread can go here before destroying defaultMemPool; 2866 // on Windows, shutdown is synchronized via loader lock and isMallocInitialized(). 2867 // See also __TBB_mallocProcessShutdownNotification() 2868 defaultMemPool->onThreadShutdown(defaultMemPool->getTLS(/*create=*/false)); 2869 // Take lock to walk through other pools; but waiting might be dangerous at this point 2870 // (e.g. on Windows the main thread might deadlock) 2871 bool locked = false; 2872 MallocMutex::scoped_lock lock(MemoryPool::memPoolListLock, /*wait=*/!main_thread, &locked); 2873 if (locked) { // the list is safe to process 2874 for (MemoryPool *memPool = defaultMemPool->next; memPool; memPool = memPool->next) 2875 memPool->onThreadShutdown(memPool->getTLS(/*create=*/false)); 2876 } 2877 } 2878 2879 TRACEF(( "[ScalableMalloc trace] Thread id %d blocks return end\n", getThreadId() )); 2880 } 2881 2882 #if USE_PTHREAD 2883 void mallocThreadShutdownNotification(void* arg) 2884 { 2885 // The routine is called for each pool (as TLS dtor) on each thread, except for the main thread 2886 if (!isMallocInitialized()) return; 2887 doThreadShutdownNotification((TLSData*)arg, false); 2888 } 2889 #else 2890 extern "C" void __TBB_mallocThreadShutdownNotification() 2891 { 2892 // The routine is called once per thread on Windows 2893 if (!isMallocInitialized()) return; 2894 doThreadShutdownNotification(nullptr, false); 2895 } 2896 #endif 2897 2898 extern "C" void __TBB_mallocProcessShutdownNotification(bool windows_process_dying) 2899 { 2900 if (!isMallocInitialized()) return; 2901 2902 // Don't clean allocator internals if the entire process is exiting 2903 if (!windows_process_dying) { 2904 doThreadShutdownNotification(nullptr, /*main_thread=*/true); 2905 } 2906 #if __TBB_MALLOC_LOCACHE_STAT 2907 printf("cache hit ratio %f, size hit %f\n", 2908 1.*cacheHits/mallocCalls, 1.*memHitKB/memAllocKB); 2909 defaultMemPool->extMemPool.loc.reportStat(stdout); 2910 #endif 2911 2912 shutdownSync.processExit(); 2913 #if __TBB_SOURCE_DIRECTLY_INCLUDED 2914 /* Pthread keys must be deleted as soon as possible to not call key dtor 2915 on thread termination when then the tbbmalloc code can be already unloaded. 2916 */ 2917 defaultMemPool->destroy(); 2918 destroyBackRefMain(&defaultMemPool->extMemPool.backend); 2919 ThreadId::destroy(); // Delete key for thread id 2920 hugePages.reset(); 2921 // new total malloc initialization is possible after this point 2922 mallocInitialized.store(0, std::memory_order_release); 2923 #endif // __TBB_SOURCE_DIRECTLY_INCLUDED 2924 2925 #if COLLECT_STATISTICS 2926 unsigned nThreads = ThreadId::getMaxThreadId(); 2927 for( int i=1; i<=nThreads && i<MAX_THREADS; ++i ) 2928 STAT_print(i); 2929 #endif 2930 if (!usedBySrcIncluded) { 2931 MALLOC_ITT_FINI_ITTLIB(); 2932 MALLOC_ITT_RELEASE_RESOURCES(); 2933 } 2934 } 2935 2936 extern "C" void * scalable_malloc(size_t size) 2937 { 2938 void *ptr = internalMalloc(size); 2939 if (!ptr) errno = ENOMEM; 2940 return ptr; 2941 } 2942 2943 extern "C" void scalable_free(void *object) 2944 { 2945 internalFree(object); 2946 } 2947 2948 #if MALLOC_ZONE_OVERLOAD_ENABLED 2949 extern "C" void __TBB_malloc_free_definite_size(void *object, size_t size) 2950 { 2951 internalPoolFree(defaultMemPool, object, size); 2952 } 2953 #endif 2954 2955 /* 2956 * A variant that provides additional memory safety, by checking whether the given address 2957 * was obtained with this allocator, and if not redirecting to the provided alternative call. 2958 */ 2959 extern "C" TBBMALLOC_EXPORT void __TBB_malloc_safer_free(void *object, void (*original_free)(void*)) 2960 { 2961 if (!object) 2962 return; 2963 2964 // tbbmalloc can allocate object only when tbbmalloc has been initialized 2965 if (mallocInitialized.load(std::memory_order_acquire) && defaultMemPool->extMemPool.backend.ptrCanBeValid(object)) { 2966 if (isLargeObject<unknownMem>(object)) { 2967 // must check 1st for large object, because small object check touches 4 pages on left, 2968 // and it can be inaccessible 2969 TLSData *tls = defaultMemPool->getTLS(/*create=*/false); 2970 2971 defaultMemPool->putToLLOCache(tls, object); 2972 return; 2973 } else if (isSmallObject(object)) { 2974 freeSmallObject(object); 2975 return; 2976 } 2977 } 2978 if (original_free) 2979 original_free(object); 2980 } 2981 2982 /********* End the free code *************/ 2983 2984 /********* Code for scalable_realloc ***********/ 2985 2986 /* 2987 * From K&R 2988 * "realloc changes the size of the object pointed to by p to size. The contents will 2989 * be unchanged up to the minimum of the old and the new sizes. If the new size is larger, 2990 * the new space is uninitialized. realloc returns a pointer to the new space, or 2991 * nullptr if the request cannot be satisfied, in which case *p is unchanged." 2992 * 2993 */ 2994 extern "C" void* scalable_realloc(void* ptr, size_t size) 2995 { 2996 void *tmp; 2997 2998 if (!ptr) 2999 tmp = internalMalloc(size); 3000 else if (!size) { 3001 internalFree(ptr); 3002 return nullptr; 3003 } else 3004 tmp = reallocAligned(defaultMemPool, ptr, size, 0); 3005 3006 if (!tmp) errno = ENOMEM; 3007 return tmp; 3008 } 3009 3010 /* 3011 * A variant that provides additional memory safety, by checking whether the given address 3012 * was obtained with this allocator, and if not redirecting to the provided alternative call. 3013 */ 3014 extern "C" TBBMALLOC_EXPORT void* __TBB_malloc_safer_realloc(void* ptr, size_t sz, void* original_realloc) 3015 { 3016 void *tmp; // TODO: fix warnings about uninitialized use of tmp 3017 3018 if (!ptr) { 3019 tmp = internalMalloc(sz); 3020 } else if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(ptr)) { 3021 if (!sz) { 3022 internalFree(ptr); 3023 return nullptr; 3024 } else { 3025 tmp = reallocAligned(defaultMemPool, ptr, sz, 0); 3026 } 3027 } 3028 #if USE_WINTHREAD 3029 else if (original_realloc && sz) { 3030 orig_ptrs *original_ptrs = static_cast<orig_ptrs*>(original_realloc); 3031 if ( original_ptrs->msize ){ 3032 size_t oldSize = original_ptrs->msize(ptr); 3033 tmp = internalMalloc(sz); 3034 if (tmp) { 3035 memcpy(tmp, ptr, sz<oldSize? sz : oldSize); 3036 if ( original_ptrs->free ){ 3037 original_ptrs->free( ptr ); 3038 } 3039 } 3040 } else 3041 tmp = nullptr; 3042 } 3043 #else 3044 else if (original_realloc) { 3045 typedef void* (*realloc_ptr_t)(void*,size_t); 3046 realloc_ptr_t original_realloc_ptr; 3047 (void *&)original_realloc_ptr = original_realloc; 3048 tmp = original_realloc_ptr(ptr,sz); 3049 } 3050 #endif 3051 else tmp = nullptr; 3052 3053 if (!tmp) errno = ENOMEM; 3054 return tmp; 3055 } 3056 3057 /********* End code for scalable_realloc ***********/ 3058 3059 /********* Code for scalable_calloc ***********/ 3060 3061 /* 3062 * From K&R 3063 * calloc returns a pointer to space for an array of nobj objects, 3064 * each of size size, or nullptr if the request cannot be satisfied. 3065 * The space is initialized to zero bytes. 3066 * 3067 */ 3068 3069 extern "C" void * scalable_calloc(size_t nobj, size_t size) 3070 { 3071 // it's square root of maximal size_t value 3072 const size_t mult_not_overflow = size_t(1) << (sizeof(size_t)*CHAR_BIT/2); 3073 const size_t arraySize = nobj * size; 3074 3075 // check for overflow during multiplication: 3076 if (nobj>=mult_not_overflow || size>=mult_not_overflow) // 1) heuristic check 3077 if (nobj && arraySize / nobj != size) { // 2) exact check 3078 errno = ENOMEM; 3079 return nullptr; 3080 } 3081 void* result = internalMalloc(arraySize); 3082 if (result) 3083 memset(result, 0, arraySize); 3084 else 3085 errno = ENOMEM; 3086 return result; 3087 } 3088 3089 /********* End code for scalable_calloc ***********/ 3090 3091 /********* Code for aligned allocation API **********/ 3092 3093 extern "C" int scalable_posix_memalign(void **memptr, size_t alignment, size_t size) 3094 { 3095 if ( !isPowerOfTwoAtLeast(alignment, sizeof(void*)) ) 3096 return EINVAL; 3097 void *result = allocateAligned(defaultMemPool, size, alignment); 3098 if (!result) 3099 return ENOMEM; 3100 *memptr = result; 3101 return 0; 3102 } 3103 3104 extern "C" void * scalable_aligned_malloc(size_t size, size_t alignment) 3105 { 3106 if (!isPowerOfTwo(alignment) || 0==size) { 3107 errno = EINVAL; 3108 return nullptr; 3109 } 3110 void *tmp = allocateAligned(defaultMemPool, size, alignment); 3111 if (!tmp) errno = ENOMEM; 3112 return tmp; 3113 } 3114 3115 extern "C" void * scalable_aligned_realloc(void *ptr, size_t size, size_t alignment) 3116 { 3117 if (!isPowerOfTwo(alignment)) { 3118 errno = EINVAL; 3119 return nullptr; 3120 } 3121 void *tmp; 3122 3123 if (!ptr) 3124 tmp = allocateAligned(defaultMemPool, size, alignment); 3125 else if (!size) { 3126 scalable_free(ptr); 3127 return nullptr; 3128 } else 3129 tmp = reallocAligned(defaultMemPool, ptr, size, alignment); 3130 3131 if (!tmp) errno = ENOMEM; 3132 return tmp; 3133 } 3134 3135 extern "C" TBBMALLOC_EXPORT void * __TBB_malloc_safer_aligned_realloc(void *ptr, size_t size, size_t alignment, void* orig_function) 3136 { 3137 /* corner cases left out of reallocAligned to not deal with errno there */ 3138 if (!isPowerOfTwo(alignment)) { 3139 errno = EINVAL; 3140 return nullptr; 3141 } 3142 void *tmp = nullptr; 3143 3144 if (!ptr) { 3145 tmp = allocateAligned(defaultMemPool, size, alignment); 3146 } else if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(ptr)) { 3147 if (!size) { 3148 internalFree(ptr); 3149 return nullptr; 3150 } else { 3151 tmp = reallocAligned(defaultMemPool, ptr, size, alignment); 3152 } 3153 } 3154 #if USE_WINTHREAD 3155 else { 3156 orig_aligned_ptrs *original_ptrs = static_cast<orig_aligned_ptrs*>(orig_function); 3157 if (size) { 3158 // Without orig_msize, we can't do anything with this. 3159 // Just keeping old pointer. 3160 if ( original_ptrs->aligned_msize ){ 3161 // set alignment and offset to have possibly correct oldSize 3162 size_t oldSize = original_ptrs->aligned_msize(ptr, sizeof(void*), 0); 3163 tmp = allocateAligned(defaultMemPool, size, alignment); 3164 if (tmp) { 3165 memcpy(tmp, ptr, size<oldSize? size : oldSize); 3166 if ( original_ptrs->aligned_free ){ 3167 original_ptrs->aligned_free( ptr ); 3168 } 3169 } 3170 } 3171 } else { 3172 if ( original_ptrs->aligned_free ){ 3173 original_ptrs->aligned_free( ptr ); 3174 } 3175 return nullptr; 3176 } 3177 } 3178 #else 3179 // As original_realloc can't align result, and there is no way to find 3180 // size of reallocating object, we are giving up. 3181 suppress_unused_warning(orig_function); 3182 #endif 3183 if (!tmp) errno = ENOMEM; 3184 return tmp; 3185 } 3186 3187 extern "C" void scalable_aligned_free(void *ptr) 3188 { 3189 internalFree(ptr); 3190 } 3191 3192 /********* end code for aligned allocation API **********/ 3193 3194 /********* Code for scalable_msize ***********/ 3195 3196 /* 3197 * Returns the size of a memory block allocated in the heap. 3198 */ 3199 extern "C" size_t scalable_msize(void* ptr) 3200 { 3201 if (ptr) { 3202 MALLOC_ASSERT(isRecognized(ptr), "Invalid pointer in scalable_msize detected."); 3203 return internalMsize(ptr); 3204 } 3205 errno = EINVAL; 3206 // Unlike _msize, return 0 in case of parameter error. 3207 // Returning size_t(-1) looks more like the way to troubles. 3208 return 0; 3209 } 3210 3211 /* 3212 * A variant that provides additional memory safety, by checking whether the given address 3213 * was obtained with this allocator, and if not redirecting to the provided alternative call. 3214 */ 3215 extern "C" TBBMALLOC_EXPORT size_t __TBB_malloc_safer_msize(void *object, size_t (*original_msize)(void*)) 3216 { 3217 if (object) { 3218 // Check if the memory was allocated by scalable_malloc 3219 if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(object)) 3220 return internalMsize(object); 3221 else if (original_msize) 3222 return original_msize(object); 3223 } 3224 // object is nullptr or unknown, or foreign and no original_msize 3225 #if USE_WINTHREAD 3226 errno = EINVAL; // errno expected to be set only on this platform 3227 #endif 3228 return 0; 3229 } 3230 3231 /* 3232 * The same as above but for _aligned_msize case 3233 */ 3234 extern "C" TBBMALLOC_EXPORT size_t __TBB_malloc_safer_aligned_msize(void *object, size_t alignment, size_t offset, size_t (*orig_aligned_msize)(void*,size_t,size_t)) 3235 { 3236 if (object) { 3237 // Check if the memory was allocated by scalable_malloc 3238 if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(object)) 3239 return internalMsize(object); 3240 else if (orig_aligned_msize) 3241 return orig_aligned_msize(object,alignment,offset); 3242 } 3243 // object is nullptr or unknown 3244 errno = EINVAL; 3245 return 0; 3246 } 3247 3248 /********* End code for scalable_msize ***********/ 3249 3250 extern "C" int scalable_allocation_mode(int param, intptr_t value) 3251 { 3252 if (param == TBBMALLOC_SET_SOFT_HEAP_LIMIT) { 3253 defaultMemPool->extMemPool.backend.setRecommendedMaxSize((size_t)value); 3254 return TBBMALLOC_OK; 3255 } else if (param == USE_HUGE_PAGES) { 3256 #if __unix__ 3257 switch (value) { 3258 case 0: 3259 case 1: 3260 hugePages.setMode(value); 3261 return TBBMALLOC_OK; 3262 default: 3263 return TBBMALLOC_INVALID_PARAM; 3264 } 3265 #else 3266 return TBBMALLOC_NO_EFFECT; 3267 #endif 3268 #if __TBB_SOURCE_DIRECTLY_INCLUDED 3269 } else if (param == TBBMALLOC_INTERNAL_SOURCE_INCLUDED) { 3270 switch (value) { 3271 case 0: // used by dynamic library 3272 case 1: // used by static library or directly included sources 3273 usedBySrcIncluded = value; 3274 return TBBMALLOC_OK; 3275 default: 3276 return TBBMALLOC_INVALID_PARAM; 3277 } 3278 #endif 3279 } else if (param == TBBMALLOC_SET_HUGE_SIZE_THRESHOLD) { 3280 defaultMemPool->extMemPool.loc.setHugeSizeThreshold((size_t)value); 3281 return TBBMALLOC_OK; 3282 } 3283 return TBBMALLOC_INVALID_PARAM; 3284 } 3285 3286 extern "C" int scalable_allocation_command(int cmd, void *param) 3287 { 3288 if (param) 3289 return TBBMALLOC_INVALID_PARAM; 3290 3291 bool released = false; 3292 switch(cmd) { 3293 case TBBMALLOC_CLEAN_THREAD_BUFFERS: 3294 if (TLSData *tls = defaultMemPool->getTLS(/*create=*/false)) 3295 released = tls->externalCleanup(/*cleanOnlyUnused*/false, /*cleanBins=*/true); 3296 break; 3297 case TBBMALLOC_CLEAN_ALL_BUFFERS: 3298 released = defaultMemPool->extMemPool.hardCachesCleanup(true); 3299 break; 3300 default: 3301 return TBBMALLOC_INVALID_PARAM; 3302 } 3303 return released ? TBBMALLOC_OK : TBBMALLOC_NO_EFFECT; 3304 } 3305