1 /* 2 Copyright (c) 2005-2023 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #include "tbbmalloc_internal.h" 18 #include <errno.h> 19 #include <new> /* for placement new */ 20 #include <string.h> /* for memset */ 21 22 #include "oneapi/tbb/version.h" 23 #include "../tbb/environment.h" 24 #include "../tbb/itt_notify.h" // for __TBB_load_ittnotify() 25 26 #if USE_PTHREAD 27 #define TlsSetValue_func pthread_setspecific 28 #define TlsGetValue_func pthread_getspecific 29 #define GetMyTID() pthread_self() 30 #include <sched.h> 31 extern "C" { static void mallocThreadShutdownNotification(void*); } 32 #if __sun || __SUNPRO_CC 33 #define __asm__ asm 34 #endif 35 #include <unistd.h> // sysconf(_SC_PAGESIZE) 36 #elif USE_WINTHREAD 37 #define GetMyTID() GetCurrentThreadId() 38 #if __TBB_WIN8UI_SUPPORT 39 #include<thread> 40 #define TlsSetValue_func FlsSetValue 41 #define TlsGetValue_func FlsGetValue 42 #define TlsAlloc() FlsAlloc(nullptr) 43 #define TLS_ALLOC_FAILURE FLS_OUT_OF_INDEXES 44 #define TlsFree FlsFree 45 #else 46 #define TlsSetValue_func TlsSetValue 47 #define TlsGetValue_func TlsGetValue 48 #define TLS_ALLOC_FAILURE TLS_OUT_OF_INDEXES 49 #endif 50 #else 51 #error Must define USE_PTHREAD or USE_WINTHREAD 52 #endif 53 54 #define FREELIST_NONBLOCKING 1 55 56 namespace rml { 57 class MemoryPool; 58 namespace internal { 59 60 class Block; 61 class MemoryPool; 62 63 #if MALLOC_CHECK_RECURSION 64 65 inline bool isMallocInitialized(); 66 67 #endif // MALLOC_CHECK_RECURSION 68 69 /** Support for handling the special UNUSABLE pointer state **/ 70 const intptr_t UNUSABLE = 0x1; 71 inline bool isSolidPtr( void* ptr ) { 72 return (UNUSABLE|(intptr_t)ptr)!=UNUSABLE; 73 } 74 inline bool isNotForUse( void* ptr ) { 75 return (intptr_t)ptr==UNUSABLE; 76 } 77 78 /* 79 * Block::objectSize value used to mark blocks allocated by startupAlloc 80 */ 81 const uint16_t startupAllocObjSizeMark = ~(uint16_t)0; 82 83 /* 84 * The following constant is used to define the size of struct Block, the block header. 85 * The intent is to have the size of a Block multiple of the cache line size, this allows us to 86 * get good alignment at the cost of some overhead equal to the amount of padding included in the Block. 87 */ 88 const int blockHeaderAlignment = estimatedCacheLineSize; 89 90 /********* The data structures and global objects **************/ 91 92 /* 93 * The malloc routines themselves need to be able to occasionally malloc some space, 94 * in order to set up the structures used by the thread local structures. This 95 * routine performs that functions. 96 */ 97 class BootStrapBlocks { 98 MallocMutex bootStrapLock; 99 Block *bootStrapBlock; 100 Block *bootStrapBlockUsed; 101 FreeObject *bootStrapObjectList; 102 public: 103 void *allocate(MemoryPool *memPool, size_t size); 104 void free(void* ptr); 105 void reset(); 106 }; 107 108 #if USE_INTERNAL_TID 109 class ThreadId { 110 static tls_key_t Tid_key; 111 std::atomic<intptr_t> ThreadCount; 112 113 unsigned int id; 114 115 static unsigned int tlsNumber() { 116 unsigned int result = reinterpret_cast<intptr_t>(TlsGetValue_func(Tid_key)); 117 if( !result ) { 118 RecursiveMallocCallProtector scoped; 119 // Thread-local value is zero -> first call from this thread, 120 // need to initialize with next ID value (IDs start from 1) 121 result = ++ThreadCount; // returned new value! 122 TlsSetValue_func( Tid_key, reinterpret_cast<void*>(result) ); 123 } 124 return result; 125 } 126 public: 127 static bool init() { 128 #if USE_WINTHREAD 129 Tid_key = TlsAlloc(); 130 if (Tid_key == TLS_ALLOC_FAILURE) 131 return false; 132 #else 133 int status = pthread_key_create( &Tid_key, nullptr ); 134 if ( status ) { 135 fprintf (stderr, "The memory manager cannot create tls key during initialization\n"); 136 return false; 137 } 138 #endif /* USE_WINTHREAD */ 139 return true; 140 } 141 #if __TBB_SOURCE_DIRECTLY_INCLUDED 142 static void destroy() { 143 if( Tid_key ) { 144 #if USE_WINTHREAD 145 BOOL status = !(TlsFree( Tid_key )); // fail is zero 146 #else 147 int status = pthread_key_delete( Tid_key ); 148 #endif /* USE_WINTHREAD */ 149 if ( status ) 150 fprintf (stderr, "The memory manager cannot delete tls key\n"); 151 Tid_key = 0; 152 } 153 } 154 #endif 155 156 ThreadId() : id(ThreadId::tlsNumber()) {} 157 bool isCurrentThreadId() const { return id == ThreadId::tlsNumber(); } 158 159 #if COLLECT_STATISTICS || MALLOC_TRACE 160 friend unsigned int getThreadId() { return ThreadId::tlsNumber(); } 161 #endif 162 #if COLLECT_STATISTICS 163 static unsigned getMaxThreadId() { return ThreadCount.load(std::memory_order_relaxed); } 164 165 friend int STAT_increment(ThreadId tid, int bin, int ctr); 166 #endif 167 }; 168 169 tls_key_t ThreadId::Tid_key; 170 intptr_t ThreadId::ThreadCount; 171 172 #if COLLECT_STATISTICS 173 int STAT_increment(ThreadId tid, int bin, int ctr) 174 { 175 return ::STAT_increment(tid.id, bin, ctr); 176 } 177 #endif 178 179 #else // USE_INTERNAL_TID 180 181 class ThreadId { 182 #if USE_PTHREAD 183 std::atomic<pthread_t> tid; 184 #else 185 std::atomic<DWORD> tid; 186 #endif 187 public: 188 ThreadId() : tid(GetMyTID()) {} 189 #if USE_PTHREAD 190 bool isCurrentThreadId() const { return pthread_equal(pthread_self(), tid.load(std::memory_order_relaxed)); } 191 #else 192 bool isCurrentThreadId() const { return GetCurrentThreadId() == tid.load(std::memory_order_relaxed); } 193 #endif 194 ThreadId& operator=(const ThreadId& other) { 195 tid.store(other.tid.load(std::memory_order_relaxed), std::memory_order_relaxed); 196 return *this; 197 } 198 static bool init() { return true; } 199 #if __TBB_SOURCE_DIRECTLY_INCLUDED 200 static void destroy() {} 201 #endif 202 }; 203 204 #endif // USE_INTERNAL_TID 205 206 /*********** Code to provide thread ID and a thread-local void pointer **********/ 207 208 bool TLSKey::init() 209 { 210 #if USE_WINTHREAD 211 TLS_pointer_key = TlsAlloc(); 212 if (TLS_pointer_key == TLS_ALLOC_FAILURE) 213 return false; 214 #else 215 int status = pthread_key_create( &TLS_pointer_key, mallocThreadShutdownNotification ); 216 if ( status ) 217 return false; 218 #endif /* USE_WINTHREAD */ 219 return true; 220 } 221 222 bool TLSKey::destroy() 223 { 224 #if USE_WINTHREAD 225 BOOL status1 = !(TlsFree(TLS_pointer_key)); // fail is zero 226 #else 227 int status1 = pthread_key_delete(TLS_pointer_key); 228 #endif /* USE_WINTHREAD */ 229 MALLOC_ASSERT(!status1, "The memory manager cannot delete tls key."); 230 return status1==0; 231 } 232 233 inline TLSData* TLSKey::getThreadMallocTLS() const 234 { 235 return (TLSData *)TlsGetValue_func( TLS_pointer_key ); 236 } 237 238 inline void TLSKey::setThreadMallocTLS( TLSData * newvalue ) { 239 RecursiveMallocCallProtector scoped; 240 TlsSetValue_func( TLS_pointer_key, newvalue ); 241 } 242 243 /* The 'next' field in the block header has to maintain some invariants: 244 * it needs to be on a 16K boundary and the first field in the block. 245 * Any value stored there needs to have the lower 14 bits set to 0 246 * so that various assert work. This means that if you want to smash this memory 247 * for debugging purposes you will need to obey this invariant. 248 * The total size of the header needs to be a power of 2 to simplify 249 * the alignment requirements. For now it is a 128 byte structure. 250 * To avoid false sharing, the fields changed only locally are separated 251 * from the fields changed by foreign threads. 252 * Changing the size of the block header would require to change 253 * some bin allocation sizes, in particular "fitting" sizes (see above). 254 */ 255 class Bin; 256 class StartupBlock; 257 258 class MemoryPool { 259 // if no explicit grainsize, expect to see malloc in user's pAlloc 260 // and set reasonable low granularity 261 static const size_t defaultGranularity = estimatedCacheLineSize; 262 263 MemoryPool() = delete; // deny 264 public: 265 static MallocMutex memPoolListLock; 266 267 // list of all active pools is used to release 268 // all TLS data on thread termination or library unload 269 MemoryPool *next, 270 *prev; 271 ExtMemoryPool extMemPool; 272 BootStrapBlocks bootStrapBlocks; 273 274 static void initDefaultPool(); 275 276 bool init(intptr_t poolId, const MemPoolPolicy* memPoolPolicy); 277 bool reset(); 278 bool destroy(); 279 void onThreadShutdown(TLSData *tlsData); 280 281 inline TLSData *getTLS(bool create); 282 void clearTLS() { extMemPool.tlsPointerKey.setThreadMallocTLS(nullptr); } 283 284 Block *getEmptyBlock(size_t size); 285 void returnEmptyBlock(Block *block, bool poolTheBlock); 286 287 // get/put large object to/from local large object cache 288 void *getFromLLOCache(TLSData *tls, size_t size, size_t alignment); 289 void putToLLOCache(TLSData *tls, void *object); 290 }; 291 292 static intptr_t defaultMemPool_space[sizeof(MemoryPool)/sizeof(intptr_t) + 293 (sizeof(MemoryPool)%sizeof(intptr_t)? 1 : 0)]; 294 static MemoryPool *defaultMemPool = (MemoryPool*)defaultMemPool_space; 295 const size_t MemoryPool::defaultGranularity; 296 // zero-initialized 297 MallocMutex MemoryPool::memPoolListLock; 298 // TODO: move huge page status to default pool, because that's its states 299 HugePagesStatus hugePages; 300 static bool usedBySrcIncluded = false; 301 302 // Padding helpers 303 template<size_t padd> 304 struct PaddingImpl { 305 size_t __padding[padd]; 306 }; 307 308 template<> 309 struct PaddingImpl<0> {}; 310 311 template<int N> 312 struct Padding : PaddingImpl<N/sizeof(size_t)> {}; 313 314 // Slab block is 16KB-aligned. To prevent false sharing, separate locally-accessed 315 // fields and fields commonly accessed by not owner threads. 316 class GlobalBlockFields : public BlockI { 317 protected: 318 std::atomic<FreeObject*> publicFreeList; 319 std::atomic<Block*> nextPrivatizable; 320 MemoryPool *poolPtr; 321 }; 322 323 class LocalBlockFields : public GlobalBlockFields, Padding<blockHeaderAlignment - sizeof(GlobalBlockFields)> { 324 protected: 325 Block *next; 326 Block *previous; /* Use double linked list to speed up removal */ 327 FreeObject *bumpPtr; /* Bump pointer moves from the end to the beginning of a block */ 328 FreeObject *freeList; 329 /* Pointer to local data for the owner thread. Used for fast finding tls 330 when releasing object from a block that current thread owned. 331 nullptr for orphaned blocks. */ 332 std::atomic<TLSData*> tlsPtr; 333 ThreadId ownerTid; /* the ID of the thread that owns or last owned the block */ 334 BackRefIdx backRefIdx; 335 uint16_t allocatedCount; /* Number of objects allocated (obviously by the owning thread) */ 336 uint16_t objectSize; 337 bool isFull; 338 339 friend class FreeBlockPool; 340 friend class StartupBlock; 341 friend class LifoList; 342 friend void *BootStrapBlocks::allocate(MemoryPool *, size_t); 343 friend bool OrphanedBlocks::cleanup(Backend*); 344 friend Block *MemoryPool::getEmptyBlock(size_t); 345 }; 346 347 // Use inheritance to guarantee that a user data start on next cache line. 348 // Can't use member for it, because when LocalBlockFields already on cache line, 349 // we must have no additional memory consumption for all compilers. 350 class Block : public LocalBlockFields, 351 Padding<2*blockHeaderAlignment - sizeof(LocalBlockFields)> { 352 public: 353 bool empty() const { 354 if (allocatedCount > 0) return false; 355 MALLOC_ASSERT(!isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT); 356 return true; 357 } 358 inline FreeObject* allocate(); 359 inline FreeObject *allocateFromFreeList(); 360 361 inline bool adjustFullness(); 362 void adjustPositionInBin(Bin* bin = nullptr); 363 #if MALLOC_DEBUG 364 bool freeListNonNull() { return freeList; } 365 #endif 366 void freePublicObject(FreeObject *objectToFree); 367 inline void freeOwnObject(void *object); 368 void reset(); 369 void privatizePublicFreeList( bool reset = true ); 370 void restoreBumpPtr(); 371 void privatizeOrphaned(TLSData *tls, unsigned index); 372 bool readyToShare(); 373 void shareOrphaned(intptr_t binTag, unsigned index); 374 unsigned int getSize() const { 375 MALLOC_ASSERT(isStartupAllocObject() || objectSize<minLargeObjectSize, 376 "Invalid object size"); 377 return isStartupAllocObject()? 0 : objectSize; 378 } 379 const BackRefIdx *getBackRefIdx() const { return &backRefIdx; } 380 inline bool isOwnedByCurrentThread() const; 381 bool isStartupAllocObject() const { return objectSize == startupAllocObjSizeMark; } 382 inline FreeObject *findObjectToFree(const void *object) const; 383 void checkFreePrecond(const void *object) const { 384 #if MALLOC_DEBUG 385 const char *msg = "Possible double free or heap corruption."; 386 // small objects are always at least sizeof(size_t) Byte aligned, 387 // try to check this before this dereference as for invalid objects 388 // this may be unreadable 389 MALLOC_ASSERT(isAligned(object, sizeof(size_t)), "Try to free invalid small object"); 390 #if !__TBB_USE_THREAD_SANITIZER 391 // releasing to free slab 392 MALLOC_ASSERT(allocatedCount>0, msg); 393 #endif 394 // must not point to slab's header 395 MALLOC_ASSERT((uintptr_t)object - (uintptr_t)this >= sizeof(Block), msg); 396 if (startupAllocObjSizeMark == objectSize) // startup block 397 MALLOC_ASSERT(object<=bumpPtr, msg); 398 else { 399 // non-startup objects are 8 Byte aligned 400 MALLOC_ASSERT(isAligned(object, 8), "Try to free invalid small object"); 401 FreeObject *toFree = findObjectToFree(object); 402 #if !__TBB_USE_THREAD_SANITIZER 403 MALLOC_ASSERT(allocatedCount <= (slabSize-sizeof(Block))/objectSize 404 && (!bumpPtr || object>bumpPtr), msg); 405 // check against head of freeList, as this is mostly 406 // expected after double free 407 MALLOC_ASSERT(toFree != freeList, msg); 408 #endif 409 // check against head of publicFreeList, to detect double free 410 // involving foreign thread 411 MALLOC_ASSERT(toFree != publicFreeList.load(std::memory_order_relaxed), msg); 412 } 413 #else 414 suppress_unused_warning(object); 415 #endif 416 } 417 void initEmptyBlock(TLSData *tls, size_t size); 418 size_t findObjectSize(void *object) const; 419 MemoryPool *getMemPool() const { return poolPtr; } // do not use on the hot path! 420 421 protected: 422 void cleanBlockHeader(); 423 424 private: 425 static const float emptyEnoughRatio; /* Threshold on free space needed to "reactivate" a block */ 426 427 inline FreeObject *allocateFromBumpPtr(); 428 inline FreeObject *findAllocatedObject(const void *address) const; 429 #if MALLOC_DEBUG 430 inline bool isProperlyPlaced(const void *object) const; 431 #endif 432 inline void markOwned(TLSData *tls) { 433 MALLOC_ASSERT(!tlsPtr.load(std::memory_order_relaxed), ASSERT_TEXT); 434 ownerTid = ThreadId(); /* save the ID of the current thread */ 435 tlsPtr.store(tls, std::memory_order_relaxed); 436 } 437 inline void markOrphaned() { 438 MALLOC_ASSERT(tlsPtr.load(std::memory_order_relaxed), ASSERT_TEXT); 439 tlsPtr.store(nullptr, std::memory_order_relaxed); 440 } 441 442 friend class Bin; 443 friend class TLSData; 444 friend bool MemoryPool::destroy(); 445 }; 446 447 const float Block::emptyEnoughRatio = 1.0 / 4.0; 448 449 static_assert(sizeof(Block) <= 2*estimatedCacheLineSize, 450 "The class Block does not fit into 2 cache lines on this platform. " 451 "Defining USE_INTERNAL_TID may help to fix it."); 452 453 class Bin { 454 private: 455 public: 456 Block *activeBlk; 457 std::atomic<Block*> mailbox; 458 MallocMutex mailLock; 459 460 public: 461 inline Block* getActiveBlock() const { return activeBlk; } 462 void resetActiveBlock() { activeBlk = nullptr; } 463 inline void setActiveBlock(Block *block); 464 inline Block* setPreviousBlockActive(); 465 Block* getPrivatizedFreeListBlock(); 466 void moveBlockToFront(Block *block); 467 bool cleanPublicFreeLists(); 468 void processEmptyBlock(Block *block, bool poolTheBlock); 469 void addPublicFreeListBlock(Block* block); 470 471 void outofTLSBin(Block* block); 472 void verifyTLSBin(size_t size) const; 473 void pushTLSBin(Block* block); 474 475 #if MALLOC_DEBUG 476 void verifyInitState() const { 477 MALLOC_ASSERT( !activeBlk, ASSERT_TEXT ); 478 MALLOC_ASSERT( !mailbox.load(std::memory_order_relaxed), ASSERT_TEXT ); 479 } 480 #endif 481 482 friend void Block::freePublicObject (FreeObject *objectToFree); 483 }; 484 485 /********* End of the data structures **************/ 486 487 /* 488 * There are bins for all 8 byte aligned objects less than this segregated size; 8 bins in total 489 */ 490 const uint32_t minSmallObjectIndex = 0; 491 const uint32_t numSmallObjectBins = 8; 492 const uint32_t maxSmallObjectSize = 64; 493 494 /* 495 * There are 4 bins between each couple of powers of 2 [64-128-256-...] 496 * from maxSmallObjectSize till this size; 16 bins in total 497 */ 498 const uint32_t minSegregatedObjectIndex = minSmallObjectIndex+numSmallObjectBins; 499 const uint32_t numSegregatedObjectBins = 16; 500 const uint32_t maxSegregatedObjectSize = 1024; 501 502 /* 503 * And there are 5 bins with allocation sizes that are multiples of estimatedCacheLineSize 504 * and selected to fit 9, 6, 4, 3, and 2 allocations in a block. 505 */ 506 const uint32_t minFittingIndex = minSegregatedObjectIndex+numSegregatedObjectBins; 507 const uint32_t numFittingBins = 5; 508 509 const uint32_t fittingAlignment = estimatedCacheLineSize; 510 511 #define SET_FITTING_SIZE(N) ( (slabSize-sizeof(Block))/N ) & ~(fittingAlignment-1) 512 // For blockSize=16*1024, sizeof(Block)=2*estimatedCacheLineSize and fittingAlignment=estimatedCacheLineSize, 513 // the comments show the fitting sizes and the amounts left unused for estimatedCacheLineSize=64/128: 514 const uint32_t fittingSize1 = SET_FITTING_SIZE(9); // 1792/1792 128/000 515 const uint32_t fittingSize2 = SET_FITTING_SIZE(6); // 2688/2688 128/000 516 const uint32_t fittingSize3 = SET_FITTING_SIZE(4); // 4032/3968 128/256 517 const uint32_t fittingSize4 = SET_FITTING_SIZE(3); // 5376/5376 128/000 518 const uint32_t fittingSize5 = SET_FITTING_SIZE(2); // 8128/8064 000/000 519 #undef SET_FITTING_SIZE 520 521 /* 522 * The total number of thread-specific Block-based bins 523 */ 524 const uint32_t numBlockBins = minFittingIndex+numFittingBins; 525 526 /* 527 * Objects of this size and larger are considered large objects. 528 */ 529 const uint32_t minLargeObjectSize = fittingSize5 + 1; 530 531 /* 532 * Per-thread pool of slab blocks. Idea behind it is to not share with other 533 * threads memory that are likely in local cache(s) of our CPU. 534 */ 535 class FreeBlockPool { 536 private: 537 std::atomic<Block*> head; 538 int size; 539 Backend *backend; 540 bool lastAccessMiss; 541 public: 542 static const int POOL_HIGH_MARK = 32; 543 static const int POOL_LOW_MARK = 8; 544 545 class ResOfGet { 546 ResOfGet() = delete; 547 public: 548 Block* block; 549 bool lastAccMiss; 550 ResOfGet(Block *b, bool lastMiss) : block(b), lastAccMiss(lastMiss) {} 551 }; 552 553 // allocated in zero-initialized memory 554 FreeBlockPool(Backend *bknd) : backend(bknd) {} 555 ResOfGet getBlock(); 556 void returnBlock(Block *block); 557 bool externalCleanup(); // can be called by another thread 558 }; 559 560 template<int LOW_MARK, int HIGH_MARK> 561 class LocalLOCImpl { 562 private: 563 static const size_t MAX_TOTAL_SIZE = 4*1024*1024; 564 // TODO: can single-linked list be faster here? 565 LargeMemoryBlock *tail; // need it when do releasing on overflow 566 std::atomic<LargeMemoryBlock*> head; 567 size_t totalSize; 568 int numOfBlocks; 569 public: 570 bool put(LargeMemoryBlock *object, ExtMemoryPool *extMemPool); 571 LargeMemoryBlock *get(size_t size); 572 bool externalCleanup(ExtMemoryPool *extMemPool); 573 #if __TBB_MALLOC_WHITEBOX_TEST 574 LocalLOCImpl() : tail(nullptr), head(nullptr), totalSize(0), numOfBlocks(0) {} 575 static size_t getMaxSize() { return MAX_TOTAL_SIZE; } 576 static const int LOC_HIGH_MARK = HIGH_MARK; 577 #else 578 // no ctor, object must be created in zero-initialized memory 579 #endif 580 }; 581 582 typedef LocalLOCImpl<8,32> LocalLOC; // set production code parameters 583 584 class TLSData : public TLSRemote { 585 MemoryPool *memPool; 586 public: 587 Bin bin[numBlockBinLimit]; 588 FreeBlockPool freeSlabBlocks; 589 LocalLOC lloc; 590 unsigned currCacheIdx; 591 private: 592 std::atomic<bool> unused; 593 public: 594 TLSData(MemoryPool *mPool, Backend *bknd) : memPool(mPool), freeSlabBlocks(bknd) {} 595 MemoryPool *getMemPool() const { return memPool; } 596 Bin* getAllocationBin(size_t size); 597 void release(); 598 bool externalCleanup(bool cleanOnlyUnused, bool cleanBins) { 599 if (!unused.load(std::memory_order_relaxed) && cleanOnlyUnused) return false; 600 // Heavy operation in terms of synchronization complexity, 601 // should be called only for the current thread 602 bool released = cleanBins ? cleanupBlockBins() : false; 603 // both cleanups to be called, and the order is not important 604 return released | lloc.externalCleanup(&memPool->extMemPool) | freeSlabBlocks.externalCleanup(); 605 } 606 bool cleanupBlockBins(); 607 void markUsed() { unused.store(false, std::memory_order_relaxed); } // called by owner when TLS touched 608 void markUnused() { unused.store(true, std::memory_order_relaxed); } // can be called by not owner thread 609 }; 610 611 TLSData *TLSKey::createTLS(MemoryPool *memPool, Backend *backend) 612 { 613 MALLOC_ASSERT( sizeof(TLSData) >= sizeof(Bin) * numBlockBins + sizeof(FreeBlockPool), ASSERT_TEXT ); 614 TLSData* tls = (TLSData*) memPool->bootStrapBlocks.allocate(memPool, sizeof(TLSData)); 615 if ( !tls ) 616 return nullptr; 617 new(tls) TLSData(memPool, backend); 618 /* the block contains zeroes after bootStrapMalloc, so bins are initialized */ 619 #if MALLOC_DEBUG 620 for (uint32_t i = 0; i < numBlockBinLimit; i++) 621 tls->bin[i].verifyInitState(); 622 #endif 623 setThreadMallocTLS(tls); 624 memPool->extMemPool.allLocalCaches.registerThread(tls); 625 return tls; 626 } 627 628 bool TLSData::cleanupBlockBins() 629 { 630 bool released = false; 631 for (uint32_t i = 0; i < numBlockBinLimit; i++) { 632 released |= bin[i].cleanPublicFreeLists(); 633 // After cleaning public free lists, only the active block might be empty. 634 // Do not use processEmptyBlock because it will just restore bumpPtr. 635 Block *block = bin[i].getActiveBlock(); 636 if (block && block->empty()) { 637 bin[i].outofTLSBin(block); 638 memPool->returnEmptyBlock(block, /*poolTheBlock=*/false); 639 released = true; 640 } 641 } 642 return released; 643 } 644 645 bool ExtMemoryPool::releaseAllLocalCaches() 646 { 647 // Iterate all registered TLS data and clean LLOC and Slab pools 648 bool released = allLocalCaches.cleanup(/*cleanOnlyUnused=*/false); 649 650 // Bins privatization is done only for the current thread 651 if (TLSData *tlsData = tlsPointerKey.getThreadMallocTLS()) 652 released |= tlsData->cleanupBlockBins(); 653 654 return released; 655 } 656 657 void AllLocalCaches::registerThread(TLSRemote *tls) 658 { 659 tls->prev = nullptr; 660 MallocMutex::scoped_lock lock(listLock); 661 MALLOC_ASSERT(head!=tls, ASSERT_TEXT); 662 tls->next = head; 663 if (head) 664 head->prev = tls; 665 head = tls; 666 MALLOC_ASSERT(head->next!=head, ASSERT_TEXT); 667 } 668 669 void AllLocalCaches::unregisterThread(TLSRemote *tls) 670 { 671 MallocMutex::scoped_lock lock(listLock); 672 MALLOC_ASSERT(head, "Can't unregister thread: no threads are registered."); 673 if (head == tls) 674 head = tls->next; 675 if (tls->next) 676 tls->next->prev = tls->prev; 677 if (tls->prev) 678 tls->prev->next = tls->next; 679 MALLOC_ASSERT(!tls->next || tls->next->next!=tls->next, ASSERT_TEXT); 680 } 681 682 bool AllLocalCaches::cleanup(bool cleanOnlyUnused) 683 { 684 bool released = false; 685 { 686 MallocMutex::scoped_lock lock(listLock); 687 for (TLSRemote *curr=head; curr; curr=curr->next) 688 released |= static_cast<TLSData*>(curr)->externalCleanup(cleanOnlyUnused, /*cleanBins=*/false); 689 } 690 return released; 691 } 692 693 void AllLocalCaches::markUnused() 694 { 695 bool locked; 696 MallocMutex::scoped_lock lock(listLock, /*block=*/false, &locked); 697 if (!locked) // not wait for marking if someone doing something with it 698 return; 699 700 for (TLSRemote *curr=head; curr; curr=curr->next) 701 static_cast<TLSData*>(curr)->markUnused(); 702 } 703 704 #if MALLOC_CHECK_RECURSION 705 MallocMutex RecursiveMallocCallProtector::rmc_mutex; 706 std::atomic<pthread_t> RecursiveMallocCallProtector::owner_thread; 707 std::atomic<void*> RecursiveMallocCallProtector::autoObjPtr; 708 bool RecursiveMallocCallProtector::mallocRecursionDetected; 709 #if __FreeBSD__ 710 bool RecursiveMallocCallProtector::canUsePthread; 711 #endif 712 713 #endif 714 715 /*********** End code to provide thread ID and a TLS pointer **********/ 716 717 // Parameter for isLargeObject, keeps our expectations on memory origin. 718 // Assertions must use unknownMem to reliably report object invalidity. 719 enum MemoryOrigin { 720 ourMem, // allocated by TBB allocator 721 unknownMem // can be allocated by system allocator or TBB allocator 722 }; 723 724 template<MemoryOrigin> 725 #if __TBB_USE_THREAD_SANITIZER 726 // We have a real race when accessing the large object header for 727 // non large objects (e.g. small or foreign objects). 728 // Therefore, we need to hide this access from the thread sanitizer 729 __attribute__((no_sanitize("thread"))) 730 #endif 731 bool isLargeObject(void *object); 732 static void *internalMalloc(size_t size); 733 static void internalFree(void *object); 734 static void *internalPoolMalloc(MemoryPool* mPool, size_t size); 735 static bool internalPoolFree(MemoryPool *mPool, void *object, size_t size); 736 737 #if !MALLOC_DEBUG 738 #if __INTEL_COMPILER || _MSC_VER 739 #define NOINLINE(decl) __declspec(noinline) decl 740 #define ALWAYSINLINE(decl) __forceinline decl 741 #elif __GNUC__ 742 #define NOINLINE(decl) decl __attribute__ ((noinline)) 743 #define ALWAYSINLINE(decl) decl __attribute__ ((always_inline)) 744 #else 745 #define NOINLINE(decl) decl 746 #define ALWAYSINLINE(decl) decl 747 #endif 748 749 static NOINLINE( bool doInitialization() ); 750 ALWAYSINLINE( bool isMallocInitialized() ); 751 752 #undef ALWAYSINLINE 753 #undef NOINLINE 754 #endif /* !MALLOC_DEBUG */ 755 756 757 /********* Now some rough utility code to deal with indexing the size bins. **************/ 758 759 /* 760 * Given a number return the highest non-zero bit in it. It is intended to work with 32-bit values only. 761 * Moreover, on some platforms, for sake of simplicity and performance, it is narrowed to only serve for 64 to 1023. 762 * This is enough for current algorithm of distribution of sizes among bins. 763 * __TBB_Log2 is not used here to minimize dependencies on TBB specific sources. 764 */ 765 #if _WIN64 && _MSC_VER>=1400 && !__INTEL_COMPILER 766 extern "C" unsigned char _BitScanReverse( unsigned long* i, unsigned long w ); 767 #pragma intrinsic(_BitScanReverse) 768 #endif 769 static inline unsigned int highestBitPos(unsigned int n) 770 { 771 MALLOC_ASSERT( n>=64 && n<1024, ASSERT_TEXT ); // only needed for bsr array lookup, but always true 772 unsigned int pos; 773 #if __ARCH_x86_32||__ARCH_x86_64 774 775 # if __unix__||__APPLE__||__MINGW32__ 776 __asm__ ("bsr %1,%0" : "=r"(pos) : "r"(n)); 777 # elif (_WIN32 && (!_WIN64 || __INTEL_COMPILER)) 778 __asm 779 { 780 bsr eax, n 781 mov pos, eax 782 } 783 # elif _WIN64 && _MSC_VER>=1400 784 _BitScanReverse((unsigned long*)&pos, (unsigned long)n); 785 # else 786 # error highestBitPos() not implemented for this platform 787 # endif 788 #elif __arm__ 789 __asm__ __volatile__ 790 ( 791 "clz %0, %1\n" 792 "rsb %0, %0, %2\n" 793 :"=r" (pos) :"r" (n), "I" (31) 794 ); 795 #else 796 static unsigned int bsr[16] = {0/*N/A*/,6,7,7,8,8,8,8,9,9,9,9,9,9,9,9}; 797 pos = bsr[ n>>6 ]; 798 #endif /* __ARCH_* */ 799 return pos; 800 } 801 802 unsigned int getSmallObjectIndex(unsigned int size) 803 { 804 unsigned int result = (size-1)>>3; 805 constexpr bool is_64bit = (8 == sizeof(void*)); 806 if (is_64bit) { 807 // For 64-bit malloc, 16 byte alignment is needed except for bin 0. 808 if (result) result |= 1; // 0,1,3,5,7; bins 2,4,6 are not aligned to 16 bytes 809 } 810 return result; 811 } 812 813 /* 814 * Depending on indexRequest, for a given size return either the index into the bin 815 * for objects of this size, or the actual size of objects in this bin. 816 */ 817 template<bool indexRequest> 818 static unsigned int getIndexOrObjectSize (unsigned int size) 819 { 820 if (size <= maxSmallObjectSize) { // selection from 8/16/24/32/40/48/56/64 821 unsigned int index = getSmallObjectIndex( size ); 822 /* Bin 0 is for 8 bytes, bin 1 is for 16, and so forth */ 823 return indexRequest ? index : (index+1)<<3; 824 } 825 else if (size <= maxSegregatedObjectSize ) { // 80/96/112/128 / 160/192/224/256 / 320/384/448/512 / 640/768/896/1024 826 unsigned int order = highestBitPos(size-1); // which group of bin sizes? 827 MALLOC_ASSERT( 6<=order && order<=9, ASSERT_TEXT ); 828 if (indexRequest) 829 return minSegregatedObjectIndex - (4*6) - 4 + (4*order) + ((size-1)>>(order-2)); 830 else { 831 unsigned int alignment = 128 >> (9-order); // alignment in the group 832 MALLOC_ASSERT( alignment==16 || alignment==32 || alignment==64 || alignment==128, ASSERT_TEXT ); 833 return alignUp(size,alignment); 834 } 835 } 836 else { 837 if( size <= fittingSize3 ) { 838 if( size <= fittingSize2 ) { 839 if( size <= fittingSize1 ) 840 return indexRequest ? minFittingIndex : fittingSize1; 841 else 842 return indexRequest ? minFittingIndex+1 : fittingSize2; 843 } else 844 return indexRequest ? minFittingIndex+2 : fittingSize3; 845 } else { 846 if( size <= fittingSize5 ) { 847 if( size <= fittingSize4 ) 848 return indexRequest ? minFittingIndex+3 : fittingSize4; 849 else 850 return indexRequest ? minFittingIndex+4 : fittingSize5; 851 } else { 852 MALLOC_ASSERT( 0,ASSERT_TEXT ); // this should not happen 853 return ~0U; 854 } 855 } 856 } 857 } 858 859 static unsigned int getIndex (unsigned int size) 860 { 861 return getIndexOrObjectSize</*indexRequest=*/true>(size); 862 } 863 864 static unsigned int getObjectSize (unsigned int size) 865 { 866 return getIndexOrObjectSize</*indexRequest=*/false>(size); 867 } 868 869 870 void *BootStrapBlocks::allocate(MemoryPool *memPool, size_t size) 871 { 872 FreeObject *result; 873 874 MALLOC_ASSERT( size == sizeof(TLSData), ASSERT_TEXT ); 875 876 { // Lock with acquire 877 MallocMutex::scoped_lock scoped_cs(bootStrapLock); 878 879 if( bootStrapObjectList) { 880 result = bootStrapObjectList; 881 bootStrapObjectList = bootStrapObjectList->next; 882 } else { 883 if (!bootStrapBlock) { 884 bootStrapBlock = memPool->getEmptyBlock(size); 885 if (!bootStrapBlock) return nullptr; 886 } 887 result = bootStrapBlock->bumpPtr; 888 bootStrapBlock->bumpPtr = (FreeObject *)((uintptr_t)bootStrapBlock->bumpPtr - bootStrapBlock->objectSize); 889 if ((uintptr_t)bootStrapBlock->bumpPtr < (uintptr_t)bootStrapBlock+sizeof(Block)) { 890 bootStrapBlock->bumpPtr = nullptr; 891 bootStrapBlock->next = bootStrapBlockUsed; 892 bootStrapBlockUsed = bootStrapBlock; 893 bootStrapBlock = nullptr; 894 } 895 } 896 } // Unlock with release 897 memset (result, 0, size); 898 return (void*)result; 899 } 900 901 void BootStrapBlocks::free(void* ptr) 902 { 903 MALLOC_ASSERT( ptr, ASSERT_TEXT ); 904 { // Lock with acquire 905 MallocMutex::scoped_lock scoped_cs(bootStrapLock); 906 ((FreeObject*)ptr)->next = bootStrapObjectList; 907 bootStrapObjectList = (FreeObject*)ptr; 908 } // Unlock with release 909 } 910 911 void BootStrapBlocks::reset() 912 { 913 bootStrapBlock = bootStrapBlockUsed = nullptr; 914 bootStrapObjectList = nullptr; 915 } 916 917 #if !(FREELIST_NONBLOCKING) 918 static MallocMutex publicFreeListLock; // lock for changes of publicFreeList 919 #endif 920 921 /********* End rough utility code **************/ 922 923 /* LifoList assumes zero initialization so a vector of it can be created 924 * by just allocating some space with no call to constructor. 925 * On Linux, it seems to be necessary to avoid linking with C++ libraries. 926 * 927 * By usage convention there is no race on the initialization. */ 928 LifoList::LifoList( ) : top(nullptr) 929 { 930 // MallocMutex assumes zero initialization 931 memset(static_cast<void*>(&lock), 0, sizeof(MallocMutex)); 932 } 933 934 void LifoList::push(Block *block) 935 { 936 MallocMutex::scoped_lock scoped_cs(lock); 937 block->next = top.load(std::memory_order_relaxed); 938 top.store(block, std::memory_order_relaxed); 939 } 940 941 Block *LifoList::pop() 942 { 943 Block* block = nullptr; 944 if (top.load(std::memory_order_relaxed)) { 945 MallocMutex::scoped_lock scoped_cs(lock); 946 block = top.load(std::memory_order_relaxed); 947 if (block) { 948 top.store(block->next, std::memory_order_relaxed); 949 } 950 } 951 return block; 952 } 953 954 Block *LifoList::grab() 955 { 956 Block *block = nullptr; 957 if (top.load(std::memory_order_relaxed)) { 958 MallocMutex::scoped_lock scoped_cs(lock); 959 block = top.load(std::memory_order_relaxed); 960 top.store(nullptr, std::memory_order_relaxed); 961 } 962 return block; 963 } 964 965 /********* Thread and block related code *************/ 966 967 template<bool poolDestroy> void AllLargeBlocksList::releaseAll(Backend *backend) { 968 LargeMemoryBlock *next, *lmb = loHead; 969 loHead = nullptr; 970 971 for (; lmb; lmb = next) { 972 next = lmb->gNext; 973 if (poolDestroy) { 974 // as it's pool destruction, no need to return object to backend, 975 // only remove backrefs, as they are global 976 removeBackRef(lmb->backRefIdx); 977 } else { 978 // clean g(Next|Prev) to prevent removing lmb 979 // from AllLargeBlocksList inside returnLargeObject 980 lmb->gNext = lmb->gPrev = nullptr; 981 backend->returnLargeObject(lmb); 982 } 983 } 984 } 985 986 TLSData* MemoryPool::getTLS(bool create) 987 { 988 TLSData* tls = extMemPool.tlsPointerKey.getThreadMallocTLS(); 989 if (create && !tls) 990 tls = extMemPool.tlsPointerKey.createTLS(this, &extMemPool.backend); 991 return tls; 992 } 993 994 /* 995 * Return the bin for the given size. 996 */ 997 inline Bin* TLSData::getAllocationBin(size_t size) 998 { 999 return bin + getIndex(size); 1000 } 1001 1002 /* Return an empty uninitialized block in a non-blocking fashion. */ 1003 Block *MemoryPool::getEmptyBlock(size_t size) 1004 { 1005 TLSData* tls = getTLS(/*create=*/false); 1006 // try to use per-thread cache, if TLS available 1007 FreeBlockPool::ResOfGet resOfGet = tls? 1008 tls->freeSlabBlocks.getBlock() : FreeBlockPool::ResOfGet(nullptr, false); 1009 Block *result = resOfGet.block; 1010 1011 if (!result) { // not found in local cache, asks backend for slabs 1012 int num = resOfGet.lastAccMiss? Backend::numOfSlabAllocOnMiss : 1; 1013 BackRefIdx backRefIdx[Backend::numOfSlabAllocOnMiss]; 1014 1015 result = static_cast<Block*>(extMemPool.backend.getSlabBlock(num)); 1016 if (!result) return nullptr; 1017 1018 if (!extMemPool.userPool()) 1019 for (int i=0; i<num; i++) { 1020 backRefIdx[i] = BackRefIdx::newBackRef(/*largeObj=*/false); 1021 if (backRefIdx[i].isInvalid()) { 1022 // roll back resource allocation 1023 for (int j=0; j<i; j++) 1024 removeBackRef(backRefIdx[j]); 1025 Block *b = result; 1026 for (int j=0; j<num; b=(Block*)((uintptr_t)b+slabSize), j++) 1027 extMemPool.backend.putSlabBlock(b); 1028 return nullptr; 1029 } 1030 } 1031 // resources were allocated, register blocks 1032 Block *b = result; 1033 for (int i=0; i<num; b=(Block*)((uintptr_t)b+slabSize), i++) { 1034 // slab block in user's pool must have invalid backRefIdx 1035 if (extMemPool.userPool()) { 1036 new (&b->backRefIdx) BackRefIdx(); 1037 } else { 1038 setBackRef(backRefIdx[i], b); 1039 b->backRefIdx = backRefIdx[i]; 1040 } 1041 b->tlsPtr.store(tls, std::memory_order_relaxed); 1042 b->poolPtr = this; 1043 // all but first one go to per-thread pool 1044 if (i > 0) { 1045 MALLOC_ASSERT(tls, ASSERT_TEXT); 1046 tls->freeSlabBlocks.returnBlock(b); 1047 } 1048 } 1049 } 1050 MALLOC_ASSERT(result, ASSERT_TEXT); 1051 result->initEmptyBlock(tls, size); 1052 STAT_increment(getThreadId(), getIndex(result->objectSize), allocBlockNew); 1053 return result; 1054 } 1055 1056 void MemoryPool::returnEmptyBlock(Block *block, bool poolTheBlock) 1057 { 1058 block->reset(); 1059 if (poolTheBlock) { 1060 getTLS(/*create=*/false)->freeSlabBlocks.returnBlock(block); 1061 } else { 1062 // slab blocks in user's pools do not have valid backRefIdx 1063 if (!extMemPool.userPool()) 1064 removeBackRef(*(block->getBackRefIdx())); 1065 extMemPool.backend.putSlabBlock(block); 1066 } 1067 } 1068 1069 bool ExtMemoryPool::init(intptr_t poolId, rawAllocType rawAlloc, 1070 rawFreeType rawFree, size_t granularity, 1071 bool keepAllMemory, bool fixedPool) 1072 { 1073 this->poolId = poolId; 1074 this->rawAlloc = rawAlloc; 1075 this->rawFree = rawFree; 1076 this->granularity = granularity; 1077 this->keepAllMemory = keepAllMemory; 1078 this->fixedPool = fixedPool; 1079 this->delayRegsReleasing = false; 1080 if (!initTLS()) 1081 return false; 1082 loc.init(this); 1083 backend.init(this); 1084 MALLOC_ASSERT(isPoolValid(), nullptr); 1085 return true; 1086 } 1087 1088 bool ExtMemoryPool::initTLS() { return tlsPointerKey.init(); } 1089 1090 bool MemoryPool::init(intptr_t poolId, const MemPoolPolicy *policy) 1091 { 1092 if (!extMemPool.init(poolId, policy->pAlloc, policy->pFree, 1093 policy->granularity? policy->granularity : defaultGranularity, 1094 policy->keepAllMemory, policy->fixedPool)) 1095 return false; 1096 { 1097 MallocMutex::scoped_lock lock(memPoolListLock); 1098 next = defaultMemPool->next; 1099 defaultMemPool->next = this; 1100 prev = defaultMemPool; 1101 if (next) 1102 next->prev = this; 1103 } 1104 return true; 1105 } 1106 1107 bool MemoryPool::reset() 1108 { 1109 MALLOC_ASSERT(extMemPool.userPool(), "No reset for the system pool."); 1110 // memory is not releasing during pool reset 1111 // TODO: mark regions to release unused on next reset() 1112 extMemPool.delayRegionsReleasing(true); 1113 1114 bootStrapBlocks.reset(); 1115 extMemPool.lmbList.releaseAll</*poolDestroy=*/false>(&extMemPool.backend); 1116 if (!extMemPool.reset()) 1117 return false; 1118 1119 if (!extMemPool.initTLS()) 1120 return false; 1121 extMemPool.delayRegionsReleasing(false); 1122 return true; 1123 } 1124 1125 bool MemoryPool::destroy() 1126 { 1127 #if __TBB_MALLOC_LOCACHE_STAT 1128 extMemPool.loc.reportStat(stdout); 1129 #endif 1130 #if __TBB_MALLOC_BACKEND_STAT 1131 extMemPool.backend.reportStat(stdout); 1132 #endif 1133 { 1134 MallocMutex::scoped_lock lock(memPoolListLock); 1135 // remove itself from global pool list 1136 if (prev) 1137 prev->next = next; 1138 if (next) 1139 next->prev = prev; 1140 } 1141 // slab blocks in non-default pool do not have backreferences, 1142 // only large objects do 1143 if (extMemPool.userPool()) 1144 extMemPool.lmbList.releaseAll</*poolDestroy=*/true>(&extMemPool.backend); 1145 else { 1146 // only one non-userPool() is supported now 1147 MALLOC_ASSERT(this==defaultMemPool, nullptr); 1148 // There and below in extMemPool.destroy(), do not restore initial state 1149 // for user pool, because it's just about to be released. But for system 1150 // pool restoring, we do not want to do zeroing of it on subsequent reload. 1151 bootStrapBlocks.reset(); 1152 extMemPool.orphanedBlocks.reset(); 1153 } 1154 return extMemPool.destroy(); 1155 } 1156 1157 void MemoryPool::onThreadShutdown(TLSData *tlsData) 1158 { 1159 if (tlsData) { // might be called for "empty" TLS 1160 tlsData->release(); 1161 bootStrapBlocks.free(tlsData); 1162 clearTLS(); 1163 } 1164 } 1165 1166 #if MALLOC_DEBUG 1167 void Bin::verifyTLSBin (size_t size) const 1168 { 1169 /* The debug version verifies the TLSBin as needed */ 1170 uint32_t objSize = getObjectSize(size); 1171 1172 if (activeBlk) { 1173 MALLOC_ASSERT( activeBlk->isOwnedByCurrentThread(), ASSERT_TEXT ); 1174 MALLOC_ASSERT( activeBlk->objectSize == objSize, ASSERT_TEXT ); 1175 #if MALLOC_DEBUG>1 1176 for (Block* temp = activeBlk->next; temp; temp=temp->next) { 1177 MALLOC_ASSERT( temp!=activeBlk, ASSERT_TEXT ); 1178 MALLOC_ASSERT( temp->isOwnedByCurrentThread(), ASSERT_TEXT ); 1179 MALLOC_ASSERT( temp->objectSize == objSize, ASSERT_TEXT ); 1180 MALLOC_ASSERT( temp->previous->next == temp, ASSERT_TEXT ); 1181 if (temp->next) { 1182 MALLOC_ASSERT( temp->next->previous == temp, ASSERT_TEXT ); 1183 } 1184 } 1185 for (Block* temp = activeBlk->previous; temp; temp=temp->previous) { 1186 MALLOC_ASSERT( temp!=activeBlk, ASSERT_TEXT ); 1187 MALLOC_ASSERT( temp->isOwnedByCurrentThread(), ASSERT_TEXT ); 1188 MALLOC_ASSERT( temp->objectSize == objSize, ASSERT_TEXT ); 1189 MALLOC_ASSERT( temp->next->previous == temp, ASSERT_TEXT ); 1190 if (temp->previous) { 1191 MALLOC_ASSERT( temp->previous->next == temp, ASSERT_TEXT ); 1192 } 1193 } 1194 #endif /* MALLOC_DEBUG>1 */ 1195 } 1196 } 1197 #else /* MALLOC_DEBUG */ 1198 inline void Bin::verifyTLSBin (size_t) const { } 1199 #endif /* MALLOC_DEBUG */ 1200 1201 /* 1202 * Add a block to the start of this tls bin list. 1203 */ 1204 void Bin::pushTLSBin(Block* block) 1205 { 1206 /* The objectSize should be defined and not a parameter 1207 because the function is applied to partially filled blocks as well */ 1208 unsigned int size = block->objectSize; 1209 1210 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1211 MALLOC_ASSERT( block->objectSize != 0, ASSERT_TEXT ); 1212 MALLOC_ASSERT( block->next == nullptr, ASSERT_TEXT ); 1213 MALLOC_ASSERT( block->previous == nullptr, ASSERT_TEXT ); 1214 1215 MALLOC_ASSERT( this, ASSERT_TEXT ); 1216 verifyTLSBin(size); 1217 1218 block->next = activeBlk; 1219 if( activeBlk ) { 1220 block->previous = activeBlk->previous; 1221 activeBlk->previous = block; 1222 if( block->previous ) 1223 block->previous->next = block; 1224 } else { 1225 activeBlk = block; 1226 } 1227 1228 verifyTLSBin(size); 1229 } 1230 1231 /* 1232 * Take a block out of its tls bin (e.g. before removal). 1233 */ 1234 void Bin::outofTLSBin(Block* block) 1235 { 1236 unsigned int size = block->objectSize; 1237 1238 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1239 MALLOC_ASSERT( block->objectSize != 0, ASSERT_TEXT ); 1240 1241 MALLOC_ASSERT( this, ASSERT_TEXT ); 1242 verifyTLSBin(size); 1243 1244 if (block == activeBlk) { 1245 activeBlk = block->previous? block->previous : block->next; 1246 } 1247 /* Unlink the block */ 1248 if (block->previous) { 1249 MALLOC_ASSERT( block->previous->next == block, ASSERT_TEXT ); 1250 block->previous->next = block->next; 1251 } 1252 if (block->next) { 1253 MALLOC_ASSERT( block->next->previous == block, ASSERT_TEXT ); 1254 block->next->previous = block->previous; 1255 } 1256 block->next = nullptr; 1257 block->previous = nullptr; 1258 1259 verifyTLSBin(size); 1260 } 1261 1262 Block* Bin::getPrivatizedFreeListBlock() 1263 { 1264 Block* block; 1265 MALLOC_ASSERT( this, ASSERT_TEXT ); 1266 // if this method is called, active block usage must be unsuccessful 1267 MALLOC_ASSERT( (!activeBlk && !mailbox.load(std::memory_order_relaxed)) || (activeBlk && activeBlk->isFull), ASSERT_TEXT ); 1268 1269 // the counter should be changed STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 1270 if (!mailbox.load(std::memory_order_acquire)) // hotpath is empty mailbox 1271 return nullptr; 1272 else { // mailbox is not empty, take lock and inspect it 1273 MallocMutex::scoped_lock scoped_cs(mailLock); 1274 block = mailbox.load(std::memory_order_relaxed); 1275 if( block ) { 1276 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1277 MALLOC_ASSERT( !isNotForUse(block->nextPrivatizable.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1278 mailbox.store(block->nextPrivatizable.load(std::memory_order_relaxed), std::memory_order_relaxed); 1279 block->nextPrivatizable.store((Block*)this, std::memory_order_relaxed); 1280 } 1281 } 1282 if( block ) { 1283 MALLOC_ASSERT( isSolidPtr(block->publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1284 block->privatizePublicFreeList(); 1285 block->adjustPositionInBin(this); 1286 } 1287 return block; 1288 } 1289 1290 void Bin::addPublicFreeListBlock(Block* block) 1291 { 1292 MallocMutex::scoped_lock scoped_cs(mailLock); 1293 block->nextPrivatizable.store(mailbox.load(std::memory_order_relaxed), std::memory_order_relaxed); 1294 mailbox.store(block, std::memory_order_relaxed); 1295 } 1296 1297 // Process publicly freed objects in all blocks and return empty blocks 1298 // to the backend in order to reduce overall footprint. 1299 bool Bin::cleanPublicFreeLists() 1300 { 1301 Block* block; 1302 if (!mailbox.load(std::memory_order_acquire)) 1303 return false; 1304 else { 1305 // Grab all the blocks in the mailbox 1306 MallocMutex::scoped_lock scoped_cs(mailLock); 1307 block = mailbox.load(std::memory_order_relaxed); 1308 mailbox.store(nullptr, std::memory_order_relaxed); 1309 } 1310 bool released = false; 1311 while (block) { 1312 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1313 Block* tmp = block->nextPrivatizable.load(std::memory_order_relaxed); 1314 block->nextPrivatizable.store((Block*)this, std::memory_order_relaxed); 1315 block->privatizePublicFreeList(); 1316 if (block->empty()) { 1317 processEmptyBlock(block, /*poolTheBlock=*/false); 1318 released = true; 1319 } else 1320 block->adjustPositionInBin(this); 1321 block = tmp; 1322 } 1323 return released; 1324 } 1325 1326 bool Block::adjustFullness() 1327 { 1328 if (bumpPtr) { 1329 /* If we are still using a bump ptr for this block it is empty enough to use. */ 1330 STAT_increment(getThreadId(), getIndex(objectSize), examineEmptyEnough); 1331 isFull = false; 1332 } else { 1333 const float threshold = (slabSize - sizeof(Block)) * (1 - emptyEnoughRatio); 1334 /* allocatedCount shows how many objects in the block are in use; however it still counts 1335 * blocks freed by other threads; so prior call to privatizePublicFreeList() is recommended */ 1336 isFull = (allocatedCount*objectSize > threshold) ? true : false; 1337 #if COLLECT_STATISTICS 1338 if (isFull) 1339 STAT_increment(getThreadId(), getIndex(objectSize), examineNotEmpty); 1340 else 1341 STAT_increment(getThreadId(), getIndex(objectSize), examineEmptyEnough); 1342 #endif 1343 } 1344 return isFull; 1345 } 1346 1347 // This method resides in class Block, and not in class Bin, in order to avoid 1348 // calling getAllocationBin on a reasonably hot path in Block::freeOwnObject 1349 void Block::adjustPositionInBin(Bin* bin/*=nullptr*/) 1350 { 1351 // If the block were full, but became empty enough to use, 1352 // move it to the front of the list 1353 if (isFull && !adjustFullness()) { 1354 if (!bin) 1355 bin = tlsPtr.load(std::memory_order_relaxed)->getAllocationBin(objectSize); 1356 bin->moveBlockToFront(this); 1357 } 1358 } 1359 1360 /* Restore the bump pointer for an empty block that is planned to use */ 1361 void Block::restoreBumpPtr() 1362 { 1363 MALLOC_ASSERT( allocatedCount == 0, ASSERT_TEXT ); 1364 MALLOC_ASSERT( !isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1365 STAT_increment(getThreadId(), getIndex(objectSize), freeRestoreBumpPtr); 1366 bumpPtr = (FreeObject *)((uintptr_t)this + slabSize - objectSize); 1367 freeList = nullptr; 1368 isFull = false; 1369 } 1370 1371 void Block::freeOwnObject(void *object) 1372 { 1373 tlsPtr.load(std::memory_order_relaxed)->markUsed(); 1374 allocatedCount--; 1375 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 1376 #if COLLECT_STATISTICS 1377 // Note that getAllocationBin is not called on the hottest path with statistics off. 1378 if (tlsPtr.load(std::memory_order_relaxed)->getAllocationBin(objectSize)->getActiveBlock() != this) 1379 STAT_increment(getThreadId(), getIndex(objectSize), freeToInactiveBlock); 1380 else 1381 STAT_increment(getThreadId(), getIndex(objectSize), freeToActiveBlock); 1382 #endif 1383 if (empty()) { 1384 // If the last object of a slab is freed, the slab cannot be marked full 1385 MALLOC_ASSERT(!isFull, ASSERT_TEXT); 1386 tlsPtr.load(std::memory_order_relaxed)->getAllocationBin(objectSize)->processEmptyBlock(this, /*poolTheBlock=*/true); 1387 } else { // hot path 1388 FreeObject *objectToFree = findObjectToFree(object); 1389 objectToFree->next = freeList; 1390 freeList = objectToFree; 1391 adjustPositionInBin(); 1392 } 1393 } 1394 1395 void Block::freePublicObject (FreeObject *objectToFree) 1396 { 1397 FreeObject* localPublicFreeList{}; 1398 1399 MALLOC_ITT_SYNC_RELEASING(&publicFreeList); 1400 #if FREELIST_NONBLOCKING 1401 // TBB_REVAMP_TODO: make it non atomic in non-blocking scenario 1402 localPublicFreeList = publicFreeList.load(std::memory_order_relaxed); 1403 do { 1404 objectToFree->next = localPublicFreeList; 1405 // no backoff necessary because trying to make change, not waiting for a change 1406 } while( !publicFreeList.compare_exchange_strong(localPublicFreeList, objectToFree) ); 1407 #else 1408 STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 1409 { 1410 MallocMutex::scoped_lock scoped_cs(publicFreeListLock); 1411 localPublicFreeList = objectToFree->next = publicFreeList; 1412 publicFreeList = objectToFree; 1413 } 1414 #endif 1415 1416 if( localPublicFreeList==nullptr ) { 1417 // if the block is abandoned, its nextPrivatizable pointer should be UNUSABLE 1418 // otherwise, it should point to the bin the block belongs to. 1419 // reading nextPrivatizable is thread-safe below, because: 1420 // 1) the executing thread atomically got publicFreeList==nullptr and changed it to non-nullptr; 1421 // 2) only owning thread can change it back to nullptr, 1422 // 3) but it can not be done until the block is put to the mailbox 1423 // So the executing thread is now the only one that can change nextPrivatizable 1424 Block* next = nextPrivatizable.load(std::memory_order_acquire); 1425 if( !isNotForUse(next) ) { 1426 MALLOC_ASSERT( next!=nullptr, ASSERT_TEXT ); 1427 Bin* theBin = (Bin*) next; 1428 #if MALLOC_DEBUG && TBB_REVAMP_TODO 1429 // FIXME: The thread that returns the block is not the block's owner. 1430 // The below assertion compares 'theBin' against the caller's local bin, thus, it always fails. 1431 // Need to find a way to get the correct remote bin for comparison. 1432 { // check that nextPrivatizable points to the bin the block belongs to 1433 uint32_t index = getIndex( objectSize ); 1434 TLSData* tls = getThreadMallocTLS(); 1435 MALLOC_ASSERT( theBin==tls->bin+index, ASSERT_TEXT ); 1436 } 1437 #endif // MALLOC_DEBUG 1438 theBin->addPublicFreeListBlock(this); 1439 } 1440 } 1441 STAT_increment(getThreadId(), ThreadCommonCounters, freeToOtherThread); 1442 STAT_increment(ownerTid.load(std::memory_order_relaxed), getIndex(objectSize), freeByOtherThread); 1443 } 1444 1445 // Make objects freed by other threads available for use again 1446 void Block::privatizePublicFreeList( bool reset ) 1447 { 1448 FreeObject *localPublicFreeList; 1449 // If reset is false, publicFreeList should not be zeroed but set to UNUSABLE 1450 // to properly synchronize with other threads freeing objects to this slab. 1451 const intptr_t endMarker = reset ? 0 : UNUSABLE; 1452 1453 // Only the owner thread may reset the pointer to nullptr 1454 MALLOC_ASSERT( isOwnedByCurrentThread() || !reset, ASSERT_TEXT ); 1455 #if FREELIST_NONBLOCKING 1456 localPublicFreeList = publicFreeList.exchange((FreeObject*)endMarker); 1457 #else 1458 STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 1459 { 1460 MallocMutex::scoped_lock scoped_cs(publicFreeListLock); 1461 localPublicFreeList = publicFreeList; 1462 publicFreeList = endMarker; 1463 } 1464 #endif 1465 MALLOC_ITT_SYNC_ACQUIRED(&publicFreeList); 1466 MALLOC_ASSERT( !(reset && isNotForUse(publicFreeList)), ASSERT_TEXT ); 1467 1468 // publicFreeList must have been UNUSABLE or valid, but not nullptr 1469 MALLOC_ASSERT( localPublicFreeList!=nullptr, ASSERT_TEXT ); 1470 if( isSolidPtr(localPublicFreeList) ) { 1471 MALLOC_ASSERT( allocatedCount <= (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 1472 /* other threads did not change the counter freeing our blocks */ 1473 allocatedCount--; 1474 FreeObject *temp = localPublicFreeList; 1475 while( isSolidPtr(temp->next) ){ // the list will end with either nullptr or UNUSABLE 1476 temp = temp->next; 1477 allocatedCount--; 1478 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 1479 } 1480 /* merge with local freeList */ 1481 temp->next = freeList; 1482 freeList = localPublicFreeList; 1483 STAT_increment(getThreadId(), getIndex(objectSize), allocPrivatized); 1484 } 1485 } 1486 1487 void Block::privatizeOrphaned(TLSData *tls, unsigned index) 1488 { 1489 Bin* bin = tls->bin + index; 1490 STAT_increment(getThreadId(), index, allocBlockPublic); 1491 next = nullptr; 1492 previous = nullptr; 1493 MALLOC_ASSERT( publicFreeList.load(std::memory_order_relaxed) != nullptr, ASSERT_TEXT ); 1494 /* There is not a race here since no other thread owns this block */ 1495 markOwned(tls); 1496 // It is safe to change nextPrivatizable, as publicFreeList is not null 1497 MALLOC_ASSERT( isNotForUse(nextPrivatizable.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1498 nextPrivatizable.store((Block*)bin, std::memory_order_relaxed); 1499 // the next call is required to change publicFreeList to 0 1500 privatizePublicFreeList(); 1501 if( empty() ) { 1502 restoreBumpPtr(); 1503 } else { 1504 adjustFullness(); // check the block fullness and set isFull 1505 } 1506 MALLOC_ASSERT( !isNotForUse(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1507 } 1508 1509 1510 bool Block::readyToShare() 1511 { 1512 FreeObject* oldVal = nullptr; 1513 #if FREELIST_NONBLOCKING 1514 publicFreeList.compare_exchange_strong(oldVal, (FreeObject*)UNUSABLE); 1515 #else 1516 STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 1517 { 1518 MallocMutex::scoped_lock scoped_cs(publicFreeListLock); 1519 if ( (oldVal=publicFreeList)==nullptr ) 1520 (intptr_t&)(publicFreeList) = UNUSABLE; 1521 } 1522 #endif 1523 return oldVal==nullptr; 1524 } 1525 1526 void Block::shareOrphaned(intptr_t binTag, unsigned index) 1527 { 1528 MALLOC_ASSERT( binTag, ASSERT_TEXT ); 1529 // unreferenced formal parameter warning 1530 tbb::detail::suppress_unused_warning(index); 1531 STAT_increment(getThreadId(), index, freeBlockPublic); 1532 markOrphaned(); 1533 if ((intptr_t)nextPrivatizable.load(std::memory_order_relaxed) == binTag) { 1534 // First check passed: the block is not in mailbox yet. 1535 // Need to set publicFreeList to non-zero, so other threads 1536 // will not change nextPrivatizable and it can be zeroed. 1537 if ( !readyToShare() ) { 1538 // another thread freed an object; we need to wait until it finishes. 1539 // There is no need for exponential backoff, as the wait here is not for a lock; 1540 // but need to yield, so the thread we wait has a chance to run. 1541 // TODO: add a pause to also be friendly to hyperthreads 1542 int count = 256; 1543 while ((intptr_t)nextPrivatizable.load(std::memory_order_relaxed) == binTag) { 1544 if (--count==0) { 1545 do_yield(); 1546 count = 256; 1547 } 1548 } 1549 } 1550 } 1551 MALLOC_ASSERT( publicFreeList.load(std::memory_order_relaxed) !=nullptr, ASSERT_TEXT ); 1552 // now it is safe to change our data 1553 previous = nullptr; 1554 // it is caller responsibility to ensure that the list of blocks 1555 // formed by nextPrivatizable pointers is kept consistent if required. 1556 // if only called from thread shutdown code, it does not matter. 1557 nextPrivatizable.store((Block*)UNUSABLE, std::memory_order_relaxed); 1558 } 1559 1560 void Block::cleanBlockHeader() 1561 { 1562 next = nullptr; 1563 previous = nullptr; 1564 freeList = nullptr; 1565 allocatedCount = 0; 1566 isFull = false; 1567 tlsPtr.store(nullptr, std::memory_order_relaxed); 1568 1569 publicFreeList.store(nullptr, std::memory_order_relaxed); 1570 } 1571 1572 void Block::initEmptyBlock(TLSData *tls, size_t size) 1573 { 1574 // Having getIndex and getObjectSize called next to each other 1575 // allows better compiler optimization as they basically share the code. 1576 unsigned int index = getIndex(size); 1577 unsigned int objSz = getObjectSize(size); 1578 1579 cleanBlockHeader(); 1580 objectSize = objSz; 1581 markOwned(tls); 1582 // bump pointer should be prepared for first allocation - thus mode it down to objectSize 1583 bumpPtr = (FreeObject *)((uintptr_t)this + slabSize - objectSize); 1584 1585 // each block should have the address where the head of the list of "privatizable" blocks is kept 1586 // the only exception is a block for boot strap which is initialized when TLS is yet nullptr 1587 nextPrivatizable.store( tls? (Block*)(tls->bin + index) : nullptr, std::memory_order_relaxed); 1588 TRACEF(( "[ScalableMalloc trace] Empty block %p is initialized, owner is %ld, objectSize is %d, bumpPtr is %p\n", 1589 this, tlsPtr.load(std::memory_order_relaxed) ? getThreadId() : -1, objectSize, bumpPtr )); 1590 } 1591 1592 Block *OrphanedBlocks::get(TLSData *tls, unsigned int size) 1593 { 1594 // TODO: try to use index from getAllocationBin 1595 unsigned int index = getIndex(size); 1596 Block *block = bins[index].pop(); 1597 if (block) { 1598 MALLOC_ITT_SYNC_ACQUIRED(bins+index); 1599 block->privatizeOrphaned(tls, index); 1600 } 1601 return block; 1602 } 1603 1604 void OrphanedBlocks::put(intptr_t binTag, Block *block) 1605 { 1606 unsigned int index = getIndex(block->getSize()); 1607 block->shareOrphaned(binTag, index); 1608 MALLOC_ITT_SYNC_RELEASING(bins+index); 1609 bins[index].push(block); 1610 } 1611 1612 void OrphanedBlocks::reset() 1613 { 1614 for (uint32_t i=0; i<numBlockBinLimit; i++) 1615 new (bins+i) LifoList(); 1616 } 1617 1618 bool OrphanedBlocks::cleanup(Backend* backend) 1619 { 1620 bool released = false; 1621 for (uint32_t i=0; i<numBlockBinLimit; i++) { 1622 Block* block = bins[i].grab(); 1623 MALLOC_ITT_SYNC_ACQUIRED(bins+i); 1624 while (block) { 1625 Block* next = block->next; 1626 block->privatizePublicFreeList( /*reset=*/false ); // do not set publicFreeList to nullptr 1627 if (block->empty()) { 1628 block->reset(); 1629 // slab blocks in user's pools do not have valid backRefIdx 1630 if (!backend->inUserPool()) 1631 removeBackRef(*(block->getBackRefIdx())); 1632 backend->putSlabBlock(block); 1633 released = true; 1634 } else { 1635 MALLOC_ITT_SYNC_RELEASING(bins+i); 1636 bins[i].push(block); 1637 } 1638 block = next; 1639 } 1640 } 1641 return released; 1642 } 1643 1644 FreeBlockPool::ResOfGet FreeBlockPool::getBlock() 1645 { 1646 Block *b = head.exchange(nullptr); 1647 1648 if (b) { 1649 size--; 1650 Block *newHead = b->next; 1651 lastAccessMiss = false; 1652 head.store(newHead, std::memory_order_release); 1653 } else { 1654 lastAccessMiss = true; 1655 } 1656 return ResOfGet(b, lastAccessMiss); 1657 } 1658 1659 void FreeBlockPool::returnBlock(Block *block) 1660 { 1661 MALLOC_ASSERT( size <= POOL_HIGH_MARK, ASSERT_TEXT ); 1662 Block *localHead = head.exchange(nullptr); 1663 1664 if (!localHead) { 1665 size = 0; // head was stolen by externalClean, correct size accordingly 1666 } else if (size == POOL_HIGH_MARK) { 1667 // release cold blocks and add hot one, 1668 // so keep POOL_LOW_MARK-1 blocks and add new block to head 1669 Block *headToFree = localHead, *helper; 1670 for (int i=0; i<POOL_LOW_MARK-2; i++) 1671 headToFree = headToFree->next; 1672 Block *last = headToFree; 1673 headToFree = headToFree->next; 1674 last->next = nullptr; 1675 size = POOL_LOW_MARK-1; 1676 for (Block *currBl = headToFree; currBl; currBl = helper) { 1677 helper = currBl->next; 1678 // slab blocks in user's pools do not have valid backRefIdx 1679 if (!backend->inUserPool()) 1680 removeBackRef(currBl->backRefIdx); 1681 backend->putSlabBlock(currBl); 1682 } 1683 } 1684 size++; 1685 block->next = localHead; 1686 head.store(block, std::memory_order_release); 1687 } 1688 1689 bool FreeBlockPool::externalCleanup() 1690 { 1691 Block *helper; 1692 bool released = false; 1693 1694 for (Block *currBl=head.exchange(nullptr); currBl; currBl=helper) { 1695 helper = currBl->next; 1696 // slab blocks in user's pools do not have valid backRefIdx 1697 if (!backend->inUserPool()) 1698 removeBackRef(currBl->backRefIdx); 1699 backend->putSlabBlock(currBl); 1700 released = true; 1701 } 1702 return released; 1703 } 1704 1705 /* Prepare the block for returning to FreeBlockPool */ 1706 void Block::reset() 1707 { 1708 // it is caller's responsibility to ensure no data is lost before calling this 1709 MALLOC_ASSERT( allocatedCount==0, ASSERT_TEXT ); 1710 MALLOC_ASSERT( !isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1711 if (!isStartupAllocObject()) 1712 STAT_increment(getThreadId(), getIndex(objectSize), freeBlockBack); 1713 1714 cleanBlockHeader(); 1715 1716 nextPrivatizable.store(nullptr, std::memory_order_relaxed); 1717 1718 objectSize = 0; 1719 // for an empty block, bump pointer should point right after the end of the block 1720 bumpPtr = (FreeObject *)((uintptr_t)this + slabSize); 1721 } 1722 1723 inline void Bin::setActiveBlock (Block *block) 1724 { 1725 // MALLOC_ASSERT( bin, ASSERT_TEXT ); 1726 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1727 // it is the caller responsibility to keep bin consistence (i.e. ensure this block is in the bin list) 1728 activeBlk = block; 1729 } 1730 1731 inline Block* Bin::setPreviousBlockActive() 1732 { 1733 MALLOC_ASSERT( activeBlk, ASSERT_TEXT ); 1734 Block* temp = activeBlk->previous; 1735 if( temp ) { 1736 MALLOC_ASSERT( !(temp->isFull), ASSERT_TEXT ); 1737 activeBlk = temp; 1738 } 1739 return temp; 1740 } 1741 1742 inline bool Block::isOwnedByCurrentThread() const { 1743 return tlsPtr.load(std::memory_order_relaxed) && ownerTid.isCurrentThreadId(); 1744 } 1745 1746 FreeObject *Block::findObjectToFree(const void *object) const 1747 { 1748 FreeObject *objectToFree; 1749 // Due to aligned allocations, a pointer passed to scalable_free 1750 // might differ from the address of internally allocated object. 1751 // Small objects however should always be fine. 1752 if (objectSize <= maxSegregatedObjectSize) 1753 objectToFree = (FreeObject*)object; 1754 // "Fitting size" allocations are suspicious if aligned higher than naturally 1755 else { 1756 if ( ! isAligned(object,2*fittingAlignment) ) 1757 // TODO: the above check is questionable - it gives false negatives in ~50% cases, 1758 // so might even be slower in average than unconditional use of findAllocatedObject. 1759 // here it should be a "real" object 1760 objectToFree = (FreeObject*)object; 1761 else 1762 // here object can be an aligned address, so applying additional checks 1763 objectToFree = findAllocatedObject(object); 1764 MALLOC_ASSERT( isAligned(objectToFree,fittingAlignment), ASSERT_TEXT ); 1765 } 1766 MALLOC_ASSERT( isProperlyPlaced(objectToFree), ASSERT_TEXT ); 1767 1768 return objectToFree; 1769 } 1770 1771 void TLSData::release() 1772 { 1773 memPool->extMemPool.allLocalCaches.unregisterThread(this); 1774 externalCleanup(/*cleanOnlyUnused=*/false, /*cleanBins=*/false); 1775 1776 for (unsigned index = 0; index < numBlockBins; index++) { 1777 Block *activeBlk = bin[index].getActiveBlock(); 1778 if (!activeBlk) 1779 continue; 1780 Block *threadlessBlock = activeBlk->previous; 1781 bool syncOnMailbox = false; 1782 while (threadlessBlock) { 1783 Block *threadBlock = threadlessBlock->previous; 1784 if (threadlessBlock->empty()) { 1785 /* we destroy the thread, so not use its block pool */ 1786 memPool->returnEmptyBlock(threadlessBlock, /*poolTheBlock=*/false); 1787 } else { 1788 memPool->extMemPool.orphanedBlocks.put(intptr_t(bin+index), threadlessBlock); 1789 syncOnMailbox = true; 1790 } 1791 threadlessBlock = threadBlock; 1792 } 1793 threadlessBlock = activeBlk; 1794 while (threadlessBlock) { 1795 Block *threadBlock = threadlessBlock->next; 1796 if (threadlessBlock->empty()) { 1797 /* we destroy the thread, so not use its block pool */ 1798 memPool->returnEmptyBlock(threadlessBlock, /*poolTheBlock=*/false); 1799 } else { 1800 memPool->extMemPool.orphanedBlocks.put(intptr_t(bin+index), threadlessBlock); 1801 syncOnMailbox = true; 1802 } 1803 threadlessBlock = threadBlock; 1804 } 1805 bin[index].resetActiveBlock(); 1806 1807 if (syncOnMailbox) { 1808 // Although, we synchronized on nextPrivatizable inside a block, we still need to 1809 // synchronize on the bin lifetime because the thread releasing an object into the public 1810 // free list is touching the bin (mailbox and mailLock) 1811 MallocMutex::scoped_lock scoped_cs(bin[index].mailLock); 1812 } 1813 } 1814 } 1815 1816 1817 #if MALLOC_CHECK_RECURSION 1818 // TODO: Use dedicated heap for this 1819 1820 /* 1821 * It's a special kind of allocation that can be used when malloc is 1822 * not available (either during startup or when malloc was already called and 1823 * we are, say, inside pthread_setspecific's call). 1824 * Block can contain objects of different sizes, 1825 * allocations are performed by moving bump pointer and increasing of object counter, 1826 * releasing is done via counter of objects allocated in the block 1827 * or moving bump pointer if releasing object is on a bound. 1828 * TODO: make bump pointer to grow to the same backward direction as all the others. 1829 */ 1830 1831 class StartupBlock : public Block { 1832 size_t availableSize() const { 1833 return slabSize - ((uintptr_t)bumpPtr - (uintptr_t)this); 1834 } 1835 static StartupBlock *getBlock(); 1836 public: 1837 static FreeObject *allocate(size_t size); 1838 static size_t msize(void *ptr) { return *((size_t*)ptr - 1); } 1839 void free(void *ptr); 1840 }; 1841 1842 static MallocMutex startupMallocLock; 1843 static StartupBlock *firstStartupBlock; 1844 1845 StartupBlock *StartupBlock::getBlock() 1846 { 1847 BackRefIdx backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/false); 1848 if (backRefIdx.isInvalid()) return nullptr; 1849 1850 StartupBlock *block = static_cast<StartupBlock*>( 1851 defaultMemPool->extMemPool.backend.getSlabBlock(1)); 1852 if (!block) return nullptr; 1853 1854 block->cleanBlockHeader(); 1855 setBackRef(backRefIdx, block); 1856 block->backRefIdx = backRefIdx; 1857 // use startupAllocObjSizeMark to mark objects from startup block marker 1858 block->objectSize = startupAllocObjSizeMark; 1859 block->bumpPtr = (FreeObject *)((uintptr_t)block + sizeof(StartupBlock)); 1860 return block; 1861 } 1862 1863 FreeObject *StartupBlock::allocate(size_t size) 1864 { 1865 FreeObject *result; 1866 StartupBlock *newBlock = nullptr; 1867 1868 /* Objects must be aligned on their natural bounds, 1869 and objects bigger than word on word's bound. */ 1870 size = alignUp(size, sizeof(size_t)); 1871 // We need size of an object to implement msize. 1872 size_t reqSize = size + sizeof(size_t); 1873 { 1874 MallocMutex::scoped_lock scoped_cs(startupMallocLock); 1875 // Re-check whether we need a new block (conditions might have changed) 1876 if (!firstStartupBlock || firstStartupBlock->availableSize() < reqSize) { 1877 if (!newBlock) { 1878 newBlock = StartupBlock::getBlock(); 1879 if (!newBlock) return nullptr; 1880 } 1881 newBlock->next = (Block*)firstStartupBlock; 1882 if (firstStartupBlock) 1883 firstStartupBlock->previous = (Block*)newBlock; 1884 firstStartupBlock = newBlock; 1885 } 1886 result = firstStartupBlock->bumpPtr; 1887 firstStartupBlock->allocatedCount++; 1888 firstStartupBlock->bumpPtr = 1889 (FreeObject *)((uintptr_t)firstStartupBlock->bumpPtr + reqSize); 1890 } 1891 1892 // keep object size at the negative offset 1893 *((size_t*)result) = size; 1894 return (FreeObject*)((size_t*)result+1); 1895 } 1896 1897 void StartupBlock::free(void *ptr) 1898 { 1899 Block* blockToRelease = nullptr; 1900 { 1901 MallocMutex::scoped_lock scoped_cs(startupMallocLock); 1902 1903 MALLOC_ASSERT(firstStartupBlock, ASSERT_TEXT); 1904 MALLOC_ASSERT(startupAllocObjSizeMark==objectSize 1905 && allocatedCount>0, ASSERT_TEXT); 1906 MALLOC_ASSERT((uintptr_t)ptr>=(uintptr_t)this+sizeof(StartupBlock) 1907 && (uintptr_t)ptr+StartupBlock::msize(ptr)<=(uintptr_t)this+slabSize, 1908 ASSERT_TEXT); 1909 if (0 == --allocatedCount) { 1910 if (this == firstStartupBlock) 1911 firstStartupBlock = (StartupBlock*)firstStartupBlock->next; 1912 if (previous) 1913 previous->next = next; 1914 if (next) 1915 next->previous = previous; 1916 blockToRelease = this; 1917 } else if ((uintptr_t)ptr + StartupBlock::msize(ptr) == (uintptr_t)bumpPtr) { 1918 // last object in the block released 1919 FreeObject *newBump = (FreeObject*)((size_t*)ptr - 1); 1920 MALLOC_ASSERT((uintptr_t)newBump>(uintptr_t)this+sizeof(StartupBlock), 1921 ASSERT_TEXT); 1922 bumpPtr = newBump; 1923 } 1924 } 1925 if (blockToRelease) { 1926 blockToRelease->previous = blockToRelease->next = nullptr; 1927 defaultMemPool->returnEmptyBlock(blockToRelease, /*poolTheBlock=*/false); 1928 } 1929 } 1930 1931 #endif /* MALLOC_CHECK_RECURSION */ 1932 1933 /********* End thread related code *************/ 1934 1935 /********* Library initialization *************/ 1936 1937 //! Value indicating the state of initialization. 1938 /* 0 = initialization not started. 1939 * 1 = initialization started but not finished. 1940 * 2 = initialization finished. 1941 * In theory, we only need values 0 and 2. But value 1 is nonetheless 1942 * useful for detecting errors in the double-check pattern. 1943 */ 1944 static std::atomic<intptr_t> mallocInitialized{0}; // implicitly initialized to 0 1945 static MallocMutex initMutex; 1946 1947 /** The leading "\0" is here so that applying "strings" to the binary 1948 delivers a clean result. */ 1949 static char VersionString[] = "\0" TBBMALLOC_VERSION_STRINGS; 1950 1951 #if USE_PTHREAD && __TBB_SOURCE_DIRECTLY_INCLUDED 1952 1953 /* Decrease race interval between dynamic library unloading and pthread key 1954 destructor. Protect only Pthreads with supported unloading. */ 1955 class ShutdownSync { 1956 /* flag is the number of threads in pthread key dtor body 1957 (i.e., between threadDtorStart() and threadDtorDone()) 1958 or the signal to skip dtor, if flag < 0 */ 1959 std::atomic<intptr_t> flag; 1960 static const intptr_t skipDtor = INTPTR_MIN/2; 1961 public: 1962 void init() { flag.store(0, std::memory_order_release); } 1963 /* Suppose that 2*abs(skipDtor) or more threads never call threadDtorStart() 1964 simultaneously, so flag never becomes negative because of that. */ 1965 bool threadDtorStart() { 1966 if (flag.load(std::memory_order_acquire) < 0) 1967 return false; 1968 if (++flag <= 0) { // note that new value returned 1969 flag.fetch_sub(1); // flag is spoiled by us, restore it 1970 return false; 1971 } 1972 return true; 1973 } 1974 void threadDtorDone() { 1975 flag.fetch_sub(1); 1976 } 1977 void processExit() { 1978 if (flag.fetch_add(skipDtor) != 0) { 1979 SpinWaitUntilEq(flag, skipDtor); 1980 } 1981 } 1982 }; 1983 1984 #else 1985 1986 class ShutdownSync { 1987 public: 1988 void init() { } 1989 bool threadDtorStart() { return true; } 1990 void threadDtorDone() { } 1991 void processExit() { } 1992 }; 1993 1994 #endif // USE_PTHREAD && __TBB_SOURCE_DIRECTLY_INCLUDED 1995 1996 static ShutdownSync shutdownSync; 1997 1998 inline bool isMallocInitialized() { 1999 // Load must have acquire fence; otherwise thread taking "initialized" path 2000 // might perform textually later loads *before* mallocInitialized becomes 2. 2001 return 2 == mallocInitialized.load(std::memory_order_acquire); 2002 } 2003 2004 /* Caller is responsible for ensuring this routine is called exactly once. */ 2005 extern "C" void MallocInitializeITT() { 2006 #if __TBB_USE_ITT_NOTIFY 2007 if (!usedBySrcIncluded) 2008 tbb::detail::r1::__TBB_load_ittnotify(); 2009 #endif 2010 } 2011 2012 void MemoryPool::initDefaultPool() { 2013 hugePages.init(); 2014 } 2015 2016 /* 2017 * Allocator initialization routine; 2018 * it is called lazily on the very first scalable_malloc call. 2019 */ 2020 static bool initMemoryManager() 2021 { 2022 TRACEF(( "[ScalableMalloc trace] sizeof(Block) is %d (expected 128); sizeof(uintptr_t) is %d\n", 2023 sizeof(Block), sizeof(uintptr_t) )); 2024 MALLOC_ASSERT( 2*blockHeaderAlignment == sizeof(Block), ASSERT_TEXT ); 2025 MALLOC_ASSERT( sizeof(FreeObject) == sizeof(void*), ASSERT_TEXT ); 2026 MALLOC_ASSERT( isAligned(defaultMemPool, sizeof(intptr_t)), 2027 "Memory pool must be void*-aligned for atomic to work over aligned arguments."); 2028 2029 #if USE_WINTHREAD 2030 const size_t granularity = 64*1024; // granulatity of VirtualAlloc 2031 #else 2032 // POSIX.1-2001-compliant way to get page size 2033 const size_t granularity = sysconf(_SC_PAGESIZE); 2034 #endif 2035 if (!defaultMemPool) { 2036 // Do not rely on static constructors and do the assignment in case 2037 // of library static section not initialized at this call yet. 2038 defaultMemPool = (MemoryPool*)defaultMemPool_space; 2039 } 2040 bool initOk = defaultMemPool-> 2041 extMemPool.init(0, nullptr, nullptr, granularity, 2042 /*keepAllMemory=*/false, /*fixedPool=*/false); 2043 // TODO: extMemPool.init() to not allocate memory 2044 if (!initOk || !initBackRefMain(&defaultMemPool->extMemPool.backend) || !ThreadId::init()) 2045 return false; 2046 MemoryPool::initDefaultPool(); 2047 // init() is required iff initMemoryManager() is called 2048 // after mallocProcessShutdownNotification() 2049 shutdownSync.init(); 2050 #if COLLECT_STATISTICS 2051 initStatisticsCollection(); 2052 #endif 2053 return true; 2054 } 2055 2056 static bool GetBoolEnvironmentVariable(const char* name) { 2057 return tbb::detail::r1::GetBoolEnvironmentVariable(name); 2058 } 2059 2060 //! Ensures that initMemoryManager() is called once and only once. 2061 /** Does not return until initMemoryManager() has been completed by a thread. 2062 There is no need to call this routine if mallocInitialized==2 . */ 2063 static bool doInitialization() 2064 { 2065 MallocMutex::scoped_lock lock( initMutex ); 2066 if (mallocInitialized.load(std::memory_order_relaxed)!=2) { 2067 MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==0, ASSERT_TEXT ); 2068 mallocInitialized.store(1, std::memory_order_relaxed); 2069 RecursiveMallocCallProtector scoped; 2070 if (!initMemoryManager()) { 2071 mallocInitialized.store(0, std::memory_order_relaxed); // restore and out 2072 return false; 2073 } 2074 #ifdef MALLOC_EXTRA_INITIALIZATION 2075 MALLOC_EXTRA_INITIALIZATION; 2076 #endif 2077 #if MALLOC_CHECK_RECURSION 2078 RecursiveMallocCallProtector::detectNaiveOverload(); 2079 #endif 2080 MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==1, ASSERT_TEXT ); 2081 // Store must have release fence, otherwise mallocInitialized==2 2082 // might become remotely visible before side effects of 2083 // initMemoryManager() become remotely visible. 2084 mallocInitialized.store(2, std::memory_order_release); 2085 if( GetBoolEnvironmentVariable("TBB_VERSION") ) { 2086 fputs(VersionString+1,stderr); 2087 hugePages.printStatus(); 2088 } 2089 } 2090 /* It can't be 0 or I would have initialized it */ 2091 MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==2, ASSERT_TEXT ); 2092 return true; 2093 } 2094 2095 /********* End library initialization *************/ 2096 2097 /********* The malloc show begins *************/ 2098 2099 2100 FreeObject *Block::allocateFromFreeList() 2101 { 2102 FreeObject *result; 2103 2104 if (!freeList) return nullptr; 2105 2106 result = freeList; 2107 MALLOC_ASSERT( result, ASSERT_TEXT ); 2108 2109 freeList = result->next; 2110 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 2111 allocatedCount++; 2112 STAT_increment(getThreadId(), getIndex(objectSize), allocFreeListUsed); 2113 2114 return result; 2115 } 2116 2117 FreeObject *Block::allocateFromBumpPtr() 2118 { 2119 FreeObject *result = bumpPtr; 2120 if (result) { 2121 bumpPtr = (FreeObject *) ((uintptr_t) bumpPtr - objectSize); 2122 if ( (uintptr_t)bumpPtr < (uintptr_t)this+sizeof(Block) ) { 2123 bumpPtr = nullptr; 2124 } 2125 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 2126 allocatedCount++; 2127 STAT_increment(getThreadId(), getIndex(objectSize), allocBumpPtrUsed); 2128 } 2129 return result; 2130 } 2131 2132 inline FreeObject* Block::allocate() 2133 { 2134 MALLOC_ASSERT( isOwnedByCurrentThread(), ASSERT_TEXT ); 2135 2136 /* for better cache locality, first looking in the free list. */ 2137 if ( FreeObject *result = allocateFromFreeList() ) { 2138 return result; 2139 } 2140 MALLOC_ASSERT( !freeList, ASSERT_TEXT ); 2141 2142 /* if free list is empty, try thread local bump pointer allocation. */ 2143 if ( FreeObject *result = allocateFromBumpPtr() ) { 2144 return result; 2145 } 2146 MALLOC_ASSERT( !bumpPtr, ASSERT_TEXT ); 2147 2148 /* the block is considered full. */ 2149 isFull = true; 2150 return nullptr; 2151 } 2152 2153 size_t Block::findObjectSize(void *object) const 2154 { 2155 size_t blSize = getSize(); 2156 #if MALLOC_CHECK_RECURSION 2157 // Currently, there is no aligned allocations from startup blocks, 2158 // so we can return just StartupBlock::msize(). 2159 // TODO: This must be extended if we add aligned allocation from startup blocks. 2160 if (!blSize) 2161 return StartupBlock::msize(object); 2162 #endif 2163 // object can be aligned, so real size can be less than block's 2164 size_t size = 2165 blSize - ((uintptr_t)object - (uintptr_t)findObjectToFree(object)); 2166 MALLOC_ASSERT(size>0 && size<minLargeObjectSize, ASSERT_TEXT); 2167 return size; 2168 } 2169 2170 void Bin::moveBlockToFront(Block *block) 2171 { 2172 /* move the block to the front of the bin */ 2173 if (block == activeBlk) return; 2174 outofTLSBin(block); 2175 pushTLSBin(block); 2176 } 2177 2178 void Bin::processEmptyBlock(Block *block, bool poolTheBlock) 2179 { 2180 if (block != activeBlk) { 2181 /* We are not using this block; return it to the pool */ 2182 outofTLSBin(block); 2183 block->getMemPool()->returnEmptyBlock(block, poolTheBlock); 2184 } else { 2185 /* all objects are free - let's restore the bump pointer */ 2186 block->restoreBumpPtr(); 2187 } 2188 } 2189 2190 template<int LOW_MARK, int HIGH_MARK> 2191 bool LocalLOCImpl<LOW_MARK, HIGH_MARK>::put(LargeMemoryBlock *object, ExtMemoryPool *extMemPool) 2192 { 2193 const size_t size = object->unalignedSize; 2194 // not spoil cache with too large object, that can cause its total cleanup 2195 if (size > MAX_TOTAL_SIZE) 2196 return false; 2197 LargeMemoryBlock *localHead = head.exchange(nullptr); 2198 2199 object->prev = nullptr; 2200 object->next = localHead; 2201 if (localHead) 2202 localHead->prev = object; 2203 else { 2204 // those might not be cleaned during local cache stealing, correct them 2205 totalSize = 0; 2206 numOfBlocks = 0; 2207 tail = object; 2208 } 2209 localHead = object; 2210 totalSize += size; 2211 numOfBlocks++; 2212 // must meet both size and number of cached objects constrains 2213 if (totalSize > MAX_TOTAL_SIZE || numOfBlocks >= HIGH_MARK) { 2214 // scanning from tail until meet conditions 2215 while (totalSize > MAX_TOTAL_SIZE || numOfBlocks > LOW_MARK) { 2216 totalSize -= tail->unalignedSize; 2217 numOfBlocks--; 2218 tail = tail->prev; 2219 } 2220 LargeMemoryBlock *headToRelease = tail->next; 2221 tail->next = nullptr; 2222 2223 extMemPool->freeLargeObjectList(headToRelease); 2224 } 2225 2226 head.store(localHead, std::memory_order_release); 2227 return true; 2228 } 2229 2230 template<int LOW_MARK, int HIGH_MARK> 2231 LargeMemoryBlock *LocalLOCImpl<LOW_MARK, HIGH_MARK>::get(size_t size) 2232 { 2233 LargeMemoryBlock *localHead, *res = nullptr; 2234 2235 if (size > MAX_TOTAL_SIZE) 2236 return nullptr; 2237 2238 // TBB_REVAMP_TODO: review this line 2239 if (!head.load(std::memory_order_acquire) || (localHead = head.exchange(nullptr)) == nullptr) { 2240 // do not restore totalSize, numOfBlocks and tail at this point, 2241 // as they are used only in put(), where they must be restored 2242 return nullptr; 2243 } 2244 2245 for (LargeMemoryBlock *curr = localHead; curr; curr=curr->next) { 2246 if (curr->unalignedSize == size) { 2247 res = curr; 2248 if (curr->next) 2249 curr->next->prev = curr->prev; 2250 else 2251 tail = curr->prev; 2252 if (curr != localHead) 2253 curr->prev->next = curr->next; 2254 else 2255 localHead = curr->next; 2256 totalSize -= size; 2257 numOfBlocks--; 2258 break; 2259 } 2260 } 2261 2262 head.store(localHead, std::memory_order_release); 2263 return res; 2264 } 2265 2266 template<int LOW_MARK, int HIGH_MARK> 2267 bool LocalLOCImpl<LOW_MARK, HIGH_MARK>::externalCleanup(ExtMemoryPool *extMemPool) 2268 { 2269 if (LargeMemoryBlock *localHead = head.exchange(nullptr)) { 2270 extMemPool->freeLargeObjectList(localHead); 2271 return true; 2272 } 2273 return false; 2274 } 2275 2276 void *MemoryPool::getFromLLOCache(TLSData* tls, size_t size, size_t alignment) 2277 { 2278 LargeMemoryBlock *lmb = nullptr; 2279 2280 size_t headersSize = sizeof(LargeMemoryBlock)+sizeof(LargeObjectHdr); 2281 size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+alignment); 2282 if (allocationSize < size) // allocationSize is wrapped around after alignToBin 2283 return nullptr; 2284 MALLOC_ASSERT(allocationSize >= alignment, "Overflow must be checked before."); 2285 2286 if (tls) { 2287 tls->markUsed(); 2288 lmb = tls->lloc.get(allocationSize); 2289 } 2290 if (!lmb) 2291 lmb = extMemPool.mallocLargeObject(this, allocationSize); 2292 2293 if (lmb) { 2294 // doing shuffle we suppose that alignment offset guarantees 2295 // that different cache lines are in use 2296 MALLOC_ASSERT(alignment >= estimatedCacheLineSize, ASSERT_TEXT); 2297 2298 void *alignedArea = (void*)alignUp((uintptr_t)lmb+headersSize, alignment); 2299 uintptr_t alignedRight = 2300 alignDown((uintptr_t)lmb+lmb->unalignedSize - size, alignment); 2301 // Has some room to shuffle object between cache lines? 2302 // Note that alignedRight and alignedArea are aligned at alignment. 2303 unsigned ptrDelta = alignedRight - (uintptr_t)alignedArea; 2304 if (ptrDelta && tls) { // !tls is cold path 2305 // for the hot path of alignment==estimatedCacheLineSize, 2306 // allow compilers to use shift for division 2307 // (since estimatedCacheLineSize is a power-of-2 constant) 2308 unsigned numOfPossibleOffsets = alignment == estimatedCacheLineSize? 2309 ptrDelta / estimatedCacheLineSize : 2310 ptrDelta / alignment; 2311 unsigned myCacheIdx = ++tls->currCacheIdx; 2312 unsigned offset = myCacheIdx % numOfPossibleOffsets; 2313 2314 // Move object to a cache line with an offset that is different from 2315 // previous allocation. This supposedly allows us to use cache 2316 // associativity more efficiently. 2317 alignedArea = (void*)((uintptr_t)alignedArea + offset*alignment); 2318 } 2319 MALLOC_ASSERT((uintptr_t)lmb+lmb->unalignedSize >= 2320 (uintptr_t)alignedArea+size, "Object doesn't fit the block."); 2321 LargeObjectHdr *header = (LargeObjectHdr*)alignedArea-1; 2322 header->memoryBlock = lmb; 2323 header->backRefIdx = lmb->backRefIdx; 2324 setBackRef(header->backRefIdx, header); 2325 2326 lmb->objectSize = size; 2327 2328 MALLOC_ASSERT( isLargeObject<unknownMem>(alignedArea), ASSERT_TEXT ); 2329 MALLOC_ASSERT( isAligned(alignedArea, alignment), ASSERT_TEXT ); 2330 2331 return alignedArea; 2332 } 2333 return nullptr; 2334 } 2335 2336 void MemoryPool::putToLLOCache(TLSData *tls, void *object) 2337 { 2338 LargeObjectHdr *header = (LargeObjectHdr*)object - 1; 2339 // overwrite backRefIdx to simplify double free detection 2340 header->backRefIdx = BackRefIdx(); 2341 2342 if (tls) { 2343 tls->markUsed(); 2344 if (tls->lloc.put(header->memoryBlock, &extMemPool)) 2345 return; 2346 } 2347 extMemPool.freeLargeObject(header->memoryBlock); 2348 } 2349 2350 /* 2351 * All aligned allocations fall into one of the following categories: 2352 * 1. if both request size and alignment are <= maxSegregatedObjectSize, 2353 * we just align the size up, and request this amount, because for every size 2354 * aligned to some power of 2, the allocated object is at least that aligned. 2355 * 2. for size<minLargeObjectSize, check if already guaranteed fittingAlignment is enough. 2356 * 3. if size+alignment<minLargeObjectSize, we take an object of fittingSizeN and align 2357 * its address up; given such pointer, scalable_free could find the real object. 2358 * Wrapping of size+alignment is impossible because maximal allowed 2359 * alignment plus minLargeObjectSize can't lead to wrapping. 2360 * 4. otherwise, aligned large object is allocated. 2361 */ 2362 static void *allocateAligned(MemoryPool *memPool, size_t size, size_t alignment) 2363 { 2364 MALLOC_ASSERT( isPowerOfTwo(alignment), ASSERT_TEXT ); 2365 2366 if (!isMallocInitialized()) 2367 if (!doInitialization()) 2368 return nullptr; 2369 2370 void *result; 2371 if (size<=maxSegregatedObjectSize && alignment<=maxSegregatedObjectSize) 2372 result = internalPoolMalloc(memPool, alignUp(size? size: sizeof(size_t), alignment)); 2373 else if (size<minLargeObjectSize) { 2374 if (alignment<=fittingAlignment) 2375 result = internalPoolMalloc(memPool, size); 2376 else if (size+alignment < minLargeObjectSize) { 2377 void *unaligned = internalPoolMalloc(memPool, size+alignment); 2378 if (!unaligned) return nullptr; 2379 result = alignUp(unaligned, alignment); 2380 } else 2381 goto LargeObjAlloc; 2382 } else { 2383 LargeObjAlloc: 2384 TLSData *tls = memPool->getTLS(/*create=*/true); 2385 // take into account only alignment that are higher then natural 2386 result = 2387 memPool->getFromLLOCache(tls, size, largeObjectAlignment>alignment? 2388 largeObjectAlignment: alignment); 2389 } 2390 2391 MALLOC_ASSERT( isAligned(result, alignment), ASSERT_TEXT ); 2392 return result; 2393 } 2394 2395 static void *reallocAligned(MemoryPool *memPool, void *ptr, 2396 size_t newSize, size_t alignment = 0) 2397 { 2398 void *result; 2399 size_t copySize; 2400 2401 if (isLargeObject<ourMem>(ptr)) { 2402 LargeMemoryBlock* lmb = ((LargeObjectHdr *)ptr - 1)->memoryBlock; 2403 copySize = lmb->unalignedSize-((uintptr_t)ptr-(uintptr_t)lmb); 2404 2405 // Apply different strategies if size decreases 2406 if (newSize <= copySize && (0 == alignment || isAligned(ptr, alignment))) { 2407 2408 // For huge objects (that do not fit in backend cache), keep the same space unless 2409 // the new size is at least twice smaller 2410 bool isMemoryBlockHuge = copySize > memPool->extMemPool.backend.getMaxBinnedSize(); 2411 size_t threshold = isMemoryBlockHuge ? copySize / 2 : 0; 2412 if (newSize > threshold) { 2413 lmb->objectSize = newSize; 2414 return ptr; 2415 } 2416 // TODO: For large objects suitable for the backend cache, 2417 // split out the excessive part and put it to the backend. 2418 } 2419 // Reallocate for real 2420 copySize = lmb->objectSize; 2421 #if BACKEND_HAS_MREMAP 2422 if (void *r = memPool->extMemPool.remap(ptr, copySize, newSize, 2423 alignment < largeObjectAlignment ? largeObjectAlignment : alignment)) 2424 return r; 2425 #endif 2426 result = alignment ? allocateAligned(memPool, newSize, alignment) : 2427 internalPoolMalloc(memPool, newSize); 2428 2429 } else { 2430 Block* block = (Block *)alignDown(ptr, slabSize); 2431 copySize = block->findObjectSize(ptr); 2432 2433 // TODO: Move object to another bin if size decreases and the current bin is "empty enough". 2434 // Currently, in case of size decreasing, old pointer is returned 2435 if (newSize <= copySize && (0==alignment || isAligned(ptr, alignment))) { 2436 return ptr; 2437 } else { 2438 result = alignment ? allocateAligned(memPool, newSize, alignment) : 2439 internalPoolMalloc(memPool, newSize); 2440 } 2441 } 2442 if (result) { 2443 memcpy(result, ptr, copySize < newSize ? copySize : newSize); 2444 internalPoolFree(memPool, ptr, 0); 2445 } 2446 return result; 2447 } 2448 2449 #if MALLOC_DEBUG 2450 /* A predicate checks if an object is properly placed inside its block */ 2451 inline bool Block::isProperlyPlaced(const void *object) const 2452 { 2453 return 0 == ((uintptr_t)this + slabSize - (uintptr_t)object) % objectSize; 2454 } 2455 #endif 2456 2457 /* Finds the real object inside the block */ 2458 FreeObject *Block::findAllocatedObject(const void *address) const 2459 { 2460 // calculate offset from the end of the block space 2461 uint16_t offset = (uintptr_t)this + slabSize - (uintptr_t)address; 2462 MALLOC_ASSERT( offset<=slabSize-sizeof(Block), ASSERT_TEXT ); 2463 // find offset difference from a multiple of allocation size 2464 offset %= objectSize; 2465 // and move the address down to where the real object starts. 2466 return (FreeObject*)((uintptr_t)address - (offset? objectSize-offset: 0)); 2467 } 2468 2469 /* 2470 * Bad dereference caused by a foreign pointer is possible only here, not earlier in call chain. 2471 * Separate function isolates SEH code, as it has bad influence on compiler optimization. 2472 */ 2473 static inline BackRefIdx safer_dereference (const BackRefIdx *ptr) 2474 { 2475 BackRefIdx id; 2476 #if _MSC_VER 2477 __try { 2478 #endif 2479 id = dereference(ptr); 2480 #if _MSC_VER 2481 } __except( GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION? 2482 EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH ) { 2483 id = BackRefIdx(); 2484 } 2485 #endif 2486 return id; 2487 } 2488 2489 template<MemoryOrigin memOrigin> 2490 bool isLargeObject(void *object) 2491 { 2492 if (!isAligned(object, largeObjectAlignment)) 2493 return false; 2494 LargeObjectHdr *header = (LargeObjectHdr*)object - 1; 2495 BackRefIdx idx = (memOrigin == unknownMem) ? 2496 safer_dereference(&header->backRefIdx) : dereference(&header->backRefIdx); 2497 2498 return idx.isLargeObject() 2499 // in valid LargeObjectHdr memoryBlock is not nullptr 2500 && header->memoryBlock 2501 // in valid LargeObjectHdr memoryBlock points somewhere before header 2502 // TODO: more strict check 2503 && (uintptr_t)header->memoryBlock < (uintptr_t)header 2504 && getBackRef(idx) == header; 2505 } 2506 2507 static inline bool isSmallObject (void *ptr) 2508 { 2509 Block* expectedBlock = (Block*)alignDown(ptr, slabSize); 2510 const BackRefIdx* idx = expectedBlock->getBackRefIdx(); 2511 2512 bool isSmall = expectedBlock == getBackRef(safer_dereference(idx)); 2513 if (isSmall) 2514 expectedBlock->checkFreePrecond(ptr); 2515 return isSmall; 2516 } 2517 2518 /**** Check if an object was allocated by scalable_malloc ****/ 2519 static inline bool isRecognized (void* ptr) 2520 { 2521 return defaultMemPool->extMemPool.backend.ptrCanBeValid(ptr) && 2522 (isLargeObject<unknownMem>(ptr) || isSmallObject(ptr)); 2523 } 2524 2525 static inline void freeSmallObject(void *object) 2526 { 2527 /* mask low bits to get the block */ 2528 Block *block = (Block *)alignDown(object, slabSize); 2529 block->checkFreePrecond(object); 2530 2531 #if MALLOC_CHECK_RECURSION 2532 if (block->isStartupAllocObject()) { 2533 ((StartupBlock *)block)->free(object); 2534 return; 2535 } 2536 #endif 2537 if (block->isOwnedByCurrentThread()) { 2538 block->freeOwnObject(object); 2539 } else { /* Slower path to add to the shared list, the allocatedCount is updated by the owner thread in malloc. */ 2540 FreeObject *objectToFree = block->findObjectToFree(object); 2541 block->freePublicObject(objectToFree); 2542 } 2543 } 2544 2545 static void *internalPoolMalloc(MemoryPool* memPool, size_t size) 2546 { 2547 Bin* bin; 2548 Block * mallocBlock; 2549 2550 if (!memPool) return nullptr; 2551 2552 if (!size) size = sizeof(size_t); 2553 2554 TLSData *tls = memPool->getTLS(/*create=*/true); 2555 2556 /* Allocate a large object */ 2557 if (size >= minLargeObjectSize) 2558 return memPool->getFromLLOCache(tls, size, largeObjectAlignment); 2559 2560 if (!tls) return nullptr; 2561 2562 tls->markUsed(); 2563 /* 2564 * Get an element in thread-local array corresponding to the given size; 2565 * It keeps ptr to the active block for allocations of this size 2566 */ 2567 bin = tls->getAllocationBin(size); 2568 if ( !bin ) return nullptr; 2569 2570 /* Get a block to try to allocate in. */ 2571 for( mallocBlock = bin->getActiveBlock(); mallocBlock; 2572 mallocBlock = bin->setPreviousBlockActive() ) // the previous block should be empty enough 2573 { 2574 if( FreeObject *result = mallocBlock->allocate() ) 2575 return result; 2576 } 2577 2578 /* 2579 * else privatize publicly freed objects in some block and allocate from it 2580 */ 2581 mallocBlock = bin->getPrivatizedFreeListBlock(); 2582 if (mallocBlock) { 2583 MALLOC_ASSERT( mallocBlock->freeListNonNull(), ASSERT_TEXT ); 2584 if ( FreeObject *result = mallocBlock->allocateFromFreeList() ) 2585 return result; 2586 /* Else something strange happened, need to retry from the beginning; */ 2587 TRACEF(( "[ScalableMalloc trace] Something is wrong: no objects in public free list; reentering.\n" )); 2588 return internalPoolMalloc(memPool, size); 2589 } 2590 2591 /* 2592 * no suitable own blocks, try to get a partial block that some other thread has discarded. 2593 */ 2594 mallocBlock = memPool->extMemPool.orphanedBlocks.get(tls, size); 2595 while (mallocBlock) { 2596 bin->pushTLSBin(mallocBlock); 2597 bin->setActiveBlock(mallocBlock); // TODO: move under the below condition? 2598 if( FreeObject *result = mallocBlock->allocate() ) 2599 return result; 2600 mallocBlock = memPool->extMemPool.orphanedBlocks.get(tls, size); 2601 } 2602 2603 /* 2604 * else try to get a new empty block 2605 */ 2606 mallocBlock = memPool->getEmptyBlock(size); 2607 if (mallocBlock) { 2608 bin->pushTLSBin(mallocBlock); 2609 bin->setActiveBlock(mallocBlock); 2610 if( FreeObject *result = mallocBlock->allocate() ) 2611 return result; 2612 /* Else something strange happened, need to retry from the beginning; */ 2613 TRACEF(( "[ScalableMalloc trace] Something is wrong: no objects in empty block; reentering.\n" )); 2614 return internalPoolMalloc(memPool, size); 2615 } 2616 /* 2617 * else nothing works so return nullptr 2618 */ 2619 TRACEF(( "[ScalableMalloc trace] No memory found, returning nullptr.\n" )); 2620 return nullptr; 2621 } 2622 2623 // When size==0 (i.e. unknown), detect here whether the object is large. 2624 // For size is known and < minLargeObjectSize, we still need to check 2625 // if the actual object is large, because large objects might be used 2626 // for aligned small allocations. 2627 static bool internalPoolFree(MemoryPool *memPool, void *object, size_t size) 2628 { 2629 if (!memPool || !object) return false; 2630 2631 // The library is initialized at allocation call, so releasing while 2632 // not initialized means foreign object is releasing. 2633 MALLOC_ASSERT(isMallocInitialized(), ASSERT_TEXT); 2634 MALLOC_ASSERT(memPool->extMemPool.userPool() || isRecognized(object), 2635 "Invalid pointer during object releasing is detected."); 2636 2637 if (size >= minLargeObjectSize || isLargeObject<ourMem>(object)) 2638 memPool->putToLLOCache(memPool->getTLS(/*create=*/false), object); 2639 else 2640 freeSmallObject(object); 2641 return true; 2642 } 2643 2644 static void *internalMalloc(size_t size) 2645 { 2646 if (!size) size = sizeof(size_t); 2647 2648 #if MALLOC_CHECK_RECURSION 2649 if (RecursiveMallocCallProtector::sameThreadActive()) 2650 return size<minLargeObjectSize? StartupBlock::allocate(size) : 2651 // nested allocation, so skip tls 2652 (FreeObject*)defaultMemPool->getFromLLOCache(nullptr, size, slabSize); 2653 #endif 2654 2655 if (!isMallocInitialized()) 2656 if (!doInitialization()) 2657 return nullptr; 2658 return internalPoolMalloc(defaultMemPool, size); 2659 } 2660 2661 static void internalFree(void *object) 2662 { 2663 internalPoolFree(defaultMemPool, object, 0); 2664 } 2665 2666 static size_t internalMsize(void* ptr) 2667 { 2668 MALLOC_ASSERT(ptr, "Invalid pointer passed to internalMsize"); 2669 if (isLargeObject<ourMem>(ptr)) { 2670 // TODO: return the maximum memory size, that can be written to this object 2671 LargeMemoryBlock* lmb = ((LargeObjectHdr*)ptr - 1)->memoryBlock; 2672 return lmb->objectSize; 2673 } else { 2674 Block *block = (Block*)alignDown(ptr, slabSize); 2675 return block->findObjectSize(ptr); 2676 } 2677 } 2678 2679 } // namespace internal 2680 2681 using namespace rml::internal; 2682 2683 // legacy entry point saved for compatibility with binaries complied 2684 // with pre-6003 versions of TBB 2685 TBBMALLOC_EXPORT rml::MemoryPool *pool_create(intptr_t pool_id, const MemPoolPolicy *policy) 2686 { 2687 rml::MemoryPool *pool; 2688 MemPoolPolicy pol(policy->pAlloc, policy->pFree, policy->granularity); 2689 2690 pool_create_v1(pool_id, &pol, &pool); 2691 return pool; 2692 } 2693 2694 rml::MemPoolError pool_create_v1(intptr_t pool_id, const MemPoolPolicy *policy, 2695 rml::MemoryPool **pool) 2696 { 2697 if ( !policy->pAlloc || policy->version<MemPoolPolicy::TBBMALLOC_POOL_VERSION 2698 // empty pFree allowed only for fixed pools 2699 || !(policy->fixedPool || policy->pFree)) { 2700 *pool = nullptr; 2701 return INVALID_POLICY; 2702 } 2703 if ( policy->version>MemPoolPolicy::TBBMALLOC_POOL_VERSION // future versions are not supported 2704 // new flags can be added in place of reserved, but default 2705 // behaviour must be supported by this version 2706 || policy->reserved ) { 2707 *pool = nullptr; 2708 return UNSUPPORTED_POLICY; 2709 } 2710 if (!isMallocInitialized()) 2711 if (!doInitialization()) { 2712 *pool = nullptr; 2713 return NO_MEMORY; 2714 } 2715 rml::internal::MemoryPool *memPool = 2716 (rml::internal::MemoryPool*)internalMalloc((sizeof(rml::internal::MemoryPool))); 2717 if (!memPool) { 2718 *pool = nullptr; 2719 return NO_MEMORY; 2720 } 2721 memset(static_cast<void*>(memPool), 0, sizeof(rml::internal::MemoryPool)); 2722 if (!memPool->init(pool_id, policy)) { 2723 internalFree(memPool); 2724 *pool = nullptr; 2725 return NO_MEMORY; 2726 } 2727 2728 *pool = (rml::MemoryPool*)memPool; 2729 return POOL_OK; 2730 } 2731 2732 bool pool_destroy(rml::MemoryPool* memPool) 2733 { 2734 if (!memPool) return false; 2735 bool ret = ((rml::internal::MemoryPool*)memPool)->destroy(); 2736 internalFree(memPool); 2737 2738 return ret; 2739 } 2740 2741 bool pool_reset(rml::MemoryPool* memPool) 2742 { 2743 if (!memPool) return false; 2744 2745 return ((rml::internal::MemoryPool*)memPool)->reset(); 2746 } 2747 2748 void *pool_malloc(rml::MemoryPool* mPool, size_t size) 2749 { 2750 return internalPoolMalloc((rml::internal::MemoryPool*)mPool, size); 2751 } 2752 2753 void *pool_realloc(rml::MemoryPool* mPool, void *object, size_t size) 2754 { 2755 if (!object) 2756 return internalPoolMalloc((rml::internal::MemoryPool*)mPool, size); 2757 if (!size) { 2758 internalPoolFree((rml::internal::MemoryPool*)mPool, object, 0); 2759 return nullptr; 2760 } 2761 return reallocAligned((rml::internal::MemoryPool*)mPool, object, size, 0); 2762 } 2763 2764 void *pool_aligned_malloc(rml::MemoryPool* mPool, size_t size, size_t alignment) 2765 { 2766 if (!isPowerOfTwo(alignment) || 0==size) 2767 return nullptr; 2768 2769 return allocateAligned((rml::internal::MemoryPool*)mPool, size, alignment); 2770 } 2771 2772 void *pool_aligned_realloc(rml::MemoryPool* memPool, void *ptr, size_t size, size_t alignment) 2773 { 2774 if (!isPowerOfTwo(alignment)) 2775 return nullptr; 2776 rml::internal::MemoryPool *mPool = (rml::internal::MemoryPool*)memPool; 2777 void *tmp; 2778 2779 if (!ptr) 2780 tmp = allocateAligned(mPool, size, alignment); 2781 else if (!size) { 2782 internalPoolFree(mPool, ptr, 0); 2783 return nullptr; 2784 } else 2785 tmp = reallocAligned(mPool, ptr, size, alignment); 2786 2787 return tmp; 2788 } 2789 2790 bool pool_free(rml::MemoryPool *mPool, void *object) 2791 { 2792 return internalPoolFree((rml::internal::MemoryPool*)mPool, object, 0); 2793 } 2794 2795 rml::MemoryPool *pool_identify(void *object) 2796 { 2797 rml::internal::MemoryPool *pool; 2798 if (isLargeObject<ourMem>(object)) { 2799 LargeObjectHdr *header = (LargeObjectHdr*)object - 1; 2800 pool = header->memoryBlock->pool; 2801 } else { 2802 Block *block = (Block*)alignDown(object, slabSize); 2803 pool = block->getMemPool(); 2804 } 2805 // do not return defaultMemPool, as it can't be used in pool_free() etc 2806 __TBB_ASSERT_RELEASE(pool!=defaultMemPool, 2807 "rml::pool_identify() can't be used for scalable_malloc() etc results."); 2808 return (rml::MemoryPool*)pool; 2809 } 2810 2811 size_t pool_msize(rml::MemoryPool *mPool, void* object) 2812 { 2813 if (object) { 2814 // No assert for object recognition, cause objects allocated from non-default 2815 // memory pool do not participate in range checking and do not have valid backreferences for 2816 // small objects. Instead, check that an object belong to the certain memory pool. 2817 MALLOC_ASSERT_EX(mPool == pool_identify(object), "Object does not belong to the specified pool"); 2818 return internalMsize(object); 2819 } 2820 errno = EINVAL; 2821 // Unlike _msize, return 0 in case of parameter error. 2822 // Returning size_t(-1) looks more like the way to troubles. 2823 return 0; 2824 } 2825 2826 } // namespace rml 2827 2828 using namespace rml::internal; 2829 2830 #if MALLOC_TRACE 2831 static unsigned int threadGoingDownCount = 0; 2832 #endif 2833 2834 /* 2835 * When a thread is shutting down this routine should be called to remove all the thread ids 2836 * from the malloc blocks and replace them with a nullptr thread id. 2837 * 2838 * For pthreads, the function is set as a callback in pthread_key_create for TLS bin. 2839 * It will be automatically called at thread exit with the key value as the argument, 2840 * unless that value is nullptr. 2841 * For Windows, it is called from DllMain( DLL_THREAD_DETACH ). 2842 * 2843 * However neither of the above is called for the main process thread, so the routine 2844 * also needs to be called during the process shutdown. 2845 * 2846 */ 2847 // TODO: Consider making this function part of class MemoryPool. 2848 void doThreadShutdownNotification(TLSData* tls, bool main_thread) 2849 { 2850 TRACEF(( "[ScalableMalloc trace] Thread id %d blocks return start %d\n", 2851 getThreadId(), threadGoingDownCount++ )); 2852 2853 #if USE_PTHREAD 2854 if (tls) { 2855 if (!shutdownSync.threadDtorStart()) return; 2856 tls->getMemPool()->onThreadShutdown(tls); 2857 shutdownSync.threadDtorDone(); 2858 } else 2859 #endif 2860 { 2861 suppress_unused_warning(tls); // not used on Windows 2862 // The default pool is safe to use at this point: 2863 // on Linux, only the main thread can go here before destroying defaultMemPool; 2864 // on Windows, shutdown is synchronized via loader lock and isMallocInitialized(). 2865 // See also __TBB_mallocProcessShutdownNotification() 2866 defaultMemPool->onThreadShutdown(defaultMemPool->getTLS(/*create=*/false)); 2867 // Take lock to walk through other pools; but waiting might be dangerous at this point 2868 // (e.g. on Windows the main thread might deadlock) 2869 bool locked; 2870 MallocMutex::scoped_lock lock(MemoryPool::memPoolListLock, /*wait=*/!main_thread, &locked); 2871 if (locked) { // the list is safe to process 2872 for (MemoryPool *memPool = defaultMemPool->next; memPool; memPool = memPool->next) 2873 memPool->onThreadShutdown(memPool->getTLS(/*create=*/false)); 2874 } 2875 } 2876 2877 TRACEF(( "[ScalableMalloc trace] Thread id %d blocks return end\n", getThreadId() )); 2878 } 2879 2880 #if USE_PTHREAD 2881 void mallocThreadShutdownNotification(void* arg) 2882 { 2883 // The routine is called for each pool (as TLS dtor) on each thread, except for the main thread 2884 if (!isMallocInitialized()) return; 2885 doThreadShutdownNotification((TLSData*)arg, false); 2886 } 2887 #else 2888 extern "C" void __TBB_mallocThreadShutdownNotification() 2889 { 2890 // The routine is called once per thread on Windows 2891 if (!isMallocInitialized()) return; 2892 doThreadShutdownNotification(nullptr, false); 2893 } 2894 #endif 2895 2896 extern "C" void __TBB_mallocProcessShutdownNotification(bool windows_process_dying) 2897 { 2898 if (!isMallocInitialized()) return; 2899 2900 // Don't clean allocator internals if the entire process is exiting 2901 if (!windows_process_dying) { 2902 doThreadShutdownNotification(nullptr, /*main_thread=*/true); 2903 } 2904 #if __TBB_MALLOC_LOCACHE_STAT 2905 printf("cache hit ratio %f, size hit %f\n", 2906 1.*cacheHits/mallocCalls, 1.*memHitKB/memAllocKB); 2907 defaultMemPool->extMemPool.loc.reportStat(stdout); 2908 #endif 2909 2910 shutdownSync.processExit(); 2911 #if __TBB_SOURCE_DIRECTLY_INCLUDED 2912 /* Pthread keys must be deleted as soon as possible to not call key dtor 2913 on thread termination when then the tbbmalloc code can be already unloaded. 2914 */ 2915 defaultMemPool->destroy(); 2916 destroyBackRefMain(&defaultMemPool->extMemPool.backend); 2917 ThreadId::destroy(); // Delete key for thread id 2918 hugePages.reset(); 2919 // new total malloc initialization is possible after this point 2920 mallocInitialized.store(0, std::memory_order_release); 2921 #endif // __TBB_SOURCE_DIRECTLY_INCLUDED 2922 2923 #if COLLECT_STATISTICS 2924 unsigned nThreads = ThreadId::getMaxThreadId(); 2925 for( int i=1; i<=nThreads && i<MAX_THREADS; ++i ) 2926 STAT_print(i); 2927 #endif 2928 if (!usedBySrcIncluded) { 2929 MALLOC_ITT_FINI_ITTLIB(); 2930 MALLOC_ITT_RELEASE_RESOURCES(); 2931 } 2932 } 2933 2934 extern "C" void * scalable_malloc(size_t size) 2935 { 2936 void *ptr = internalMalloc(size); 2937 if (!ptr) errno = ENOMEM; 2938 return ptr; 2939 } 2940 2941 extern "C" void scalable_free(void *object) 2942 { 2943 internalFree(object); 2944 } 2945 2946 #if MALLOC_ZONE_OVERLOAD_ENABLED 2947 extern "C" void __TBB_malloc_free_definite_size(void *object, size_t size) 2948 { 2949 internalPoolFree(defaultMemPool, object, size); 2950 } 2951 #endif 2952 2953 /* 2954 * A variant that provides additional memory safety, by checking whether the given address 2955 * was obtained with this allocator, and if not redirecting to the provided alternative call. 2956 */ 2957 extern "C" TBBMALLOC_EXPORT void __TBB_malloc_safer_free(void *object, void (*original_free)(void*)) 2958 { 2959 if (!object) 2960 return; 2961 2962 // tbbmalloc can allocate object only when tbbmalloc has been initialized 2963 if (mallocInitialized.load(std::memory_order_acquire) && defaultMemPool->extMemPool.backend.ptrCanBeValid(object)) { 2964 if (isLargeObject<unknownMem>(object)) { 2965 // must check 1st for large object, because small object check touches 4 pages on left, 2966 // and it can be inaccessible 2967 TLSData *tls = defaultMemPool->getTLS(/*create=*/false); 2968 2969 defaultMemPool->putToLLOCache(tls, object); 2970 return; 2971 } else if (isSmallObject(object)) { 2972 freeSmallObject(object); 2973 return; 2974 } 2975 } 2976 if (original_free) 2977 original_free(object); 2978 } 2979 2980 /********* End the free code *************/ 2981 2982 /********* Code for scalable_realloc ***********/ 2983 2984 /* 2985 * From K&R 2986 * "realloc changes the size of the object pointed to by p to size. The contents will 2987 * be unchanged up to the minimum of the old and the new sizes. If the new size is larger, 2988 * the new space is uninitialized. realloc returns a pointer to the new space, or 2989 * nullptr if the request cannot be satisfied, in which case *p is unchanged." 2990 * 2991 */ 2992 extern "C" void* scalable_realloc(void* ptr, size_t size) 2993 { 2994 void *tmp; 2995 2996 if (!ptr) 2997 tmp = internalMalloc(size); 2998 else if (!size) { 2999 internalFree(ptr); 3000 return nullptr; 3001 } else 3002 tmp = reallocAligned(defaultMemPool, ptr, size, 0); 3003 3004 if (!tmp) errno = ENOMEM; 3005 return tmp; 3006 } 3007 3008 /* 3009 * A variant that provides additional memory safety, by checking whether the given address 3010 * was obtained with this allocator, and if not redirecting to the provided alternative call. 3011 */ 3012 extern "C" TBBMALLOC_EXPORT void* __TBB_malloc_safer_realloc(void* ptr, size_t sz, void* original_realloc) 3013 { 3014 void *tmp; // TODO: fix warnings about uninitialized use of tmp 3015 3016 if (!ptr) { 3017 tmp = internalMalloc(sz); 3018 } else if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(ptr)) { 3019 if (!sz) { 3020 internalFree(ptr); 3021 return nullptr; 3022 } else { 3023 tmp = reallocAligned(defaultMemPool, ptr, sz, 0); 3024 } 3025 } 3026 #if USE_WINTHREAD 3027 else if (original_realloc && sz) { 3028 orig_ptrs *original_ptrs = static_cast<orig_ptrs*>(original_realloc); 3029 if ( original_ptrs->msize ){ 3030 size_t oldSize = original_ptrs->msize(ptr); 3031 tmp = internalMalloc(sz); 3032 if (tmp) { 3033 memcpy(tmp, ptr, sz<oldSize? sz : oldSize); 3034 if ( original_ptrs->free ){ 3035 original_ptrs->free( ptr ); 3036 } 3037 } 3038 } else 3039 tmp = nullptr; 3040 } 3041 #else 3042 else if (original_realloc) { 3043 typedef void* (*realloc_ptr_t)(void*,size_t); 3044 realloc_ptr_t original_realloc_ptr; 3045 (void *&)original_realloc_ptr = original_realloc; 3046 tmp = original_realloc_ptr(ptr,sz); 3047 } 3048 #endif 3049 else tmp = nullptr; 3050 3051 if (!tmp) errno = ENOMEM; 3052 return tmp; 3053 } 3054 3055 /********* End code for scalable_realloc ***********/ 3056 3057 /********* Code for scalable_calloc ***********/ 3058 3059 /* 3060 * From K&R 3061 * calloc returns a pointer to space for an array of nobj objects, 3062 * each of size size, or nullptr if the request cannot be satisfied. 3063 * The space is initialized to zero bytes. 3064 * 3065 */ 3066 3067 extern "C" void * scalable_calloc(size_t nobj, size_t size) 3068 { 3069 // it's square root of maximal size_t value 3070 const size_t mult_not_overflow = size_t(1) << (sizeof(size_t)*CHAR_BIT/2); 3071 const size_t arraySize = nobj * size; 3072 3073 // check for overflow during multiplication: 3074 if (nobj>=mult_not_overflow || size>=mult_not_overflow) // 1) heuristic check 3075 if (nobj && arraySize / nobj != size) { // 2) exact check 3076 errno = ENOMEM; 3077 return nullptr; 3078 } 3079 void* result = internalMalloc(arraySize); 3080 if (result) 3081 memset(result, 0, arraySize); 3082 else 3083 errno = ENOMEM; 3084 return result; 3085 } 3086 3087 /********* End code for scalable_calloc ***********/ 3088 3089 /********* Code for aligned allocation API **********/ 3090 3091 extern "C" int scalable_posix_memalign(void **memptr, size_t alignment, size_t size) 3092 { 3093 if ( !isPowerOfTwoAtLeast(alignment, sizeof(void*)) ) 3094 return EINVAL; 3095 void *result = allocateAligned(defaultMemPool, size, alignment); 3096 if (!result) 3097 return ENOMEM; 3098 *memptr = result; 3099 return 0; 3100 } 3101 3102 extern "C" void * scalable_aligned_malloc(size_t size, size_t alignment) 3103 { 3104 if (!isPowerOfTwo(alignment) || 0==size) { 3105 errno = EINVAL; 3106 return nullptr; 3107 } 3108 void *tmp = allocateAligned(defaultMemPool, size, alignment); 3109 if (!tmp) errno = ENOMEM; 3110 return tmp; 3111 } 3112 3113 extern "C" void * scalable_aligned_realloc(void *ptr, size_t size, size_t alignment) 3114 { 3115 if (!isPowerOfTwo(alignment)) { 3116 errno = EINVAL; 3117 return nullptr; 3118 } 3119 void *tmp; 3120 3121 if (!ptr) 3122 tmp = allocateAligned(defaultMemPool, size, alignment); 3123 else if (!size) { 3124 scalable_free(ptr); 3125 return nullptr; 3126 } else 3127 tmp = reallocAligned(defaultMemPool, ptr, size, alignment); 3128 3129 if (!tmp) errno = ENOMEM; 3130 return tmp; 3131 } 3132 3133 extern "C" TBBMALLOC_EXPORT void * __TBB_malloc_safer_aligned_realloc(void *ptr, size_t size, size_t alignment, void* orig_function) 3134 { 3135 /* corner cases left out of reallocAligned to not deal with errno there */ 3136 if (!isPowerOfTwo(alignment)) { 3137 errno = EINVAL; 3138 return nullptr; 3139 } 3140 void *tmp = nullptr; 3141 3142 if (!ptr) { 3143 tmp = allocateAligned(defaultMemPool, size, alignment); 3144 } else if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(ptr)) { 3145 if (!size) { 3146 internalFree(ptr); 3147 return nullptr; 3148 } else { 3149 tmp = reallocAligned(defaultMemPool, ptr, size, alignment); 3150 } 3151 } 3152 #if USE_WINTHREAD 3153 else { 3154 orig_aligned_ptrs *original_ptrs = static_cast<orig_aligned_ptrs*>(orig_function); 3155 if (size) { 3156 // Without orig_msize, we can't do anything with this. 3157 // Just keeping old pointer. 3158 if ( original_ptrs->aligned_msize ){ 3159 // set alignment and offset to have possibly correct oldSize 3160 size_t oldSize = original_ptrs->aligned_msize(ptr, sizeof(void*), 0); 3161 tmp = allocateAligned(defaultMemPool, size, alignment); 3162 if (tmp) { 3163 memcpy(tmp, ptr, size<oldSize? size : oldSize); 3164 if ( original_ptrs->aligned_free ){ 3165 original_ptrs->aligned_free( ptr ); 3166 } 3167 } 3168 } 3169 } else { 3170 if ( original_ptrs->aligned_free ){ 3171 original_ptrs->aligned_free( ptr ); 3172 } 3173 return nullptr; 3174 } 3175 } 3176 #else 3177 // As original_realloc can't align result, and there is no way to find 3178 // size of reallocating object, we are giving up. 3179 suppress_unused_warning(orig_function); 3180 #endif 3181 if (!tmp) errno = ENOMEM; 3182 return tmp; 3183 } 3184 3185 extern "C" void scalable_aligned_free(void *ptr) 3186 { 3187 internalFree(ptr); 3188 } 3189 3190 /********* end code for aligned allocation API **********/ 3191 3192 /********* Code for scalable_msize ***********/ 3193 3194 /* 3195 * Returns the size of a memory block allocated in the heap. 3196 */ 3197 extern "C" size_t scalable_msize(void* ptr) 3198 { 3199 if (ptr) { 3200 MALLOC_ASSERT(isRecognized(ptr), "Invalid pointer in scalable_msize detected."); 3201 return internalMsize(ptr); 3202 } 3203 errno = EINVAL; 3204 // Unlike _msize, return 0 in case of parameter error. 3205 // Returning size_t(-1) looks more like the way to troubles. 3206 return 0; 3207 } 3208 3209 /* 3210 * A variant that provides additional memory safety, by checking whether the given address 3211 * was obtained with this allocator, and if not redirecting to the provided alternative call. 3212 */ 3213 extern "C" TBBMALLOC_EXPORT size_t __TBB_malloc_safer_msize(void *object, size_t (*original_msize)(void*)) 3214 { 3215 if (object) { 3216 // Check if the memory was allocated by scalable_malloc 3217 if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(object)) 3218 return internalMsize(object); 3219 else if (original_msize) 3220 return original_msize(object); 3221 } 3222 // object is nullptr or unknown, or foreign and no original_msize 3223 #if USE_WINTHREAD 3224 errno = EINVAL; // errno expected to be set only on this platform 3225 #endif 3226 return 0; 3227 } 3228 3229 /* 3230 * The same as above but for _aligned_msize case 3231 */ 3232 extern "C" TBBMALLOC_EXPORT size_t __TBB_malloc_safer_aligned_msize(void *object, size_t alignment, size_t offset, size_t (*orig_aligned_msize)(void*,size_t,size_t)) 3233 { 3234 if (object) { 3235 // Check if the memory was allocated by scalable_malloc 3236 if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(object)) 3237 return internalMsize(object); 3238 else if (orig_aligned_msize) 3239 return orig_aligned_msize(object,alignment,offset); 3240 } 3241 // object is nullptr or unknown 3242 errno = EINVAL; 3243 return 0; 3244 } 3245 3246 /********* End code for scalable_msize ***********/ 3247 3248 extern "C" int scalable_allocation_mode(int param, intptr_t value) 3249 { 3250 if (param == TBBMALLOC_SET_SOFT_HEAP_LIMIT) { 3251 defaultMemPool->extMemPool.backend.setRecommendedMaxSize((size_t)value); 3252 return TBBMALLOC_OK; 3253 } else if (param == USE_HUGE_PAGES) { 3254 #if __unix__ 3255 switch (value) { 3256 case 0: 3257 case 1: 3258 hugePages.setMode(value); 3259 return TBBMALLOC_OK; 3260 default: 3261 return TBBMALLOC_INVALID_PARAM; 3262 } 3263 #else 3264 return TBBMALLOC_NO_EFFECT; 3265 #endif 3266 #if __TBB_SOURCE_DIRECTLY_INCLUDED 3267 } else if (param == TBBMALLOC_INTERNAL_SOURCE_INCLUDED) { 3268 switch (value) { 3269 case 0: // used by dynamic library 3270 case 1: // used by static library or directly included sources 3271 usedBySrcIncluded = value; 3272 return TBBMALLOC_OK; 3273 default: 3274 return TBBMALLOC_INVALID_PARAM; 3275 } 3276 #endif 3277 } else if (param == TBBMALLOC_SET_HUGE_SIZE_THRESHOLD) { 3278 defaultMemPool->extMemPool.loc.setHugeSizeThreshold((size_t)value); 3279 return TBBMALLOC_OK; 3280 } 3281 return TBBMALLOC_INVALID_PARAM; 3282 } 3283 3284 extern "C" int scalable_allocation_command(int cmd, void *param) 3285 { 3286 if (param) 3287 return TBBMALLOC_INVALID_PARAM; 3288 3289 bool released = false; 3290 switch(cmd) { 3291 case TBBMALLOC_CLEAN_THREAD_BUFFERS: 3292 if (TLSData *tls = defaultMemPool->getTLS(/*create=*/false)) 3293 released = tls->externalCleanup(/*cleanOnlyUnused*/false, /*cleanBins=*/true); 3294 break; 3295 case TBBMALLOC_CLEAN_ALL_BUFFERS: 3296 released = defaultMemPool->extMemPool.hardCachesCleanup(); 3297 break; 3298 default: 3299 return TBBMALLOC_INVALID_PARAM; 3300 } 3301 return released ? TBBMALLOC_OK : TBBMALLOC_NO_EFFECT; 3302 } 3303