1 /* 2 Copyright (c) 2005-2021 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #include "tbbmalloc_internal.h" 18 #include <errno.h> 19 #include <new> /* for placement new */ 20 #include <string.h> /* for memset */ 21 22 #include "oneapi/tbb/version.h" 23 #include "../tbb/environment.h" 24 #include "../tbb/itt_notify.h" // for __TBB_load_ittnotify() 25 26 #if USE_PTHREAD 27 #define TlsSetValue_func pthread_setspecific 28 #define TlsGetValue_func pthread_getspecific 29 #define GetMyTID() pthread_self() 30 #include <sched.h> 31 inline void do_yield() {sched_yield();} 32 extern "C" { static void mallocThreadShutdownNotification(void*); } 33 #if __sun || __SUNPRO_CC 34 #define __asm__ asm 35 #endif 36 #include <unistd.h> // sysconf(_SC_PAGESIZE) 37 #elif USE_WINTHREAD 38 #define GetMyTID() GetCurrentThreadId() 39 #if __TBB_WIN8UI_SUPPORT 40 #include<thread> 41 #define TlsSetValue_func FlsSetValue 42 #define TlsGetValue_func FlsGetValue 43 #define TlsAlloc() FlsAlloc(NULL) 44 #define TLS_ALLOC_FAILURE FLS_OUT_OF_INDEXES 45 #define TlsFree FlsFree 46 inline void do_yield() {std::this_thread::yield();} 47 #else 48 #define TlsSetValue_func TlsSetValue 49 #define TlsGetValue_func TlsGetValue 50 #define TLS_ALLOC_FAILURE TLS_OUT_OF_INDEXES 51 inline void do_yield() {SwitchToThread();} 52 #endif 53 #else 54 #error Must define USE_PTHREAD or USE_WINTHREAD 55 #endif 56 57 #define FREELIST_NONBLOCKING 1 58 59 namespace rml { 60 class MemoryPool; 61 namespace internal { 62 63 class Block; 64 class MemoryPool; 65 66 #if MALLOC_CHECK_RECURSION 67 68 inline bool isMallocInitialized(); 69 70 #endif // MALLOC_CHECK_RECURSION 71 72 /** Support for handling the special UNUSABLE pointer state **/ 73 const intptr_t UNUSABLE = 0x1; 74 inline bool isSolidPtr( void* ptr ) { 75 return (UNUSABLE|(intptr_t)ptr)!=UNUSABLE; 76 } 77 inline bool isNotForUse( void* ptr ) { 78 return (intptr_t)ptr==UNUSABLE; 79 } 80 81 /* 82 * Block::objectSize value used to mark blocks allocated by startupAlloc 83 */ 84 const uint16_t startupAllocObjSizeMark = ~(uint16_t)0; 85 86 /* 87 * The following constant is used to define the size of struct Block, the block header. 88 * The intent is to have the size of a Block multiple of the cache line size, this allows us to 89 * get good alignment at the cost of some overhead equal to the amount of padding included in the Block. 90 */ 91 const int blockHeaderAlignment = estimatedCacheLineSize; 92 93 /********* The data structures and global objects **************/ 94 95 /* 96 * The malloc routines themselves need to be able to occasionally malloc some space, 97 * in order to set up the structures used by the thread local structures. This 98 * routine performs that functions. 99 */ 100 class BootStrapBlocks { 101 MallocMutex bootStrapLock; 102 Block *bootStrapBlock; 103 Block *bootStrapBlockUsed; 104 FreeObject *bootStrapObjectList; 105 public: 106 void *allocate(MemoryPool *memPool, size_t size); 107 void free(void* ptr); 108 void reset(); 109 }; 110 111 #if USE_INTERNAL_TID 112 class ThreadId { 113 static tls_key_t Tid_key; 114 std::atomic<intptr_t> ThreadCount; 115 116 unsigned int id; 117 118 static unsigned int tlsNumber() { 119 unsigned int result = reinterpret_cast<intptr_t>(TlsGetValue_func(Tid_key)); 120 if( !result ) { 121 RecursiveMallocCallProtector scoped; 122 // Thread-local value is zero -> first call from this thread, 123 // need to initialize with next ID value (IDs start from 1) 124 result = ++ThreadCount; // returned new value! 125 TlsSetValue_func( Tid_key, reinterpret_cast<void*>(result) ); 126 } 127 return result; 128 } 129 public: 130 static bool init() { 131 #if USE_WINTHREAD 132 Tid_key = TlsAlloc(); 133 if (Tid_key == TLS_ALLOC_FAILURE) 134 return false; 135 #else 136 int status = pthread_key_create( &Tid_key, NULL ); 137 if ( status ) { 138 fprintf (stderr, "The memory manager cannot create tls key during initialization\n"); 139 return false; 140 } 141 #endif /* USE_WINTHREAD */ 142 return true; 143 } 144 #if __TBB_SOURCE_DIRECTLY_INCLUDED 145 static void destroy() { 146 if( Tid_key ) { 147 #if USE_WINTHREAD 148 BOOL status = !(TlsFree( Tid_key )); // fail is zero 149 #else 150 int status = pthread_key_delete( Tid_key ); 151 #endif /* USE_WINTHREAD */ 152 if ( status ) 153 fprintf (stderr, "The memory manager cannot delete tls key\n"); 154 Tid_key = 0; 155 } 156 } 157 #endif 158 159 ThreadId() : id(ThreadId::tlsNumber()) {} 160 bool isCurrentThreadId() const { return id == ThreadId::tlsNumber(); } 161 162 #if COLLECT_STATISTICS || MALLOC_TRACE 163 friend unsigned int getThreadId() { return ThreadId::tlsNumber(); } 164 #endif 165 #if COLLECT_STATISTICS 166 static unsigned getMaxThreadId() { return ThreadCount.load(std::memory_order_relaxed); } 167 168 friend int STAT_increment(ThreadId tid, int bin, int ctr); 169 #endif 170 }; 171 172 tls_key_t ThreadId::Tid_key; 173 intptr_t ThreadId::ThreadCount; 174 175 #if COLLECT_STATISTICS 176 int STAT_increment(ThreadId tid, int bin, int ctr) 177 { 178 return ::STAT_increment(tid.id, bin, ctr); 179 } 180 #endif 181 182 #else // USE_INTERNAL_TID 183 184 class ThreadId { 185 #if USE_PTHREAD 186 std::atomic<pthread_t> tid; 187 #else 188 std::atomic<DWORD> tid; 189 #endif 190 public: 191 ThreadId() : tid(GetMyTID()) {} 192 #if USE_PTHREAD 193 bool isCurrentThreadId() const { return pthread_equal(pthread_self(), tid.load(std::memory_order_relaxed)); } 194 #else 195 bool isCurrentThreadId() const { return GetCurrentThreadId() == tid.load(std::memory_order_relaxed); } 196 #endif 197 ThreadId& operator=(const ThreadId& other) { 198 tid.store(other.tid.load(std::memory_order_relaxed), std::memory_order_relaxed); 199 return *this; 200 } 201 static bool init() { return true; } 202 #if __TBB_SOURCE_DIRECTLY_INCLUDED 203 static void destroy() {} 204 #endif 205 }; 206 207 #endif // USE_INTERNAL_TID 208 209 /*********** Code to provide thread ID and a thread-local void pointer **********/ 210 211 bool TLSKey::init() 212 { 213 #if USE_WINTHREAD 214 TLS_pointer_key = TlsAlloc(); 215 if (TLS_pointer_key == TLS_ALLOC_FAILURE) 216 return false; 217 #else 218 int status = pthread_key_create( &TLS_pointer_key, mallocThreadShutdownNotification ); 219 if ( status ) 220 return false; 221 #endif /* USE_WINTHREAD */ 222 return true; 223 } 224 225 bool TLSKey::destroy() 226 { 227 #if USE_WINTHREAD 228 BOOL status1 = !(TlsFree(TLS_pointer_key)); // fail is zero 229 #else 230 int status1 = pthread_key_delete(TLS_pointer_key); 231 #endif /* USE_WINTHREAD */ 232 MALLOC_ASSERT(!status1, "The memory manager cannot delete tls key."); 233 return status1==0; 234 } 235 236 inline TLSData* TLSKey::getThreadMallocTLS() const 237 { 238 return (TLSData *)TlsGetValue_func( TLS_pointer_key ); 239 } 240 241 inline void TLSKey::setThreadMallocTLS( TLSData * newvalue ) { 242 RecursiveMallocCallProtector scoped; 243 TlsSetValue_func( TLS_pointer_key, newvalue ); 244 } 245 246 /* The 'next' field in the block header has to maintain some invariants: 247 * it needs to be on a 16K boundary and the first field in the block. 248 * Any value stored there needs to have the lower 14 bits set to 0 249 * so that various assert work. This means that if you want to smash this memory 250 * for debugging purposes you will need to obey this invariant. 251 * The total size of the header needs to be a power of 2 to simplify 252 * the alignment requirements. For now it is a 128 byte structure. 253 * To avoid false sharing, the fields changed only locally are separated 254 * from the fields changed by foreign threads. 255 * Changing the size of the block header would require to change 256 * some bin allocation sizes, in particular "fitting" sizes (see above). 257 */ 258 class Bin; 259 class StartupBlock; 260 261 class MemoryPool { 262 // if no explicit grainsize, expect to see malloc in user's pAlloc 263 // and set reasonable low granularity 264 static const size_t defaultGranularity = estimatedCacheLineSize; 265 266 MemoryPool(); // deny 267 public: 268 static MallocMutex memPoolListLock; 269 270 // list of all active pools is used to release 271 // all TLS data on thread termination or library unload 272 MemoryPool *next, 273 *prev; 274 ExtMemoryPool extMemPool; 275 BootStrapBlocks bootStrapBlocks; 276 277 static void initDefaultPool(); 278 279 bool init(intptr_t poolId, const MemPoolPolicy* memPoolPolicy); 280 bool reset(); 281 bool destroy(); 282 void onThreadShutdown(TLSData *tlsData); 283 284 inline TLSData *getTLS(bool create); 285 void clearTLS() { extMemPool.tlsPointerKey.setThreadMallocTLS(NULL); } 286 287 Block *getEmptyBlock(size_t size); 288 void returnEmptyBlock(Block *block, bool poolTheBlock); 289 290 // get/put large object to/from local large object cache 291 void *getFromLLOCache(TLSData *tls, size_t size, size_t alignment); 292 void putToLLOCache(TLSData *tls, void *object); 293 }; 294 295 static intptr_t defaultMemPool_space[sizeof(MemoryPool)/sizeof(intptr_t) + 296 (sizeof(MemoryPool)%sizeof(intptr_t)? 1 : 0)]; 297 static MemoryPool *defaultMemPool = (MemoryPool*)defaultMemPool_space; 298 const size_t MemoryPool::defaultGranularity; 299 // zero-initialized 300 MallocMutex MemoryPool::memPoolListLock; 301 // TODO: move huge page status to default pool, because that's its states 302 HugePagesStatus hugePages; 303 static bool usedBySrcIncluded = false; 304 305 // Padding helpers 306 template<size_t padd> 307 struct PaddingImpl { 308 size_t __padding[padd]; 309 }; 310 311 template<> 312 struct PaddingImpl<0> {}; 313 314 template<int N> 315 struct Padding : PaddingImpl<N/sizeof(size_t)> {}; 316 317 // Slab block is 16KB-aligned. To prevent false sharing, separate locally-accessed 318 // fields and fields commonly accessed by not owner threads. 319 class GlobalBlockFields : public BlockI { 320 protected: 321 std::atomic<FreeObject*> publicFreeList; 322 std::atomic<Block*> nextPrivatizable; 323 MemoryPool *poolPtr; 324 }; 325 326 class LocalBlockFields : public GlobalBlockFields, Padding<blockHeaderAlignment - sizeof(GlobalBlockFields)> { 327 protected: 328 Block *next; 329 Block *previous; /* Use double linked list to speed up removal */ 330 FreeObject *bumpPtr; /* Bump pointer moves from the end to the beginning of a block */ 331 FreeObject *freeList; 332 /* Pointer to local data for the owner thread. Used for fast finding tls 333 when releasing object from a block that current thread owned. 334 NULL for orphaned blocks. */ 335 std::atomic<TLSData*> tlsPtr; 336 ThreadId ownerTid; /* the ID of the thread that owns or last owned the block */ 337 BackRefIdx backRefIdx; 338 uint16_t allocatedCount; /* Number of objects allocated (obviously by the owning thread) */ 339 uint16_t objectSize; 340 bool isFull; 341 342 friend class FreeBlockPool; 343 friend class StartupBlock; 344 friend class LifoList; 345 friend void *BootStrapBlocks::allocate(MemoryPool *, size_t); 346 friend bool OrphanedBlocks::cleanup(Backend*); 347 friend Block *MemoryPool::getEmptyBlock(size_t); 348 }; 349 350 // Use inheritance to guarantee that a user data start on next cache line. 351 // Can't use member for it, because when LocalBlockFields already on cache line, 352 // we must have no additional memory consumption for all compilers. 353 class Block : public LocalBlockFields, 354 Padding<2*blockHeaderAlignment - sizeof(LocalBlockFields)> { 355 public: 356 bool empty() const { 357 if (allocatedCount > 0) return false; 358 MALLOC_ASSERT(!isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT); 359 return true; 360 } 361 inline FreeObject* allocate(); 362 inline FreeObject *allocateFromFreeList(); 363 364 inline bool adjustFullness(); 365 void adjustPositionInBin(Bin* bin = NULL); 366 #if MALLOC_DEBUG 367 bool freeListNonNull() { return freeList; } 368 #endif 369 void freePublicObject(FreeObject *objectToFree); 370 inline void freeOwnObject(void *object); 371 void reset(); 372 void privatizePublicFreeList( bool reset = true ); 373 void restoreBumpPtr(); 374 void privatizeOrphaned(TLSData *tls, unsigned index); 375 bool readyToShare(); 376 void shareOrphaned(intptr_t binTag, unsigned index); 377 unsigned int getSize() const { 378 MALLOC_ASSERT(isStartupAllocObject() || objectSize<minLargeObjectSize, 379 "Invalid object size"); 380 return isStartupAllocObject()? 0 : objectSize; 381 } 382 const BackRefIdx *getBackRefIdx() const { return &backRefIdx; } 383 inline bool isOwnedByCurrentThread() const; 384 bool isStartupAllocObject() const { return objectSize == startupAllocObjSizeMark; } 385 inline FreeObject *findObjectToFree(const void *object) const; 386 void checkFreePrecond(const void *object) const { 387 #if MALLOC_DEBUG 388 const char *msg = "Possible double free or heap corruption."; 389 // small objects are always at least sizeof(size_t) Byte aligned, 390 // try to check this before this dereference as for invalid objects 391 // this may be unreadable 392 MALLOC_ASSERT(isAligned(object, sizeof(size_t)), "Try to free invalid small object"); 393 #if !__TBB_USE_THREAD_SANITIZER 394 // releasing to free slab 395 MALLOC_ASSERT(allocatedCount>0, msg); 396 #endif 397 // must not point to slab's header 398 MALLOC_ASSERT((uintptr_t)object - (uintptr_t)this >= sizeof(Block), msg); 399 if (startupAllocObjSizeMark == objectSize) // startup block 400 MALLOC_ASSERT(object<=bumpPtr, msg); 401 else { 402 // non-startup objects are 8 Byte aligned 403 MALLOC_ASSERT(isAligned(object, 8), "Try to free invalid small object"); 404 FreeObject *toFree = findObjectToFree(object); 405 #if !__TBB_USE_THREAD_SANITIZER 406 MALLOC_ASSERT(allocatedCount <= (slabSize-sizeof(Block))/objectSize 407 && (!bumpPtr || object>bumpPtr), msg); 408 // check against head of freeList, as this is mostly 409 // expected after double free 410 MALLOC_ASSERT(toFree != freeList, msg); 411 #endif 412 // check against head of publicFreeList, to detect double free 413 // involving foreign thread 414 MALLOC_ASSERT(toFree != publicFreeList.load(std::memory_order_relaxed), msg); 415 } 416 #else 417 suppress_unused_warning(object); 418 #endif 419 } 420 void initEmptyBlock(TLSData *tls, size_t size); 421 size_t findObjectSize(void *object) const; 422 MemoryPool *getMemPool() const { return poolPtr; } // do not use on the hot path! 423 424 protected: 425 void cleanBlockHeader(); 426 427 private: 428 static const float emptyEnoughRatio; /* Threshold on free space needed to "reactivate" a block */ 429 430 inline FreeObject *allocateFromBumpPtr(); 431 inline FreeObject *findAllocatedObject(const void *address) const; 432 #if MALLOC_DEBUG 433 inline bool isProperlyPlaced(const void *object) const; 434 #endif 435 inline void markOwned(TLSData *tls) { 436 MALLOC_ASSERT(!tlsPtr.load(std::memory_order_relaxed), ASSERT_TEXT); 437 ownerTid = ThreadId(); /* save the ID of the current thread */ 438 tlsPtr.store(tls, std::memory_order_relaxed); 439 } 440 inline void markOrphaned() { 441 MALLOC_ASSERT(tlsPtr.load(std::memory_order_relaxed), ASSERT_TEXT); 442 tlsPtr.store(nullptr, std::memory_order_relaxed); 443 } 444 445 friend class Bin; 446 friend class TLSData; 447 friend bool MemoryPool::destroy(); 448 }; 449 450 const float Block::emptyEnoughRatio = 1.0 / 4.0; 451 452 static_assert(sizeof(Block) <= 2*estimatedCacheLineSize, 453 "The class Block does not fit into 2 cache lines on this platform. " 454 "Defining USE_INTERNAL_TID may help to fix it."); 455 456 class Bin { 457 private: 458 public: 459 Block *activeBlk; 460 std::atomic<Block*> mailbox; 461 MallocMutex mailLock; 462 463 public: 464 inline Block* getActiveBlock() const { return activeBlk; } 465 void resetActiveBlock() { activeBlk = NULL; } 466 inline void setActiveBlock(Block *block); 467 inline Block* setPreviousBlockActive(); 468 Block* getPrivatizedFreeListBlock(); 469 void moveBlockToFront(Block *block); 470 bool cleanPublicFreeLists(); 471 void processEmptyBlock(Block *block, bool poolTheBlock); 472 void addPublicFreeListBlock(Block* block); 473 474 void outofTLSBin(Block* block); 475 void verifyTLSBin(size_t size) const; 476 void pushTLSBin(Block* block); 477 478 #if MALLOC_DEBUG 479 void verifyInitState() const { 480 MALLOC_ASSERT( !activeBlk, ASSERT_TEXT ); 481 MALLOC_ASSERT( !mailbox.load(std::memory_order_relaxed), ASSERT_TEXT ); 482 } 483 #endif 484 485 friend void Block::freePublicObject (FreeObject *objectToFree); 486 }; 487 488 /********* End of the data structures **************/ 489 490 /* 491 * There are bins for all 8 byte aligned objects less than this segregated size; 8 bins in total 492 */ 493 const uint32_t minSmallObjectIndex = 0; 494 const uint32_t numSmallObjectBins = 8; 495 const uint32_t maxSmallObjectSize = 64; 496 497 /* 498 * There are 4 bins between each couple of powers of 2 [64-128-256-...] 499 * from maxSmallObjectSize till this size; 16 bins in total 500 */ 501 const uint32_t minSegregatedObjectIndex = minSmallObjectIndex+numSmallObjectBins; 502 const uint32_t numSegregatedObjectBins = 16; 503 const uint32_t maxSegregatedObjectSize = 1024; 504 505 /* 506 * And there are 5 bins with allocation sizes that are multiples of estimatedCacheLineSize 507 * and selected to fit 9, 6, 4, 3, and 2 allocations in a block. 508 */ 509 const uint32_t minFittingIndex = minSegregatedObjectIndex+numSegregatedObjectBins; 510 const uint32_t numFittingBins = 5; 511 512 const uint32_t fittingAlignment = estimatedCacheLineSize; 513 514 #define SET_FITTING_SIZE(N) ( (slabSize-sizeof(Block))/N ) & ~(fittingAlignment-1) 515 // For blockSize=16*1024, sizeof(Block)=2*estimatedCacheLineSize and fittingAlignment=estimatedCacheLineSize, 516 // the comments show the fitting sizes and the amounts left unused for estimatedCacheLineSize=64/128: 517 const uint32_t fittingSize1 = SET_FITTING_SIZE(9); // 1792/1792 128/000 518 const uint32_t fittingSize2 = SET_FITTING_SIZE(6); // 2688/2688 128/000 519 const uint32_t fittingSize3 = SET_FITTING_SIZE(4); // 4032/3968 128/256 520 const uint32_t fittingSize4 = SET_FITTING_SIZE(3); // 5376/5376 128/000 521 const uint32_t fittingSize5 = SET_FITTING_SIZE(2); // 8128/8064 000/000 522 #undef SET_FITTING_SIZE 523 524 /* 525 * The total number of thread-specific Block-based bins 526 */ 527 const uint32_t numBlockBins = minFittingIndex+numFittingBins; 528 529 /* 530 * Objects of this size and larger are considered large objects. 531 */ 532 const uint32_t minLargeObjectSize = fittingSize5 + 1; 533 534 /* 535 * Per-thread pool of slab blocks. Idea behind it is to not share with other 536 * threads memory that are likely in local cache(s) of our CPU. 537 */ 538 class FreeBlockPool { 539 private: 540 std::atomic<Block*> head; 541 int size; 542 Backend *backend; 543 bool lastAccessMiss; 544 public: 545 static const int POOL_HIGH_MARK = 32; 546 static const int POOL_LOW_MARK = 8; 547 548 class ResOfGet { 549 ResOfGet(); 550 public: 551 Block* block; 552 bool lastAccMiss; 553 ResOfGet(Block *b, bool lastMiss) : block(b), lastAccMiss(lastMiss) {} 554 }; 555 556 // allocated in zero-initialized memory 557 FreeBlockPool(Backend *bknd) : backend(bknd) {} 558 ResOfGet getBlock(); 559 void returnBlock(Block *block); 560 bool externalCleanup(); // can be called by another thread 561 }; 562 563 template<int LOW_MARK, int HIGH_MARK> 564 class LocalLOCImpl { 565 private: 566 static const size_t MAX_TOTAL_SIZE = 4*1024*1024; 567 // TODO: can single-linked list be faster here? 568 LargeMemoryBlock *tail; // need it when do releasing on overflow 569 std::atomic<LargeMemoryBlock*> head; 570 size_t totalSize; 571 int numOfBlocks; 572 public: 573 bool put(LargeMemoryBlock *object, ExtMemoryPool *extMemPool); 574 LargeMemoryBlock *get(size_t size); 575 bool externalCleanup(ExtMemoryPool *extMemPool); 576 #if __TBB_MALLOC_WHITEBOX_TEST 577 LocalLOCImpl() : head(NULL), tail(NULL), totalSize(0), numOfBlocks(0) {} 578 static size_t getMaxSize() { return MAX_TOTAL_SIZE; } 579 static const int LOC_HIGH_MARK = HIGH_MARK; 580 #else 581 // no ctor, object must be created in zero-initialized memory 582 #endif 583 }; 584 585 typedef LocalLOCImpl<8,32> LocalLOC; // set production code parameters 586 587 class TLSData : public TLSRemote { 588 MemoryPool *memPool; 589 public: 590 Bin bin[numBlockBinLimit]; 591 FreeBlockPool freeSlabBlocks; 592 LocalLOC lloc; 593 unsigned currCacheIdx; 594 private: 595 std::atomic<bool> unused; 596 public: 597 TLSData(MemoryPool *mPool, Backend *bknd) : memPool(mPool), freeSlabBlocks(bknd) {} 598 MemoryPool *getMemPool() const { return memPool; } 599 Bin* getAllocationBin(size_t size); 600 void release(); 601 bool externalCleanup(bool cleanOnlyUnused, bool cleanBins) { 602 if (!unused.load(std::memory_order_relaxed) && cleanOnlyUnused) return false; 603 // Heavy operation in terms of synchronization complexity, 604 // should be called only for the current thread 605 bool released = cleanBins ? cleanupBlockBins() : false; 606 // both cleanups to be called, and the order is not important 607 return released | lloc.externalCleanup(&memPool->extMemPool) | freeSlabBlocks.externalCleanup(); 608 } 609 bool cleanupBlockBins(); 610 void markUsed() { unused.store(false, std::memory_order_relaxed); } // called by owner when TLS touched 611 void markUnused() { unused.store(true, std::memory_order_relaxed); } // can be called by not owner thread 612 }; 613 614 TLSData *TLSKey::createTLS(MemoryPool *memPool, Backend *backend) 615 { 616 MALLOC_ASSERT( sizeof(TLSData) >= sizeof(Bin) * numBlockBins + sizeof(FreeBlockPool), ASSERT_TEXT ); 617 TLSData* tls = (TLSData*) memPool->bootStrapBlocks.allocate(memPool, sizeof(TLSData)); 618 if ( !tls ) 619 return NULL; 620 new(tls) TLSData(memPool, backend); 621 /* the block contains zeroes after bootStrapMalloc, so bins are initialized */ 622 #if MALLOC_DEBUG 623 for (uint32_t i = 0; i < numBlockBinLimit; i++) 624 tls->bin[i].verifyInitState(); 625 #endif 626 setThreadMallocTLS(tls); 627 memPool->extMemPool.allLocalCaches.registerThread(tls); 628 return tls; 629 } 630 631 bool TLSData::cleanupBlockBins() 632 { 633 bool released = false; 634 for (uint32_t i = 0; i < numBlockBinLimit; i++) { 635 released |= bin[i].cleanPublicFreeLists(); 636 // After cleaning public free lists, only the active block might be empty. 637 // Do not use processEmptyBlock because it will just restore bumpPtr. 638 Block *block = bin[i].getActiveBlock(); 639 if (block && block->empty()) { 640 bin[i].outofTLSBin(block); 641 memPool->returnEmptyBlock(block, /*poolTheBlock=*/false); 642 released = true; 643 } 644 } 645 return released; 646 } 647 648 bool ExtMemoryPool::releaseAllLocalCaches() 649 { 650 // Iterate all registered TLS data and clean LLOC and Slab pools 651 bool released = allLocalCaches.cleanup(/*cleanOnlyUnused=*/false); 652 653 // Bins privatization is done only for the current thread 654 if (TLSData *tlsData = tlsPointerKey.getThreadMallocTLS()) 655 released |= tlsData->cleanupBlockBins(); 656 657 return released; 658 } 659 660 void AllLocalCaches::registerThread(TLSRemote *tls) 661 { 662 tls->prev = NULL; 663 MallocMutex::scoped_lock lock(listLock); 664 MALLOC_ASSERT(head!=tls, ASSERT_TEXT); 665 tls->next = head; 666 if (head) 667 head->prev = tls; 668 head = tls; 669 MALLOC_ASSERT(head->next!=head, ASSERT_TEXT); 670 } 671 672 void AllLocalCaches::unregisterThread(TLSRemote *tls) 673 { 674 MallocMutex::scoped_lock lock(listLock); 675 MALLOC_ASSERT(head, "Can't unregister thread: no threads are registered."); 676 if (head == tls) 677 head = tls->next; 678 if (tls->next) 679 tls->next->prev = tls->prev; 680 if (tls->prev) 681 tls->prev->next = tls->next; 682 MALLOC_ASSERT(!tls->next || tls->next->next!=tls->next, ASSERT_TEXT); 683 } 684 685 bool AllLocalCaches::cleanup(bool cleanOnlyUnused) 686 { 687 bool released = false; 688 { 689 MallocMutex::scoped_lock lock(listLock); 690 for (TLSRemote *curr=head; curr; curr=curr->next) 691 released |= static_cast<TLSData*>(curr)->externalCleanup(cleanOnlyUnused, /*cleanBins=*/false); 692 } 693 return released; 694 } 695 696 void AllLocalCaches::markUnused() 697 { 698 bool locked; 699 MallocMutex::scoped_lock lock(listLock, /*block=*/false, &locked); 700 if (!locked) // not wait for marking if someone doing something with it 701 return; 702 703 for (TLSRemote *curr=head; curr; curr=curr->next) 704 static_cast<TLSData*>(curr)->markUnused(); 705 } 706 707 #if MALLOC_CHECK_RECURSION 708 MallocMutex RecursiveMallocCallProtector::rmc_mutex; 709 pthread_t RecursiveMallocCallProtector::owner_thread; 710 std::atomic<void*> RecursiveMallocCallProtector::autoObjPtr; 711 bool RecursiveMallocCallProtector::mallocRecursionDetected; 712 #if __FreeBSD__ 713 bool RecursiveMallocCallProtector::canUsePthread; 714 #endif 715 716 #endif 717 718 /*********** End code to provide thread ID and a TLS pointer **********/ 719 720 // Parameter for isLargeObject, keeps our expectations on memory origin. 721 // Assertions must use unknownMem to reliably report object invalidity. 722 enum MemoryOrigin { 723 ourMem, // allocated by TBB allocator 724 unknownMem // can be allocated by system allocator or TBB allocator 725 }; 726 727 template<MemoryOrigin> 728 #if __TBB_USE_THREAD_SANITIZER 729 // We have a real race when accessing the large object header for 730 // non large objects (e.g. small or foreign objects). 731 // Therefore, we need to hide this access from the thread sanitizer 732 __attribute__((no_sanitize("thread"))) 733 #endif 734 bool isLargeObject(void *object); 735 static void *internalMalloc(size_t size); 736 static void internalFree(void *object); 737 static void *internalPoolMalloc(MemoryPool* mPool, size_t size); 738 static bool internalPoolFree(MemoryPool *mPool, void *object, size_t size); 739 740 #if !MALLOC_DEBUG 741 #if __INTEL_COMPILER || _MSC_VER 742 #define NOINLINE(decl) __declspec(noinline) decl 743 #define ALWAYSINLINE(decl) __forceinline decl 744 #elif __GNUC__ 745 #define NOINLINE(decl) decl __attribute__ ((noinline)) 746 #define ALWAYSINLINE(decl) decl __attribute__ ((always_inline)) 747 #else 748 #define NOINLINE(decl) decl 749 #define ALWAYSINLINE(decl) decl 750 #endif 751 752 static NOINLINE( bool doInitialization() ); 753 ALWAYSINLINE( bool isMallocInitialized() ); 754 755 #undef ALWAYSINLINE 756 #undef NOINLINE 757 #endif /* !MALLOC_DEBUG */ 758 759 760 /********* Now some rough utility code to deal with indexing the size bins. **************/ 761 762 /* 763 * Given a number return the highest non-zero bit in it. It is intended to work with 32-bit values only. 764 * Moreover, on some platforms, for sake of simplicity and performance, it is narrowed to only serve for 64 to 1023. 765 * This is enough for current algorithm of distribution of sizes among bins. 766 * __TBB_Log2 is not used here to minimize dependencies on TBB specific sources. 767 */ 768 #if _WIN64 && _MSC_VER>=1400 && !__INTEL_COMPILER 769 extern "C" unsigned char _BitScanReverse( unsigned long* i, unsigned long w ); 770 #pragma intrinsic(_BitScanReverse) 771 #endif 772 static inline unsigned int highestBitPos(unsigned int n) 773 { 774 MALLOC_ASSERT( n>=64 && n<1024, ASSERT_TEXT ); // only needed for bsr array lookup, but always true 775 unsigned int pos; 776 #if __ARCH_x86_32||__ARCH_x86_64 777 778 # if __linux__||__APPLE__||__FreeBSD__||__NetBSD__||__OpenBSD__||__sun||__MINGW32__ 779 __asm__ ("bsr %1,%0" : "=r"(pos) : "r"(n)); 780 # elif (_WIN32 && (!_WIN64 || __INTEL_COMPILER)) 781 __asm 782 { 783 bsr eax, n 784 mov pos, eax 785 } 786 # elif _WIN64 && _MSC_VER>=1400 787 _BitScanReverse((unsigned long*)&pos, (unsigned long)n); 788 # else 789 # error highestBitPos() not implemented for this platform 790 # endif 791 #elif __arm__ 792 __asm__ __volatile__ 793 ( 794 "clz %0, %1\n" 795 "rsb %0, %0, %2\n" 796 :"=r" (pos) :"r" (n), "I" (31) 797 ); 798 #else 799 static unsigned int bsr[16] = {0/*N/A*/,6,7,7,8,8,8,8,9,9,9,9,9,9,9,9}; 800 pos = bsr[ n>>6 ]; 801 #endif /* __ARCH_* */ 802 return pos; 803 } 804 805 806 #if __TBB_x86_32 || __aarch32__ 807 unsigned int getSmallObjectIndex(unsigned int size) 808 { 809 return (size-1)>>3; 810 } 811 #elif __TBB_x86_64 || __aarch64__ 812 unsigned int getSmallObjectIndex(unsigned int size) 813 { 814 // For 64-bit malloc, 16 byte alignment is needed except for bin 0. 815 unsigned int result = (size-1)>>3; 816 if (result) result |= 1; // 0,1,3,5,7; bins 2,4,6 are not aligned to 16 bytes 817 return result; 818 } 819 #endif // __TBB_x86_32 || __aarch32__ 820 821 /* 822 * Depending on indexRequest, for a given size return either the index into the bin 823 * for objects of this size, or the actual size of objects in this bin. 824 */ 825 template<bool indexRequest> 826 static unsigned int getIndexOrObjectSize (unsigned int size) 827 { 828 if (size <= maxSmallObjectSize) { // selection from 8/16/24/32/40/48/56/64 829 unsigned int index = getSmallObjectIndex( size ); 830 /* Bin 0 is for 8 bytes, bin 1 is for 16, and so forth */ 831 return indexRequest ? index : (index+1)<<3; 832 } 833 else if (size <= maxSegregatedObjectSize ) { // 80/96/112/128 / 160/192/224/256 / 320/384/448/512 / 640/768/896/1024 834 unsigned int order = highestBitPos(size-1); // which group of bin sizes? 835 MALLOC_ASSERT( 6<=order && order<=9, ASSERT_TEXT ); 836 if (indexRequest) 837 return minSegregatedObjectIndex - (4*6) - 4 + (4*order) + ((size-1)>>(order-2)); 838 else { 839 unsigned int alignment = 128 >> (9-order); // alignment in the group 840 MALLOC_ASSERT( alignment==16 || alignment==32 || alignment==64 || alignment==128, ASSERT_TEXT ); 841 return alignUp(size,alignment); 842 } 843 } 844 else { 845 if( size <= fittingSize3 ) { 846 if( size <= fittingSize2 ) { 847 if( size <= fittingSize1 ) 848 return indexRequest ? minFittingIndex : fittingSize1; 849 else 850 return indexRequest ? minFittingIndex+1 : fittingSize2; 851 } else 852 return indexRequest ? minFittingIndex+2 : fittingSize3; 853 } else { 854 if( size <= fittingSize5 ) { 855 if( size <= fittingSize4 ) 856 return indexRequest ? minFittingIndex+3 : fittingSize4; 857 else 858 return indexRequest ? minFittingIndex+4 : fittingSize5; 859 } else { 860 MALLOC_ASSERT( 0,ASSERT_TEXT ); // this should not happen 861 return ~0U; 862 } 863 } 864 } 865 } 866 867 static unsigned int getIndex (unsigned int size) 868 { 869 return getIndexOrObjectSize</*indexRequest=*/true>(size); 870 } 871 872 static unsigned int getObjectSize (unsigned int size) 873 { 874 return getIndexOrObjectSize</*indexRequest=*/false>(size); 875 } 876 877 878 void *BootStrapBlocks::allocate(MemoryPool *memPool, size_t size) 879 { 880 FreeObject *result; 881 882 MALLOC_ASSERT( size == sizeof(TLSData), ASSERT_TEXT ); 883 884 { // Lock with acquire 885 MallocMutex::scoped_lock scoped_cs(bootStrapLock); 886 887 if( bootStrapObjectList) { 888 result = bootStrapObjectList; 889 bootStrapObjectList = bootStrapObjectList->next; 890 } else { 891 if (!bootStrapBlock) { 892 bootStrapBlock = memPool->getEmptyBlock(size); 893 if (!bootStrapBlock) return NULL; 894 } 895 result = bootStrapBlock->bumpPtr; 896 bootStrapBlock->bumpPtr = (FreeObject *)((uintptr_t)bootStrapBlock->bumpPtr - bootStrapBlock->objectSize); 897 if ((uintptr_t)bootStrapBlock->bumpPtr < (uintptr_t)bootStrapBlock+sizeof(Block)) { 898 bootStrapBlock->bumpPtr = NULL; 899 bootStrapBlock->next = bootStrapBlockUsed; 900 bootStrapBlockUsed = bootStrapBlock; 901 bootStrapBlock = NULL; 902 } 903 } 904 } // Unlock with release 905 memset (result, 0, size); 906 return (void*)result; 907 } 908 909 void BootStrapBlocks::free(void* ptr) 910 { 911 MALLOC_ASSERT( ptr, ASSERT_TEXT ); 912 { // Lock with acquire 913 MallocMutex::scoped_lock scoped_cs(bootStrapLock); 914 ((FreeObject*)ptr)->next = bootStrapObjectList; 915 bootStrapObjectList = (FreeObject*)ptr; 916 } // Unlock with release 917 } 918 919 void BootStrapBlocks::reset() 920 { 921 bootStrapBlock = bootStrapBlockUsed = NULL; 922 bootStrapObjectList = NULL; 923 } 924 925 #if !(FREELIST_NONBLOCKING) 926 static MallocMutex publicFreeListLock; // lock for changes of publicFreeList 927 #endif 928 929 /********* End rough utility code **************/ 930 931 /* LifoList assumes zero initialization so a vector of it can be created 932 * by just allocating some space with no call to constructor. 933 * On Linux, it seems to be necessary to avoid linking with C++ libraries. 934 * 935 * By usage convention there is no race on the initialization. */ 936 LifoList::LifoList( ) : top(nullptr) 937 { 938 // MallocMutex assumes zero initialization 939 memset(&lock, 0, sizeof(MallocMutex)); 940 } 941 942 void LifoList::push(Block *block) 943 { 944 MallocMutex::scoped_lock scoped_cs(lock); 945 block->next = top.load(std::memory_order_relaxed); 946 top.store(block, std::memory_order_relaxed); 947 } 948 949 Block *LifoList::pop() 950 { 951 Block* block = nullptr; 952 if (top.load(std::memory_order_relaxed)) { 953 MallocMutex::scoped_lock scoped_cs(lock); 954 block = top.load(std::memory_order_relaxed); 955 if (block) { 956 top.store(block->next, std::memory_order_relaxed); 957 } 958 } 959 return block; 960 } 961 962 Block *LifoList::grab() 963 { 964 Block *block = nullptr; 965 if (top.load(std::memory_order_relaxed)) { 966 MallocMutex::scoped_lock scoped_cs(lock); 967 block = top.load(std::memory_order_relaxed); 968 top.store(nullptr, std::memory_order_relaxed); 969 } 970 return block; 971 } 972 973 /********* Thread and block related code *************/ 974 975 template<bool poolDestroy> void AllLargeBlocksList::releaseAll(Backend *backend) { 976 LargeMemoryBlock *next, *lmb = loHead; 977 loHead = NULL; 978 979 for (; lmb; lmb = next) { 980 next = lmb->gNext; 981 if (poolDestroy) { 982 // as it's pool destruction, no need to return object to backend, 983 // only remove backrefs, as they are global 984 removeBackRef(lmb->backRefIdx); 985 } else { 986 // clean g(Next|Prev) to prevent removing lmb 987 // from AllLargeBlocksList inside returnLargeObject 988 lmb->gNext = lmb->gPrev = NULL; 989 backend->returnLargeObject(lmb); 990 } 991 } 992 } 993 994 TLSData* MemoryPool::getTLS(bool create) 995 { 996 TLSData* tls = extMemPool.tlsPointerKey.getThreadMallocTLS(); 997 if (create && !tls) 998 tls = extMemPool.tlsPointerKey.createTLS(this, &extMemPool.backend); 999 return tls; 1000 } 1001 1002 /* 1003 * Return the bin for the given size. 1004 */ 1005 inline Bin* TLSData::getAllocationBin(size_t size) 1006 { 1007 return bin + getIndex(size); 1008 } 1009 1010 /* Return an empty uninitialized block in a non-blocking fashion. */ 1011 Block *MemoryPool::getEmptyBlock(size_t size) 1012 { 1013 TLSData* tls = getTLS(/*create=*/false); 1014 // try to use per-thread cache, if TLS available 1015 FreeBlockPool::ResOfGet resOfGet = tls? 1016 tls->freeSlabBlocks.getBlock() : FreeBlockPool::ResOfGet(NULL, false); 1017 Block *result = resOfGet.block; 1018 1019 if (!result) { // not found in local cache, asks backend for slabs 1020 int num = resOfGet.lastAccMiss? Backend::numOfSlabAllocOnMiss : 1; 1021 BackRefIdx backRefIdx[Backend::numOfSlabAllocOnMiss]; 1022 1023 result = static_cast<Block*>(extMemPool.backend.getSlabBlock(num)); 1024 if (!result) return NULL; 1025 1026 if (!extMemPool.userPool()) 1027 for (int i=0; i<num; i++) { 1028 backRefIdx[i] = BackRefIdx::newBackRef(/*largeObj=*/false); 1029 if (backRefIdx[i].isInvalid()) { 1030 // roll back resource allocation 1031 for (int j=0; j<i; j++) 1032 removeBackRef(backRefIdx[j]); 1033 Block *b = result; 1034 for (int j=0; j<num; b=(Block*)((uintptr_t)b+slabSize), j++) 1035 extMemPool.backend.putSlabBlock(b); 1036 return NULL; 1037 } 1038 } 1039 // resources were allocated, register blocks 1040 Block *b = result; 1041 for (int i=0; i<num; b=(Block*)((uintptr_t)b+slabSize), i++) { 1042 // slab block in user's pool must have invalid backRefIdx 1043 if (extMemPool.userPool()) { 1044 new (&b->backRefIdx) BackRefIdx(); 1045 } else { 1046 setBackRef(backRefIdx[i], b); 1047 b->backRefIdx = backRefIdx[i]; 1048 } 1049 b->tlsPtr.store(tls, std::memory_order_relaxed); 1050 b->poolPtr = this; 1051 // all but first one go to per-thread pool 1052 if (i > 0) { 1053 MALLOC_ASSERT(tls, ASSERT_TEXT); 1054 tls->freeSlabBlocks.returnBlock(b); 1055 } 1056 } 1057 } 1058 MALLOC_ASSERT(result, ASSERT_TEXT); 1059 result->initEmptyBlock(tls, size); 1060 STAT_increment(getThreadId(), getIndex(result->objectSize), allocBlockNew); 1061 return result; 1062 } 1063 1064 void MemoryPool::returnEmptyBlock(Block *block, bool poolTheBlock) 1065 { 1066 block->reset(); 1067 if (poolTheBlock) { 1068 getTLS(/*create=*/false)->freeSlabBlocks.returnBlock(block); 1069 } else { 1070 // slab blocks in user's pools do not have valid backRefIdx 1071 if (!extMemPool.userPool()) 1072 removeBackRef(*(block->getBackRefIdx())); 1073 extMemPool.backend.putSlabBlock(block); 1074 } 1075 } 1076 1077 bool ExtMemoryPool::init(intptr_t poolId, rawAllocType rawAlloc, 1078 rawFreeType rawFree, size_t granularity, 1079 bool keepAllMemory, bool fixedPool) 1080 { 1081 this->poolId = poolId; 1082 this->rawAlloc = rawAlloc; 1083 this->rawFree = rawFree; 1084 this->granularity = granularity; 1085 this->keepAllMemory = keepAllMemory; 1086 this->fixedPool = fixedPool; 1087 this->delayRegsReleasing = false; 1088 if (!initTLS()) 1089 return false; 1090 loc.init(this); 1091 backend.init(this); 1092 MALLOC_ASSERT(isPoolValid(), NULL); 1093 return true; 1094 } 1095 1096 bool ExtMemoryPool::initTLS() { return tlsPointerKey.init(); } 1097 1098 bool MemoryPool::init(intptr_t poolId, const MemPoolPolicy *policy) 1099 { 1100 if (!extMemPool.init(poolId, policy->pAlloc, policy->pFree, 1101 policy->granularity? policy->granularity : defaultGranularity, 1102 policy->keepAllMemory, policy->fixedPool)) 1103 return false; 1104 { 1105 MallocMutex::scoped_lock lock(memPoolListLock); 1106 next = defaultMemPool->next; 1107 defaultMemPool->next = this; 1108 prev = defaultMemPool; 1109 if (next) 1110 next->prev = this; 1111 } 1112 return true; 1113 } 1114 1115 bool MemoryPool::reset() 1116 { 1117 MALLOC_ASSERT(extMemPool.userPool(), "No reset for the system pool."); 1118 // memory is not releasing during pool reset 1119 // TODO: mark regions to release unused on next reset() 1120 extMemPool.delayRegionsReleasing(true); 1121 1122 bootStrapBlocks.reset(); 1123 extMemPool.lmbList.releaseAll</*poolDestroy=*/false>(&extMemPool.backend); 1124 if (!extMemPool.reset()) 1125 return false; 1126 1127 if (!extMemPool.initTLS()) 1128 return false; 1129 extMemPool.delayRegionsReleasing(false); 1130 return true; 1131 } 1132 1133 bool MemoryPool::destroy() 1134 { 1135 #if __TBB_MALLOC_LOCACHE_STAT 1136 extMemPool.loc.reportStat(stdout); 1137 #endif 1138 #if __TBB_MALLOC_BACKEND_STAT 1139 extMemPool.backend.reportStat(stdout); 1140 #endif 1141 { 1142 MallocMutex::scoped_lock lock(memPoolListLock); 1143 // remove itself from global pool list 1144 if (prev) 1145 prev->next = next; 1146 if (next) 1147 next->prev = prev; 1148 } 1149 // slab blocks in non-default pool do not have backreferences, 1150 // only large objects do 1151 if (extMemPool.userPool()) 1152 extMemPool.lmbList.releaseAll</*poolDestroy=*/true>(&extMemPool.backend); 1153 else { 1154 // only one non-userPool() is supported now 1155 MALLOC_ASSERT(this==defaultMemPool, NULL); 1156 // There and below in extMemPool.destroy(), do not restore initial state 1157 // for user pool, because it's just about to be released. But for system 1158 // pool restoring, we do not want to do zeroing of it on subsequent reload. 1159 bootStrapBlocks.reset(); 1160 extMemPool.orphanedBlocks.reset(); 1161 } 1162 return extMemPool.destroy(); 1163 } 1164 1165 void MemoryPool::onThreadShutdown(TLSData *tlsData) 1166 { 1167 if (tlsData) { // might be called for "empty" TLS 1168 tlsData->release(); 1169 bootStrapBlocks.free(tlsData); 1170 clearTLS(); 1171 } 1172 } 1173 1174 #if MALLOC_DEBUG 1175 void Bin::verifyTLSBin (size_t size) const 1176 { 1177 /* The debug version verifies the TLSBin as needed */ 1178 uint32_t objSize = getObjectSize(size); 1179 1180 if (activeBlk) { 1181 MALLOC_ASSERT( activeBlk->isOwnedByCurrentThread(), ASSERT_TEXT ); 1182 MALLOC_ASSERT( activeBlk->objectSize == objSize, ASSERT_TEXT ); 1183 #if MALLOC_DEBUG>1 1184 for (Block* temp = activeBlk->next; temp; temp=temp->next) { 1185 MALLOC_ASSERT( temp!=activeBlk, ASSERT_TEXT ); 1186 MALLOC_ASSERT( temp->isOwnedByCurrentThread(), ASSERT_TEXT ); 1187 MALLOC_ASSERT( temp->objectSize == objSize, ASSERT_TEXT ); 1188 MALLOC_ASSERT( temp->previous->next == temp, ASSERT_TEXT ); 1189 if (temp->next) { 1190 MALLOC_ASSERT( temp->next->previous == temp, ASSERT_TEXT ); 1191 } 1192 } 1193 for (Block* temp = activeBlk->previous; temp; temp=temp->previous) { 1194 MALLOC_ASSERT( temp!=activeBlk, ASSERT_TEXT ); 1195 MALLOC_ASSERT( temp->isOwnedByCurrentThread(), ASSERT_TEXT ); 1196 MALLOC_ASSERT( temp->objectSize == objSize, ASSERT_TEXT ); 1197 MALLOC_ASSERT( temp->next->previous == temp, ASSERT_TEXT ); 1198 if (temp->previous) { 1199 MALLOC_ASSERT( temp->previous->next == temp, ASSERT_TEXT ); 1200 } 1201 } 1202 #endif /* MALLOC_DEBUG>1 */ 1203 } 1204 } 1205 #else /* MALLOC_DEBUG */ 1206 inline void Bin::verifyTLSBin (size_t) const { } 1207 #endif /* MALLOC_DEBUG */ 1208 1209 /* 1210 * Add a block to the start of this tls bin list. 1211 */ 1212 void Bin::pushTLSBin(Block* block) 1213 { 1214 /* The objectSize should be defined and not a parameter 1215 because the function is applied to partially filled blocks as well */ 1216 unsigned int size = block->objectSize; 1217 1218 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1219 MALLOC_ASSERT( block->objectSize != 0, ASSERT_TEXT ); 1220 MALLOC_ASSERT( block->next == NULL, ASSERT_TEXT ); 1221 MALLOC_ASSERT( block->previous == NULL, ASSERT_TEXT ); 1222 1223 MALLOC_ASSERT( this, ASSERT_TEXT ); 1224 verifyTLSBin(size); 1225 1226 block->next = activeBlk; 1227 if( activeBlk ) { 1228 block->previous = activeBlk->previous; 1229 activeBlk->previous = block; 1230 if( block->previous ) 1231 block->previous->next = block; 1232 } else { 1233 activeBlk = block; 1234 } 1235 1236 verifyTLSBin(size); 1237 } 1238 1239 /* 1240 * Take a block out of its tls bin (e.g. before removal). 1241 */ 1242 void Bin::outofTLSBin(Block* block) 1243 { 1244 unsigned int size = block->objectSize; 1245 1246 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1247 MALLOC_ASSERT( block->objectSize != 0, ASSERT_TEXT ); 1248 1249 MALLOC_ASSERT( this, ASSERT_TEXT ); 1250 verifyTLSBin(size); 1251 1252 if (block == activeBlk) { 1253 activeBlk = block->previous? block->previous : block->next; 1254 } 1255 /* Unlink the block */ 1256 if (block->previous) { 1257 MALLOC_ASSERT( block->previous->next == block, ASSERT_TEXT ); 1258 block->previous->next = block->next; 1259 } 1260 if (block->next) { 1261 MALLOC_ASSERT( block->next->previous == block, ASSERT_TEXT ); 1262 block->next->previous = block->previous; 1263 } 1264 block->next = NULL; 1265 block->previous = NULL; 1266 1267 verifyTLSBin(size); 1268 } 1269 1270 Block* Bin::getPrivatizedFreeListBlock() 1271 { 1272 Block* block; 1273 MALLOC_ASSERT( this, ASSERT_TEXT ); 1274 // if this method is called, active block usage must be unsuccessful 1275 MALLOC_ASSERT( !activeBlk && !mailbox.load(std::memory_order_relaxed) || activeBlk && activeBlk->isFull, ASSERT_TEXT ); 1276 1277 // the counter should be changed STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 1278 if (!mailbox.load(std::memory_order_acquire)) // hotpath is empty mailbox 1279 return NULL; 1280 else { // mailbox is not empty, take lock and inspect it 1281 MallocMutex::scoped_lock scoped_cs(mailLock); 1282 block = mailbox.load(std::memory_order_relaxed); 1283 if( block ) { 1284 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1285 MALLOC_ASSERT( !isNotForUse(block->nextPrivatizable.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1286 mailbox.store(block->nextPrivatizable.load(std::memory_order_relaxed), std::memory_order_relaxed); 1287 block->nextPrivatizable.store((Block*)this, std::memory_order_relaxed); 1288 } 1289 } 1290 if( block ) { 1291 MALLOC_ASSERT( isSolidPtr(block->publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1292 block->privatizePublicFreeList(); 1293 block->adjustPositionInBin(this); 1294 } 1295 return block; 1296 } 1297 1298 void Bin::addPublicFreeListBlock(Block* block) 1299 { 1300 MallocMutex::scoped_lock scoped_cs(mailLock); 1301 block->nextPrivatizable.store(mailbox.load(std::memory_order_relaxed), std::memory_order_relaxed); 1302 mailbox.store(block, std::memory_order_relaxed); 1303 } 1304 1305 // Process publicly freed objects in all blocks and return empty blocks 1306 // to the backend in order to reduce overall footprint. 1307 bool Bin::cleanPublicFreeLists() 1308 { 1309 Block* block; 1310 if (!mailbox.load(std::memory_order_acquire)) 1311 return false; 1312 else { 1313 // Grab all the blocks in the mailbox 1314 MallocMutex::scoped_lock scoped_cs(mailLock); 1315 block = mailbox.load(std::memory_order_relaxed); 1316 mailbox.store(NULL, std::memory_order_relaxed); 1317 } 1318 bool released = false; 1319 while (block) { 1320 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1321 Block* tmp = block->nextPrivatizable.load(std::memory_order_relaxed); 1322 block->nextPrivatizable.store((Block*)this, std::memory_order_relaxed); 1323 block->privatizePublicFreeList(); 1324 if (block->empty()) { 1325 processEmptyBlock(block, /*poolTheBlock=*/false); 1326 released = true; 1327 } else 1328 block->adjustPositionInBin(this); 1329 block = tmp; 1330 } 1331 return released; 1332 } 1333 1334 bool Block::adjustFullness() 1335 { 1336 if (bumpPtr) { 1337 /* If we are still using a bump ptr for this block it is empty enough to use. */ 1338 STAT_increment(getThreadId(), getIndex(objectSize), examineEmptyEnough); 1339 isFull = false; 1340 } else { 1341 const float threshold = (slabSize - sizeof(Block)) * (1 - emptyEnoughRatio); 1342 /* allocatedCount shows how many objects in the block are in use; however it still counts 1343 * blocks freed by other threads; so prior call to privatizePublicFreeList() is recommended */ 1344 isFull = (allocatedCount*objectSize > threshold) ? true : false; 1345 #if COLLECT_STATISTICS 1346 if (isFull) 1347 STAT_increment(getThreadId(), getIndex(objectSize), examineNotEmpty); 1348 else 1349 STAT_increment(getThreadId(), getIndex(objectSize), examineEmptyEnough); 1350 #endif 1351 } 1352 return isFull; 1353 } 1354 1355 // This method resides in class Block, and not in class Bin, in order to avoid 1356 // calling getAllocationBin on a reasonably hot path in Block::freeOwnObject 1357 void Block::adjustPositionInBin(Bin* bin/*=NULL*/) 1358 { 1359 // If the block were full, but became empty enough to use, 1360 // move it to the front of the list 1361 if (isFull && !adjustFullness()) { 1362 if (!bin) 1363 bin = tlsPtr.load(std::memory_order_relaxed)->getAllocationBin(objectSize); 1364 bin->moveBlockToFront(this); 1365 } 1366 } 1367 1368 /* Restore the bump pointer for an empty block that is planned to use */ 1369 void Block::restoreBumpPtr() 1370 { 1371 MALLOC_ASSERT( allocatedCount == 0, ASSERT_TEXT ); 1372 MALLOC_ASSERT( !isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1373 STAT_increment(getThreadId(), getIndex(objectSize), freeRestoreBumpPtr); 1374 bumpPtr = (FreeObject *)((uintptr_t)this + slabSize - objectSize); 1375 freeList = NULL; 1376 isFull = false; 1377 } 1378 1379 void Block::freeOwnObject(void *object) 1380 { 1381 tlsPtr.load(std::memory_order_relaxed)->markUsed(); 1382 allocatedCount--; 1383 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 1384 #if COLLECT_STATISTICS 1385 // Note that getAllocationBin is not called on the hottest path with statistics off. 1386 if (tlsPtr.load(std::memory_order_relaxed)->getAllocationBin(objectSize)->getActiveBlock() != this) 1387 STAT_increment(getThreadId(), getIndex(objectSize), freeToInactiveBlock); 1388 else 1389 STAT_increment(getThreadId(), getIndex(objectSize), freeToActiveBlock); 1390 #endif 1391 if (empty()) { 1392 // If the last object of a slab is freed, the slab cannot be marked full 1393 MALLOC_ASSERT(!isFull, ASSERT_TEXT); 1394 tlsPtr.load(std::memory_order_relaxed)->getAllocationBin(objectSize)->processEmptyBlock(this, /*poolTheBlock=*/true); 1395 } else { // hot path 1396 FreeObject *objectToFree = findObjectToFree(object); 1397 objectToFree->next = freeList; 1398 freeList = objectToFree; 1399 adjustPositionInBin(); 1400 } 1401 } 1402 1403 void Block::freePublicObject (FreeObject *objectToFree) 1404 { 1405 FreeObject* localPublicFreeList{}; 1406 1407 MALLOC_ITT_SYNC_RELEASING(&publicFreeList); 1408 #if FREELIST_NONBLOCKING 1409 // TBB_REVAMP_TODO: make it non atomic in non-blocking scenario 1410 localPublicFreeList = publicFreeList.load(std::memory_order_relaxed); 1411 do { 1412 objectToFree->next = localPublicFreeList; 1413 // no backoff necessary because trying to make change, not waiting for a change 1414 } while( !publicFreeList.compare_exchange_strong(localPublicFreeList, objectToFree) ); 1415 #else 1416 STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 1417 { 1418 MallocMutex::scoped_lock scoped_cs(publicFreeListLock); 1419 localPublicFreeList = objectToFree->next = publicFreeList; 1420 publicFreeList = objectToFree; 1421 } 1422 #endif 1423 1424 if( localPublicFreeList==NULL ) { 1425 // if the block is abandoned, its nextPrivatizable pointer should be UNUSABLE 1426 // otherwise, it should point to the bin the block belongs to. 1427 // reading nextPrivatizable is thread-safe below, because: 1428 // 1) the executing thread atomically got publicFreeList==NULL and changed it to non-NULL; 1429 // 2) only owning thread can change it back to NULL, 1430 // 3) but it can not be done until the block is put to the mailbox 1431 // So the executing thread is now the only one that can change nextPrivatizable 1432 Block* next = nextPrivatizable.load(std::memory_order_acquire); 1433 if( !isNotForUse(next) ) { 1434 MALLOC_ASSERT( next!=nullptr, ASSERT_TEXT ); 1435 Bin* theBin = (Bin*) next; 1436 #if MALLOC_DEBUG && TBB_REVAMP_TODO 1437 // FIXME: The thread that returns the block is not the block's owner. 1438 // The below assertion compares 'theBin' against the caller's local bin, thus, it always fails. 1439 // Need to find a way to get the correct remote bin for comparison. 1440 { // check that nextPrivatizable points to the bin the block belongs to 1441 uint32_t index = getIndex( objectSize ); 1442 TLSData* tls = getThreadMallocTLS(); 1443 MALLOC_ASSERT( theBin==tls->bin+index, ASSERT_TEXT ); 1444 } 1445 #endif // MALLOC_DEBUG 1446 theBin->addPublicFreeListBlock(this); 1447 } 1448 } 1449 STAT_increment(getThreadId(), ThreadCommonCounters, freeToOtherThread); 1450 STAT_increment(ownerTid.load(std::memory_order_relaxed), getIndex(objectSize), freeByOtherThread); 1451 } 1452 1453 // Make objects freed by other threads available for use again 1454 void Block::privatizePublicFreeList( bool reset ) 1455 { 1456 FreeObject *localPublicFreeList; 1457 // If reset is false, publicFreeList should not be zeroed but set to UNUSABLE 1458 // to properly synchronize with other threads freeing objects to this slab. 1459 const intptr_t endMarker = reset ? 0 : UNUSABLE; 1460 1461 // Only the owner thread may reset the pointer to NULL 1462 MALLOC_ASSERT( isOwnedByCurrentThread() || !reset, ASSERT_TEXT ); 1463 #if FREELIST_NONBLOCKING 1464 localPublicFreeList = publicFreeList.exchange((FreeObject*)endMarker); 1465 #else 1466 STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 1467 { 1468 MallocMutex::scoped_lock scoped_cs(publicFreeListLock); 1469 localPublicFreeList = publicFreeList; 1470 publicFreeList = endMarker; 1471 } 1472 #endif 1473 MALLOC_ITT_SYNC_ACQUIRED(&publicFreeList); 1474 MALLOC_ASSERT( !(reset && isNotForUse(publicFreeList)), ASSERT_TEXT ); 1475 1476 // publicFreeList must have been UNUSABLE or valid, but not NULL 1477 MALLOC_ASSERT( localPublicFreeList!=NULL, ASSERT_TEXT ); 1478 if( isSolidPtr(localPublicFreeList) ) { 1479 MALLOC_ASSERT( allocatedCount <= (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 1480 /* other threads did not change the counter freeing our blocks */ 1481 allocatedCount--; 1482 FreeObject *temp = localPublicFreeList; 1483 while( isSolidPtr(temp->next) ){ // the list will end with either NULL or UNUSABLE 1484 temp = temp->next; 1485 allocatedCount--; 1486 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 1487 } 1488 /* merge with local freeList */ 1489 temp->next = freeList; 1490 freeList = localPublicFreeList; 1491 STAT_increment(getThreadId(), getIndex(objectSize), allocPrivatized); 1492 } 1493 } 1494 1495 void Block::privatizeOrphaned(TLSData *tls, unsigned index) 1496 { 1497 Bin* bin = tls->bin + index; 1498 STAT_increment(getThreadId(), index, allocBlockPublic); 1499 next = NULL; 1500 previous = NULL; 1501 MALLOC_ASSERT( publicFreeList.load(std::memory_order_relaxed) != NULL, ASSERT_TEXT ); 1502 /* There is not a race here since no other thread owns this block */ 1503 markOwned(tls); 1504 // It is safe to change nextPrivatizable, as publicFreeList is not null 1505 MALLOC_ASSERT( isNotForUse(nextPrivatizable.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1506 nextPrivatizable.store((Block*)bin, std::memory_order_relaxed); 1507 // the next call is required to change publicFreeList to 0 1508 privatizePublicFreeList(); 1509 if( empty() ) { 1510 restoreBumpPtr(); 1511 } else { 1512 adjustFullness(); // check the block fullness and set isFull 1513 } 1514 MALLOC_ASSERT( !isNotForUse(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1515 } 1516 1517 1518 bool Block::readyToShare() 1519 { 1520 FreeObject* oldVal = NULL; 1521 #if FREELIST_NONBLOCKING 1522 publicFreeList.compare_exchange_strong(oldVal, (FreeObject*)UNUSABLE); 1523 #else 1524 STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 1525 { 1526 MallocMutex::scoped_lock scoped_cs(publicFreeListLock); 1527 if ( (oldVal=publicFreeList)==NULL ) 1528 (intptr_t&)(publicFreeList) = UNUSABLE; 1529 } 1530 #endif 1531 return oldVal==NULL; 1532 } 1533 1534 void Block::shareOrphaned(intptr_t binTag, unsigned index) 1535 { 1536 MALLOC_ASSERT( binTag, ASSERT_TEXT ); 1537 // unreferenced formal parameter warning 1538 tbb::detail::suppress_unused_warning(index); 1539 STAT_increment(getThreadId(), index, freeBlockPublic); 1540 markOrphaned(); 1541 bool syncOnMailbox = false; 1542 if ((intptr_t)nextPrivatizable.load(std::memory_order_relaxed) == binTag) { 1543 // First check passed: the block is not in mailbox yet. 1544 // Need to set publicFreeList to non-zero, so other threads 1545 // will not change nextPrivatizable and it can be zeroed. 1546 if ( !readyToShare() ) { 1547 // another thread freed an object; we need to wait until it finishes. 1548 // There is no need for exponential backoff, as the wait here is not for a lock; 1549 // but need to yield, so the thread we wait has a chance to run. 1550 // TODO: add a pause to also be friendly to hyperthreads 1551 int count = 256; 1552 while ((intptr_t)nextPrivatizable.load(std::memory_order_relaxed) == binTag) { 1553 if (--count==0) { 1554 do_yield(); 1555 count = 256; 1556 } 1557 } 1558 } 1559 } 1560 MALLOC_ASSERT( publicFreeList.load(std::memory_order_relaxed) !=NULL, ASSERT_TEXT ); 1561 // now it is safe to change our data 1562 previous = NULL; 1563 // it is caller responsibility to ensure that the list of blocks 1564 // formed by nextPrivatizable pointers is kept consistent if required. 1565 // if only called from thread shutdown code, it does not matter. 1566 nextPrivatizable.store((Block*)UNUSABLE, std::memory_order_relaxed); 1567 } 1568 1569 void Block::cleanBlockHeader() 1570 { 1571 next = nullptr; 1572 previous = nullptr; 1573 freeList = nullptr; 1574 allocatedCount = 0; 1575 isFull = false; 1576 tlsPtr.store(nullptr, std::memory_order_relaxed); 1577 1578 publicFreeList.store(nullptr, std::memory_order_relaxed); 1579 } 1580 1581 void Block::initEmptyBlock(TLSData *tls, size_t size) 1582 { 1583 // Having getIndex and getObjectSize called next to each other 1584 // allows better compiler optimization as they basically share the code. 1585 unsigned int index = getIndex(size); 1586 unsigned int objSz = getObjectSize(size); 1587 1588 cleanBlockHeader(); 1589 objectSize = objSz; 1590 markOwned(tls); 1591 // bump pointer should be prepared for first allocation - thus mode it down to objectSize 1592 bumpPtr = (FreeObject *)((uintptr_t)this + slabSize - objectSize); 1593 1594 // each block should have the address where the head of the list of "privatizable" blocks is kept 1595 // the only exception is a block for boot strap which is initialized when TLS is yet NULL 1596 nextPrivatizable.store( tls? (Block*)(tls->bin + index) : nullptr, std::memory_order_relaxed); 1597 TRACEF(( "[ScalableMalloc trace] Empty block %p is initialized, owner is %ld, objectSize is %d, bumpPtr is %p\n", 1598 this, tlsPtr.load(std::memory_order_relaxed) ? getThreadId() : -1, objectSize, bumpPtr )); 1599 } 1600 1601 Block *OrphanedBlocks::get(TLSData *tls, unsigned int size) 1602 { 1603 // TODO: try to use index from getAllocationBin 1604 unsigned int index = getIndex(size); 1605 Block *block = bins[index].pop(); 1606 if (block) { 1607 MALLOC_ITT_SYNC_ACQUIRED(bins+index); 1608 block->privatizeOrphaned(tls, index); 1609 } 1610 return block; 1611 } 1612 1613 void OrphanedBlocks::put(intptr_t binTag, Block *block) 1614 { 1615 unsigned int index = getIndex(block->getSize()); 1616 block->shareOrphaned(binTag, index); 1617 MALLOC_ITT_SYNC_RELEASING(bins+index); 1618 bins[index].push(block); 1619 } 1620 1621 void OrphanedBlocks::reset() 1622 { 1623 for (uint32_t i=0; i<numBlockBinLimit; i++) 1624 new (bins+i) LifoList(); 1625 } 1626 1627 bool OrphanedBlocks::cleanup(Backend* backend) 1628 { 1629 bool released = false; 1630 for (uint32_t i=0; i<numBlockBinLimit; i++) { 1631 Block* block = bins[i].grab(); 1632 MALLOC_ITT_SYNC_ACQUIRED(bins+i); 1633 while (block) { 1634 Block* next = block->next; 1635 block->privatizePublicFreeList( /*reset=*/false ); // do not set publicFreeList to NULL 1636 if (block->empty()) { 1637 block->reset(); 1638 // slab blocks in user's pools do not have valid backRefIdx 1639 if (!backend->inUserPool()) 1640 removeBackRef(*(block->getBackRefIdx())); 1641 backend->putSlabBlock(block); 1642 released = true; 1643 } else { 1644 MALLOC_ITT_SYNC_RELEASING(bins+i); 1645 bins[i].push(block); 1646 } 1647 block = next; 1648 } 1649 } 1650 return released; 1651 } 1652 1653 FreeBlockPool::ResOfGet FreeBlockPool::getBlock() 1654 { 1655 Block *b = head.exchange(NULL); 1656 1657 if (b) { 1658 size--; 1659 Block *newHead = b->next; 1660 lastAccessMiss = false; 1661 head.store(newHead, std::memory_order_release); 1662 } else { 1663 lastAccessMiss = true; 1664 } 1665 return ResOfGet(b, lastAccessMiss); 1666 } 1667 1668 void FreeBlockPool::returnBlock(Block *block) 1669 { 1670 MALLOC_ASSERT( size <= POOL_HIGH_MARK, ASSERT_TEXT ); 1671 Block *localHead = head.exchange(NULL); 1672 1673 if (!localHead) { 1674 size = 0; // head was stolen by externalClean, correct size accordingly 1675 } else if (size == POOL_HIGH_MARK) { 1676 // release cold blocks and add hot one, 1677 // so keep POOL_LOW_MARK-1 blocks and add new block to head 1678 Block *headToFree = localHead, *helper; 1679 for (int i=0; i<POOL_LOW_MARK-2; i++) 1680 headToFree = headToFree->next; 1681 Block *last = headToFree; 1682 headToFree = headToFree->next; 1683 last->next = NULL; 1684 size = POOL_LOW_MARK-1; 1685 for (Block *currBl = headToFree; currBl; currBl = helper) { 1686 helper = currBl->next; 1687 // slab blocks in user's pools do not have valid backRefIdx 1688 if (!backend->inUserPool()) 1689 removeBackRef(currBl->backRefIdx); 1690 backend->putSlabBlock(currBl); 1691 } 1692 } 1693 size++; 1694 block->next = localHead; 1695 head.store(block, std::memory_order_release); 1696 } 1697 1698 bool FreeBlockPool::externalCleanup() 1699 { 1700 Block *helper; 1701 bool released = false; 1702 1703 for (Block *currBl=head.exchange(NULL); currBl; currBl=helper) { 1704 helper = currBl->next; 1705 // slab blocks in user's pools do not have valid backRefIdx 1706 if (!backend->inUserPool()) 1707 removeBackRef(currBl->backRefIdx); 1708 backend->putSlabBlock(currBl); 1709 released = true; 1710 } 1711 return released; 1712 } 1713 1714 /* Prepare the block for returning to FreeBlockPool */ 1715 void Block::reset() 1716 { 1717 // it is caller's responsibility to ensure no data is lost before calling this 1718 MALLOC_ASSERT( allocatedCount==0, ASSERT_TEXT ); 1719 MALLOC_ASSERT( !isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1720 if (!isStartupAllocObject()) 1721 STAT_increment(getThreadId(), getIndex(objectSize), freeBlockBack); 1722 1723 cleanBlockHeader(); 1724 1725 nextPrivatizable.store(nullptr, std::memory_order_relaxed); 1726 1727 objectSize = 0; 1728 // for an empty block, bump pointer should point right after the end of the block 1729 bumpPtr = (FreeObject *)((uintptr_t)this + slabSize); 1730 } 1731 1732 inline void Bin::setActiveBlock (Block *block) 1733 { 1734 // MALLOC_ASSERT( bin, ASSERT_TEXT ); 1735 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1736 // it is the caller responsibility to keep bin consistence (i.e. ensure this block is in the bin list) 1737 activeBlk = block; 1738 } 1739 1740 inline Block* Bin::setPreviousBlockActive() 1741 { 1742 MALLOC_ASSERT( activeBlk, ASSERT_TEXT ); 1743 Block* temp = activeBlk->previous; 1744 if( temp ) { 1745 MALLOC_ASSERT( !(temp->isFull), ASSERT_TEXT ); 1746 activeBlk = temp; 1747 } 1748 return temp; 1749 } 1750 1751 inline bool Block::isOwnedByCurrentThread() const { 1752 return tlsPtr.load(std::memory_order_relaxed) && ownerTid.isCurrentThreadId(); 1753 } 1754 1755 FreeObject *Block::findObjectToFree(const void *object) const 1756 { 1757 FreeObject *objectToFree; 1758 // Due to aligned allocations, a pointer passed to scalable_free 1759 // might differ from the address of internally allocated object. 1760 // Small objects however should always be fine. 1761 if (objectSize <= maxSegregatedObjectSize) 1762 objectToFree = (FreeObject*)object; 1763 // "Fitting size" allocations are suspicious if aligned higher than naturally 1764 else { 1765 if ( ! isAligned(object,2*fittingAlignment) ) 1766 // TODO: the above check is questionable - it gives false negatives in ~50% cases, 1767 // so might even be slower in average than unconditional use of findAllocatedObject. 1768 // here it should be a "real" object 1769 objectToFree = (FreeObject*)object; 1770 else 1771 // here object can be an aligned address, so applying additional checks 1772 objectToFree = findAllocatedObject(object); 1773 MALLOC_ASSERT( isAligned(objectToFree,fittingAlignment), ASSERT_TEXT ); 1774 } 1775 MALLOC_ASSERT( isProperlyPlaced(objectToFree), ASSERT_TEXT ); 1776 1777 return objectToFree; 1778 } 1779 1780 void TLSData::release() 1781 { 1782 memPool->extMemPool.allLocalCaches.unregisterThread(this); 1783 externalCleanup(/*cleanOnlyUnused=*/false, /*cleanBins=*/false); 1784 1785 for (unsigned index = 0; index < numBlockBins; index++) { 1786 Block *activeBlk = bin[index].getActiveBlock(); 1787 if (!activeBlk) 1788 continue; 1789 Block *threadlessBlock = activeBlk->previous; 1790 bool syncOnMailbox = false; 1791 while (threadlessBlock) { 1792 Block *threadBlock = threadlessBlock->previous; 1793 if (threadlessBlock->empty()) { 1794 /* we destroy the thread, so not use its block pool */ 1795 memPool->returnEmptyBlock(threadlessBlock, /*poolTheBlock=*/false); 1796 } else { 1797 memPool->extMemPool.orphanedBlocks.put(intptr_t(bin+index), threadlessBlock); 1798 syncOnMailbox = true; 1799 } 1800 threadlessBlock = threadBlock; 1801 } 1802 threadlessBlock = activeBlk; 1803 while (threadlessBlock) { 1804 Block *threadBlock = threadlessBlock->next; 1805 if (threadlessBlock->empty()) { 1806 /* we destroy the thread, so not use its block pool */ 1807 memPool->returnEmptyBlock(threadlessBlock, /*poolTheBlock=*/false); 1808 } else { 1809 memPool->extMemPool.orphanedBlocks.put(intptr_t(bin+index), threadlessBlock); 1810 syncOnMailbox = true; 1811 } 1812 threadlessBlock = threadBlock; 1813 } 1814 bin[index].resetActiveBlock(); 1815 1816 if (syncOnMailbox) { 1817 // Although, we synchronized on nextPrivatizable inside a block, we still need to 1818 // synchronize on the bin lifetime because the thread releasing an object into the public 1819 // free list is touching the bin (mailbox and mailLock) 1820 MallocMutex::scoped_lock scoped_cs(bin[index].mailLock); 1821 } 1822 } 1823 } 1824 1825 1826 #if MALLOC_CHECK_RECURSION 1827 // TODO: Use dedicated heap for this 1828 1829 /* 1830 * It's a special kind of allocation that can be used when malloc is 1831 * not available (either during startup or when malloc was already called and 1832 * we are, say, inside pthread_setspecific's call). 1833 * Block can contain objects of different sizes, 1834 * allocations are performed by moving bump pointer and increasing of object counter, 1835 * releasing is done via counter of objects allocated in the block 1836 * or moving bump pointer if releasing object is on a bound. 1837 * TODO: make bump pointer to grow to the same backward direction as all the others. 1838 */ 1839 1840 class StartupBlock : public Block { 1841 size_t availableSize() const { 1842 return slabSize - ((uintptr_t)bumpPtr - (uintptr_t)this); 1843 } 1844 static StartupBlock *getBlock(); 1845 public: 1846 static FreeObject *allocate(size_t size); 1847 static size_t msize(void *ptr) { return *((size_t*)ptr - 1); } 1848 void free(void *ptr); 1849 }; 1850 1851 static MallocMutex startupMallocLock; 1852 static StartupBlock *firstStartupBlock; 1853 1854 StartupBlock *StartupBlock::getBlock() 1855 { 1856 BackRefIdx backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/false); 1857 if (backRefIdx.isInvalid()) return NULL; 1858 1859 StartupBlock *block = static_cast<StartupBlock*>( 1860 defaultMemPool->extMemPool.backend.getSlabBlock(1)); 1861 if (!block) return NULL; 1862 1863 block->cleanBlockHeader(); 1864 setBackRef(backRefIdx, block); 1865 block->backRefIdx = backRefIdx; 1866 // use startupAllocObjSizeMark to mark objects from startup block marker 1867 block->objectSize = startupAllocObjSizeMark; 1868 block->bumpPtr = (FreeObject *)((uintptr_t)block + sizeof(StartupBlock)); 1869 return block; 1870 } 1871 1872 FreeObject *StartupBlock::allocate(size_t size) 1873 { 1874 FreeObject *result; 1875 StartupBlock *newBlock = NULL; 1876 bool newBlockUnused = false; 1877 1878 /* Objects must be aligned on their natural bounds, 1879 and objects bigger than word on word's bound. */ 1880 size = alignUp(size, sizeof(size_t)); 1881 // We need size of an object to implement msize. 1882 size_t reqSize = size + sizeof(size_t); 1883 { 1884 MallocMutex::scoped_lock scoped_cs(startupMallocLock); 1885 // Re-check whether we need a new block (conditions might have changed) 1886 if (!firstStartupBlock || firstStartupBlock->availableSize() < reqSize) { 1887 if (!newBlock) { 1888 newBlock = StartupBlock::getBlock(); 1889 if (!newBlock) return NULL; 1890 } 1891 newBlock->next = (Block*)firstStartupBlock; 1892 if (firstStartupBlock) 1893 firstStartupBlock->previous = (Block*)newBlock; 1894 firstStartupBlock = newBlock; 1895 } 1896 result = firstStartupBlock->bumpPtr; 1897 firstStartupBlock->allocatedCount++; 1898 firstStartupBlock->bumpPtr = 1899 (FreeObject *)((uintptr_t)firstStartupBlock->bumpPtr + reqSize); 1900 } 1901 1902 // keep object size at the negative offset 1903 *((size_t*)result) = size; 1904 return (FreeObject*)((size_t*)result+1); 1905 } 1906 1907 void StartupBlock::free(void *ptr) 1908 { 1909 Block* blockToRelease = NULL; 1910 { 1911 MallocMutex::scoped_lock scoped_cs(startupMallocLock); 1912 1913 MALLOC_ASSERT(firstStartupBlock, ASSERT_TEXT); 1914 MALLOC_ASSERT(startupAllocObjSizeMark==objectSize 1915 && allocatedCount>0, ASSERT_TEXT); 1916 MALLOC_ASSERT((uintptr_t)ptr>=(uintptr_t)this+sizeof(StartupBlock) 1917 && (uintptr_t)ptr+StartupBlock::msize(ptr)<=(uintptr_t)this+slabSize, 1918 ASSERT_TEXT); 1919 if (0 == --allocatedCount) { 1920 if (this == firstStartupBlock) 1921 firstStartupBlock = (StartupBlock*)firstStartupBlock->next; 1922 if (previous) 1923 previous->next = next; 1924 if (next) 1925 next->previous = previous; 1926 blockToRelease = this; 1927 } else if ((uintptr_t)ptr + StartupBlock::msize(ptr) == (uintptr_t)bumpPtr) { 1928 // last object in the block released 1929 FreeObject *newBump = (FreeObject*)((size_t*)ptr - 1); 1930 MALLOC_ASSERT((uintptr_t)newBump>(uintptr_t)this+sizeof(StartupBlock), 1931 ASSERT_TEXT); 1932 bumpPtr = newBump; 1933 } 1934 } 1935 if (blockToRelease) { 1936 blockToRelease->previous = blockToRelease->next = NULL; 1937 defaultMemPool->returnEmptyBlock(blockToRelease, /*poolTheBlock=*/false); 1938 } 1939 } 1940 1941 #endif /* MALLOC_CHECK_RECURSION */ 1942 1943 /********* End thread related code *************/ 1944 1945 /********* Library initialization *************/ 1946 1947 //! Value indicating the state of initialization. 1948 /* 0 = initialization not started. 1949 * 1 = initialization started but not finished. 1950 * 2 = initialization finished. 1951 * In theory, we only need values 0 and 2. But value 1 is nonetheless 1952 * useful for detecting errors in the double-check pattern. 1953 */ 1954 static std::atomic<intptr_t> mallocInitialized{0}; // implicitly initialized to 0 1955 static MallocMutex initMutex; 1956 1957 /** The leading "\0" is here so that applying "strings" to the binary 1958 delivers a clean result. */ 1959 static char VersionString[] = "\0" TBBMALLOC_VERSION_STRINGS; 1960 1961 #if USE_PTHREAD && (__TBB_SOURCE_DIRECTLY_INCLUDED || __TBB_USE_DLOPEN_REENTRANCY_WORKAROUND) 1962 1963 /* Decrease race interval between dynamic library unloading and pthread key 1964 destructor. Protect only Pthreads with supported unloading. */ 1965 class ShutdownSync { 1966 /* flag is the number of threads in pthread key dtor body 1967 (i.e., between threadDtorStart() and threadDtorDone()) 1968 or the signal to skip dtor, if flag < 0 */ 1969 std::atomic<intptr_t> flag; 1970 static const intptr_t skipDtor = INTPTR_MIN/2; 1971 public: 1972 void init() { flag.store(0, std::memory_order_release); } 1973 /* Suppose that 2*abs(skipDtor) or more threads never call threadDtorStart() 1974 simultaneously, so flag never becomes negative because of that. */ 1975 bool threadDtorStart() { 1976 if (flag.load(std::memory_order_acquire) < 0) 1977 return false; 1978 if (++flag <= 0) { // note that new value returned 1979 flag.fetch_sub(1); // flag is spoiled by us, restore it 1980 return false; 1981 } 1982 return true; 1983 } 1984 void threadDtorDone() { 1985 flag.fetch_sub(1); 1986 } 1987 void processExit() { 1988 if (flag.fetch_add(skipDtor) != 0) { 1989 SpinWaitUntilEq(flag, skipDtor); 1990 } 1991 } 1992 }; 1993 1994 #else 1995 1996 class ShutdownSync { 1997 public: 1998 void init() { } 1999 bool threadDtorStart() { return true; } 2000 void threadDtorDone() { } 2001 void processExit() { } 2002 }; 2003 2004 #endif // USE_PTHREAD && (__TBB_SOURCE_DIRECTLY_INCLUDED || __TBB_USE_DLOPEN_REENTRANCY_WORKAROUND) 2005 2006 static ShutdownSync shutdownSync; 2007 2008 inline bool isMallocInitialized() { 2009 // Load must have acquire fence; otherwise thread taking "initialized" path 2010 // might perform textually later loads *before* mallocInitialized becomes 2. 2011 return 2 == mallocInitialized.load(std::memory_order_acquire); 2012 } 2013 2014 /* Caller is responsible for ensuring this routine is called exactly once. */ 2015 extern "C" void MallocInitializeITT() { 2016 #if __TBB_USE_ITT_NOTIFY 2017 if (!usedBySrcIncluded) 2018 tbb::detail::r1::__TBB_load_ittnotify(); 2019 #endif 2020 } 2021 2022 void MemoryPool::initDefaultPool() { 2023 hugePages.init(); 2024 } 2025 2026 /* 2027 * Allocator initialization routine; 2028 * it is called lazily on the very first scalable_malloc call. 2029 */ 2030 static bool initMemoryManager() 2031 { 2032 TRACEF(( "[ScalableMalloc trace] sizeof(Block) is %d (expected 128); sizeof(uintptr_t) is %d\n", 2033 sizeof(Block), sizeof(uintptr_t) )); 2034 MALLOC_ASSERT( 2*blockHeaderAlignment == sizeof(Block), ASSERT_TEXT ); 2035 MALLOC_ASSERT( sizeof(FreeObject) == sizeof(void*), ASSERT_TEXT ); 2036 MALLOC_ASSERT( isAligned(defaultMemPool, sizeof(intptr_t)), 2037 "Memory pool must be void*-aligned for atomic to work over aligned arguments."); 2038 2039 #if USE_WINTHREAD 2040 const size_t granularity = 64*1024; // granulatity of VirtualAlloc 2041 #else 2042 // POSIX.1-2001-compliant way to get page size 2043 const size_t granularity = sysconf(_SC_PAGESIZE); 2044 #endif 2045 if (!defaultMemPool) { 2046 // Do not rely on static constructors and do the assignment in case 2047 // of library static section not initialized at this call yet. 2048 defaultMemPool = (MemoryPool*)defaultMemPool_space; 2049 } 2050 bool initOk = defaultMemPool-> 2051 extMemPool.init(0, NULL, NULL, granularity, 2052 /*keepAllMemory=*/false, /*fixedPool=*/false); 2053 // TODO: extMemPool.init() to not allocate memory 2054 if (!initOk || !initBackRefMaster(&defaultMemPool->extMemPool.backend) || !ThreadId::init()) 2055 return false; 2056 MemoryPool::initDefaultPool(); 2057 // init() is required iff initMemoryManager() is called 2058 // after mallocProcessShutdownNotification() 2059 shutdownSync.init(); 2060 #if COLLECT_STATISTICS 2061 initStatisticsCollection(); 2062 #endif 2063 return true; 2064 } 2065 2066 static bool GetBoolEnvironmentVariable(const char* name) { 2067 return tbb::detail::r1::GetBoolEnvironmentVariable(name); 2068 } 2069 2070 //! Ensures that initMemoryManager() is called once and only once. 2071 /** Does not return until initMemoryManager() has been completed by a thread. 2072 There is no need to call this routine if mallocInitialized==2 . */ 2073 static bool doInitialization() 2074 { 2075 MallocMutex::scoped_lock lock( initMutex ); 2076 if (mallocInitialized.load(std::memory_order_relaxed)!=2) { 2077 MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==0, ASSERT_TEXT ); 2078 mallocInitialized.store(1, std::memory_order_relaxed); 2079 RecursiveMallocCallProtector scoped; 2080 if (!initMemoryManager()) { 2081 mallocInitialized.store(0, std::memory_order_relaxed); // restore and out 2082 return false; 2083 } 2084 #ifdef MALLOC_EXTRA_INITIALIZATION 2085 MALLOC_EXTRA_INITIALIZATION; 2086 #endif 2087 #if MALLOC_CHECK_RECURSION 2088 RecursiveMallocCallProtector::detectNaiveOverload(); 2089 #endif 2090 MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==1, ASSERT_TEXT ); 2091 // Store must have release fence, otherwise mallocInitialized==2 2092 // might become remotely visible before side effects of 2093 // initMemoryManager() become remotely visible. 2094 mallocInitialized.store(2, std::memory_order_release); 2095 if( GetBoolEnvironmentVariable("TBB_VERSION") ) { 2096 fputs(VersionString+1,stderr); 2097 hugePages.printStatus(); 2098 } 2099 } 2100 /* It can't be 0 or I would have initialized it */ 2101 MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==2, ASSERT_TEXT ); 2102 return true; 2103 } 2104 2105 /********* End library initialization *************/ 2106 2107 /********* The malloc show begins *************/ 2108 2109 2110 FreeObject *Block::allocateFromFreeList() 2111 { 2112 FreeObject *result; 2113 2114 if (!freeList) return NULL; 2115 2116 result = freeList; 2117 MALLOC_ASSERT( result, ASSERT_TEXT ); 2118 2119 freeList = result->next; 2120 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 2121 allocatedCount++; 2122 STAT_increment(getThreadId(), getIndex(objectSize), allocFreeListUsed); 2123 2124 return result; 2125 } 2126 2127 FreeObject *Block::allocateFromBumpPtr() 2128 { 2129 FreeObject *result = bumpPtr; 2130 if (result) { 2131 bumpPtr = (FreeObject *) ((uintptr_t) bumpPtr - objectSize); 2132 if ( (uintptr_t)bumpPtr < (uintptr_t)this+sizeof(Block) ) { 2133 bumpPtr = NULL; 2134 } 2135 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 2136 allocatedCount++; 2137 STAT_increment(getThreadId(), getIndex(objectSize), allocBumpPtrUsed); 2138 } 2139 return result; 2140 } 2141 2142 inline FreeObject* Block::allocate() 2143 { 2144 MALLOC_ASSERT( isOwnedByCurrentThread(), ASSERT_TEXT ); 2145 2146 /* for better cache locality, first looking in the free list. */ 2147 if ( FreeObject *result = allocateFromFreeList() ) { 2148 return result; 2149 } 2150 MALLOC_ASSERT( !freeList, ASSERT_TEXT ); 2151 2152 /* if free list is empty, try thread local bump pointer allocation. */ 2153 if ( FreeObject *result = allocateFromBumpPtr() ) { 2154 return result; 2155 } 2156 MALLOC_ASSERT( !bumpPtr, ASSERT_TEXT ); 2157 2158 /* the block is considered full. */ 2159 isFull = true; 2160 return NULL; 2161 } 2162 2163 size_t Block::findObjectSize(void *object) const 2164 { 2165 size_t blSize = getSize(); 2166 #if MALLOC_CHECK_RECURSION 2167 // Currently, there is no aligned allocations from startup blocks, 2168 // so we can return just StartupBlock::msize(). 2169 // TODO: This must be extended if we add aligned allocation from startup blocks. 2170 if (!blSize) 2171 return StartupBlock::msize(object); 2172 #endif 2173 // object can be aligned, so real size can be less than block's 2174 size_t size = 2175 blSize - ((uintptr_t)object - (uintptr_t)findObjectToFree(object)); 2176 MALLOC_ASSERT(size>0 && size<minLargeObjectSize, ASSERT_TEXT); 2177 return size; 2178 } 2179 2180 void Bin::moveBlockToFront(Block *block) 2181 { 2182 /* move the block to the front of the bin */ 2183 if (block == activeBlk) return; 2184 outofTLSBin(block); 2185 pushTLSBin(block); 2186 } 2187 2188 void Bin::processEmptyBlock(Block *block, bool poolTheBlock) 2189 { 2190 if (block != activeBlk) { 2191 /* We are not using this block; return it to the pool */ 2192 outofTLSBin(block); 2193 block->getMemPool()->returnEmptyBlock(block, poolTheBlock); 2194 } else { 2195 /* all objects are free - let's restore the bump pointer */ 2196 block->restoreBumpPtr(); 2197 } 2198 } 2199 2200 template<int LOW_MARK, int HIGH_MARK> 2201 bool LocalLOCImpl<LOW_MARK, HIGH_MARK>::put(LargeMemoryBlock *object, ExtMemoryPool *extMemPool) 2202 { 2203 const size_t size = object->unalignedSize; 2204 // not spoil cache with too large object, that can cause its total cleanup 2205 if (size > MAX_TOTAL_SIZE) 2206 return false; 2207 LargeMemoryBlock *localHead = head.exchange(NULL); 2208 2209 object->prev = NULL; 2210 object->next = localHead; 2211 if (localHead) 2212 localHead->prev = object; 2213 else { 2214 // those might not be cleaned during local cache stealing, correct them 2215 totalSize = 0; 2216 numOfBlocks = 0; 2217 tail = object; 2218 } 2219 localHead = object; 2220 totalSize += size; 2221 numOfBlocks++; 2222 // must meet both size and number of cached objects constrains 2223 if (totalSize > MAX_TOTAL_SIZE || numOfBlocks >= HIGH_MARK) { 2224 // scanning from tail until meet conditions 2225 while (totalSize > MAX_TOTAL_SIZE || numOfBlocks > LOW_MARK) { 2226 totalSize -= tail->unalignedSize; 2227 numOfBlocks--; 2228 tail = tail->prev; 2229 } 2230 LargeMemoryBlock *headToRelease = tail->next; 2231 tail->next = NULL; 2232 2233 extMemPool->freeLargeObjectList(headToRelease); 2234 } 2235 2236 head.store(localHead, std::memory_order_release); 2237 return true; 2238 } 2239 2240 template<int LOW_MARK, int HIGH_MARK> 2241 LargeMemoryBlock *LocalLOCImpl<LOW_MARK, HIGH_MARK>::get(size_t size) 2242 { 2243 LargeMemoryBlock *localHead, *res = NULL; 2244 2245 if (size > MAX_TOTAL_SIZE) 2246 return NULL; 2247 2248 // TBB_REVAMP_TODO: review this line 2249 if (!head.load(std::memory_order_acquire) || (localHead = head.exchange(NULL)) == NULL) { 2250 // do not restore totalSize, numOfBlocks and tail at this point, 2251 // as they are used only in put(), where they must be restored 2252 return NULL; 2253 } 2254 2255 for (LargeMemoryBlock *curr = localHead; curr; curr=curr->next) { 2256 if (curr->unalignedSize == size) { 2257 res = curr; 2258 if (curr->next) 2259 curr->next->prev = curr->prev; 2260 else 2261 tail = curr->prev; 2262 if (curr != localHead) 2263 curr->prev->next = curr->next; 2264 else 2265 localHead = curr->next; 2266 totalSize -= size; 2267 numOfBlocks--; 2268 break; 2269 } 2270 } 2271 2272 head.store(localHead, std::memory_order_release); 2273 return res; 2274 } 2275 2276 template<int LOW_MARK, int HIGH_MARK> 2277 bool LocalLOCImpl<LOW_MARK, HIGH_MARK>::externalCleanup(ExtMemoryPool *extMemPool) 2278 { 2279 if (LargeMemoryBlock *localHead = head.exchange(NULL)) { 2280 extMemPool->freeLargeObjectList(localHead); 2281 return true; 2282 } 2283 return false; 2284 } 2285 2286 void *MemoryPool::getFromLLOCache(TLSData* tls, size_t size, size_t alignment) 2287 { 2288 LargeMemoryBlock *lmb = NULL; 2289 2290 size_t headersSize = sizeof(LargeMemoryBlock)+sizeof(LargeObjectHdr); 2291 size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+alignment); 2292 if (allocationSize < size) // allocationSize is wrapped around after alignToBin 2293 return NULL; 2294 MALLOC_ASSERT(allocationSize >= alignment, "Overflow must be checked before."); 2295 2296 if (tls) { 2297 tls->markUsed(); 2298 lmb = tls->lloc.get(allocationSize); 2299 } 2300 if (!lmb) 2301 lmb = extMemPool.mallocLargeObject(this, allocationSize); 2302 2303 if (lmb) { 2304 // doing shuffle we suppose that alignment offset guarantees 2305 // that different cache lines are in use 2306 MALLOC_ASSERT(alignment >= estimatedCacheLineSize, ASSERT_TEXT); 2307 2308 void *alignedArea = (void*)alignUp((uintptr_t)lmb+headersSize, alignment); 2309 uintptr_t alignedRight = 2310 alignDown((uintptr_t)lmb+lmb->unalignedSize - size, alignment); 2311 // Has some room to shuffle object between cache lines? 2312 // Note that alignedRight and alignedArea are aligned at alignment. 2313 unsigned ptrDelta = alignedRight - (uintptr_t)alignedArea; 2314 if (ptrDelta && tls) { // !tls is cold path 2315 // for the hot path of alignment==estimatedCacheLineSize, 2316 // allow compilers to use shift for division 2317 // (since estimatedCacheLineSize is a power-of-2 constant) 2318 unsigned numOfPossibleOffsets = alignment == estimatedCacheLineSize? 2319 ptrDelta / estimatedCacheLineSize : 2320 ptrDelta / alignment; 2321 unsigned myCacheIdx = ++tls->currCacheIdx; 2322 unsigned offset = myCacheIdx % numOfPossibleOffsets; 2323 2324 // Move object to a cache line with an offset that is different from 2325 // previous allocation. This supposedly allows us to use cache 2326 // associativity more efficiently. 2327 alignedArea = (void*)((uintptr_t)alignedArea + offset*alignment); 2328 } 2329 MALLOC_ASSERT((uintptr_t)lmb+lmb->unalignedSize >= 2330 (uintptr_t)alignedArea+size, "Object doesn't fit the block."); 2331 LargeObjectHdr *header = (LargeObjectHdr*)alignedArea-1; 2332 header->memoryBlock = lmb; 2333 header->backRefIdx = lmb->backRefIdx; 2334 setBackRef(header->backRefIdx, header); 2335 2336 lmb->objectSize = size; 2337 2338 MALLOC_ASSERT( isLargeObject<unknownMem>(alignedArea), ASSERT_TEXT ); 2339 MALLOC_ASSERT( isAligned(alignedArea, alignment), ASSERT_TEXT ); 2340 2341 return alignedArea; 2342 } 2343 return NULL; 2344 } 2345 2346 void MemoryPool::putToLLOCache(TLSData *tls, void *object) 2347 { 2348 LargeObjectHdr *header = (LargeObjectHdr*)object - 1; 2349 // overwrite backRefIdx to simplify double free detection 2350 header->backRefIdx = BackRefIdx(); 2351 2352 if (tls) { 2353 tls->markUsed(); 2354 if (tls->lloc.put(header->memoryBlock, &extMemPool)) 2355 return; 2356 } 2357 extMemPool.freeLargeObject(header->memoryBlock); 2358 } 2359 2360 /* 2361 * All aligned allocations fall into one of the following categories: 2362 * 1. if both request size and alignment are <= maxSegregatedObjectSize, 2363 * we just align the size up, and request this amount, because for every size 2364 * aligned to some power of 2, the allocated object is at least that aligned. 2365 * 2. for size<minLargeObjectSize, check if already guaranteed fittingAlignment is enough. 2366 * 3. if size+alignment<minLargeObjectSize, we take an object of fittingSizeN and align 2367 * its address up; given such pointer, scalable_free could find the real object. 2368 * Wrapping of size+alignment is impossible because maximal allowed 2369 * alignment plus minLargeObjectSize can't lead to wrapping. 2370 * 4. otherwise, aligned large object is allocated. 2371 */ 2372 static void *allocateAligned(MemoryPool *memPool, size_t size, size_t alignment) 2373 { 2374 MALLOC_ASSERT( isPowerOfTwo(alignment), ASSERT_TEXT ); 2375 2376 if (!isMallocInitialized()) 2377 if (!doInitialization()) 2378 return NULL; 2379 2380 void *result; 2381 if (size<=maxSegregatedObjectSize && alignment<=maxSegregatedObjectSize) 2382 result = internalPoolMalloc(memPool, alignUp(size? size: sizeof(size_t), alignment)); 2383 else if (size<minLargeObjectSize) { 2384 if (alignment<=fittingAlignment) 2385 result = internalPoolMalloc(memPool, size); 2386 else if (size+alignment < minLargeObjectSize) { 2387 void *unaligned = internalPoolMalloc(memPool, size+alignment); 2388 if (!unaligned) return NULL; 2389 result = alignUp(unaligned, alignment); 2390 } else 2391 goto LargeObjAlloc; 2392 } else { 2393 LargeObjAlloc: 2394 TLSData *tls = memPool->getTLS(/*create=*/true); 2395 // take into account only alignment that are higher then natural 2396 result = 2397 memPool->getFromLLOCache(tls, size, largeObjectAlignment>alignment? 2398 largeObjectAlignment: alignment); 2399 } 2400 2401 MALLOC_ASSERT( isAligned(result, alignment), ASSERT_TEXT ); 2402 return result; 2403 } 2404 2405 static void *reallocAligned(MemoryPool *memPool, void *ptr, 2406 size_t newSize, size_t alignment = 0) 2407 { 2408 void *result; 2409 size_t copySize; 2410 2411 if (isLargeObject<ourMem>(ptr)) { 2412 LargeMemoryBlock* lmb = ((LargeObjectHdr *)ptr - 1)->memoryBlock; 2413 copySize = lmb->unalignedSize-((uintptr_t)ptr-(uintptr_t)lmb); 2414 2415 // Apply different strategies if size decreases 2416 if (newSize <= copySize && (0 == alignment || isAligned(ptr, alignment))) { 2417 2418 // For huge objects (that do not fit in backend cache), keep the same space unless 2419 // the new size is at least twice smaller 2420 bool isMemoryBlockHuge = copySize > memPool->extMemPool.backend.getMaxBinnedSize(); 2421 size_t threshold = isMemoryBlockHuge ? copySize / 2 : 0; 2422 if (newSize > threshold) { 2423 lmb->objectSize = newSize; 2424 return ptr; 2425 } 2426 // TODO: For large objects suitable for the backend cache, 2427 // split out the excessive part and put it to the backend. 2428 } 2429 // Reallocate for real 2430 copySize = lmb->objectSize; 2431 #if BACKEND_HAS_MREMAP 2432 if (void *r = memPool->extMemPool.remap(ptr, copySize, newSize, 2433 alignment < largeObjectAlignment ? largeObjectAlignment : alignment)) 2434 return r; 2435 #endif 2436 result = alignment ? allocateAligned(memPool, newSize, alignment) : 2437 internalPoolMalloc(memPool, newSize); 2438 2439 } else { 2440 Block* block = (Block *)alignDown(ptr, slabSize); 2441 copySize = block->findObjectSize(ptr); 2442 2443 // TODO: Move object to another bin if size decreases and the current bin is "empty enough". 2444 // Currently, in case of size decreasing, old pointer is returned 2445 if (newSize <= copySize && (0==alignment || isAligned(ptr, alignment))) { 2446 return ptr; 2447 } else { 2448 result = alignment ? allocateAligned(memPool, newSize, alignment) : 2449 internalPoolMalloc(memPool, newSize); 2450 } 2451 } 2452 if (result) { 2453 memcpy(result, ptr, copySize < newSize ? copySize : newSize); 2454 internalPoolFree(memPool, ptr, 0); 2455 } 2456 return result; 2457 } 2458 2459 #if MALLOC_DEBUG 2460 /* A predicate checks if an object is properly placed inside its block */ 2461 inline bool Block::isProperlyPlaced(const void *object) const 2462 { 2463 return 0 == ((uintptr_t)this + slabSize - (uintptr_t)object) % objectSize; 2464 } 2465 #endif 2466 2467 /* Finds the real object inside the block */ 2468 FreeObject *Block::findAllocatedObject(const void *address) const 2469 { 2470 // calculate offset from the end of the block space 2471 uint16_t offset = (uintptr_t)this + slabSize - (uintptr_t)address; 2472 MALLOC_ASSERT( offset<=slabSize-sizeof(Block), ASSERT_TEXT ); 2473 // find offset difference from a multiple of allocation size 2474 offset %= objectSize; 2475 // and move the address down to where the real object starts. 2476 return (FreeObject*)((uintptr_t)address - (offset? objectSize-offset: 0)); 2477 } 2478 2479 /* 2480 * Bad dereference caused by a foreign pointer is possible only here, not earlier in call chain. 2481 * Separate function isolates SEH code, as it has bad influence on compiler optimization. 2482 */ 2483 static inline BackRefIdx safer_dereference (const BackRefIdx *ptr) 2484 { 2485 BackRefIdx id; 2486 #if _MSC_VER 2487 __try { 2488 #endif 2489 id = dereference(ptr); 2490 #if _MSC_VER 2491 } __except( GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION? 2492 EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH ) { 2493 id = BackRefIdx(); 2494 } 2495 #endif 2496 return id; 2497 } 2498 2499 template<MemoryOrigin memOrigin> 2500 bool isLargeObject(void *object) 2501 { 2502 if (!isAligned(object, largeObjectAlignment)) 2503 return false; 2504 LargeObjectHdr *header = (LargeObjectHdr*)object - 1; 2505 BackRefIdx idx = (memOrigin == unknownMem) ? 2506 safer_dereference(&header->backRefIdx) : dereference(&header->backRefIdx); 2507 2508 return idx.isLargeObject() 2509 // in valid LargeObjectHdr memoryBlock is not NULL 2510 && header->memoryBlock 2511 // in valid LargeObjectHdr memoryBlock points somewhere before header 2512 // TODO: more strict check 2513 && (uintptr_t)header->memoryBlock < (uintptr_t)header 2514 && getBackRef(idx) == header; 2515 } 2516 2517 static inline bool isSmallObject (void *ptr) 2518 { 2519 Block* expectedBlock = (Block*)alignDown(ptr, slabSize); 2520 const BackRefIdx* idx = expectedBlock->getBackRefIdx(); 2521 2522 bool isSmall = expectedBlock == getBackRef(safer_dereference(idx)); 2523 if (isSmall) 2524 expectedBlock->checkFreePrecond(ptr); 2525 return isSmall; 2526 } 2527 2528 /**** Check if an object was allocated by scalable_malloc ****/ 2529 static inline bool isRecognized (void* ptr) 2530 { 2531 return defaultMemPool->extMemPool.backend.ptrCanBeValid(ptr) && 2532 (isLargeObject<unknownMem>(ptr) || isSmallObject(ptr)); 2533 } 2534 2535 static inline void freeSmallObject(void *object) 2536 { 2537 /* mask low bits to get the block */ 2538 Block *block = (Block *)alignDown(object, slabSize); 2539 block->checkFreePrecond(object); 2540 2541 #if MALLOC_CHECK_RECURSION 2542 if (block->isStartupAllocObject()) { 2543 ((StartupBlock *)block)->free(object); 2544 return; 2545 } 2546 #endif 2547 if (block->isOwnedByCurrentThread()) { 2548 block->freeOwnObject(object); 2549 } else { /* Slower path to add to the shared list, the allocatedCount is updated by the owner thread in malloc. */ 2550 FreeObject *objectToFree = block->findObjectToFree(object); 2551 block->freePublicObject(objectToFree); 2552 } 2553 } 2554 2555 static void *internalPoolMalloc(MemoryPool* memPool, size_t size) 2556 { 2557 Bin* bin; 2558 Block * mallocBlock; 2559 2560 if (!memPool) return NULL; 2561 2562 if (!size) size = sizeof(size_t); 2563 2564 TLSData *tls = memPool->getTLS(/*create=*/true); 2565 2566 /* Allocate a large object */ 2567 if (size >= minLargeObjectSize) 2568 return memPool->getFromLLOCache(tls, size, largeObjectAlignment); 2569 2570 if (!tls) return NULL; 2571 2572 tls->markUsed(); 2573 /* 2574 * Get an element in thread-local array corresponding to the given size; 2575 * It keeps ptr to the active block for allocations of this size 2576 */ 2577 bin = tls->getAllocationBin(size); 2578 if ( !bin ) return NULL; 2579 2580 /* Get a block to try to allocate in. */ 2581 for( mallocBlock = bin->getActiveBlock(); mallocBlock; 2582 mallocBlock = bin->setPreviousBlockActive() ) // the previous block should be empty enough 2583 { 2584 if( FreeObject *result = mallocBlock->allocate() ) 2585 return result; 2586 } 2587 2588 /* 2589 * else privatize publicly freed objects in some block and allocate from it 2590 */ 2591 mallocBlock = bin->getPrivatizedFreeListBlock(); 2592 if (mallocBlock) { 2593 MALLOC_ASSERT( mallocBlock->freeListNonNull(), ASSERT_TEXT ); 2594 if ( FreeObject *result = mallocBlock->allocateFromFreeList() ) 2595 return result; 2596 /* Else something strange happened, need to retry from the beginning; */ 2597 TRACEF(( "[ScalableMalloc trace] Something is wrong: no objects in public free list; reentering.\n" )); 2598 return internalPoolMalloc(memPool, size); 2599 } 2600 2601 /* 2602 * no suitable own blocks, try to get a partial block that some other thread has discarded. 2603 */ 2604 mallocBlock = memPool->extMemPool.orphanedBlocks.get(tls, size); 2605 while (mallocBlock) { 2606 bin->pushTLSBin(mallocBlock); 2607 bin->setActiveBlock(mallocBlock); // TODO: move under the below condition? 2608 if( FreeObject *result = mallocBlock->allocate() ) 2609 return result; 2610 mallocBlock = memPool->extMemPool.orphanedBlocks.get(tls, size); 2611 } 2612 2613 /* 2614 * else try to get a new empty block 2615 */ 2616 mallocBlock = memPool->getEmptyBlock(size); 2617 if (mallocBlock) { 2618 bin->pushTLSBin(mallocBlock); 2619 bin->setActiveBlock(mallocBlock); 2620 if( FreeObject *result = mallocBlock->allocate() ) 2621 return result; 2622 /* Else something strange happened, need to retry from the beginning; */ 2623 TRACEF(( "[ScalableMalloc trace] Something is wrong: no objects in empty block; reentering.\n" )); 2624 return internalPoolMalloc(memPool, size); 2625 } 2626 /* 2627 * else nothing works so return NULL 2628 */ 2629 TRACEF(( "[ScalableMalloc trace] No memory found, returning NULL.\n" )); 2630 return NULL; 2631 } 2632 2633 // When size==0 (i.e. unknown), detect here whether the object is large. 2634 // For size is known and < minLargeObjectSize, we still need to check 2635 // if the actual object is large, because large objects might be used 2636 // for aligned small allocations. 2637 static bool internalPoolFree(MemoryPool *memPool, void *object, size_t size) 2638 { 2639 if (!memPool || !object) return false; 2640 2641 // The library is initialized at allocation call, so releasing while 2642 // not initialized means foreign object is releasing. 2643 MALLOC_ASSERT(isMallocInitialized(), ASSERT_TEXT); 2644 MALLOC_ASSERT(memPool->extMemPool.userPool() || isRecognized(object), 2645 "Invalid pointer during object releasing is detected."); 2646 2647 if (size >= minLargeObjectSize || isLargeObject<ourMem>(object)) 2648 memPool->putToLLOCache(memPool->getTLS(/*create=*/false), object); 2649 else 2650 freeSmallObject(object); 2651 return true; 2652 } 2653 2654 static void *internalMalloc(size_t size) 2655 { 2656 if (!size) size = sizeof(size_t); 2657 2658 #if MALLOC_CHECK_RECURSION 2659 if (RecursiveMallocCallProtector::sameThreadActive()) 2660 return size<minLargeObjectSize? StartupBlock::allocate(size) : 2661 // nested allocation, so skip tls 2662 (FreeObject*)defaultMemPool->getFromLLOCache(NULL, size, slabSize); 2663 #endif 2664 2665 if (!isMallocInitialized()) 2666 if (!doInitialization()) 2667 return NULL; 2668 return internalPoolMalloc(defaultMemPool, size); 2669 } 2670 2671 static void internalFree(void *object) 2672 { 2673 internalPoolFree(defaultMemPool, object, 0); 2674 } 2675 2676 static size_t internalMsize(void* ptr) 2677 { 2678 MALLOC_ASSERT(ptr, "Invalid pointer passed to internalMsize"); 2679 if (isLargeObject<ourMem>(ptr)) { 2680 // TODO: return the maximum memory size, that can be written to this object 2681 LargeMemoryBlock* lmb = ((LargeObjectHdr*)ptr - 1)->memoryBlock; 2682 return lmb->objectSize; 2683 } else { 2684 Block *block = (Block*)alignDown(ptr, slabSize); 2685 return block->findObjectSize(ptr); 2686 } 2687 } 2688 2689 } // namespace internal 2690 2691 using namespace rml::internal; 2692 2693 // legacy entry point saved for compatibility with binaries complied 2694 // with pre-6003 versions of TBB 2695 rml::MemoryPool *pool_create(intptr_t pool_id, const MemPoolPolicy *policy) 2696 { 2697 rml::MemoryPool *pool; 2698 MemPoolPolicy pol(policy->pAlloc, policy->pFree, policy->granularity); 2699 2700 pool_create_v1(pool_id, &pol, &pool); 2701 return pool; 2702 } 2703 2704 rml::MemPoolError pool_create_v1(intptr_t pool_id, const MemPoolPolicy *policy, 2705 rml::MemoryPool **pool) 2706 { 2707 if ( !policy->pAlloc || policy->version<MemPoolPolicy::TBBMALLOC_POOL_VERSION 2708 // empty pFree allowed only for fixed pools 2709 || !(policy->fixedPool || policy->pFree)) { 2710 *pool = NULL; 2711 return INVALID_POLICY; 2712 } 2713 if ( policy->version>MemPoolPolicy::TBBMALLOC_POOL_VERSION // future versions are not supported 2714 // new flags can be added in place of reserved, but default 2715 // behaviour must be supported by this version 2716 || policy->reserved ) { 2717 *pool = NULL; 2718 return UNSUPPORTED_POLICY; 2719 } 2720 if (!isMallocInitialized()) 2721 if (!doInitialization()) { 2722 *pool = NULL; 2723 return NO_MEMORY; 2724 } 2725 rml::internal::MemoryPool *memPool = 2726 (rml::internal::MemoryPool*)internalMalloc((sizeof(rml::internal::MemoryPool))); 2727 if (!memPool) { 2728 *pool = NULL; 2729 return NO_MEMORY; 2730 } 2731 memset(memPool, 0, sizeof(rml::internal::MemoryPool)); 2732 if (!memPool->init(pool_id, policy)) { 2733 internalFree(memPool); 2734 *pool = NULL; 2735 return NO_MEMORY; 2736 } 2737 2738 *pool = (rml::MemoryPool*)memPool; 2739 return POOL_OK; 2740 } 2741 2742 bool pool_destroy(rml::MemoryPool* memPool) 2743 { 2744 if (!memPool) return false; 2745 bool ret = ((rml::internal::MemoryPool*)memPool)->destroy(); 2746 internalFree(memPool); 2747 2748 return ret; 2749 } 2750 2751 bool pool_reset(rml::MemoryPool* memPool) 2752 { 2753 if (!memPool) return false; 2754 2755 return ((rml::internal::MemoryPool*)memPool)->reset(); 2756 } 2757 2758 void *pool_malloc(rml::MemoryPool* mPool, size_t size) 2759 { 2760 return internalPoolMalloc((rml::internal::MemoryPool*)mPool, size); 2761 } 2762 2763 void *pool_realloc(rml::MemoryPool* mPool, void *object, size_t size) 2764 { 2765 if (!object) 2766 return internalPoolMalloc((rml::internal::MemoryPool*)mPool, size); 2767 if (!size) { 2768 internalPoolFree((rml::internal::MemoryPool*)mPool, object, 0); 2769 return NULL; 2770 } 2771 return reallocAligned((rml::internal::MemoryPool*)mPool, object, size, 0); 2772 } 2773 2774 void *pool_aligned_malloc(rml::MemoryPool* mPool, size_t size, size_t alignment) 2775 { 2776 if (!isPowerOfTwo(alignment) || 0==size) 2777 return NULL; 2778 2779 return allocateAligned((rml::internal::MemoryPool*)mPool, size, alignment); 2780 } 2781 2782 void *pool_aligned_realloc(rml::MemoryPool* memPool, void *ptr, size_t size, size_t alignment) 2783 { 2784 if (!isPowerOfTwo(alignment)) 2785 return NULL; 2786 rml::internal::MemoryPool *mPool = (rml::internal::MemoryPool*)memPool; 2787 void *tmp; 2788 2789 if (!ptr) 2790 tmp = allocateAligned(mPool, size, alignment); 2791 else if (!size) { 2792 internalPoolFree(mPool, ptr, 0); 2793 return NULL; 2794 } else 2795 tmp = reallocAligned(mPool, ptr, size, alignment); 2796 2797 return tmp; 2798 } 2799 2800 bool pool_free(rml::MemoryPool *mPool, void *object) 2801 { 2802 return internalPoolFree((rml::internal::MemoryPool*)mPool, object, 0); 2803 } 2804 2805 rml::MemoryPool *pool_identify(void *object) 2806 { 2807 rml::internal::MemoryPool *pool; 2808 if (isLargeObject<ourMem>(object)) { 2809 LargeObjectHdr *header = (LargeObjectHdr*)object - 1; 2810 pool = header->memoryBlock->pool; 2811 } else { 2812 Block *block = (Block*)alignDown(object, slabSize); 2813 pool = block->getMemPool(); 2814 } 2815 // do not return defaultMemPool, as it can't be used in pool_free() etc 2816 __TBB_ASSERT_RELEASE(pool!=defaultMemPool, 2817 "rml::pool_identify() can't be used for scalable_malloc() etc results."); 2818 return (rml::MemoryPool*)pool; 2819 } 2820 2821 size_t pool_msize(rml::MemoryPool *mPool, void* object) 2822 { 2823 if (object) { 2824 // No assert for object recognition, cause objects allocated from non-default 2825 // memory pool do not participate in range checking and do not have valid backreferences for 2826 // small objects. Instead, check that an object belong to the certain memory pool. 2827 MALLOC_ASSERT_EX(mPool == pool_identify(object), "Object does not belong to the specified pool"); 2828 return internalMsize(object); 2829 } 2830 errno = EINVAL; 2831 // Unlike _msize, return 0 in case of parameter error. 2832 // Returning size_t(-1) looks more like the way to troubles. 2833 return 0; 2834 } 2835 2836 } // namespace rml 2837 2838 using namespace rml::internal; 2839 2840 #if MALLOC_TRACE 2841 static unsigned int threadGoingDownCount = 0; 2842 #endif 2843 2844 /* 2845 * When a thread is shutting down this routine should be called to remove all the thread ids 2846 * from the malloc blocks and replace them with a NULL thread id. 2847 * 2848 * For pthreads, the function is set as a callback in pthread_key_create for TLS bin. 2849 * It will be automatically called at thread exit with the key value as the argument, 2850 * unless that value is NULL. 2851 * For Windows, it is called from DllMain( DLL_THREAD_DETACH ). 2852 * 2853 * However neither of the above is called for the main process thread, so the routine 2854 * also needs to be called during the process shutdown. 2855 * 2856 */ 2857 // TODO: Consider making this function part of class MemoryPool. 2858 void doThreadShutdownNotification(TLSData* tls, bool main_thread) 2859 { 2860 TRACEF(( "[ScalableMalloc trace] Thread id %d blocks return start %d\n", 2861 getThreadId(), threadGoingDownCount++ )); 2862 2863 #if USE_PTHREAD 2864 if (tls) { 2865 if (!shutdownSync.threadDtorStart()) return; 2866 tls->getMemPool()->onThreadShutdown(tls); 2867 shutdownSync.threadDtorDone(); 2868 } else 2869 #endif 2870 { 2871 suppress_unused_warning(tls); // not used on Windows 2872 // The default pool is safe to use at this point: 2873 // on Linux, only the main thread can go here before destroying defaultMemPool; 2874 // on Windows, shutdown is synchronized via loader lock and isMallocInitialized(). 2875 // See also __TBB_mallocProcessShutdownNotification() 2876 defaultMemPool->onThreadShutdown(defaultMemPool->getTLS(/*create=*/false)); 2877 // Take lock to walk through other pools; but waiting might be dangerous at this point 2878 // (e.g. on Windows the main thread might deadlock) 2879 bool locked; 2880 MallocMutex::scoped_lock lock(MemoryPool::memPoolListLock, /*wait=*/!main_thread, &locked); 2881 if (locked) { // the list is safe to process 2882 for (MemoryPool *memPool = defaultMemPool->next; memPool; memPool = memPool->next) 2883 memPool->onThreadShutdown(memPool->getTLS(/*create=*/false)); 2884 } 2885 } 2886 2887 TRACEF(( "[ScalableMalloc trace] Thread id %d blocks return end\n", getThreadId() )); 2888 } 2889 2890 #if USE_PTHREAD 2891 void mallocThreadShutdownNotification(void* arg) 2892 { 2893 // The routine is called for each pool (as TLS dtor) on each thread, except for the main thread 2894 if (!isMallocInitialized()) return; 2895 doThreadShutdownNotification((TLSData*)arg, false); 2896 } 2897 #else 2898 extern "C" void __TBB_mallocThreadShutdownNotification() 2899 { 2900 // The routine is called once per thread on Windows 2901 if (!isMallocInitialized()) return; 2902 doThreadShutdownNotification(NULL, false); 2903 } 2904 #endif 2905 2906 extern "C" void __TBB_mallocProcessShutdownNotification(bool windows_process_dying) 2907 { 2908 if (!isMallocInitialized()) return; 2909 2910 // Don't clean allocator internals if the entire process is exiting 2911 if (!windows_process_dying) { 2912 doThreadShutdownNotification(NULL, /*main_thread=*/true); 2913 } 2914 #if __TBB_MALLOC_LOCACHE_STAT 2915 printf("cache hit ratio %f, size hit %f\n", 2916 1.*cacheHits/mallocCalls, 1.*memHitKB/memAllocKB); 2917 defaultMemPool->extMemPool.loc.reportStat(stdout); 2918 #endif 2919 2920 shutdownSync.processExit(); 2921 #if __TBB_SOURCE_DIRECTLY_INCLUDED 2922 /* Pthread keys must be deleted as soon as possible to not call key dtor 2923 on thread termination when then the tbbmalloc code can be already unloaded. 2924 */ 2925 defaultMemPool->destroy(); 2926 destroyBackRefMaster(&defaultMemPool->extMemPool.backend); 2927 ThreadId::destroy(); // Delete key for thread id 2928 hugePages.reset(); 2929 // new total malloc initialization is possible after this point 2930 mallocInitialized.store(0, std::memory_order_release); 2931 #elif __TBB_USE_DLOPEN_REENTRANCY_WORKAROUND 2932 /* In most cases we prevent unloading tbbmalloc, and don't clean up memory 2933 on process shutdown. When impossible to prevent, library unload results 2934 in shutdown notification, and it makes sense to release unused memory 2935 at that point (we can't release all memory because it's possible that 2936 it will be accessed after this point). 2937 TODO: better support systems where we can't prevent unloading by removing 2938 pthread destructors and releasing caches. 2939 */ 2940 defaultMemPool->extMemPool.hardCachesCleanup(); 2941 #endif // __TBB_SOURCE_DIRECTLY_INCLUDED 2942 2943 #if COLLECT_STATISTICS 2944 unsigned nThreads = ThreadId::getMaxThreadId(); 2945 for( int i=1; i<=nThreads && i<MAX_THREADS; ++i ) 2946 STAT_print(i); 2947 #endif 2948 if (!usedBySrcIncluded) 2949 MALLOC_ITT_FINI_ITTLIB(); 2950 } 2951 2952 extern "C" void * scalable_malloc(size_t size) 2953 { 2954 void *ptr = internalMalloc(size); 2955 if (!ptr) errno = ENOMEM; 2956 return ptr; 2957 } 2958 2959 extern "C" void scalable_free(void *object) 2960 { 2961 internalFree(object); 2962 } 2963 2964 #if MALLOC_ZONE_OVERLOAD_ENABLED 2965 extern "C" void __TBB_malloc_free_definite_size(void *object, size_t size) 2966 { 2967 internalPoolFree(defaultMemPool, object, size); 2968 } 2969 #endif 2970 2971 /* 2972 * A variant that provides additional memory safety, by checking whether the given address 2973 * was obtained with this allocator, and if not redirecting to the provided alternative call. 2974 */ 2975 extern "C" void __TBB_malloc_safer_free(void *object, void (*original_free)(void*)) 2976 { 2977 if (!object) 2978 return; 2979 2980 // tbbmalloc can allocate object only when tbbmalloc has been initialized 2981 if (mallocInitialized.load(std::memory_order_acquire) && defaultMemPool->extMemPool.backend.ptrCanBeValid(object)) { 2982 if (isLargeObject<unknownMem>(object)) { 2983 // must check 1st for large object, because small object check touches 4 pages on left, 2984 // and it can be inaccessible 2985 TLSData *tls = defaultMemPool->getTLS(/*create=*/false); 2986 2987 defaultMemPool->putToLLOCache(tls, object); 2988 return; 2989 } else if (isSmallObject(object)) { 2990 freeSmallObject(object); 2991 return; 2992 } 2993 } 2994 if (original_free) 2995 original_free(object); 2996 } 2997 2998 /********* End the free code *************/ 2999 3000 /********* Code for scalable_realloc ***********/ 3001 3002 /* 3003 * From K&R 3004 * "realloc changes the size of the object pointed to by p to size. The contents will 3005 * be unchanged up to the minimum of the old and the new sizes. If the new size is larger, 3006 * the new space is uninitialized. realloc returns a pointer to the new space, or 3007 * NULL if the request cannot be satisfied, in which case *p is unchanged." 3008 * 3009 */ 3010 extern "C" void* scalable_realloc(void* ptr, size_t size) 3011 { 3012 void *tmp; 3013 3014 if (!ptr) 3015 tmp = internalMalloc(size); 3016 else if (!size) { 3017 internalFree(ptr); 3018 return NULL; 3019 } else 3020 tmp = reallocAligned(defaultMemPool, ptr, size, 0); 3021 3022 if (!tmp) errno = ENOMEM; 3023 return tmp; 3024 } 3025 3026 /* 3027 * A variant that provides additional memory safety, by checking whether the given address 3028 * was obtained with this allocator, and if not redirecting to the provided alternative call. 3029 */ 3030 extern "C" void* __TBB_malloc_safer_realloc(void* ptr, size_t sz, void* original_realloc) 3031 { 3032 void *tmp; // TODO: fix warnings about uninitialized use of tmp 3033 3034 if (!ptr) { 3035 tmp = internalMalloc(sz); 3036 } else if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(ptr)) { 3037 if (!sz) { 3038 internalFree(ptr); 3039 return NULL; 3040 } else { 3041 tmp = reallocAligned(defaultMemPool, ptr, sz, 0); 3042 } 3043 } 3044 #if USE_WINTHREAD 3045 else if (original_realloc && sz) { 3046 orig_ptrs *original_ptrs = static_cast<orig_ptrs*>(original_realloc); 3047 if ( original_ptrs->msize ){ 3048 size_t oldSize = original_ptrs->msize(ptr); 3049 tmp = internalMalloc(sz); 3050 if (tmp) { 3051 memcpy(tmp, ptr, sz<oldSize? sz : oldSize); 3052 if ( original_ptrs->free ){ 3053 original_ptrs->free( ptr ); 3054 } 3055 } 3056 } else 3057 tmp = NULL; 3058 } 3059 #else 3060 else if (original_realloc) { 3061 typedef void* (*realloc_ptr_t)(void*,size_t); 3062 realloc_ptr_t original_realloc_ptr; 3063 (void *&)original_realloc_ptr = original_realloc; 3064 tmp = original_realloc_ptr(ptr,sz); 3065 } 3066 #endif 3067 else tmp = NULL; 3068 3069 if (!tmp) errno = ENOMEM; 3070 return tmp; 3071 } 3072 3073 /********* End code for scalable_realloc ***********/ 3074 3075 /********* Code for scalable_calloc ***********/ 3076 3077 /* 3078 * From K&R 3079 * calloc returns a pointer to space for an array of nobj objects, 3080 * each of size size, or NULL if the request cannot be satisfied. 3081 * The space is initialized to zero bytes. 3082 * 3083 */ 3084 3085 extern "C" void * scalable_calloc(size_t nobj, size_t size) 3086 { 3087 // it's square root of maximal size_t value 3088 const size_t mult_not_overflow = size_t(1) << (sizeof(size_t)*CHAR_BIT/2); 3089 const size_t arraySize = nobj * size; 3090 3091 // check for overflow during multiplication: 3092 if (nobj>=mult_not_overflow || size>=mult_not_overflow) // 1) heuristic check 3093 if (nobj && arraySize / nobj != size) { // 2) exact check 3094 errno = ENOMEM; 3095 return NULL; 3096 } 3097 void* result = internalMalloc(arraySize); 3098 if (result) 3099 memset(result, 0, arraySize); 3100 else 3101 errno = ENOMEM; 3102 return result; 3103 } 3104 3105 /********* End code for scalable_calloc ***********/ 3106 3107 /********* Code for aligned allocation API **********/ 3108 3109 extern "C" int scalable_posix_memalign(void **memptr, size_t alignment, size_t size) 3110 { 3111 if ( !isPowerOfTwoAtLeast(alignment, sizeof(void*)) ) 3112 return EINVAL; 3113 void *result = allocateAligned(defaultMemPool, size, alignment); 3114 if (!result) 3115 return ENOMEM; 3116 *memptr = result; 3117 return 0; 3118 } 3119 3120 extern "C" void * scalable_aligned_malloc(size_t size, size_t alignment) 3121 { 3122 if (!isPowerOfTwo(alignment) || 0==size) { 3123 errno = EINVAL; 3124 return NULL; 3125 } 3126 void *tmp = allocateAligned(defaultMemPool, size, alignment); 3127 if (!tmp) errno = ENOMEM; 3128 return tmp; 3129 } 3130 3131 extern "C" void * scalable_aligned_realloc(void *ptr, size_t size, size_t alignment) 3132 { 3133 if (!isPowerOfTwo(alignment)) { 3134 errno = EINVAL; 3135 return NULL; 3136 } 3137 void *tmp; 3138 3139 if (!ptr) 3140 tmp = allocateAligned(defaultMemPool, size, alignment); 3141 else if (!size) { 3142 scalable_free(ptr); 3143 return NULL; 3144 } else 3145 tmp = reallocAligned(defaultMemPool, ptr, size, alignment); 3146 3147 if (!tmp) errno = ENOMEM; 3148 return tmp; 3149 } 3150 3151 extern "C" void * __TBB_malloc_safer_aligned_realloc(void *ptr, size_t size, size_t alignment, void* orig_function) 3152 { 3153 /* corner cases left out of reallocAligned to not deal with errno there */ 3154 if (!isPowerOfTwo(alignment)) { 3155 errno = EINVAL; 3156 return NULL; 3157 } 3158 void *tmp = NULL; 3159 3160 if (!ptr) { 3161 tmp = allocateAligned(defaultMemPool, size, alignment); 3162 } else if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(ptr)) { 3163 if (!size) { 3164 internalFree(ptr); 3165 return NULL; 3166 } else { 3167 tmp = reallocAligned(defaultMemPool, ptr, size, alignment); 3168 } 3169 } 3170 #if USE_WINTHREAD 3171 else { 3172 orig_aligned_ptrs *original_ptrs = static_cast<orig_aligned_ptrs*>(orig_function); 3173 if (size) { 3174 // Without orig_msize, we can't do anything with this. 3175 // Just keeping old pointer. 3176 if ( original_ptrs->aligned_msize ){ 3177 // set alignment and offset to have possibly correct oldSize 3178 size_t oldSize = original_ptrs->aligned_msize(ptr, sizeof(void*), 0); 3179 tmp = allocateAligned(defaultMemPool, size, alignment); 3180 if (tmp) { 3181 memcpy(tmp, ptr, size<oldSize? size : oldSize); 3182 if ( original_ptrs->aligned_free ){ 3183 original_ptrs->aligned_free( ptr ); 3184 } 3185 } 3186 } 3187 } else { 3188 if ( original_ptrs->aligned_free ){ 3189 original_ptrs->aligned_free( ptr ); 3190 } 3191 return NULL; 3192 } 3193 } 3194 #else 3195 // As original_realloc can't align result, and there is no way to find 3196 // size of reallocating object, we are giving up. 3197 suppress_unused_warning(orig_function); 3198 #endif 3199 if (!tmp) errno = ENOMEM; 3200 return tmp; 3201 } 3202 3203 extern "C" void scalable_aligned_free(void *ptr) 3204 { 3205 internalFree(ptr); 3206 } 3207 3208 /********* end code for aligned allocation API **********/ 3209 3210 /********* Code for scalable_msize ***********/ 3211 3212 /* 3213 * Returns the size of a memory block allocated in the heap. 3214 */ 3215 extern "C" size_t scalable_msize(void* ptr) 3216 { 3217 if (ptr) { 3218 MALLOC_ASSERT(isRecognized(ptr), "Invalid pointer in scalable_msize detected."); 3219 return internalMsize(ptr); 3220 } 3221 errno = EINVAL; 3222 // Unlike _msize, return 0 in case of parameter error. 3223 // Returning size_t(-1) looks more like the way to troubles. 3224 return 0; 3225 } 3226 3227 /* 3228 * A variant that provides additional memory safety, by checking whether the given address 3229 * was obtained with this allocator, and if not redirecting to the provided alternative call. 3230 */ 3231 extern "C" size_t __TBB_malloc_safer_msize(void *object, size_t (*original_msize)(void*)) 3232 { 3233 if (object) { 3234 // Check if the memory was allocated by scalable_malloc 3235 if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(object)) 3236 return internalMsize(object); 3237 else if (original_msize) 3238 return original_msize(object); 3239 } 3240 // object is NULL or unknown, or foreign and no original_msize 3241 #if USE_WINTHREAD 3242 errno = EINVAL; // errno expected to be set only on this platform 3243 #endif 3244 return 0; 3245 } 3246 3247 /* 3248 * The same as above but for _aligned_msize case 3249 */ 3250 extern "C" size_t __TBB_malloc_safer_aligned_msize(void *object, size_t alignment, size_t offset, size_t (*orig_aligned_msize)(void*,size_t,size_t)) 3251 { 3252 if (object) { 3253 // Check if the memory was allocated by scalable_malloc 3254 if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(object)) 3255 return internalMsize(object); 3256 else if (orig_aligned_msize) 3257 return orig_aligned_msize(object,alignment,offset); 3258 } 3259 // object is NULL or unknown 3260 errno = EINVAL; 3261 return 0; 3262 } 3263 3264 /********* End code for scalable_msize ***********/ 3265 3266 extern "C" int scalable_allocation_mode(int param, intptr_t value) 3267 { 3268 if (param == TBBMALLOC_SET_SOFT_HEAP_LIMIT) { 3269 defaultMemPool->extMemPool.backend.setRecommendedMaxSize((size_t)value); 3270 return TBBMALLOC_OK; 3271 } else if (param == USE_HUGE_PAGES) { 3272 #if __linux__ 3273 switch (value) { 3274 case 0: 3275 case 1: 3276 hugePages.setMode(value); 3277 return TBBMALLOC_OK; 3278 default: 3279 return TBBMALLOC_INVALID_PARAM; 3280 } 3281 #else 3282 return TBBMALLOC_NO_EFFECT; 3283 #endif 3284 #if __TBB_SOURCE_DIRECTLY_INCLUDED 3285 } else if (param == TBBMALLOC_INTERNAL_SOURCE_INCLUDED) { 3286 switch (value) { 3287 case 0: // used by dynamic library 3288 case 1: // used by static library or directly included sources 3289 usedBySrcIncluded = value; 3290 return TBBMALLOC_OK; 3291 default: 3292 return TBBMALLOC_INVALID_PARAM; 3293 } 3294 #endif 3295 } else if (param == TBBMALLOC_SET_HUGE_SIZE_THRESHOLD) { 3296 defaultMemPool->extMemPool.loc.setHugeSizeThreshold((size_t)value); 3297 return TBBMALLOC_OK; 3298 } 3299 return TBBMALLOC_INVALID_PARAM; 3300 } 3301 3302 extern "C" int scalable_allocation_command(int cmd, void *param) 3303 { 3304 if (param) 3305 return TBBMALLOC_INVALID_PARAM; 3306 3307 bool released = false; 3308 switch(cmd) { 3309 case TBBMALLOC_CLEAN_THREAD_BUFFERS: 3310 if (TLSData *tls = defaultMemPool->getTLS(/*create=*/false)) 3311 released = tls->externalCleanup(/*cleanOnlyUnused*/false, /*cleanBins=*/true); 3312 break; 3313 case TBBMALLOC_CLEAN_ALL_BUFFERS: 3314 released = defaultMemPool->extMemPool.hardCachesCleanup(); 3315 break; 3316 default: 3317 return TBBMALLOC_INVALID_PARAM; 3318 } 3319 return released ? TBBMALLOC_OK : TBBMALLOC_NO_EFFECT; 3320 } 3321