1 /* 2 Copyright (c) 2005-2021 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #include "tbbmalloc_internal.h" 18 #include <errno.h> 19 #include <new> /* for placement new */ 20 #include <string.h> /* for memset */ 21 22 #include "oneapi/tbb/version.h" 23 #include "../tbb/environment.h" 24 #include "../tbb/itt_notify.h" // for __TBB_load_ittnotify() 25 26 #if USE_PTHREAD 27 #define TlsSetValue_func pthread_setspecific 28 #define TlsGetValue_func pthread_getspecific 29 #define GetMyTID() pthread_self() 30 #include <sched.h> 31 inline void do_yield() {sched_yield();} 32 extern "C" { static void mallocThreadShutdownNotification(void*); } 33 #if __sun || __SUNPRO_CC 34 #define __asm__ asm 35 #endif 36 #include <unistd.h> // sysconf(_SC_PAGESIZE) 37 #elif USE_WINTHREAD 38 #define GetMyTID() GetCurrentThreadId() 39 #if __TBB_WIN8UI_SUPPORT 40 #include<thread> 41 #define TlsSetValue_func FlsSetValue 42 #define TlsGetValue_func FlsGetValue 43 #define TlsAlloc() FlsAlloc(NULL) 44 #define TLS_ALLOC_FAILURE FLS_OUT_OF_INDEXES 45 #define TlsFree FlsFree 46 inline void do_yield() {std::this_thread::yield();} 47 #else 48 #define TlsSetValue_func TlsSetValue 49 #define TlsGetValue_func TlsGetValue 50 #define TLS_ALLOC_FAILURE TLS_OUT_OF_INDEXES 51 inline void do_yield() {SwitchToThread();} 52 #endif 53 #else 54 #error Must define USE_PTHREAD or USE_WINTHREAD 55 #endif 56 57 #define FREELIST_NONBLOCKING 1 58 59 namespace rml { 60 class MemoryPool; 61 namespace internal { 62 63 class Block; 64 class MemoryPool; 65 66 #if MALLOC_CHECK_RECURSION 67 68 inline bool isMallocInitialized(); 69 70 #endif // MALLOC_CHECK_RECURSION 71 72 /** Support for handling the special UNUSABLE pointer state **/ 73 const intptr_t UNUSABLE = 0x1; 74 inline bool isSolidPtr( void* ptr ) { 75 return (UNUSABLE|(intptr_t)ptr)!=UNUSABLE; 76 } 77 inline bool isNotForUse( void* ptr ) { 78 return (intptr_t)ptr==UNUSABLE; 79 } 80 81 /* 82 * Block::objectSize value used to mark blocks allocated by startupAlloc 83 */ 84 const uint16_t startupAllocObjSizeMark = ~(uint16_t)0; 85 86 /* 87 * The following constant is used to define the size of struct Block, the block header. 88 * The intent is to have the size of a Block multiple of the cache line size, this allows us to 89 * get good alignment at the cost of some overhead equal to the amount of padding included in the Block. 90 */ 91 const int blockHeaderAlignment = estimatedCacheLineSize; 92 93 /********* The data structures and global objects **************/ 94 95 /* 96 * The malloc routines themselves need to be able to occasionally malloc some space, 97 * in order to set up the structures used by the thread local structures. This 98 * routine performs that functions. 99 */ 100 class BootStrapBlocks { 101 MallocMutex bootStrapLock; 102 Block *bootStrapBlock; 103 Block *bootStrapBlockUsed; 104 FreeObject *bootStrapObjectList; 105 public: 106 void *allocate(MemoryPool *memPool, size_t size); 107 void free(void* ptr); 108 void reset(); 109 }; 110 111 #if USE_INTERNAL_TID 112 class ThreadId { 113 static tls_key_t Tid_key; 114 std::atomic<intptr_t> ThreadCount; 115 116 unsigned int id; 117 118 static unsigned int tlsNumber() { 119 unsigned int result = reinterpret_cast<intptr_t>(TlsGetValue_func(Tid_key)); 120 if( !result ) { 121 RecursiveMallocCallProtector scoped; 122 // Thread-local value is zero -> first call from this thread, 123 // need to initialize with next ID value (IDs start from 1) 124 result = ++ThreadCount; // returned new value! 125 TlsSetValue_func( Tid_key, reinterpret_cast<void*>(result) ); 126 } 127 return result; 128 } 129 public: 130 static bool init() { 131 #if USE_WINTHREAD 132 Tid_key = TlsAlloc(); 133 if (Tid_key == TLS_ALLOC_FAILURE) 134 return false; 135 #else 136 int status = pthread_key_create( &Tid_key, NULL ); 137 if ( status ) { 138 fprintf (stderr, "The memory manager cannot create tls key during initialization\n"); 139 return false; 140 } 141 #endif /* USE_WINTHREAD */ 142 return true; 143 } 144 #if __TBB_SOURCE_DIRECTLY_INCLUDED 145 static void destroy() { 146 if( Tid_key ) { 147 #if USE_WINTHREAD 148 BOOL status = !(TlsFree( Tid_key )); // fail is zero 149 #else 150 int status = pthread_key_delete( Tid_key ); 151 #endif /* USE_WINTHREAD */ 152 if ( status ) 153 fprintf (stderr, "The memory manager cannot delete tls key\n"); 154 Tid_key = 0; 155 } 156 } 157 #endif 158 159 ThreadId() : id(ThreadId::tlsNumber()) {} 160 bool isCurrentThreadId() const { return id == ThreadId::tlsNumber(); } 161 162 #if COLLECT_STATISTICS || MALLOC_TRACE 163 friend unsigned int getThreadId() { return ThreadId::tlsNumber(); } 164 #endif 165 #if COLLECT_STATISTICS 166 static unsigned getMaxThreadId() { return ThreadCount.load(std::memory_order_relaxed); } 167 168 friend int STAT_increment(ThreadId tid, int bin, int ctr); 169 #endif 170 }; 171 172 tls_key_t ThreadId::Tid_key; 173 intptr_t ThreadId::ThreadCount; 174 175 #if COLLECT_STATISTICS 176 int STAT_increment(ThreadId tid, int bin, int ctr) 177 { 178 return ::STAT_increment(tid.id, bin, ctr); 179 } 180 #endif 181 182 #else // USE_INTERNAL_TID 183 184 class ThreadId { 185 #if USE_PTHREAD 186 std::atomic<pthread_t> tid; 187 #else 188 std::atomic<DWORD> tid; 189 #endif 190 public: 191 ThreadId() : tid(GetMyTID()) {} 192 #if USE_PTHREAD 193 bool isCurrentThreadId() const { return pthread_equal(pthread_self(), tid.load(std::memory_order_relaxed)); } 194 #else 195 bool isCurrentThreadId() const { return GetCurrentThreadId() == tid.load(std::memory_order_relaxed); } 196 #endif 197 ThreadId& operator=(const ThreadId& other) { 198 tid.store(other.tid.load(std::memory_order_relaxed), std::memory_order_relaxed); 199 return *this; 200 } 201 static bool init() { return true; } 202 #if __TBB_SOURCE_DIRECTLY_INCLUDED 203 static void destroy() {} 204 #endif 205 }; 206 207 #endif // USE_INTERNAL_TID 208 209 /*********** Code to provide thread ID and a thread-local void pointer **********/ 210 211 bool TLSKey::init() 212 { 213 #if USE_WINTHREAD 214 TLS_pointer_key = TlsAlloc(); 215 if (TLS_pointer_key == TLS_ALLOC_FAILURE) 216 return false; 217 #else 218 int status = pthread_key_create( &TLS_pointer_key, mallocThreadShutdownNotification ); 219 if ( status ) 220 return false; 221 #endif /* USE_WINTHREAD */ 222 return true; 223 } 224 225 bool TLSKey::destroy() 226 { 227 #if USE_WINTHREAD 228 BOOL status1 = !(TlsFree(TLS_pointer_key)); // fail is zero 229 #else 230 int status1 = pthread_key_delete(TLS_pointer_key); 231 #endif /* USE_WINTHREAD */ 232 MALLOC_ASSERT(!status1, "The memory manager cannot delete tls key."); 233 return status1==0; 234 } 235 236 inline TLSData* TLSKey::getThreadMallocTLS() const 237 { 238 return (TLSData *)TlsGetValue_func( TLS_pointer_key ); 239 } 240 241 inline void TLSKey::setThreadMallocTLS( TLSData * newvalue ) { 242 RecursiveMallocCallProtector scoped; 243 TlsSetValue_func( TLS_pointer_key, newvalue ); 244 } 245 246 /* The 'next' field in the block header has to maintain some invariants: 247 * it needs to be on a 16K boundary and the first field in the block. 248 * Any value stored there needs to have the lower 14 bits set to 0 249 * so that various assert work. This means that if you want to smash this memory 250 * for debugging purposes you will need to obey this invariant. 251 * The total size of the header needs to be a power of 2 to simplify 252 * the alignment requirements. For now it is a 128 byte structure. 253 * To avoid false sharing, the fields changed only locally are separated 254 * from the fields changed by foreign threads. 255 * Changing the size of the block header would require to change 256 * some bin allocation sizes, in particular "fitting" sizes (see above). 257 */ 258 class Bin; 259 class StartupBlock; 260 261 class MemoryPool { 262 // if no explicit grainsize, expect to see malloc in user's pAlloc 263 // and set reasonable low granularity 264 static const size_t defaultGranularity = estimatedCacheLineSize; 265 266 MemoryPool(); // deny 267 public: 268 static MallocMutex memPoolListLock; 269 270 // list of all active pools is used to release 271 // all TLS data on thread termination or library unload 272 MemoryPool *next, 273 *prev; 274 ExtMemoryPool extMemPool; 275 BootStrapBlocks bootStrapBlocks; 276 277 static void initDefaultPool(); 278 279 bool init(intptr_t poolId, const MemPoolPolicy* memPoolPolicy); 280 bool reset(); 281 bool destroy(); 282 void onThreadShutdown(TLSData *tlsData); 283 284 inline TLSData *getTLS(bool create); 285 void clearTLS() { extMemPool.tlsPointerKey.setThreadMallocTLS(NULL); } 286 287 Block *getEmptyBlock(size_t size); 288 void returnEmptyBlock(Block *block, bool poolTheBlock); 289 290 // get/put large object to/from local large object cache 291 void *getFromLLOCache(TLSData *tls, size_t size, size_t alignment); 292 void putToLLOCache(TLSData *tls, void *object); 293 }; 294 295 static intptr_t defaultMemPool_space[sizeof(MemoryPool)/sizeof(intptr_t) + 296 (sizeof(MemoryPool)%sizeof(intptr_t)? 1 : 0)]; 297 static MemoryPool *defaultMemPool = (MemoryPool*)defaultMemPool_space; 298 const size_t MemoryPool::defaultGranularity; 299 // zero-initialized 300 MallocMutex MemoryPool::memPoolListLock; 301 // TODO: move huge page status to default pool, because that's its states 302 HugePagesStatus hugePages; 303 static bool usedBySrcIncluded = false; 304 305 // Padding helpers 306 template<size_t padd> 307 struct PaddingImpl { 308 size_t __padding[padd]; 309 }; 310 311 template<> 312 struct PaddingImpl<0> {}; 313 314 template<int N> 315 struct Padding : PaddingImpl<N/sizeof(size_t)> {}; 316 317 // Slab block is 16KB-aligned. To prevent false sharing, separate locally-accessed 318 // fields and fields commonly accessed by not owner threads. 319 class GlobalBlockFields : public BlockI { 320 protected: 321 std::atomic<FreeObject*> publicFreeList; 322 std::atomic<Block*> nextPrivatizable; 323 MemoryPool *poolPtr; 324 }; 325 326 class LocalBlockFields : public GlobalBlockFields, Padding<blockHeaderAlignment - sizeof(GlobalBlockFields)> { 327 protected: 328 Block *next; 329 Block *previous; /* Use double linked list to speed up removal */ 330 FreeObject *bumpPtr; /* Bump pointer moves from the end to the beginning of a block */ 331 FreeObject *freeList; 332 /* Pointer to local data for the owner thread. Used for fast finding tls 333 when releasing object from a block that current thread owned. 334 NULL for orphaned blocks. */ 335 std::atomic<TLSData*> tlsPtr; 336 ThreadId ownerTid; /* the ID of the thread that owns or last owned the block */ 337 BackRefIdx backRefIdx; 338 uint16_t allocatedCount; /* Number of objects allocated (obviously by the owning thread) */ 339 uint16_t objectSize; 340 bool isFull; 341 342 friend class FreeBlockPool; 343 friend class StartupBlock; 344 friend class LifoList; 345 friend void *BootStrapBlocks::allocate(MemoryPool *, size_t); 346 friend bool OrphanedBlocks::cleanup(Backend*); 347 friend Block *MemoryPool::getEmptyBlock(size_t); 348 }; 349 350 // Use inheritance to guarantee that a user data start on next cache line. 351 // Can't use member for it, because when LocalBlockFields already on cache line, 352 // we must have no additional memory consumption for all compilers. 353 class Block : public LocalBlockFields, 354 Padding<2*blockHeaderAlignment - sizeof(LocalBlockFields)> { 355 public: 356 bool empty() const { 357 if (allocatedCount > 0) return false; 358 MALLOC_ASSERT(!isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT); 359 return true; 360 } 361 inline FreeObject* allocate(); 362 inline FreeObject *allocateFromFreeList(); 363 364 inline bool adjustFullness(); 365 void adjustPositionInBin(Bin* bin = NULL); 366 #if MALLOC_DEBUG 367 bool freeListNonNull() { return freeList; } 368 #endif 369 void freePublicObject(FreeObject *objectToFree); 370 inline void freeOwnObject(void *object); 371 void reset(); 372 void privatizePublicFreeList( bool reset = true ); 373 void restoreBumpPtr(); 374 void privatizeOrphaned(TLSData *tls, unsigned index); 375 bool readyToShare(); 376 void shareOrphaned(intptr_t binTag, unsigned index); 377 unsigned int getSize() const { 378 MALLOC_ASSERT(isStartupAllocObject() || objectSize<minLargeObjectSize, 379 "Invalid object size"); 380 return isStartupAllocObject()? 0 : objectSize; 381 } 382 const BackRefIdx *getBackRefIdx() const { return &backRefIdx; } 383 inline bool isOwnedByCurrentThread() const; 384 bool isStartupAllocObject() const { return objectSize == startupAllocObjSizeMark; } 385 inline FreeObject *findObjectToFree(const void *object) const; 386 void checkFreePrecond(const void *object) const { 387 #if MALLOC_DEBUG 388 const char *msg = "Possible double free or heap corruption."; 389 // small objects are always at least sizeof(size_t) Byte aligned, 390 // try to check this before this dereference as for invalid objects 391 // this may be unreadable 392 MALLOC_ASSERT(isAligned(object, sizeof(size_t)), "Try to free invalid small object"); 393 #if !__TBB_USE_THREAD_SANITIZER 394 // releasing to free slab 395 MALLOC_ASSERT(allocatedCount>0, msg); 396 #endif 397 // must not point to slab's header 398 MALLOC_ASSERT((uintptr_t)object - (uintptr_t)this >= sizeof(Block), msg); 399 if (startupAllocObjSizeMark == objectSize) // startup block 400 MALLOC_ASSERT(object<=bumpPtr, msg); 401 else { 402 // non-startup objects are 8 Byte aligned 403 MALLOC_ASSERT(isAligned(object, 8), "Try to free invalid small object"); 404 FreeObject *toFree = findObjectToFree(object); 405 #if !__TBB_USE_THREAD_SANITIZER 406 MALLOC_ASSERT(allocatedCount <= (slabSize-sizeof(Block))/objectSize 407 && (!bumpPtr || object>bumpPtr), msg); 408 // check against head of freeList, as this is mostly 409 // expected after double free 410 MALLOC_ASSERT(toFree != freeList, msg); 411 #endif 412 // check against head of publicFreeList, to detect double free 413 // involving foreign thread 414 MALLOC_ASSERT(toFree != publicFreeList.load(std::memory_order_relaxed), msg); 415 } 416 #else 417 suppress_unused_warning(object); 418 #endif 419 } 420 void initEmptyBlock(TLSData *tls, size_t size); 421 size_t findObjectSize(void *object) const; 422 MemoryPool *getMemPool() const { return poolPtr; } // do not use on the hot path! 423 424 protected: 425 void cleanBlockHeader(); 426 427 private: 428 static const float emptyEnoughRatio; /* Threshold on free space needed to "reactivate" a block */ 429 430 inline FreeObject *allocateFromBumpPtr(); 431 inline FreeObject *findAllocatedObject(const void *address) const; 432 #if MALLOC_DEBUG 433 inline bool isProperlyPlaced(const void *object) const; 434 #endif 435 inline void markOwned(TLSData *tls) { 436 MALLOC_ASSERT(!tlsPtr.load(std::memory_order_relaxed), ASSERT_TEXT); 437 ownerTid = ThreadId(); /* save the ID of the current thread */ 438 tlsPtr.store(tls, std::memory_order_relaxed); 439 } 440 inline void markOrphaned() { 441 MALLOC_ASSERT(tlsPtr.load(std::memory_order_relaxed), ASSERT_TEXT); 442 tlsPtr.store(nullptr, std::memory_order_relaxed); 443 } 444 445 friend class Bin; 446 friend class TLSData; 447 friend bool MemoryPool::destroy(); 448 }; 449 450 const float Block::emptyEnoughRatio = 1.0 / 4.0; 451 452 static_assert(sizeof(Block) <= 2*estimatedCacheLineSize, 453 "The class Block does not fit into 2 cache lines on this platform. " 454 "Defining USE_INTERNAL_TID may help to fix it."); 455 456 class Bin { 457 private: 458 public: 459 Block *activeBlk; 460 std::atomic<Block*> mailbox; 461 MallocMutex mailLock; 462 463 public: 464 inline Block* getActiveBlock() const { return activeBlk; } 465 void resetActiveBlock() { activeBlk = NULL; } 466 inline void setActiveBlock(Block *block); 467 inline Block* setPreviousBlockActive(); 468 Block* getPrivatizedFreeListBlock(); 469 void moveBlockToFront(Block *block); 470 bool cleanPublicFreeLists(); 471 void processEmptyBlock(Block *block, bool poolTheBlock); 472 void addPublicFreeListBlock(Block* block); 473 474 void outofTLSBin(Block* block); 475 void verifyTLSBin(size_t size) const; 476 void pushTLSBin(Block* block); 477 478 #if MALLOC_DEBUG 479 void verifyInitState() const { 480 MALLOC_ASSERT( !activeBlk, ASSERT_TEXT ); 481 MALLOC_ASSERT( !mailbox.load(std::memory_order_relaxed), ASSERT_TEXT ); 482 } 483 #endif 484 485 friend void Block::freePublicObject (FreeObject *objectToFree); 486 }; 487 488 /********* End of the data structures **************/ 489 490 /* 491 * There are bins for all 8 byte aligned objects less than this segregated size; 8 bins in total 492 */ 493 const uint32_t minSmallObjectIndex = 0; 494 const uint32_t numSmallObjectBins = 8; 495 const uint32_t maxSmallObjectSize = 64; 496 497 /* 498 * There are 4 bins between each couple of powers of 2 [64-128-256-...] 499 * from maxSmallObjectSize till this size; 16 bins in total 500 */ 501 const uint32_t minSegregatedObjectIndex = minSmallObjectIndex+numSmallObjectBins; 502 const uint32_t numSegregatedObjectBins = 16; 503 const uint32_t maxSegregatedObjectSize = 1024; 504 505 /* 506 * And there are 5 bins with allocation sizes that are multiples of estimatedCacheLineSize 507 * and selected to fit 9, 6, 4, 3, and 2 allocations in a block. 508 */ 509 const uint32_t minFittingIndex = minSegregatedObjectIndex+numSegregatedObjectBins; 510 const uint32_t numFittingBins = 5; 511 512 const uint32_t fittingAlignment = estimatedCacheLineSize; 513 514 #define SET_FITTING_SIZE(N) ( (slabSize-sizeof(Block))/N ) & ~(fittingAlignment-1) 515 // For blockSize=16*1024, sizeof(Block)=2*estimatedCacheLineSize and fittingAlignment=estimatedCacheLineSize, 516 // the comments show the fitting sizes and the amounts left unused for estimatedCacheLineSize=64/128: 517 const uint32_t fittingSize1 = SET_FITTING_SIZE(9); // 1792/1792 128/000 518 const uint32_t fittingSize2 = SET_FITTING_SIZE(6); // 2688/2688 128/000 519 const uint32_t fittingSize3 = SET_FITTING_SIZE(4); // 4032/3968 128/256 520 const uint32_t fittingSize4 = SET_FITTING_SIZE(3); // 5376/5376 128/000 521 const uint32_t fittingSize5 = SET_FITTING_SIZE(2); // 8128/8064 000/000 522 #undef SET_FITTING_SIZE 523 524 /* 525 * The total number of thread-specific Block-based bins 526 */ 527 const uint32_t numBlockBins = minFittingIndex+numFittingBins; 528 529 /* 530 * Objects of this size and larger are considered large objects. 531 */ 532 const uint32_t minLargeObjectSize = fittingSize5 + 1; 533 534 /* 535 * Per-thread pool of slab blocks. Idea behind it is to not share with other 536 * threads memory that are likely in local cache(s) of our CPU. 537 */ 538 class FreeBlockPool { 539 private: 540 std::atomic<Block*> head; 541 int size; 542 Backend *backend; 543 bool lastAccessMiss; 544 public: 545 static const int POOL_HIGH_MARK = 32; 546 static const int POOL_LOW_MARK = 8; 547 548 class ResOfGet { 549 ResOfGet(); 550 public: 551 Block* block; 552 bool lastAccMiss; 553 ResOfGet(Block *b, bool lastMiss) : block(b), lastAccMiss(lastMiss) {} 554 }; 555 556 // allocated in zero-initialized memory 557 FreeBlockPool(Backend *bknd) : backend(bknd) {} 558 ResOfGet getBlock(); 559 void returnBlock(Block *block); 560 bool externalCleanup(); // can be called by another thread 561 }; 562 563 template<int LOW_MARK, int HIGH_MARK> 564 class LocalLOCImpl { 565 private: 566 static const size_t MAX_TOTAL_SIZE = 4*1024*1024; 567 // TODO: can single-linked list be faster here? 568 LargeMemoryBlock *tail; // need it when do releasing on overflow 569 std::atomic<LargeMemoryBlock*> head; 570 size_t totalSize; 571 int numOfBlocks; 572 public: 573 bool put(LargeMemoryBlock *object, ExtMemoryPool *extMemPool); 574 LargeMemoryBlock *get(size_t size); 575 bool externalCleanup(ExtMemoryPool *extMemPool); 576 #if __TBB_MALLOC_WHITEBOX_TEST 577 LocalLOCImpl() : head(NULL), tail(NULL), totalSize(0), numOfBlocks(0) {} 578 static size_t getMaxSize() { return MAX_TOTAL_SIZE; } 579 static const int LOC_HIGH_MARK = HIGH_MARK; 580 #else 581 // no ctor, object must be created in zero-initialized memory 582 #endif 583 }; 584 585 typedef LocalLOCImpl<8,32> LocalLOC; // set production code parameters 586 587 class TLSData : public TLSRemote { 588 MemoryPool *memPool; 589 public: 590 Bin bin[numBlockBinLimit]; 591 FreeBlockPool freeSlabBlocks; 592 LocalLOC lloc; 593 unsigned currCacheIdx; 594 private: 595 std::atomic<bool> unused; 596 public: 597 TLSData(MemoryPool *mPool, Backend *bknd) : memPool(mPool), freeSlabBlocks(bknd) {} 598 MemoryPool *getMemPool() const { return memPool; } 599 Bin* getAllocationBin(size_t size); 600 void release(); 601 bool externalCleanup(bool cleanOnlyUnused, bool cleanBins) { 602 if (!unused.load(std::memory_order_relaxed) && cleanOnlyUnused) return false; 603 // Heavy operation in terms of synchronization complexity, 604 // should be called only for the current thread 605 bool released = cleanBins ? cleanupBlockBins() : false; 606 // both cleanups to be called, and the order is not important 607 return released | lloc.externalCleanup(&memPool->extMemPool) | freeSlabBlocks.externalCleanup(); 608 } 609 bool cleanupBlockBins(); 610 void markUsed() { unused.store(false, std::memory_order_relaxed); } // called by owner when TLS touched 611 void markUnused() { unused.store(true, std::memory_order_relaxed); } // can be called by not owner thread 612 }; 613 614 TLSData *TLSKey::createTLS(MemoryPool *memPool, Backend *backend) 615 { 616 MALLOC_ASSERT( sizeof(TLSData) >= sizeof(Bin) * numBlockBins + sizeof(FreeBlockPool), ASSERT_TEXT ); 617 TLSData* tls = (TLSData*) memPool->bootStrapBlocks.allocate(memPool, sizeof(TLSData)); 618 if ( !tls ) 619 return NULL; 620 new(tls) TLSData(memPool, backend); 621 /* the block contains zeroes after bootStrapMalloc, so bins are initialized */ 622 #if MALLOC_DEBUG 623 for (uint32_t i = 0; i < numBlockBinLimit; i++) 624 tls->bin[i].verifyInitState(); 625 #endif 626 setThreadMallocTLS(tls); 627 memPool->extMemPool.allLocalCaches.registerThread(tls); 628 return tls; 629 } 630 631 bool TLSData::cleanupBlockBins() 632 { 633 bool released = false; 634 for (uint32_t i = 0; i < numBlockBinLimit; i++) { 635 released |= bin[i].cleanPublicFreeLists(); 636 // After cleaning public free lists, only the active block might be empty. 637 // Do not use processEmptyBlock because it will just restore bumpPtr. 638 Block *block = bin[i].getActiveBlock(); 639 if (block && block->empty()) { 640 bin[i].outofTLSBin(block); 641 memPool->returnEmptyBlock(block, /*poolTheBlock=*/false); 642 released = true; 643 } 644 } 645 return released; 646 } 647 648 bool ExtMemoryPool::releaseAllLocalCaches() 649 { 650 // Iterate all registered TLS data and clean LLOC and Slab pools 651 bool released = allLocalCaches.cleanup(/*cleanOnlyUnused=*/false); 652 653 // Bins privatization is done only for the current thread 654 if (TLSData *tlsData = tlsPointerKey.getThreadMallocTLS()) 655 released |= tlsData->cleanupBlockBins(); 656 657 return released; 658 } 659 660 void AllLocalCaches::registerThread(TLSRemote *tls) 661 { 662 tls->prev = NULL; 663 MallocMutex::scoped_lock lock(listLock); 664 MALLOC_ASSERT(head!=tls, ASSERT_TEXT); 665 tls->next = head; 666 if (head) 667 head->prev = tls; 668 head = tls; 669 MALLOC_ASSERT(head->next!=head, ASSERT_TEXT); 670 } 671 672 void AllLocalCaches::unregisterThread(TLSRemote *tls) 673 { 674 MallocMutex::scoped_lock lock(listLock); 675 MALLOC_ASSERT(head, "Can't unregister thread: no threads are registered."); 676 if (head == tls) 677 head = tls->next; 678 if (tls->next) 679 tls->next->prev = tls->prev; 680 if (tls->prev) 681 tls->prev->next = tls->next; 682 MALLOC_ASSERT(!tls->next || tls->next->next!=tls->next, ASSERT_TEXT); 683 } 684 685 bool AllLocalCaches::cleanup(bool cleanOnlyUnused) 686 { 687 bool released = false; 688 { 689 MallocMutex::scoped_lock lock(listLock); 690 for (TLSRemote *curr=head; curr; curr=curr->next) 691 released |= static_cast<TLSData*>(curr)->externalCleanup(cleanOnlyUnused, /*cleanBins=*/false); 692 } 693 return released; 694 } 695 696 void AllLocalCaches::markUnused() 697 { 698 bool locked; 699 MallocMutex::scoped_lock lock(listLock, /*block=*/false, &locked); 700 if (!locked) // not wait for marking if someone doing something with it 701 return; 702 703 for (TLSRemote *curr=head; curr; curr=curr->next) 704 static_cast<TLSData*>(curr)->markUnused(); 705 } 706 707 #if MALLOC_CHECK_RECURSION 708 MallocMutex RecursiveMallocCallProtector::rmc_mutex; 709 std::atomic<pthread_t> RecursiveMallocCallProtector::owner_thread; 710 std::atomic<void*> RecursiveMallocCallProtector::autoObjPtr; 711 bool RecursiveMallocCallProtector::mallocRecursionDetected; 712 #if __FreeBSD__ 713 bool RecursiveMallocCallProtector::canUsePthread; 714 #endif 715 716 #endif 717 718 /*********** End code to provide thread ID and a TLS pointer **********/ 719 720 // Parameter for isLargeObject, keeps our expectations on memory origin. 721 // Assertions must use unknownMem to reliably report object invalidity. 722 enum MemoryOrigin { 723 ourMem, // allocated by TBB allocator 724 unknownMem // can be allocated by system allocator or TBB allocator 725 }; 726 727 template<MemoryOrigin> 728 #if __TBB_USE_THREAD_SANITIZER 729 // We have a real race when accessing the large object header for 730 // non large objects (e.g. small or foreign objects). 731 // Therefore, we need to hide this access from the thread sanitizer 732 __attribute__((no_sanitize("thread"))) 733 #endif 734 bool isLargeObject(void *object); 735 static void *internalMalloc(size_t size); 736 static void internalFree(void *object); 737 static void *internalPoolMalloc(MemoryPool* mPool, size_t size); 738 static bool internalPoolFree(MemoryPool *mPool, void *object, size_t size); 739 740 #if !MALLOC_DEBUG 741 #if __INTEL_COMPILER || _MSC_VER 742 #define NOINLINE(decl) __declspec(noinline) decl 743 #define ALWAYSINLINE(decl) __forceinline decl 744 #elif __GNUC__ 745 #define NOINLINE(decl) decl __attribute__ ((noinline)) 746 #define ALWAYSINLINE(decl) decl __attribute__ ((always_inline)) 747 #else 748 #define NOINLINE(decl) decl 749 #define ALWAYSINLINE(decl) decl 750 #endif 751 752 static NOINLINE( bool doInitialization() ); 753 ALWAYSINLINE( bool isMallocInitialized() ); 754 755 #undef ALWAYSINLINE 756 #undef NOINLINE 757 #endif /* !MALLOC_DEBUG */ 758 759 760 /********* Now some rough utility code to deal with indexing the size bins. **************/ 761 762 /* 763 * Given a number return the highest non-zero bit in it. It is intended to work with 32-bit values only. 764 * Moreover, on some platforms, for sake of simplicity and performance, it is narrowed to only serve for 64 to 1023. 765 * This is enough for current algorithm of distribution of sizes among bins. 766 * __TBB_Log2 is not used here to minimize dependencies on TBB specific sources. 767 */ 768 #if _WIN64 && _MSC_VER>=1400 && !__INTEL_COMPILER 769 extern "C" unsigned char _BitScanReverse( unsigned long* i, unsigned long w ); 770 #pragma intrinsic(_BitScanReverse) 771 #endif 772 static inline unsigned int highestBitPos(unsigned int n) 773 { 774 MALLOC_ASSERT( n>=64 && n<1024, ASSERT_TEXT ); // only needed for bsr array lookup, but always true 775 unsigned int pos; 776 #if __ARCH_x86_32||__ARCH_x86_64 777 778 # if __linux__||__APPLE__||__FreeBSD__||__NetBSD__||__OpenBSD__||__sun||__MINGW32__ 779 __asm__ ("bsr %1,%0" : "=r"(pos) : "r"(n)); 780 # elif (_WIN32 && (!_WIN64 || __INTEL_COMPILER)) 781 __asm 782 { 783 bsr eax, n 784 mov pos, eax 785 } 786 # elif _WIN64 && _MSC_VER>=1400 787 _BitScanReverse((unsigned long*)&pos, (unsigned long)n); 788 # else 789 # error highestBitPos() not implemented for this platform 790 # endif 791 #elif __arm__ 792 __asm__ __volatile__ 793 ( 794 "clz %0, %1\n" 795 "rsb %0, %0, %2\n" 796 :"=r" (pos) :"r" (n), "I" (31) 797 ); 798 #else 799 static unsigned int bsr[16] = {0/*N/A*/,6,7,7,8,8,8,8,9,9,9,9,9,9,9,9}; 800 pos = bsr[ n>>6 ]; 801 #endif /* __ARCH_* */ 802 return pos; 803 } 804 805 unsigned int getSmallObjectIndex(unsigned int size) 806 { 807 unsigned int result = (size-1)>>3; 808 if (sizeof(void*)==8) { 809 // For 64-bit malloc, 16 byte alignment is needed except for bin 0. 810 if (result) result |= 1; // 0,1,3,5,7; bins 2,4,6 are not aligned to 16 bytes 811 } 812 return result; 813 } 814 815 /* 816 * Depending on indexRequest, for a given size return either the index into the bin 817 * for objects of this size, or the actual size of objects in this bin. 818 */ 819 template<bool indexRequest> 820 static unsigned int getIndexOrObjectSize (unsigned int size) 821 { 822 if (size <= maxSmallObjectSize) { // selection from 8/16/24/32/40/48/56/64 823 unsigned int index = getSmallObjectIndex( size ); 824 /* Bin 0 is for 8 bytes, bin 1 is for 16, and so forth */ 825 return indexRequest ? index : (index+1)<<3; 826 } 827 else if (size <= maxSegregatedObjectSize ) { // 80/96/112/128 / 160/192/224/256 / 320/384/448/512 / 640/768/896/1024 828 unsigned int order = highestBitPos(size-1); // which group of bin sizes? 829 MALLOC_ASSERT( 6<=order && order<=9, ASSERT_TEXT ); 830 if (indexRequest) 831 return minSegregatedObjectIndex - (4*6) - 4 + (4*order) + ((size-1)>>(order-2)); 832 else { 833 unsigned int alignment = 128 >> (9-order); // alignment in the group 834 MALLOC_ASSERT( alignment==16 || alignment==32 || alignment==64 || alignment==128, ASSERT_TEXT ); 835 return alignUp(size,alignment); 836 } 837 } 838 else { 839 if( size <= fittingSize3 ) { 840 if( size <= fittingSize2 ) { 841 if( size <= fittingSize1 ) 842 return indexRequest ? minFittingIndex : fittingSize1; 843 else 844 return indexRequest ? minFittingIndex+1 : fittingSize2; 845 } else 846 return indexRequest ? minFittingIndex+2 : fittingSize3; 847 } else { 848 if( size <= fittingSize5 ) { 849 if( size <= fittingSize4 ) 850 return indexRequest ? minFittingIndex+3 : fittingSize4; 851 else 852 return indexRequest ? minFittingIndex+4 : fittingSize5; 853 } else { 854 MALLOC_ASSERT( 0,ASSERT_TEXT ); // this should not happen 855 return ~0U; 856 } 857 } 858 } 859 } 860 861 static unsigned int getIndex (unsigned int size) 862 { 863 return getIndexOrObjectSize</*indexRequest=*/true>(size); 864 } 865 866 static unsigned int getObjectSize (unsigned int size) 867 { 868 return getIndexOrObjectSize</*indexRequest=*/false>(size); 869 } 870 871 872 void *BootStrapBlocks::allocate(MemoryPool *memPool, size_t size) 873 { 874 FreeObject *result; 875 876 MALLOC_ASSERT( size == sizeof(TLSData), ASSERT_TEXT ); 877 878 { // Lock with acquire 879 MallocMutex::scoped_lock scoped_cs(bootStrapLock); 880 881 if( bootStrapObjectList) { 882 result = bootStrapObjectList; 883 bootStrapObjectList = bootStrapObjectList->next; 884 } else { 885 if (!bootStrapBlock) { 886 bootStrapBlock = memPool->getEmptyBlock(size); 887 if (!bootStrapBlock) return NULL; 888 } 889 result = bootStrapBlock->bumpPtr; 890 bootStrapBlock->bumpPtr = (FreeObject *)((uintptr_t)bootStrapBlock->bumpPtr - bootStrapBlock->objectSize); 891 if ((uintptr_t)bootStrapBlock->bumpPtr < (uintptr_t)bootStrapBlock+sizeof(Block)) { 892 bootStrapBlock->bumpPtr = NULL; 893 bootStrapBlock->next = bootStrapBlockUsed; 894 bootStrapBlockUsed = bootStrapBlock; 895 bootStrapBlock = NULL; 896 } 897 } 898 } // Unlock with release 899 memset (result, 0, size); 900 return (void*)result; 901 } 902 903 void BootStrapBlocks::free(void* ptr) 904 { 905 MALLOC_ASSERT( ptr, ASSERT_TEXT ); 906 { // Lock with acquire 907 MallocMutex::scoped_lock scoped_cs(bootStrapLock); 908 ((FreeObject*)ptr)->next = bootStrapObjectList; 909 bootStrapObjectList = (FreeObject*)ptr; 910 } // Unlock with release 911 } 912 913 void BootStrapBlocks::reset() 914 { 915 bootStrapBlock = bootStrapBlockUsed = NULL; 916 bootStrapObjectList = NULL; 917 } 918 919 #if !(FREELIST_NONBLOCKING) 920 static MallocMutex publicFreeListLock; // lock for changes of publicFreeList 921 #endif 922 923 /********* End rough utility code **************/ 924 925 /* LifoList assumes zero initialization so a vector of it can be created 926 * by just allocating some space with no call to constructor. 927 * On Linux, it seems to be necessary to avoid linking with C++ libraries. 928 * 929 * By usage convention there is no race on the initialization. */ 930 LifoList::LifoList( ) : top(nullptr) 931 { 932 // MallocMutex assumes zero initialization 933 memset(&lock, 0, sizeof(MallocMutex)); 934 } 935 936 void LifoList::push(Block *block) 937 { 938 MallocMutex::scoped_lock scoped_cs(lock); 939 block->next = top.load(std::memory_order_relaxed); 940 top.store(block, std::memory_order_relaxed); 941 } 942 943 Block *LifoList::pop() 944 { 945 Block* block = nullptr; 946 if (top.load(std::memory_order_relaxed)) { 947 MallocMutex::scoped_lock scoped_cs(lock); 948 block = top.load(std::memory_order_relaxed); 949 if (block) { 950 top.store(block->next, std::memory_order_relaxed); 951 } 952 } 953 return block; 954 } 955 956 Block *LifoList::grab() 957 { 958 Block *block = nullptr; 959 if (top.load(std::memory_order_relaxed)) { 960 MallocMutex::scoped_lock scoped_cs(lock); 961 block = top.load(std::memory_order_relaxed); 962 top.store(nullptr, std::memory_order_relaxed); 963 } 964 return block; 965 } 966 967 /********* Thread and block related code *************/ 968 969 template<bool poolDestroy> void AllLargeBlocksList::releaseAll(Backend *backend) { 970 LargeMemoryBlock *next, *lmb = loHead; 971 loHead = NULL; 972 973 for (; lmb; lmb = next) { 974 next = lmb->gNext; 975 if (poolDestroy) { 976 // as it's pool destruction, no need to return object to backend, 977 // only remove backrefs, as they are global 978 removeBackRef(lmb->backRefIdx); 979 } else { 980 // clean g(Next|Prev) to prevent removing lmb 981 // from AllLargeBlocksList inside returnLargeObject 982 lmb->gNext = lmb->gPrev = NULL; 983 backend->returnLargeObject(lmb); 984 } 985 } 986 } 987 988 TLSData* MemoryPool::getTLS(bool create) 989 { 990 TLSData* tls = extMemPool.tlsPointerKey.getThreadMallocTLS(); 991 if (create && !tls) 992 tls = extMemPool.tlsPointerKey.createTLS(this, &extMemPool.backend); 993 return tls; 994 } 995 996 /* 997 * Return the bin for the given size. 998 */ 999 inline Bin* TLSData::getAllocationBin(size_t size) 1000 { 1001 return bin + getIndex(size); 1002 } 1003 1004 /* Return an empty uninitialized block in a non-blocking fashion. */ 1005 Block *MemoryPool::getEmptyBlock(size_t size) 1006 { 1007 TLSData* tls = getTLS(/*create=*/false); 1008 // try to use per-thread cache, if TLS available 1009 FreeBlockPool::ResOfGet resOfGet = tls? 1010 tls->freeSlabBlocks.getBlock() : FreeBlockPool::ResOfGet(NULL, false); 1011 Block *result = resOfGet.block; 1012 1013 if (!result) { // not found in local cache, asks backend for slabs 1014 int num = resOfGet.lastAccMiss? Backend::numOfSlabAllocOnMiss : 1; 1015 BackRefIdx backRefIdx[Backend::numOfSlabAllocOnMiss]; 1016 1017 result = static_cast<Block*>(extMemPool.backend.getSlabBlock(num)); 1018 if (!result) return NULL; 1019 1020 if (!extMemPool.userPool()) 1021 for (int i=0; i<num; i++) { 1022 backRefIdx[i] = BackRefIdx::newBackRef(/*largeObj=*/false); 1023 if (backRefIdx[i].isInvalid()) { 1024 // roll back resource allocation 1025 for (int j=0; j<i; j++) 1026 removeBackRef(backRefIdx[j]); 1027 Block *b = result; 1028 for (int j=0; j<num; b=(Block*)((uintptr_t)b+slabSize), j++) 1029 extMemPool.backend.putSlabBlock(b); 1030 return NULL; 1031 } 1032 } 1033 // resources were allocated, register blocks 1034 Block *b = result; 1035 for (int i=0; i<num; b=(Block*)((uintptr_t)b+slabSize), i++) { 1036 // slab block in user's pool must have invalid backRefIdx 1037 if (extMemPool.userPool()) { 1038 new (&b->backRefIdx) BackRefIdx(); 1039 } else { 1040 setBackRef(backRefIdx[i], b); 1041 b->backRefIdx = backRefIdx[i]; 1042 } 1043 b->tlsPtr.store(tls, std::memory_order_relaxed); 1044 b->poolPtr = this; 1045 // all but first one go to per-thread pool 1046 if (i > 0) { 1047 MALLOC_ASSERT(tls, ASSERT_TEXT); 1048 tls->freeSlabBlocks.returnBlock(b); 1049 } 1050 } 1051 } 1052 MALLOC_ASSERT(result, ASSERT_TEXT); 1053 result->initEmptyBlock(tls, size); 1054 STAT_increment(getThreadId(), getIndex(result->objectSize), allocBlockNew); 1055 return result; 1056 } 1057 1058 void MemoryPool::returnEmptyBlock(Block *block, bool poolTheBlock) 1059 { 1060 block->reset(); 1061 if (poolTheBlock) { 1062 getTLS(/*create=*/false)->freeSlabBlocks.returnBlock(block); 1063 } else { 1064 // slab blocks in user's pools do not have valid backRefIdx 1065 if (!extMemPool.userPool()) 1066 removeBackRef(*(block->getBackRefIdx())); 1067 extMemPool.backend.putSlabBlock(block); 1068 } 1069 } 1070 1071 bool ExtMemoryPool::init(intptr_t poolId, rawAllocType rawAlloc, 1072 rawFreeType rawFree, size_t granularity, 1073 bool keepAllMemory, bool fixedPool) 1074 { 1075 this->poolId = poolId; 1076 this->rawAlloc = rawAlloc; 1077 this->rawFree = rawFree; 1078 this->granularity = granularity; 1079 this->keepAllMemory = keepAllMemory; 1080 this->fixedPool = fixedPool; 1081 this->delayRegsReleasing = false; 1082 if (!initTLS()) 1083 return false; 1084 loc.init(this); 1085 backend.init(this); 1086 MALLOC_ASSERT(isPoolValid(), NULL); 1087 return true; 1088 } 1089 1090 bool ExtMemoryPool::initTLS() { return tlsPointerKey.init(); } 1091 1092 bool MemoryPool::init(intptr_t poolId, const MemPoolPolicy *policy) 1093 { 1094 if (!extMemPool.init(poolId, policy->pAlloc, policy->pFree, 1095 policy->granularity? policy->granularity : defaultGranularity, 1096 policy->keepAllMemory, policy->fixedPool)) 1097 return false; 1098 { 1099 MallocMutex::scoped_lock lock(memPoolListLock); 1100 next = defaultMemPool->next; 1101 defaultMemPool->next = this; 1102 prev = defaultMemPool; 1103 if (next) 1104 next->prev = this; 1105 } 1106 return true; 1107 } 1108 1109 bool MemoryPool::reset() 1110 { 1111 MALLOC_ASSERT(extMemPool.userPool(), "No reset for the system pool."); 1112 // memory is not releasing during pool reset 1113 // TODO: mark regions to release unused on next reset() 1114 extMemPool.delayRegionsReleasing(true); 1115 1116 bootStrapBlocks.reset(); 1117 extMemPool.lmbList.releaseAll</*poolDestroy=*/false>(&extMemPool.backend); 1118 if (!extMemPool.reset()) 1119 return false; 1120 1121 if (!extMemPool.initTLS()) 1122 return false; 1123 extMemPool.delayRegionsReleasing(false); 1124 return true; 1125 } 1126 1127 bool MemoryPool::destroy() 1128 { 1129 #if __TBB_MALLOC_LOCACHE_STAT 1130 extMemPool.loc.reportStat(stdout); 1131 #endif 1132 #if __TBB_MALLOC_BACKEND_STAT 1133 extMemPool.backend.reportStat(stdout); 1134 #endif 1135 { 1136 MallocMutex::scoped_lock lock(memPoolListLock); 1137 // remove itself from global pool list 1138 if (prev) 1139 prev->next = next; 1140 if (next) 1141 next->prev = prev; 1142 } 1143 // slab blocks in non-default pool do not have backreferences, 1144 // only large objects do 1145 if (extMemPool.userPool()) 1146 extMemPool.lmbList.releaseAll</*poolDestroy=*/true>(&extMemPool.backend); 1147 else { 1148 // only one non-userPool() is supported now 1149 MALLOC_ASSERT(this==defaultMemPool, NULL); 1150 // There and below in extMemPool.destroy(), do not restore initial state 1151 // for user pool, because it's just about to be released. But for system 1152 // pool restoring, we do not want to do zeroing of it on subsequent reload. 1153 bootStrapBlocks.reset(); 1154 extMemPool.orphanedBlocks.reset(); 1155 } 1156 return extMemPool.destroy(); 1157 } 1158 1159 void MemoryPool::onThreadShutdown(TLSData *tlsData) 1160 { 1161 if (tlsData) { // might be called for "empty" TLS 1162 tlsData->release(); 1163 bootStrapBlocks.free(tlsData); 1164 clearTLS(); 1165 } 1166 } 1167 1168 #if MALLOC_DEBUG 1169 void Bin::verifyTLSBin (size_t size) const 1170 { 1171 /* The debug version verifies the TLSBin as needed */ 1172 uint32_t objSize = getObjectSize(size); 1173 1174 if (activeBlk) { 1175 MALLOC_ASSERT( activeBlk->isOwnedByCurrentThread(), ASSERT_TEXT ); 1176 MALLOC_ASSERT( activeBlk->objectSize == objSize, ASSERT_TEXT ); 1177 #if MALLOC_DEBUG>1 1178 for (Block* temp = activeBlk->next; temp; temp=temp->next) { 1179 MALLOC_ASSERT( temp!=activeBlk, ASSERT_TEXT ); 1180 MALLOC_ASSERT( temp->isOwnedByCurrentThread(), ASSERT_TEXT ); 1181 MALLOC_ASSERT( temp->objectSize == objSize, ASSERT_TEXT ); 1182 MALLOC_ASSERT( temp->previous->next == temp, ASSERT_TEXT ); 1183 if (temp->next) { 1184 MALLOC_ASSERT( temp->next->previous == temp, ASSERT_TEXT ); 1185 } 1186 } 1187 for (Block* temp = activeBlk->previous; temp; temp=temp->previous) { 1188 MALLOC_ASSERT( temp!=activeBlk, ASSERT_TEXT ); 1189 MALLOC_ASSERT( temp->isOwnedByCurrentThread(), ASSERT_TEXT ); 1190 MALLOC_ASSERT( temp->objectSize == objSize, ASSERT_TEXT ); 1191 MALLOC_ASSERT( temp->next->previous == temp, ASSERT_TEXT ); 1192 if (temp->previous) { 1193 MALLOC_ASSERT( temp->previous->next == temp, ASSERT_TEXT ); 1194 } 1195 } 1196 #endif /* MALLOC_DEBUG>1 */ 1197 } 1198 } 1199 #else /* MALLOC_DEBUG */ 1200 inline void Bin::verifyTLSBin (size_t) const { } 1201 #endif /* MALLOC_DEBUG */ 1202 1203 /* 1204 * Add a block to the start of this tls bin list. 1205 */ 1206 void Bin::pushTLSBin(Block* block) 1207 { 1208 /* The objectSize should be defined and not a parameter 1209 because the function is applied to partially filled blocks as well */ 1210 unsigned int size = block->objectSize; 1211 1212 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1213 MALLOC_ASSERT( block->objectSize != 0, ASSERT_TEXT ); 1214 MALLOC_ASSERT( block->next == NULL, ASSERT_TEXT ); 1215 MALLOC_ASSERT( block->previous == NULL, ASSERT_TEXT ); 1216 1217 MALLOC_ASSERT( this, ASSERT_TEXT ); 1218 verifyTLSBin(size); 1219 1220 block->next = activeBlk; 1221 if( activeBlk ) { 1222 block->previous = activeBlk->previous; 1223 activeBlk->previous = block; 1224 if( block->previous ) 1225 block->previous->next = block; 1226 } else { 1227 activeBlk = block; 1228 } 1229 1230 verifyTLSBin(size); 1231 } 1232 1233 /* 1234 * Take a block out of its tls bin (e.g. before removal). 1235 */ 1236 void Bin::outofTLSBin(Block* block) 1237 { 1238 unsigned int size = block->objectSize; 1239 1240 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1241 MALLOC_ASSERT( block->objectSize != 0, ASSERT_TEXT ); 1242 1243 MALLOC_ASSERT( this, ASSERT_TEXT ); 1244 verifyTLSBin(size); 1245 1246 if (block == activeBlk) { 1247 activeBlk = block->previous? block->previous : block->next; 1248 } 1249 /* Unlink the block */ 1250 if (block->previous) { 1251 MALLOC_ASSERT( block->previous->next == block, ASSERT_TEXT ); 1252 block->previous->next = block->next; 1253 } 1254 if (block->next) { 1255 MALLOC_ASSERT( block->next->previous == block, ASSERT_TEXT ); 1256 block->next->previous = block->previous; 1257 } 1258 block->next = NULL; 1259 block->previous = NULL; 1260 1261 verifyTLSBin(size); 1262 } 1263 1264 Block* Bin::getPrivatizedFreeListBlock() 1265 { 1266 Block* block; 1267 MALLOC_ASSERT( this, ASSERT_TEXT ); 1268 // if this method is called, active block usage must be unsuccessful 1269 MALLOC_ASSERT( !activeBlk && !mailbox.load(std::memory_order_relaxed) || activeBlk && activeBlk->isFull, ASSERT_TEXT ); 1270 1271 // the counter should be changed STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 1272 if (!mailbox.load(std::memory_order_acquire)) // hotpath is empty mailbox 1273 return NULL; 1274 else { // mailbox is not empty, take lock and inspect it 1275 MallocMutex::scoped_lock scoped_cs(mailLock); 1276 block = mailbox.load(std::memory_order_relaxed); 1277 if( block ) { 1278 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1279 MALLOC_ASSERT( !isNotForUse(block->nextPrivatizable.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1280 mailbox.store(block->nextPrivatizable.load(std::memory_order_relaxed), std::memory_order_relaxed); 1281 block->nextPrivatizable.store((Block*)this, std::memory_order_relaxed); 1282 } 1283 } 1284 if( block ) { 1285 MALLOC_ASSERT( isSolidPtr(block->publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1286 block->privatizePublicFreeList(); 1287 block->adjustPositionInBin(this); 1288 } 1289 return block; 1290 } 1291 1292 void Bin::addPublicFreeListBlock(Block* block) 1293 { 1294 MallocMutex::scoped_lock scoped_cs(mailLock); 1295 block->nextPrivatizable.store(mailbox.load(std::memory_order_relaxed), std::memory_order_relaxed); 1296 mailbox.store(block, std::memory_order_relaxed); 1297 } 1298 1299 // Process publicly freed objects in all blocks and return empty blocks 1300 // to the backend in order to reduce overall footprint. 1301 bool Bin::cleanPublicFreeLists() 1302 { 1303 Block* block; 1304 if (!mailbox.load(std::memory_order_acquire)) 1305 return false; 1306 else { 1307 // Grab all the blocks in the mailbox 1308 MallocMutex::scoped_lock scoped_cs(mailLock); 1309 block = mailbox.load(std::memory_order_relaxed); 1310 mailbox.store(NULL, std::memory_order_relaxed); 1311 } 1312 bool released = false; 1313 while (block) { 1314 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1315 Block* tmp = block->nextPrivatizable.load(std::memory_order_relaxed); 1316 block->nextPrivatizable.store((Block*)this, std::memory_order_relaxed); 1317 block->privatizePublicFreeList(); 1318 if (block->empty()) { 1319 processEmptyBlock(block, /*poolTheBlock=*/false); 1320 released = true; 1321 } else 1322 block->adjustPositionInBin(this); 1323 block = tmp; 1324 } 1325 return released; 1326 } 1327 1328 bool Block::adjustFullness() 1329 { 1330 if (bumpPtr) { 1331 /* If we are still using a bump ptr for this block it is empty enough to use. */ 1332 STAT_increment(getThreadId(), getIndex(objectSize), examineEmptyEnough); 1333 isFull = false; 1334 } else { 1335 const float threshold = (slabSize - sizeof(Block)) * (1 - emptyEnoughRatio); 1336 /* allocatedCount shows how many objects in the block are in use; however it still counts 1337 * blocks freed by other threads; so prior call to privatizePublicFreeList() is recommended */ 1338 isFull = (allocatedCount*objectSize > threshold) ? true : false; 1339 #if COLLECT_STATISTICS 1340 if (isFull) 1341 STAT_increment(getThreadId(), getIndex(objectSize), examineNotEmpty); 1342 else 1343 STAT_increment(getThreadId(), getIndex(objectSize), examineEmptyEnough); 1344 #endif 1345 } 1346 return isFull; 1347 } 1348 1349 // This method resides in class Block, and not in class Bin, in order to avoid 1350 // calling getAllocationBin on a reasonably hot path in Block::freeOwnObject 1351 void Block::adjustPositionInBin(Bin* bin/*=NULL*/) 1352 { 1353 // If the block were full, but became empty enough to use, 1354 // move it to the front of the list 1355 if (isFull && !adjustFullness()) { 1356 if (!bin) 1357 bin = tlsPtr.load(std::memory_order_relaxed)->getAllocationBin(objectSize); 1358 bin->moveBlockToFront(this); 1359 } 1360 } 1361 1362 /* Restore the bump pointer for an empty block that is planned to use */ 1363 void Block::restoreBumpPtr() 1364 { 1365 MALLOC_ASSERT( allocatedCount == 0, ASSERT_TEXT ); 1366 MALLOC_ASSERT( !isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1367 STAT_increment(getThreadId(), getIndex(objectSize), freeRestoreBumpPtr); 1368 bumpPtr = (FreeObject *)((uintptr_t)this + slabSize - objectSize); 1369 freeList = NULL; 1370 isFull = false; 1371 } 1372 1373 void Block::freeOwnObject(void *object) 1374 { 1375 tlsPtr.load(std::memory_order_relaxed)->markUsed(); 1376 allocatedCount--; 1377 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 1378 #if COLLECT_STATISTICS 1379 // Note that getAllocationBin is not called on the hottest path with statistics off. 1380 if (tlsPtr.load(std::memory_order_relaxed)->getAllocationBin(objectSize)->getActiveBlock() != this) 1381 STAT_increment(getThreadId(), getIndex(objectSize), freeToInactiveBlock); 1382 else 1383 STAT_increment(getThreadId(), getIndex(objectSize), freeToActiveBlock); 1384 #endif 1385 if (empty()) { 1386 // If the last object of a slab is freed, the slab cannot be marked full 1387 MALLOC_ASSERT(!isFull, ASSERT_TEXT); 1388 tlsPtr.load(std::memory_order_relaxed)->getAllocationBin(objectSize)->processEmptyBlock(this, /*poolTheBlock=*/true); 1389 } else { // hot path 1390 FreeObject *objectToFree = findObjectToFree(object); 1391 objectToFree->next = freeList; 1392 freeList = objectToFree; 1393 adjustPositionInBin(); 1394 } 1395 } 1396 1397 void Block::freePublicObject (FreeObject *objectToFree) 1398 { 1399 FreeObject* localPublicFreeList{}; 1400 1401 MALLOC_ITT_SYNC_RELEASING(&publicFreeList); 1402 #if FREELIST_NONBLOCKING 1403 // TBB_REVAMP_TODO: make it non atomic in non-blocking scenario 1404 localPublicFreeList = publicFreeList.load(std::memory_order_relaxed); 1405 do { 1406 objectToFree->next = localPublicFreeList; 1407 // no backoff necessary because trying to make change, not waiting for a change 1408 } while( !publicFreeList.compare_exchange_strong(localPublicFreeList, objectToFree) ); 1409 #else 1410 STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 1411 { 1412 MallocMutex::scoped_lock scoped_cs(publicFreeListLock); 1413 localPublicFreeList = objectToFree->next = publicFreeList; 1414 publicFreeList = objectToFree; 1415 } 1416 #endif 1417 1418 if( localPublicFreeList==NULL ) { 1419 // if the block is abandoned, its nextPrivatizable pointer should be UNUSABLE 1420 // otherwise, it should point to the bin the block belongs to. 1421 // reading nextPrivatizable is thread-safe below, because: 1422 // 1) the executing thread atomically got publicFreeList==NULL and changed it to non-NULL; 1423 // 2) only owning thread can change it back to NULL, 1424 // 3) but it can not be done until the block is put to the mailbox 1425 // So the executing thread is now the only one that can change nextPrivatizable 1426 Block* next = nextPrivatizable.load(std::memory_order_acquire); 1427 if( !isNotForUse(next) ) { 1428 MALLOC_ASSERT( next!=nullptr, ASSERT_TEXT ); 1429 Bin* theBin = (Bin*) next; 1430 #if MALLOC_DEBUG && TBB_REVAMP_TODO 1431 // FIXME: The thread that returns the block is not the block's owner. 1432 // The below assertion compares 'theBin' against the caller's local bin, thus, it always fails. 1433 // Need to find a way to get the correct remote bin for comparison. 1434 { // check that nextPrivatizable points to the bin the block belongs to 1435 uint32_t index = getIndex( objectSize ); 1436 TLSData* tls = getThreadMallocTLS(); 1437 MALLOC_ASSERT( theBin==tls->bin+index, ASSERT_TEXT ); 1438 } 1439 #endif // MALLOC_DEBUG 1440 theBin->addPublicFreeListBlock(this); 1441 } 1442 } 1443 STAT_increment(getThreadId(), ThreadCommonCounters, freeToOtherThread); 1444 STAT_increment(ownerTid.load(std::memory_order_relaxed), getIndex(objectSize), freeByOtherThread); 1445 } 1446 1447 // Make objects freed by other threads available for use again 1448 void Block::privatizePublicFreeList( bool reset ) 1449 { 1450 FreeObject *localPublicFreeList; 1451 // If reset is false, publicFreeList should not be zeroed but set to UNUSABLE 1452 // to properly synchronize with other threads freeing objects to this slab. 1453 const intptr_t endMarker = reset ? 0 : UNUSABLE; 1454 1455 // Only the owner thread may reset the pointer to NULL 1456 MALLOC_ASSERT( isOwnedByCurrentThread() || !reset, ASSERT_TEXT ); 1457 #if FREELIST_NONBLOCKING 1458 localPublicFreeList = publicFreeList.exchange((FreeObject*)endMarker); 1459 #else 1460 STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 1461 { 1462 MallocMutex::scoped_lock scoped_cs(publicFreeListLock); 1463 localPublicFreeList = publicFreeList; 1464 publicFreeList = endMarker; 1465 } 1466 #endif 1467 MALLOC_ITT_SYNC_ACQUIRED(&publicFreeList); 1468 MALLOC_ASSERT( !(reset && isNotForUse(publicFreeList)), ASSERT_TEXT ); 1469 1470 // publicFreeList must have been UNUSABLE or valid, but not NULL 1471 MALLOC_ASSERT( localPublicFreeList!=NULL, ASSERT_TEXT ); 1472 if( isSolidPtr(localPublicFreeList) ) { 1473 MALLOC_ASSERT( allocatedCount <= (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 1474 /* other threads did not change the counter freeing our blocks */ 1475 allocatedCount--; 1476 FreeObject *temp = localPublicFreeList; 1477 while( isSolidPtr(temp->next) ){ // the list will end with either NULL or UNUSABLE 1478 temp = temp->next; 1479 allocatedCount--; 1480 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 1481 } 1482 /* merge with local freeList */ 1483 temp->next = freeList; 1484 freeList = localPublicFreeList; 1485 STAT_increment(getThreadId(), getIndex(objectSize), allocPrivatized); 1486 } 1487 } 1488 1489 void Block::privatizeOrphaned(TLSData *tls, unsigned index) 1490 { 1491 Bin* bin = tls->bin + index; 1492 STAT_increment(getThreadId(), index, allocBlockPublic); 1493 next = NULL; 1494 previous = NULL; 1495 MALLOC_ASSERT( publicFreeList.load(std::memory_order_relaxed) != NULL, ASSERT_TEXT ); 1496 /* There is not a race here since no other thread owns this block */ 1497 markOwned(tls); 1498 // It is safe to change nextPrivatizable, as publicFreeList is not null 1499 MALLOC_ASSERT( isNotForUse(nextPrivatizable.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1500 nextPrivatizable.store((Block*)bin, std::memory_order_relaxed); 1501 // the next call is required to change publicFreeList to 0 1502 privatizePublicFreeList(); 1503 if( empty() ) { 1504 restoreBumpPtr(); 1505 } else { 1506 adjustFullness(); // check the block fullness and set isFull 1507 } 1508 MALLOC_ASSERT( !isNotForUse(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1509 } 1510 1511 1512 bool Block::readyToShare() 1513 { 1514 FreeObject* oldVal = NULL; 1515 #if FREELIST_NONBLOCKING 1516 publicFreeList.compare_exchange_strong(oldVal, (FreeObject*)UNUSABLE); 1517 #else 1518 STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 1519 { 1520 MallocMutex::scoped_lock scoped_cs(publicFreeListLock); 1521 if ( (oldVal=publicFreeList)==NULL ) 1522 (intptr_t&)(publicFreeList) = UNUSABLE; 1523 } 1524 #endif 1525 return oldVal==NULL; 1526 } 1527 1528 void Block::shareOrphaned(intptr_t binTag, unsigned index) 1529 { 1530 MALLOC_ASSERT( binTag, ASSERT_TEXT ); 1531 // unreferenced formal parameter warning 1532 tbb::detail::suppress_unused_warning(index); 1533 STAT_increment(getThreadId(), index, freeBlockPublic); 1534 markOrphaned(); 1535 bool syncOnMailbox = false; 1536 if ((intptr_t)nextPrivatizable.load(std::memory_order_relaxed) == binTag) { 1537 // First check passed: the block is not in mailbox yet. 1538 // Need to set publicFreeList to non-zero, so other threads 1539 // will not change nextPrivatizable and it can be zeroed. 1540 if ( !readyToShare() ) { 1541 // another thread freed an object; we need to wait until it finishes. 1542 // There is no need for exponential backoff, as the wait here is not for a lock; 1543 // but need to yield, so the thread we wait has a chance to run. 1544 // TODO: add a pause to also be friendly to hyperthreads 1545 int count = 256; 1546 while ((intptr_t)nextPrivatizable.load(std::memory_order_relaxed) == binTag) { 1547 if (--count==0) { 1548 do_yield(); 1549 count = 256; 1550 } 1551 } 1552 } 1553 } 1554 MALLOC_ASSERT( publicFreeList.load(std::memory_order_relaxed) !=NULL, ASSERT_TEXT ); 1555 // now it is safe to change our data 1556 previous = NULL; 1557 // it is caller responsibility to ensure that the list of blocks 1558 // formed by nextPrivatizable pointers is kept consistent if required. 1559 // if only called from thread shutdown code, it does not matter. 1560 nextPrivatizable.store((Block*)UNUSABLE, std::memory_order_relaxed); 1561 } 1562 1563 void Block::cleanBlockHeader() 1564 { 1565 next = nullptr; 1566 previous = nullptr; 1567 freeList = nullptr; 1568 allocatedCount = 0; 1569 isFull = false; 1570 tlsPtr.store(nullptr, std::memory_order_relaxed); 1571 1572 publicFreeList.store(nullptr, std::memory_order_relaxed); 1573 } 1574 1575 void Block::initEmptyBlock(TLSData *tls, size_t size) 1576 { 1577 // Having getIndex and getObjectSize called next to each other 1578 // allows better compiler optimization as they basically share the code. 1579 unsigned int index = getIndex(size); 1580 unsigned int objSz = getObjectSize(size); 1581 1582 cleanBlockHeader(); 1583 objectSize = objSz; 1584 markOwned(tls); 1585 // bump pointer should be prepared for first allocation - thus mode it down to objectSize 1586 bumpPtr = (FreeObject *)((uintptr_t)this + slabSize - objectSize); 1587 1588 // each block should have the address where the head of the list of "privatizable" blocks is kept 1589 // the only exception is a block for boot strap which is initialized when TLS is yet NULL 1590 nextPrivatizable.store( tls? (Block*)(tls->bin + index) : nullptr, std::memory_order_relaxed); 1591 TRACEF(( "[ScalableMalloc trace] Empty block %p is initialized, owner is %ld, objectSize is %d, bumpPtr is %p\n", 1592 this, tlsPtr.load(std::memory_order_relaxed) ? getThreadId() : -1, objectSize, bumpPtr )); 1593 } 1594 1595 Block *OrphanedBlocks::get(TLSData *tls, unsigned int size) 1596 { 1597 // TODO: try to use index from getAllocationBin 1598 unsigned int index = getIndex(size); 1599 Block *block = bins[index].pop(); 1600 if (block) { 1601 MALLOC_ITT_SYNC_ACQUIRED(bins+index); 1602 block->privatizeOrphaned(tls, index); 1603 } 1604 return block; 1605 } 1606 1607 void OrphanedBlocks::put(intptr_t binTag, Block *block) 1608 { 1609 unsigned int index = getIndex(block->getSize()); 1610 block->shareOrphaned(binTag, index); 1611 MALLOC_ITT_SYNC_RELEASING(bins+index); 1612 bins[index].push(block); 1613 } 1614 1615 void OrphanedBlocks::reset() 1616 { 1617 for (uint32_t i=0; i<numBlockBinLimit; i++) 1618 new (bins+i) LifoList(); 1619 } 1620 1621 bool OrphanedBlocks::cleanup(Backend* backend) 1622 { 1623 bool released = false; 1624 for (uint32_t i=0; i<numBlockBinLimit; i++) { 1625 Block* block = bins[i].grab(); 1626 MALLOC_ITT_SYNC_ACQUIRED(bins+i); 1627 while (block) { 1628 Block* next = block->next; 1629 block->privatizePublicFreeList( /*reset=*/false ); // do not set publicFreeList to NULL 1630 if (block->empty()) { 1631 block->reset(); 1632 // slab blocks in user's pools do not have valid backRefIdx 1633 if (!backend->inUserPool()) 1634 removeBackRef(*(block->getBackRefIdx())); 1635 backend->putSlabBlock(block); 1636 released = true; 1637 } else { 1638 MALLOC_ITT_SYNC_RELEASING(bins+i); 1639 bins[i].push(block); 1640 } 1641 block = next; 1642 } 1643 } 1644 return released; 1645 } 1646 1647 FreeBlockPool::ResOfGet FreeBlockPool::getBlock() 1648 { 1649 Block *b = head.exchange(NULL); 1650 1651 if (b) { 1652 size--; 1653 Block *newHead = b->next; 1654 lastAccessMiss = false; 1655 head.store(newHead, std::memory_order_release); 1656 } else { 1657 lastAccessMiss = true; 1658 } 1659 return ResOfGet(b, lastAccessMiss); 1660 } 1661 1662 void FreeBlockPool::returnBlock(Block *block) 1663 { 1664 MALLOC_ASSERT( size <= POOL_HIGH_MARK, ASSERT_TEXT ); 1665 Block *localHead = head.exchange(NULL); 1666 1667 if (!localHead) { 1668 size = 0; // head was stolen by externalClean, correct size accordingly 1669 } else if (size == POOL_HIGH_MARK) { 1670 // release cold blocks and add hot one, 1671 // so keep POOL_LOW_MARK-1 blocks and add new block to head 1672 Block *headToFree = localHead, *helper; 1673 for (int i=0; i<POOL_LOW_MARK-2; i++) 1674 headToFree = headToFree->next; 1675 Block *last = headToFree; 1676 headToFree = headToFree->next; 1677 last->next = NULL; 1678 size = POOL_LOW_MARK-1; 1679 for (Block *currBl = headToFree; currBl; currBl = helper) { 1680 helper = currBl->next; 1681 // slab blocks in user's pools do not have valid backRefIdx 1682 if (!backend->inUserPool()) 1683 removeBackRef(currBl->backRefIdx); 1684 backend->putSlabBlock(currBl); 1685 } 1686 } 1687 size++; 1688 block->next = localHead; 1689 head.store(block, std::memory_order_release); 1690 } 1691 1692 bool FreeBlockPool::externalCleanup() 1693 { 1694 Block *helper; 1695 bool released = false; 1696 1697 for (Block *currBl=head.exchange(NULL); currBl; currBl=helper) { 1698 helper = currBl->next; 1699 // slab blocks in user's pools do not have valid backRefIdx 1700 if (!backend->inUserPool()) 1701 removeBackRef(currBl->backRefIdx); 1702 backend->putSlabBlock(currBl); 1703 released = true; 1704 } 1705 return released; 1706 } 1707 1708 /* Prepare the block for returning to FreeBlockPool */ 1709 void Block::reset() 1710 { 1711 // it is caller's responsibility to ensure no data is lost before calling this 1712 MALLOC_ASSERT( allocatedCount==0, ASSERT_TEXT ); 1713 MALLOC_ASSERT( !isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1714 if (!isStartupAllocObject()) 1715 STAT_increment(getThreadId(), getIndex(objectSize), freeBlockBack); 1716 1717 cleanBlockHeader(); 1718 1719 nextPrivatizable.store(nullptr, std::memory_order_relaxed); 1720 1721 objectSize = 0; 1722 // for an empty block, bump pointer should point right after the end of the block 1723 bumpPtr = (FreeObject *)((uintptr_t)this + slabSize); 1724 } 1725 1726 inline void Bin::setActiveBlock (Block *block) 1727 { 1728 // MALLOC_ASSERT( bin, ASSERT_TEXT ); 1729 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1730 // it is the caller responsibility to keep bin consistence (i.e. ensure this block is in the bin list) 1731 activeBlk = block; 1732 } 1733 1734 inline Block* Bin::setPreviousBlockActive() 1735 { 1736 MALLOC_ASSERT( activeBlk, ASSERT_TEXT ); 1737 Block* temp = activeBlk->previous; 1738 if( temp ) { 1739 MALLOC_ASSERT( !(temp->isFull), ASSERT_TEXT ); 1740 activeBlk = temp; 1741 } 1742 return temp; 1743 } 1744 1745 inline bool Block::isOwnedByCurrentThread() const { 1746 return tlsPtr.load(std::memory_order_relaxed) && ownerTid.isCurrentThreadId(); 1747 } 1748 1749 FreeObject *Block::findObjectToFree(const void *object) const 1750 { 1751 FreeObject *objectToFree; 1752 // Due to aligned allocations, a pointer passed to scalable_free 1753 // might differ from the address of internally allocated object. 1754 // Small objects however should always be fine. 1755 if (objectSize <= maxSegregatedObjectSize) 1756 objectToFree = (FreeObject*)object; 1757 // "Fitting size" allocations are suspicious if aligned higher than naturally 1758 else { 1759 if ( ! isAligned(object,2*fittingAlignment) ) 1760 // TODO: the above check is questionable - it gives false negatives in ~50% cases, 1761 // so might even be slower in average than unconditional use of findAllocatedObject. 1762 // here it should be a "real" object 1763 objectToFree = (FreeObject*)object; 1764 else 1765 // here object can be an aligned address, so applying additional checks 1766 objectToFree = findAllocatedObject(object); 1767 MALLOC_ASSERT( isAligned(objectToFree,fittingAlignment), ASSERT_TEXT ); 1768 } 1769 MALLOC_ASSERT( isProperlyPlaced(objectToFree), ASSERT_TEXT ); 1770 1771 return objectToFree; 1772 } 1773 1774 void TLSData::release() 1775 { 1776 memPool->extMemPool.allLocalCaches.unregisterThread(this); 1777 externalCleanup(/*cleanOnlyUnused=*/false, /*cleanBins=*/false); 1778 1779 for (unsigned index = 0; index < numBlockBins; index++) { 1780 Block *activeBlk = bin[index].getActiveBlock(); 1781 if (!activeBlk) 1782 continue; 1783 Block *threadlessBlock = activeBlk->previous; 1784 bool syncOnMailbox = false; 1785 while (threadlessBlock) { 1786 Block *threadBlock = threadlessBlock->previous; 1787 if (threadlessBlock->empty()) { 1788 /* we destroy the thread, so not use its block pool */ 1789 memPool->returnEmptyBlock(threadlessBlock, /*poolTheBlock=*/false); 1790 } else { 1791 memPool->extMemPool.orphanedBlocks.put(intptr_t(bin+index), threadlessBlock); 1792 syncOnMailbox = true; 1793 } 1794 threadlessBlock = threadBlock; 1795 } 1796 threadlessBlock = activeBlk; 1797 while (threadlessBlock) { 1798 Block *threadBlock = threadlessBlock->next; 1799 if (threadlessBlock->empty()) { 1800 /* we destroy the thread, so not use its block pool */ 1801 memPool->returnEmptyBlock(threadlessBlock, /*poolTheBlock=*/false); 1802 } else { 1803 memPool->extMemPool.orphanedBlocks.put(intptr_t(bin+index), threadlessBlock); 1804 syncOnMailbox = true; 1805 } 1806 threadlessBlock = threadBlock; 1807 } 1808 bin[index].resetActiveBlock(); 1809 1810 if (syncOnMailbox) { 1811 // Although, we synchronized on nextPrivatizable inside a block, we still need to 1812 // synchronize on the bin lifetime because the thread releasing an object into the public 1813 // free list is touching the bin (mailbox and mailLock) 1814 MallocMutex::scoped_lock scoped_cs(bin[index].mailLock); 1815 } 1816 } 1817 } 1818 1819 1820 #if MALLOC_CHECK_RECURSION 1821 // TODO: Use dedicated heap for this 1822 1823 /* 1824 * It's a special kind of allocation that can be used when malloc is 1825 * not available (either during startup or when malloc was already called and 1826 * we are, say, inside pthread_setspecific's call). 1827 * Block can contain objects of different sizes, 1828 * allocations are performed by moving bump pointer and increasing of object counter, 1829 * releasing is done via counter of objects allocated in the block 1830 * or moving bump pointer if releasing object is on a bound. 1831 * TODO: make bump pointer to grow to the same backward direction as all the others. 1832 */ 1833 1834 class StartupBlock : public Block { 1835 size_t availableSize() const { 1836 return slabSize - ((uintptr_t)bumpPtr - (uintptr_t)this); 1837 } 1838 static StartupBlock *getBlock(); 1839 public: 1840 static FreeObject *allocate(size_t size); 1841 static size_t msize(void *ptr) { return *((size_t*)ptr - 1); } 1842 void free(void *ptr); 1843 }; 1844 1845 static MallocMutex startupMallocLock; 1846 static StartupBlock *firstStartupBlock; 1847 1848 StartupBlock *StartupBlock::getBlock() 1849 { 1850 BackRefIdx backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/false); 1851 if (backRefIdx.isInvalid()) return NULL; 1852 1853 StartupBlock *block = static_cast<StartupBlock*>( 1854 defaultMemPool->extMemPool.backend.getSlabBlock(1)); 1855 if (!block) return NULL; 1856 1857 block->cleanBlockHeader(); 1858 setBackRef(backRefIdx, block); 1859 block->backRefIdx = backRefIdx; 1860 // use startupAllocObjSizeMark to mark objects from startup block marker 1861 block->objectSize = startupAllocObjSizeMark; 1862 block->bumpPtr = (FreeObject *)((uintptr_t)block + sizeof(StartupBlock)); 1863 return block; 1864 } 1865 1866 FreeObject *StartupBlock::allocate(size_t size) 1867 { 1868 FreeObject *result; 1869 StartupBlock *newBlock = NULL; 1870 bool newBlockUnused = false; 1871 1872 /* Objects must be aligned on their natural bounds, 1873 and objects bigger than word on word's bound. */ 1874 size = alignUp(size, sizeof(size_t)); 1875 // We need size of an object to implement msize. 1876 size_t reqSize = size + sizeof(size_t); 1877 { 1878 MallocMutex::scoped_lock scoped_cs(startupMallocLock); 1879 // Re-check whether we need a new block (conditions might have changed) 1880 if (!firstStartupBlock || firstStartupBlock->availableSize() < reqSize) { 1881 if (!newBlock) { 1882 newBlock = StartupBlock::getBlock(); 1883 if (!newBlock) return NULL; 1884 } 1885 newBlock->next = (Block*)firstStartupBlock; 1886 if (firstStartupBlock) 1887 firstStartupBlock->previous = (Block*)newBlock; 1888 firstStartupBlock = newBlock; 1889 } 1890 result = firstStartupBlock->bumpPtr; 1891 firstStartupBlock->allocatedCount++; 1892 firstStartupBlock->bumpPtr = 1893 (FreeObject *)((uintptr_t)firstStartupBlock->bumpPtr + reqSize); 1894 } 1895 1896 // keep object size at the negative offset 1897 *((size_t*)result) = size; 1898 return (FreeObject*)((size_t*)result+1); 1899 } 1900 1901 void StartupBlock::free(void *ptr) 1902 { 1903 Block* blockToRelease = NULL; 1904 { 1905 MallocMutex::scoped_lock scoped_cs(startupMallocLock); 1906 1907 MALLOC_ASSERT(firstStartupBlock, ASSERT_TEXT); 1908 MALLOC_ASSERT(startupAllocObjSizeMark==objectSize 1909 && allocatedCount>0, ASSERT_TEXT); 1910 MALLOC_ASSERT((uintptr_t)ptr>=(uintptr_t)this+sizeof(StartupBlock) 1911 && (uintptr_t)ptr+StartupBlock::msize(ptr)<=(uintptr_t)this+slabSize, 1912 ASSERT_TEXT); 1913 if (0 == --allocatedCount) { 1914 if (this == firstStartupBlock) 1915 firstStartupBlock = (StartupBlock*)firstStartupBlock->next; 1916 if (previous) 1917 previous->next = next; 1918 if (next) 1919 next->previous = previous; 1920 blockToRelease = this; 1921 } else if ((uintptr_t)ptr + StartupBlock::msize(ptr) == (uintptr_t)bumpPtr) { 1922 // last object in the block released 1923 FreeObject *newBump = (FreeObject*)((size_t*)ptr - 1); 1924 MALLOC_ASSERT((uintptr_t)newBump>(uintptr_t)this+sizeof(StartupBlock), 1925 ASSERT_TEXT); 1926 bumpPtr = newBump; 1927 } 1928 } 1929 if (blockToRelease) { 1930 blockToRelease->previous = blockToRelease->next = NULL; 1931 defaultMemPool->returnEmptyBlock(blockToRelease, /*poolTheBlock=*/false); 1932 } 1933 } 1934 1935 #endif /* MALLOC_CHECK_RECURSION */ 1936 1937 /********* End thread related code *************/ 1938 1939 /********* Library initialization *************/ 1940 1941 //! Value indicating the state of initialization. 1942 /* 0 = initialization not started. 1943 * 1 = initialization started but not finished. 1944 * 2 = initialization finished. 1945 * In theory, we only need values 0 and 2. But value 1 is nonetheless 1946 * useful for detecting errors in the double-check pattern. 1947 */ 1948 static std::atomic<intptr_t> mallocInitialized{0}; // implicitly initialized to 0 1949 static MallocMutex initMutex; 1950 1951 /** The leading "\0" is here so that applying "strings" to the binary 1952 delivers a clean result. */ 1953 static char VersionString[] = "\0" TBBMALLOC_VERSION_STRINGS; 1954 1955 #if USE_PTHREAD && (__TBB_SOURCE_DIRECTLY_INCLUDED || __TBB_USE_DLOPEN_REENTRANCY_WORKAROUND) 1956 1957 /* Decrease race interval between dynamic library unloading and pthread key 1958 destructor. Protect only Pthreads with supported unloading. */ 1959 class ShutdownSync { 1960 /* flag is the number of threads in pthread key dtor body 1961 (i.e., between threadDtorStart() and threadDtorDone()) 1962 or the signal to skip dtor, if flag < 0 */ 1963 std::atomic<intptr_t> flag; 1964 static const intptr_t skipDtor = INTPTR_MIN/2; 1965 public: 1966 void init() { flag.store(0, std::memory_order_release); } 1967 /* Suppose that 2*abs(skipDtor) or more threads never call threadDtorStart() 1968 simultaneously, so flag never becomes negative because of that. */ 1969 bool threadDtorStart() { 1970 if (flag.load(std::memory_order_acquire) < 0) 1971 return false; 1972 if (++flag <= 0) { // note that new value returned 1973 flag.fetch_sub(1); // flag is spoiled by us, restore it 1974 return false; 1975 } 1976 return true; 1977 } 1978 void threadDtorDone() { 1979 flag.fetch_sub(1); 1980 } 1981 void processExit() { 1982 if (flag.fetch_add(skipDtor) != 0) { 1983 SpinWaitUntilEq(flag, skipDtor); 1984 } 1985 } 1986 }; 1987 1988 #else 1989 1990 class ShutdownSync { 1991 public: 1992 void init() { } 1993 bool threadDtorStart() { return true; } 1994 void threadDtorDone() { } 1995 void processExit() { } 1996 }; 1997 1998 #endif // USE_PTHREAD && (__TBB_SOURCE_DIRECTLY_INCLUDED || __TBB_USE_DLOPEN_REENTRANCY_WORKAROUND) 1999 2000 static ShutdownSync shutdownSync; 2001 2002 inline bool isMallocInitialized() { 2003 // Load must have acquire fence; otherwise thread taking "initialized" path 2004 // might perform textually later loads *before* mallocInitialized becomes 2. 2005 return 2 == mallocInitialized.load(std::memory_order_acquire); 2006 } 2007 2008 /* Caller is responsible for ensuring this routine is called exactly once. */ 2009 extern "C" void MallocInitializeITT() { 2010 #if __TBB_USE_ITT_NOTIFY 2011 if (!usedBySrcIncluded) 2012 tbb::detail::r1::__TBB_load_ittnotify(); 2013 #endif 2014 } 2015 2016 void MemoryPool::initDefaultPool() { 2017 hugePages.init(); 2018 } 2019 2020 /* 2021 * Allocator initialization routine; 2022 * it is called lazily on the very first scalable_malloc call. 2023 */ 2024 static bool initMemoryManager() 2025 { 2026 TRACEF(( "[ScalableMalloc trace] sizeof(Block) is %d (expected 128); sizeof(uintptr_t) is %d\n", 2027 sizeof(Block), sizeof(uintptr_t) )); 2028 MALLOC_ASSERT( 2*blockHeaderAlignment == sizeof(Block), ASSERT_TEXT ); 2029 MALLOC_ASSERT( sizeof(FreeObject) == sizeof(void*), ASSERT_TEXT ); 2030 MALLOC_ASSERT( isAligned(defaultMemPool, sizeof(intptr_t)), 2031 "Memory pool must be void*-aligned for atomic to work over aligned arguments."); 2032 2033 #if USE_WINTHREAD 2034 const size_t granularity = 64*1024; // granulatity of VirtualAlloc 2035 #else 2036 // POSIX.1-2001-compliant way to get page size 2037 const size_t granularity = sysconf(_SC_PAGESIZE); 2038 #endif 2039 if (!defaultMemPool) { 2040 // Do not rely on static constructors and do the assignment in case 2041 // of library static section not initialized at this call yet. 2042 defaultMemPool = (MemoryPool*)defaultMemPool_space; 2043 } 2044 bool initOk = defaultMemPool-> 2045 extMemPool.init(0, NULL, NULL, granularity, 2046 /*keepAllMemory=*/false, /*fixedPool=*/false); 2047 // TODO: extMemPool.init() to not allocate memory 2048 if (!initOk || !initBackRefMaster(&defaultMemPool->extMemPool.backend) || !ThreadId::init()) 2049 return false; 2050 MemoryPool::initDefaultPool(); 2051 // init() is required iff initMemoryManager() is called 2052 // after mallocProcessShutdownNotification() 2053 shutdownSync.init(); 2054 #if COLLECT_STATISTICS 2055 initStatisticsCollection(); 2056 #endif 2057 return true; 2058 } 2059 2060 static bool GetBoolEnvironmentVariable(const char* name) { 2061 return tbb::detail::r1::GetBoolEnvironmentVariable(name); 2062 } 2063 2064 //! Ensures that initMemoryManager() is called once and only once. 2065 /** Does not return until initMemoryManager() has been completed by a thread. 2066 There is no need to call this routine if mallocInitialized==2 . */ 2067 static bool doInitialization() 2068 { 2069 MallocMutex::scoped_lock lock( initMutex ); 2070 if (mallocInitialized.load(std::memory_order_relaxed)!=2) { 2071 MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==0, ASSERT_TEXT ); 2072 mallocInitialized.store(1, std::memory_order_relaxed); 2073 RecursiveMallocCallProtector scoped; 2074 if (!initMemoryManager()) { 2075 mallocInitialized.store(0, std::memory_order_relaxed); // restore and out 2076 return false; 2077 } 2078 #ifdef MALLOC_EXTRA_INITIALIZATION 2079 MALLOC_EXTRA_INITIALIZATION; 2080 #endif 2081 #if MALLOC_CHECK_RECURSION 2082 RecursiveMallocCallProtector::detectNaiveOverload(); 2083 #endif 2084 MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==1, ASSERT_TEXT ); 2085 // Store must have release fence, otherwise mallocInitialized==2 2086 // might become remotely visible before side effects of 2087 // initMemoryManager() become remotely visible. 2088 mallocInitialized.store(2, std::memory_order_release); 2089 if( GetBoolEnvironmentVariable("TBB_VERSION") ) { 2090 fputs(VersionString+1,stderr); 2091 hugePages.printStatus(); 2092 } 2093 } 2094 /* It can't be 0 or I would have initialized it */ 2095 MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==2, ASSERT_TEXT ); 2096 return true; 2097 } 2098 2099 /********* End library initialization *************/ 2100 2101 /********* The malloc show begins *************/ 2102 2103 2104 FreeObject *Block::allocateFromFreeList() 2105 { 2106 FreeObject *result; 2107 2108 if (!freeList) return NULL; 2109 2110 result = freeList; 2111 MALLOC_ASSERT( result, ASSERT_TEXT ); 2112 2113 freeList = result->next; 2114 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 2115 allocatedCount++; 2116 STAT_increment(getThreadId(), getIndex(objectSize), allocFreeListUsed); 2117 2118 return result; 2119 } 2120 2121 FreeObject *Block::allocateFromBumpPtr() 2122 { 2123 FreeObject *result = bumpPtr; 2124 if (result) { 2125 bumpPtr = (FreeObject *) ((uintptr_t) bumpPtr - objectSize); 2126 if ( (uintptr_t)bumpPtr < (uintptr_t)this+sizeof(Block) ) { 2127 bumpPtr = NULL; 2128 } 2129 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 2130 allocatedCount++; 2131 STAT_increment(getThreadId(), getIndex(objectSize), allocBumpPtrUsed); 2132 } 2133 return result; 2134 } 2135 2136 inline FreeObject* Block::allocate() 2137 { 2138 MALLOC_ASSERT( isOwnedByCurrentThread(), ASSERT_TEXT ); 2139 2140 /* for better cache locality, first looking in the free list. */ 2141 if ( FreeObject *result = allocateFromFreeList() ) { 2142 return result; 2143 } 2144 MALLOC_ASSERT( !freeList, ASSERT_TEXT ); 2145 2146 /* if free list is empty, try thread local bump pointer allocation. */ 2147 if ( FreeObject *result = allocateFromBumpPtr() ) { 2148 return result; 2149 } 2150 MALLOC_ASSERT( !bumpPtr, ASSERT_TEXT ); 2151 2152 /* the block is considered full. */ 2153 isFull = true; 2154 return NULL; 2155 } 2156 2157 size_t Block::findObjectSize(void *object) const 2158 { 2159 size_t blSize = getSize(); 2160 #if MALLOC_CHECK_RECURSION 2161 // Currently, there is no aligned allocations from startup blocks, 2162 // so we can return just StartupBlock::msize(). 2163 // TODO: This must be extended if we add aligned allocation from startup blocks. 2164 if (!blSize) 2165 return StartupBlock::msize(object); 2166 #endif 2167 // object can be aligned, so real size can be less than block's 2168 size_t size = 2169 blSize - ((uintptr_t)object - (uintptr_t)findObjectToFree(object)); 2170 MALLOC_ASSERT(size>0 && size<minLargeObjectSize, ASSERT_TEXT); 2171 return size; 2172 } 2173 2174 void Bin::moveBlockToFront(Block *block) 2175 { 2176 /* move the block to the front of the bin */ 2177 if (block == activeBlk) return; 2178 outofTLSBin(block); 2179 pushTLSBin(block); 2180 } 2181 2182 void Bin::processEmptyBlock(Block *block, bool poolTheBlock) 2183 { 2184 if (block != activeBlk) { 2185 /* We are not using this block; return it to the pool */ 2186 outofTLSBin(block); 2187 block->getMemPool()->returnEmptyBlock(block, poolTheBlock); 2188 } else { 2189 /* all objects are free - let's restore the bump pointer */ 2190 block->restoreBumpPtr(); 2191 } 2192 } 2193 2194 template<int LOW_MARK, int HIGH_MARK> 2195 bool LocalLOCImpl<LOW_MARK, HIGH_MARK>::put(LargeMemoryBlock *object, ExtMemoryPool *extMemPool) 2196 { 2197 const size_t size = object->unalignedSize; 2198 // not spoil cache with too large object, that can cause its total cleanup 2199 if (size > MAX_TOTAL_SIZE) 2200 return false; 2201 LargeMemoryBlock *localHead = head.exchange(NULL); 2202 2203 object->prev = NULL; 2204 object->next = localHead; 2205 if (localHead) 2206 localHead->prev = object; 2207 else { 2208 // those might not be cleaned during local cache stealing, correct them 2209 totalSize = 0; 2210 numOfBlocks = 0; 2211 tail = object; 2212 } 2213 localHead = object; 2214 totalSize += size; 2215 numOfBlocks++; 2216 // must meet both size and number of cached objects constrains 2217 if (totalSize > MAX_TOTAL_SIZE || numOfBlocks >= HIGH_MARK) { 2218 // scanning from tail until meet conditions 2219 while (totalSize > MAX_TOTAL_SIZE || numOfBlocks > LOW_MARK) { 2220 totalSize -= tail->unalignedSize; 2221 numOfBlocks--; 2222 tail = tail->prev; 2223 } 2224 LargeMemoryBlock *headToRelease = tail->next; 2225 tail->next = NULL; 2226 2227 extMemPool->freeLargeObjectList(headToRelease); 2228 } 2229 2230 head.store(localHead, std::memory_order_release); 2231 return true; 2232 } 2233 2234 template<int LOW_MARK, int HIGH_MARK> 2235 LargeMemoryBlock *LocalLOCImpl<LOW_MARK, HIGH_MARK>::get(size_t size) 2236 { 2237 LargeMemoryBlock *localHead, *res = NULL; 2238 2239 if (size > MAX_TOTAL_SIZE) 2240 return NULL; 2241 2242 // TBB_REVAMP_TODO: review this line 2243 if (!head.load(std::memory_order_acquire) || (localHead = head.exchange(NULL)) == NULL) { 2244 // do not restore totalSize, numOfBlocks and tail at this point, 2245 // as they are used only in put(), where they must be restored 2246 return NULL; 2247 } 2248 2249 for (LargeMemoryBlock *curr = localHead; curr; curr=curr->next) { 2250 if (curr->unalignedSize == size) { 2251 res = curr; 2252 if (curr->next) 2253 curr->next->prev = curr->prev; 2254 else 2255 tail = curr->prev; 2256 if (curr != localHead) 2257 curr->prev->next = curr->next; 2258 else 2259 localHead = curr->next; 2260 totalSize -= size; 2261 numOfBlocks--; 2262 break; 2263 } 2264 } 2265 2266 head.store(localHead, std::memory_order_release); 2267 return res; 2268 } 2269 2270 template<int LOW_MARK, int HIGH_MARK> 2271 bool LocalLOCImpl<LOW_MARK, HIGH_MARK>::externalCleanup(ExtMemoryPool *extMemPool) 2272 { 2273 if (LargeMemoryBlock *localHead = head.exchange(NULL)) { 2274 extMemPool->freeLargeObjectList(localHead); 2275 return true; 2276 } 2277 return false; 2278 } 2279 2280 void *MemoryPool::getFromLLOCache(TLSData* tls, size_t size, size_t alignment) 2281 { 2282 LargeMemoryBlock *lmb = NULL; 2283 2284 size_t headersSize = sizeof(LargeMemoryBlock)+sizeof(LargeObjectHdr); 2285 size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+alignment); 2286 if (allocationSize < size) // allocationSize is wrapped around after alignToBin 2287 return NULL; 2288 MALLOC_ASSERT(allocationSize >= alignment, "Overflow must be checked before."); 2289 2290 if (tls) { 2291 tls->markUsed(); 2292 lmb = tls->lloc.get(allocationSize); 2293 } 2294 if (!lmb) 2295 lmb = extMemPool.mallocLargeObject(this, allocationSize); 2296 2297 if (lmb) { 2298 // doing shuffle we suppose that alignment offset guarantees 2299 // that different cache lines are in use 2300 MALLOC_ASSERT(alignment >= estimatedCacheLineSize, ASSERT_TEXT); 2301 2302 void *alignedArea = (void*)alignUp((uintptr_t)lmb+headersSize, alignment); 2303 uintptr_t alignedRight = 2304 alignDown((uintptr_t)lmb+lmb->unalignedSize - size, alignment); 2305 // Has some room to shuffle object between cache lines? 2306 // Note that alignedRight and alignedArea are aligned at alignment. 2307 unsigned ptrDelta = alignedRight - (uintptr_t)alignedArea; 2308 if (ptrDelta && tls) { // !tls is cold path 2309 // for the hot path of alignment==estimatedCacheLineSize, 2310 // allow compilers to use shift for division 2311 // (since estimatedCacheLineSize is a power-of-2 constant) 2312 unsigned numOfPossibleOffsets = alignment == estimatedCacheLineSize? 2313 ptrDelta / estimatedCacheLineSize : 2314 ptrDelta / alignment; 2315 unsigned myCacheIdx = ++tls->currCacheIdx; 2316 unsigned offset = myCacheIdx % numOfPossibleOffsets; 2317 2318 // Move object to a cache line with an offset that is different from 2319 // previous allocation. This supposedly allows us to use cache 2320 // associativity more efficiently. 2321 alignedArea = (void*)((uintptr_t)alignedArea + offset*alignment); 2322 } 2323 MALLOC_ASSERT((uintptr_t)lmb+lmb->unalignedSize >= 2324 (uintptr_t)alignedArea+size, "Object doesn't fit the block."); 2325 LargeObjectHdr *header = (LargeObjectHdr*)alignedArea-1; 2326 header->memoryBlock = lmb; 2327 header->backRefIdx = lmb->backRefIdx; 2328 setBackRef(header->backRefIdx, header); 2329 2330 lmb->objectSize = size; 2331 2332 MALLOC_ASSERT( isLargeObject<unknownMem>(alignedArea), ASSERT_TEXT ); 2333 MALLOC_ASSERT( isAligned(alignedArea, alignment), ASSERT_TEXT ); 2334 2335 return alignedArea; 2336 } 2337 return NULL; 2338 } 2339 2340 void MemoryPool::putToLLOCache(TLSData *tls, void *object) 2341 { 2342 LargeObjectHdr *header = (LargeObjectHdr*)object - 1; 2343 // overwrite backRefIdx to simplify double free detection 2344 header->backRefIdx = BackRefIdx(); 2345 2346 if (tls) { 2347 tls->markUsed(); 2348 if (tls->lloc.put(header->memoryBlock, &extMemPool)) 2349 return; 2350 } 2351 extMemPool.freeLargeObject(header->memoryBlock); 2352 } 2353 2354 /* 2355 * All aligned allocations fall into one of the following categories: 2356 * 1. if both request size and alignment are <= maxSegregatedObjectSize, 2357 * we just align the size up, and request this amount, because for every size 2358 * aligned to some power of 2, the allocated object is at least that aligned. 2359 * 2. for size<minLargeObjectSize, check if already guaranteed fittingAlignment is enough. 2360 * 3. if size+alignment<minLargeObjectSize, we take an object of fittingSizeN and align 2361 * its address up; given such pointer, scalable_free could find the real object. 2362 * Wrapping of size+alignment is impossible because maximal allowed 2363 * alignment plus minLargeObjectSize can't lead to wrapping. 2364 * 4. otherwise, aligned large object is allocated. 2365 */ 2366 static void *allocateAligned(MemoryPool *memPool, size_t size, size_t alignment) 2367 { 2368 MALLOC_ASSERT( isPowerOfTwo(alignment), ASSERT_TEXT ); 2369 2370 if (!isMallocInitialized()) 2371 if (!doInitialization()) 2372 return NULL; 2373 2374 void *result; 2375 if (size<=maxSegregatedObjectSize && alignment<=maxSegregatedObjectSize) 2376 result = internalPoolMalloc(memPool, alignUp(size? size: sizeof(size_t), alignment)); 2377 else if (size<minLargeObjectSize) { 2378 if (alignment<=fittingAlignment) 2379 result = internalPoolMalloc(memPool, size); 2380 else if (size+alignment < minLargeObjectSize) { 2381 void *unaligned = internalPoolMalloc(memPool, size+alignment); 2382 if (!unaligned) return NULL; 2383 result = alignUp(unaligned, alignment); 2384 } else 2385 goto LargeObjAlloc; 2386 } else { 2387 LargeObjAlloc: 2388 TLSData *tls = memPool->getTLS(/*create=*/true); 2389 // take into account only alignment that are higher then natural 2390 result = 2391 memPool->getFromLLOCache(tls, size, largeObjectAlignment>alignment? 2392 largeObjectAlignment: alignment); 2393 } 2394 2395 MALLOC_ASSERT( isAligned(result, alignment), ASSERT_TEXT ); 2396 return result; 2397 } 2398 2399 static void *reallocAligned(MemoryPool *memPool, void *ptr, 2400 size_t newSize, size_t alignment = 0) 2401 { 2402 void *result; 2403 size_t copySize; 2404 2405 if (isLargeObject<ourMem>(ptr)) { 2406 LargeMemoryBlock* lmb = ((LargeObjectHdr *)ptr - 1)->memoryBlock; 2407 copySize = lmb->unalignedSize-((uintptr_t)ptr-(uintptr_t)lmb); 2408 2409 // Apply different strategies if size decreases 2410 if (newSize <= copySize && (0 == alignment || isAligned(ptr, alignment))) { 2411 2412 // For huge objects (that do not fit in backend cache), keep the same space unless 2413 // the new size is at least twice smaller 2414 bool isMemoryBlockHuge = copySize > memPool->extMemPool.backend.getMaxBinnedSize(); 2415 size_t threshold = isMemoryBlockHuge ? copySize / 2 : 0; 2416 if (newSize > threshold) { 2417 lmb->objectSize = newSize; 2418 return ptr; 2419 } 2420 // TODO: For large objects suitable for the backend cache, 2421 // split out the excessive part and put it to the backend. 2422 } 2423 // Reallocate for real 2424 copySize = lmb->objectSize; 2425 #if BACKEND_HAS_MREMAP 2426 if (void *r = memPool->extMemPool.remap(ptr, copySize, newSize, 2427 alignment < largeObjectAlignment ? largeObjectAlignment : alignment)) 2428 return r; 2429 #endif 2430 result = alignment ? allocateAligned(memPool, newSize, alignment) : 2431 internalPoolMalloc(memPool, newSize); 2432 2433 } else { 2434 Block* block = (Block *)alignDown(ptr, slabSize); 2435 copySize = block->findObjectSize(ptr); 2436 2437 // TODO: Move object to another bin if size decreases and the current bin is "empty enough". 2438 // Currently, in case of size decreasing, old pointer is returned 2439 if (newSize <= copySize && (0==alignment || isAligned(ptr, alignment))) { 2440 return ptr; 2441 } else { 2442 result = alignment ? allocateAligned(memPool, newSize, alignment) : 2443 internalPoolMalloc(memPool, newSize); 2444 } 2445 } 2446 if (result) { 2447 memcpy(result, ptr, copySize < newSize ? copySize : newSize); 2448 internalPoolFree(memPool, ptr, 0); 2449 } 2450 return result; 2451 } 2452 2453 #if MALLOC_DEBUG 2454 /* A predicate checks if an object is properly placed inside its block */ 2455 inline bool Block::isProperlyPlaced(const void *object) const 2456 { 2457 return 0 == ((uintptr_t)this + slabSize - (uintptr_t)object) % objectSize; 2458 } 2459 #endif 2460 2461 /* Finds the real object inside the block */ 2462 FreeObject *Block::findAllocatedObject(const void *address) const 2463 { 2464 // calculate offset from the end of the block space 2465 uint16_t offset = (uintptr_t)this + slabSize - (uintptr_t)address; 2466 MALLOC_ASSERT( offset<=slabSize-sizeof(Block), ASSERT_TEXT ); 2467 // find offset difference from a multiple of allocation size 2468 offset %= objectSize; 2469 // and move the address down to where the real object starts. 2470 return (FreeObject*)((uintptr_t)address - (offset? objectSize-offset: 0)); 2471 } 2472 2473 /* 2474 * Bad dereference caused by a foreign pointer is possible only here, not earlier in call chain. 2475 * Separate function isolates SEH code, as it has bad influence on compiler optimization. 2476 */ 2477 static inline BackRefIdx safer_dereference (const BackRefIdx *ptr) 2478 { 2479 BackRefIdx id; 2480 #if _MSC_VER 2481 __try { 2482 #endif 2483 id = dereference(ptr); 2484 #if _MSC_VER 2485 } __except( GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION? 2486 EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH ) { 2487 id = BackRefIdx(); 2488 } 2489 #endif 2490 return id; 2491 } 2492 2493 template<MemoryOrigin memOrigin> 2494 bool isLargeObject(void *object) 2495 { 2496 if (!isAligned(object, largeObjectAlignment)) 2497 return false; 2498 LargeObjectHdr *header = (LargeObjectHdr*)object - 1; 2499 BackRefIdx idx = (memOrigin == unknownMem) ? 2500 safer_dereference(&header->backRefIdx) : dereference(&header->backRefIdx); 2501 2502 return idx.isLargeObject() 2503 // in valid LargeObjectHdr memoryBlock is not NULL 2504 && header->memoryBlock 2505 // in valid LargeObjectHdr memoryBlock points somewhere before header 2506 // TODO: more strict check 2507 && (uintptr_t)header->memoryBlock < (uintptr_t)header 2508 && getBackRef(idx) == header; 2509 } 2510 2511 static inline bool isSmallObject (void *ptr) 2512 { 2513 Block* expectedBlock = (Block*)alignDown(ptr, slabSize); 2514 const BackRefIdx* idx = expectedBlock->getBackRefIdx(); 2515 2516 bool isSmall = expectedBlock == getBackRef(safer_dereference(idx)); 2517 if (isSmall) 2518 expectedBlock->checkFreePrecond(ptr); 2519 return isSmall; 2520 } 2521 2522 /**** Check if an object was allocated by scalable_malloc ****/ 2523 static inline bool isRecognized (void* ptr) 2524 { 2525 return defaultMemPool->extMemPool.backend.ptrCanBeValid(ptr) && 2526 (isLargeObject<unknownMem>(ptr) || isSmallObject(ptr)); 2527 } 2528 2529 static inline void freeSmallObject(void *object) 2530 { 2531 /* mask low bits to get the block */ 2532 Block *block = (Block *)alignDown(object, slabSize); 2533 block->checkFreePrecond(object); 2534 2535 #if MALLOC_CHECK_RECURSION 2536 if (block->isStartupAllocObject()) { 2537 ((StartupBlock *)block)->free(object); 2538 return; 2539 } 2540 #endif 2541 if (block->isOwnedByCurrentThread()) { 2542 block->freeOwnObject(object); 2543 } else { /* Slower path to add to the shared list, the allocatedCount is updated by the owner thread in malloc. */ 2544 FreeObject *objectToFree = block->findObjectToFree(object); 2545 block->freePublicObject(objectToFree); 2546 } 2547 } 2548 2549 static void *internalPoolMalloc(MemoryPool* memPool, size_t size) 2550 { 2551 Bin* bin; 2552 Block * mallocBlock; 2553 2554 if (!memPool) return NULL; 2555 2556 if (!size) size = sizeof(size_t); 2557 2558 TLSData *tls = memPool->getTLS(/*create=*/true); 2559 2560 /* Allocate a large object */ 2561 if (size >= minLargeObjectSize) 2562 return memPool->getFromLLOCache(tls, size, largeObjectAlignment); 2563 2564 if (!tls) return NULL; 2565 2566 tls->markUsed(); 2567 /* 2568 * Get an element in thread-local array corresponding to the given size; 2569 * It keeps ptr to the active block for allocations of this size 2570 */ 2571 bin = tls->getAllocationBin(size); 2572 if ( !bin ) return NULL; 2573 2574 /* Get a block to try to allocate in. */ 2575 for( mallocBlock = bin->getActiveBlock(); mallocBlock; 2576 mallocBlock = bin->setPreviousBlockActive() ) // the previous block should be empty enough 2577 { 2578 if( FreeObject *result = mallocBlock->allocate() ) 2579 return result; 2580 } 2581 2582 /* 2583 * else privatize publicly freed objects in some block and allocate from it 2584 */ 2585 mallocBlock = bin->getPrivatizedFreeListBlock(); 2586 if (mallocBlock) { 2587 MALLOC_ASSERT( mallocBlock->freeListNonNull(), ASSERT_TEXT ); 2588 if ( FreeObject *result = mallocBlock->allocateFromFreeList() ) 2589 return result; 2590 /* Else something strange happened, need to retry from the beginning; */ 2591 TRACEF(( "[ScalableMalloc trace] Something is wrong: no objects in public free list; reentering.\n" )); 2592 return internalPoolMalloc(memPool, size); 2593 } 2594 2595 /* 2596 * no suitable own blocks, try to get a partial block that some other thread has discarded. 2597 */ 2598 mallocBlock = memPool->extMemPool.orphanedBlocks.get(tls, size); 2599 while (mallocBlock) { 2600 bin->pushTLSBin(mallocBlock); 2601 bin->setActiveBlock(mallocBlock); // TODO: move under the below condition? 2602 if( FreeObject *result = mallocBlock->allocate() ) 2603 return result; 2604 mallocBlock = memPool->extMemPool.orphanedBlocks.get(tls, size); 2605 } 2606 2607 /* 2608 * else try to get a new empty block 2609 */ 2610 mallocBlock = memPool->getEmptyBlock(size); 2611 if (mallocBlock) { 2612 bin->pushTLSBin(mallocBlock); 2613 bin->setActiveBlock(mallocBlock); 2614 if( FreeObject *result = mallocBlock->allocate() ) 2615 return result; 2616 /* Else something strange happened, need to retry from the beginning; */ 2617 TRACEF(( "[ScalableMalloc trace] Something is wrong: no objects in empty block; reentering.\n" )); 2618 return internalPoolMalloc(memPool, size); 2619 } 2620 /* 2621 * else nothing works so return NULL 2622 */ 2623 TRACEF(( "[ScalableMalloc trace] No memory found, returning NULL.\n" )); 2624 return NULL; 2625 } 2626 2627 // When size==0 (i.e. unknown), detect here whether the object is large. 2628 // For size is known and < minLargeObjectSize, we still need to check 2629 // if the actual object is large, because large objects might be used 2630 // for aligned small allocations. 2631 static bool internalPoolFree(MemoryPool *memPool, void *object, size_t size) 2632 { 2633 if (!memPool || !object) return false; 2634 2635 // The library is initialized at allocation call, so releasing while 2636 // not initialized means foreign object is releasing. 2637 MALLOC_ASSERT(isMallocInitialized(), ASSERT_TEXT); 2638 MALLOC_ASSERT(memPool->extMemPool.userPool() || isRecognized(object), 2639 "Invalid pointer during object releasing is detected."); 2640 2641 if (size >= minLargeObjectSize || isLargeObject<ourMem>(object)) 2642 memPool->putToLLOCache(memPool->getTLS(/*create=*/false), object); 2643 else 2644 freeSmallObject(object); 2645 return true; 2646 } 2647 2648 static void *internalMalloc(size_t size) 2649 { 2650 if (!size) size = sizeof(size_t); 2651 2652 #if MALLOC_CHECK_RECURSION 2653 if (RecursiveMallocCallProtector::sameThreadActive()) 2654 return size<minLargeObjectSize? StartupBlock::allocate(size) : 2655 // nested allocation, so skip tls 2656 (FreeObject*)defaultMemPool->getFromLLOCache(NULL, size, slabSize); 2657 #endif 2658 2659 if (!isMallocInitialized()) 2660 if (!doInitialization()) 2661 return NULL; 2662 return internalPoolMalloc(defaultMemPool, size); 2663 } 2664 2665 static void internalFree(void *object) 2666 { 2667 internalPoolFree(defaultMemPool, object, 0); 2668 } 2669 2670 static size_t internalMsize(void* ptr) 2671 { 2672 MALLOC_ASSERT(ptr, "Invalid pointer passed to internalMsize"); 2673 if (isLargeObject<ourMem>(ptr)) { 2674 // TODO: return the maximum memory size, that can be written to this object 2675 LargeMemoryBlock* lmb = ((LargeObjectHdr*)ptr - 1)->memoryBlock; 2676 return lmb->objectSize; 2677 } else { 2678 Block *block = (Block*)alignDown(ptr, slabSize); 2679 return block->findObjectSize(ptr); 2680 } 2681 } 2682 2683 } // namespace internal 2684 2685 using namespace rml::internal; 2686 2687 // legacy entry point saved for compatibility with binaries complied 2688 // with pre-6003 versions of TBB 2689 rml::MemoryPool *pool_create(intptr_t pool_id, const MemPoolPolicy *policy) 2690 { 2691 rml::MemoryPool *pool; 2692 MemPoolPolicy pol(policy->pAlloc, policy->pFree, policy->granularity); 2693 2694 pool_create_v1(pool_id, &pol, &pool); 2695 return pool; 2696 } 2697 2698 rml::MemPoolError pool_create_v1(intptr_t pool_id, const MemPoolPolicy *policy, 2699 rml::MemoryPool **pool) 2700 { 2701 if ( !policy->pAlloc || policy->version<MemPoolPolicy::TBBMALLOC_POOL_VERSION 2702 // empty pFree allowed only for fixed pools 2703 || !(policy->fixedPool || policy->pFree)) { 2704 *pool = NULL; 2705 return INVALID_POLICY; 2706 } 2707 if ( policy->version>MemPoolPolicy::TBBMALLOC_POOL_VERSION // future versions are not supported 2708 // new flags can be added in place of reserved, but default 2709 // behaviour must be supported by this version 2710 || policy->reserved ) { 2711 *pool = NULL; 2712 return UNSUPPORTED_POLICY; 2713 } 2714 if (!isMallocInitialized()) 2715 if (!doInitialization()) { 2716 *pool = NULL; 2717 return NO_MEMORY; 2718 } 2719 rml::internal::MemoryPool *memPool = 2720 (rml::internal::MemoryPool*)internalMalloc((sizeof(rml::internal::MemoryPool))); 2721 if (!memPool) { 2722 *pool = NULL; 2723 return NO_MEMORY; 2724 } 2725 memset(memPool, 0, sizeof(rml::internal::MemoryPool)); 2726 if (!memPool->init(pool_id, policy)) { 2727 internalFree(memPool); 2728 *pool = NULL; 2729 return NO_MEMORY; 2730 } 2731 2732 *pool = (rml::MemoryPool*)memPool; 2733 return POOL_OK; 2734 } 2735 2736 bool pool_destroy(rml::MemoryPool* memPool) 2737 { 2738 if (!memPool) return false; 2739 bool ret = ((rml::internal::MemoryPool*)memPool)->destroy(); 2740 internalFree(memPool); 2741 2742 return ret; 2743 } 2744 2745 bool pool_reset(rml::MemoryPool* memPool) 2746 { 2747 if (!memPool) return false; 2748 2749 return ((rml::internal::MemoryPool*)memPool)->reset(); 2750 } 2751 2752 void *pool_malloc(rml::MemoryPool* mPool, size_t size) 2753 { 2754 return internalPoolMalloc((rml::internal::MemoryPool*)mPool, size); 2755 } 2756 2757 void *pool_realloc(rml::MemoryPool* mPool, void *object, size_t size) 2758 { 2759 if (!object) 2760 return internalPoolMalloc((rml::internal::MemoryPool*)mPool, size); 2761 if (!size) { 2762 internalPoolFree((rml::internal::MemoryPool*)mPool, object, 0); 2763 return NULL; 2764 } 2765 return reallocAligned((rml::internal::MemoryPool*)mPool, object, size, 0); 2766 } 2767 2768 void *pool_aligned_malloc(rml::MemoryPool* mPool, size_t size, size_t alignment) 2769 { 2770 if (!isPowerOfTwo(alignment) || 0==size) 2771 return NULL; 2772 2773 return allocateAligned((rml::internal::MemoryPool*)mPool, size, alignment); 2774 } 2775 2776 void *pool_aligned_realloc(rml::MemoryPool* memPool, void *ptr, size_t size, size_t alignment) 2777 { 2778 if (!isPowerOfTwo(alignment)) 2779 return NULL; 2780 rml::internal::MemoryPool *mPool = (rml::internal::MemoryPool*)memPool; 2781 void *tmp; 2782 2783 if (!ptr) 2784 tmp = allocateAligned(mPool, size, alignment); 2785 else if (!size) { 2786 internalPoolFree(mPool, ptr, 0); 2787 return NULL; 2788 } else 2789 tmp = reallocAligned(mPool, ptr, size, alignment); 2790 2791 return tmp; 2792 } 2793 2794 bool pool_free(rml::MemoryPool *mPool, void *object) 2795 { 2796 return internalPoolFree((rml::internal::MemoryPool*)mPool, object, 0); 2797 } 2798 2799 rml::MemoryPool *pool_identify(void *object) 2800 { 2801 rml::internal::MemoryPool *pool; 2802 if (isLargeObject<ourMem>(object)) { 2803 LargeObjectHdr *header = (LargeObjectHdr*)object - 1; 2804 pool = header->memoryBlock->pool; 2805 } else { 2806 Block *block = (Block*)alignDown(object, slabSize); 2807 pool = block->getMemPool(); 2808 } 2809 // do not return defaultMemPool, as it can't be used in pool_free() etc 2810 __TBB_ASSERT_RELEASE(pool!=defaultMemPool, 2811 "rml::pool_identify() can't be used for scalable_malloc() etc results."); 2812 return (rml::MemoryPool*)pool; 2813 } 2814 2815 size_t pool_msize(rml::MemoryPool *mPool, void* object) 2816 { 2817 if (object) { 2818 // No assert for object recognition, cause objects allocated from non-default 2819 // memory pool do not participate in range checking and do not have valid backreferences for 2820 // small objects. Instead, check that an object belong to the certain memory pool. 2821 MALLOC_ASSERT_EX(mPool == pool_identify(object), "Object does not belong to the specified pool"); 2822 return internalMsize(object); 2823 } 2824 errno = EINVAL; 2825 // Unlike _msize, return 0 in case of parameter error. 2826 // Returning size_t(-1) looks more like the way to troubles. 2827 return 0; 2828 } 2829 2830 } // namespace rml 2831 2832 using namespace rml::internal; 2833 2834 #if MALLOC_TRACE 2835 static unsigned int threadGoingDownCount = 0; 2836 #endif 2837 2838 /* 2839 * When a thread is shutting down this routine should be called to remove all the thread ids 2840 * from the malloc blocks and replace them with a NULL thread id. 2841 * 2842 * For pthreads, the function is set as a callback in pthread_key_create for TLS bin. 2843 * It will be automatically called at thread exit with the key value as the argument, 2844 * unless that value is NULL. 2845 * For Windows, it is called from DllMain( DLL_THREAD_DETACH ). 2846 * 2847 * However neither of the above is called for the main process thread, so the routine 2848 * also needs to be called during the process shutdown. 2849 * 2850 */ 2851 // TODO: Consider making this function part of class MemoryPool. 2852 void doThreadShutdownNotification(TLSData* tls, bool main_thread) 2853 { 2854 TRACEF(( "[ScalableMalloc trace] Thread id %d blocks return start %d\n", 2855 getThreadId(), threadGoingDownCount++ )); 2856 2857 #if USE_PTHREAD 2858 if (tls) { 2859 if (!shutdownSync.threadDtorStart()) return; 2860 tls->getMemPool()->onThreadShutdown(tls); 2861 shutdownSync.threadDtorDone(); 2862 } else 2863 #endif 2864 { 2865 suppress_unused_warning(tls); // not used on Windows 2866 // The default pool is safe to use at this point: 2867 // on Linux, only the main thread can go here before destroying defaultMemPool; 2868 // on Windows, shutdown is synchronized via loader lock and isMallocInitialized(). 2869 // See also __TBB_mallocProcessShutdownNotification() 2870 defaultMemPool->onThreadShutdown(defaultMemPool->getTLS(/*create=*/false)); 2871 // Take lock to walk through other pools; but waiting might be dangerous at this point 2872 // (e.g. on Windows the main thread might deadlock) 2873 bool locked; 2874 MallocMutex::scoped_lock lock(MemoryPool::memPoolListLock, /*wait=*/!main_thread, &locked); 2875 if (locked) { // the list is safe to process 2876 for (MemoryPool *memPool = defaultMemPool->next; memPool; memPool = memPool->next) 2877 memPool->onThreadShutdown(memPool->getTLS(/*create=*/false)); 2878 } 2879 } 2880 2881 TRACEF(( "[ScalableMalloc trace] Thread id %d blocks return end\n", getThreadId() )); 2882 } 2883 2884 #if USE_PTHREAD 2885 void mallocThreadShutdownNotification(void* arg) 2886 { 2887 // The routine is called for each pool (as TLS dtor) on each thread, except for the main thread 2888 if (!isMallocInitialized()) return; 2889 doThreadShutdownNotification((TLSData*)arg, false); 2890 } 2891 #else 2892 extern "C" void __TBB_mallocThreadShutdownNotification() 2893 { 2894 // The routine is called once per thread on Windows 2895 if (!isMallocInitialized()) return; 2896 doThreadShutdownNotification(NULL, false); 2897 } 2898 #endif 2899 2900 extern "C" void __TBB_mallocProcessShutdownNotification(bool windows_process_dying) 2901 { 2902 if (!isMallocInitialized()) return; 2903 2904 // Don't clean allocator internals if the entire process is exiting 2905 if (!windows_process_dying) { 2906 doThreadShutdownNotification(NULL, /*main_thread=*/true); 2907 } 2908 #if __TBB_MALLOC_LOCACHE_STAT 2909 printf("cache hit ratio %f, size hit %f\n", 2910 1.*cacheHits/mallocCalls, 1.*memHitKB/memAllocKB); 2911 defaultMemPool->extMemPool.loc.reportStat(stdout); 2912 #endif 2913 2914 shutdownSync.processExit(); 2915 #if __TBB_SOURCE_DIRECTLY_INCLUDED 2916 /* Pthread keys must be deleted as soon as possible to not call key dtor 2917 on thread termination when then the tbbmalloc code can be already unloaded. 2918 */ 2919 defaultMemPool->destroy(); 2920 destroyBackRefMaster(&defaultMemPool->extMemPool.backend); 2921 ThreadId::destroy(); // Delete key for thread id 2922 hugePages.reset(); 2923 // new total malloc initialization is possible after this point 2924 mallocInitialized.store(0, std::memory_order_release); 2925 #elif __TBB_USE_DLOPEN_REENTRANCY_WORKAROUND 2926 /* In most cases we prevent unloading tbbmalloc, and don't clean up memory 2927 on process shutdown. When impossible to prevent, library unload results 2928 in shutdown notification, and it makes sense to release unused memory 2929 at that point (we can't release all memory because it's possible that 2930 it will be accessed after this point). 2931 TODO: better support systems where we can't prevent unloading by removing 2932 pthread destructors and releasing caches. 2933 */ 2934 defaultMemPool->extMemPool.hardCachesCleanup(); 2935 #endif // __TBB_SOURCE_DIRECTLY_INCLUDED 2936 2937 #if COLLECT_STATISTICS 2938 unsigned nThreads = ThreadId::getMaxThreadId(); 2939 for( int i=1; i<=nThreads && i<MAX_THREADS; ++i ) 2940 STAT_print(i); 2941 #endif 2942 if (!usedBySrcIncluded) 2943 MALLOC_ITT_FINI_ITTLIB(); 2944 } 2945 2946 extern "C" void * scalable_malloc(size_t size) 2947 { 2948 void *ptr = internalMalloc(size); 2949 if (!ptr) errno = ENOMEM; 2950 return ptr; 2951 } 2952 2953 extern "C" void scalable_free(void *object) 2954 { 2955 internalFree(object); 2956 } 2957 2958 #if MALLOC_ZONE_OVERLOAD_ENABLED 2959 extern "C" void __TBB_malloc_free_definite_size(void *object, size_t size) 2960 { 2961 internalPoolFree(defaultMemPool, object, size); 2962 } 2963 #endif 2964 2965 /* 2966 * A variant that provides additional memory safety, by checking whether the given address 2967 * was obtained with this allocator, and if not redirecting to the provided alternative call. 2968 */ 2969 extern "C" void __TBB_malloc_safer_free(void *object, void (*original_free)(void*)) 2970 { 2971 if (!object) 2972 return; 2973 2974 // tbbmalloc can allocate object only when tbbmalloc has been initialized 2975 if (mallocInitialized.load(std::memory_order_acquire) && defaultMemPool->extMemPool.backend.ptrCanBeValid(object)) { 2976 if (isLargeObject<unknownMem>(object)) { 2977 // must check 1st for large object, because small object check touches 4 pages on left, 2978 // and it can be inaccessible 2979 TLSData *tls = defaultMemPool->getTLS(/*create=*/false); 2980 2981 defaultMemPool->putToLLOCache(tls, object); 2982 return; 2983 } else if (isSmallObject(object)) { 2984 freeSmallObject(object); 2985 return; 2986 } 2987 } 2988 if (original_free) 2989 original_free(object); 2990 } 2991 2992 /********* End the free code *************/ 2993 2994 /********* Code for scalable_realloc ***********/ 2995 2996 /* 2997 * From K&R 2998 * "realloc changes the size of the object pointed to by p to size. The contents will 2999 * be unchanged up to the minimum of the old and the new sizes. If the new size is larger, 3000 * the new space is uninitialized. realloc returns a pointer to the new space, or 3001 * NULL if the request cannot be satisfied, in which case *p is unchanged." 3002 * 3003 */ 3004 extern "C" void* scalable_realloc(void* ptr, size_t size) 3005 { 3006 void *tmp; 3007 3008 if (!ptr) 3009 tmp = internalMalloc(size); 3010 else if (!size) { 3011 internalFree(ptr); 3012 return NULL; 3013 } else 3014 tmp = reallocAligned(defaultMemPool, ptr, size, 0); 3015 3016 if (!tmp) errno = ENOMEM; 3017 return tmp; 3018 } 3019 3020 /* 3021 * A variant that provides additional memory safety, by checking whether the given address 3022 * was obtained with this allocator, and if not redirecting to the provided alternative call. 3023 */ 3024 extern "C" void* __TBB_malloc_safer_realloc(void* ptr, size_t sz, void* original_realloc) 3025 { 3026 void *tmp; // TODO: fix warnings about uninitialized use of tmp 3027 3028 if (!ptr) { 3029 tmp = internalMalloc(sz); 3030 } else if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(ptr)) { 3031 if (!sz) { 3032 internalFree(ptr); 3033 return NULL; 3034 } else { 3035 tmp = reallocAligned(defaultMemPool, ptr, sz, 0); 3036 } 3037 } 3038 #if USE_WINTHREAD 3039 else if (original_realloc && sz) { 3040 orig_ptrs *original_ptrs = static_cast<orig_ptrs*>(original_realloc); 3041 if ( original_ptrs->msize ){ 3042 size_t oldSize = original_ptrs->msize(ptr); 3043 tmp = internalMalloc(sz); 3044 if (tmp) { 3045 memcpy(tmp, ptr, sz<oldSize? sz : oldSize); 3046 if ( original_ptrs->free ){ 3047 original_ptrs->free( ptr ); 3048 } 3049 } 3050 } else 3051 tmp = NULL; 3052 } 3053 #else 3054 else if (original_realloc) { 3055 typedef void* (*realloc_ptr_t)(void*,size_t); 3056 realloc_ptr_t original_realloc_ptr; 3057 (void *&)original_realloc_ptr = original_realloc; 3058 tmp = original_realloc_ptr(ptr,sz); 3059 } 3060 #endif 3061 else tmp = NULL; 3062 3063 if (!tmp) errno = ENOMEM; 3064 return tmp; 3065 } 3066 3067 /********* End code for scalable_realloc ***********/ 3068 3069 /********* Code for scalable_calloc ***********/ 3070 3071 /* 3072 * From K&R 3073 * calloc returns a pointer to space for an array of nobj objects, 3074 * each of size size, or NULL if the request cannot be satisfied. 3075 * The space is initialized to zero bytes. 3076 * 3077 */ 3078 3079 extern "C" void * scalable_calloc(size_t nobj, size_t size) 3080 { 3081 // it's square root of maximal size_t value 3082 const size_t mult_not_overflow = size_t(1) << (sizeof(size_t)*CHAR_BIT/2); 3083 const size_t arraySize = nobj * size; 3084 3085 // check for overflow during multiplication: 3086 if (nobj>=mult_not_overflow || size>=mult_not_overflow) // 1) heuristic check 3087 if (nobj && arraySize / nobj != size) { // 2) exact check 3088 errno = ENOMEM; 3089 return NULL; 3090 } 3091 void* result = internalMalloc(arraySize); 3092 if (result) 3093 memset(result, 0, arraySize); 3094 else 3095 errno = ENOMEM; 3096 return result; 3097 } 3098 3099 /********* End code for scalable_calloc ***********/ 3100 3101 /********* Code for aligned allocation API **********/ 3102 3103 extern "C" int scalable_posix_memalign(void **memptr, size_t alignment, size_t size) 3104 { 3105 if ( !isPowerOfTwoAtLeast(alignment, sizeof(void*)) ) 3106 return EINVAL; 3107 void *result = allocateAligned(defaultMemPool, size, alignment); 3108 if (!result) 3109 return ENOMEM; 3110 *memptr = result; 3111 return 0; 3112 } 3113 3114 extern "C" void * scalable_aligned_malloc(size_t size, size_t alignment) 3115 { 3116 if (!isPowerOfTwo(alignment) || 0==size) { 3117 errno = EINVAL; 3118 return NULL; 3119 } 3120 void *tmp = allocateAligned(defaultMemPool, size, alignment); 3121 if (!tmp) errno = ENOMEM; 3122 return tmp; 3123 } 3124 3125 extern "C" void * scalable_aligned_realloc(void *ptr, size_t size, size_t alignment) 3126 { 3127 if (!isPowerOfTwo(alignment)) { 3128 errno = EINVAL; 3129 return NULL; 3130 } 3131 void *tmp; 3132 3133 if (!ptr) 3134 tmp = allocateAligned(defaultMemPool, size, alignment); 3135 else if (!size) { 3136 scalable_free(ptr); 3137 return NULL; 3138 } else 3139 tmp = reallocAligned(defaultMemPool, ptr, size, alignment); 3140 3141 if (!tmp) errno = ENOMEM; 3142 return tmp; 3143 } 3144 3145 extern "C" void * __TBB_malloc_safer_aligned_realloc(void *ptr, size_t size, size_t alignment, void* orig_function) 3146 { 3147 /* corner cases left out of reallocAligned to not deal with errno there */ 3148 if (!isPowerOfTwo(alignment)) { 3149 errno = EINVAL; 3150 return NULL; 3151 } 3152 void *tmp = NULL; 3153 3154 if (!ptr) { 3155 tmp = allocateAligned(defaultMemPool, size, alignment); 3156 } else if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(ptr)) { 3157 if (!size) { 3158 internalFree(ptr); 3159 return NULL; 3160 } else { 3161 tmp = reallocAligned(defaultMemPool, ptr, size, alignment); 3162 } 3163 } 3164 #if USE_WINTHREAD 3165 else { 3166 orig_aligned_ptrs *original_ptrs = static_cast<orig_aligned_ptrs*>(orig_function); 3167 if (size) { 3168 // Without orig_msize, we can't do anything with this. 3169 // Just keeping old pointer. 3170 if ( original_ptrs->aligned_msize ){ 3171 // set alignment and offset to have possibly correct oldSize 3172 size_t oldSize = original_ptrs->aligned_msize(ptr, sizeof(void*), 0); 3173 tmp = allocateAligned(defaultMemPool, size, alignment); 3174 if (tmp) { 3175 memcpy(tmp, ptr, size<oldSize? size : oldSize); 3176 if ( original_ptrs->aligned_free ){ 3177 original_ptrs->aligned_free( ptr ); 3178 } 3179 } 3180 } 3181 } else { 3182 if ( original_ptrs->aligned_free ){ 3183 original_ptrs->aligned_free( ptr ); 3184 } 3185 return NULL; 3186 } 3187 } 3188 #else 3189 // As original_realloc can't align result, and there is no way to find 3190 // size of reallocating object, we are giving up. 3191 suppress_unused_warning(orig_function); 3192 #endif 3193 if (!tmp) errno = ENOMEM; 3194 return tmp; 3195 } 3196 3197 extern "C" void scalable_aligned_free(void *ptr) 3198 { 3199 internalFree(ptr); 3200 } 3201 3202 /********* end code for aligned allocation API **********/ 3203 3204 /********* Code for scalable_msize ***********/ 3205 3206 /* 3207 * Returns the size of a memory block allocated in the heap. 3208 */ 3209 extern "C" size_t scalable_msize(void* ptr) 3210 { 3211 if (ptr) { 3212 MALLOC_ASSERT(isRecognized(ptr), "Invalid pointer in scalable_msize detected."); 3213 return internalMsize(ptr); 3214 } 3215 errno = EINVAL; 3216 // Unlike _msize, return 0 in case of parameter error. 3217 // Returning size_t(-1) looks more like the way to troubles. 3218 return 0; 3219 } 3220 3221 /* 3222 * A variant that provides additional memory safety, by checking whether the given address 3223 * was obtained with this allocator, and if not redirecting to the provided alternative call. 3224 */ 3225 extern "C" size_t __TBB_malloc_safer_msize(void *object, size_t (*original_msize)(void*)) 3226 { 3227 if (object) { 3228 // Check if the memory was allocated by scalable_malloc 3229 if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(object)) 3230 return internalMsize(object); 3231 else if (original_msize) 3232 return original_msize(object); 3233 } 3234 // object is NULL or unknown, or foreign and no original_msize 3235 #if USE_WINTHREAD 3236 errno = EINVAL; // errno expected to be set only on this platform 3237 #endif 3238 return 0; 3239 } 3240 3241 /* 3242 * The same as above but for _aligned_msize case 3243 */ 3244 extern "C" size_t __TBB_malloc_safer_aligned_msize(void *object, size_t alignment, size_t offset, size_t (*orig_aligned_msize)(void*,size_t,size_t)) 3245 { 3246 if (object) { 3247 // Check if the memory was allocated by scalable_malloc 3248 if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(object)) 3249 return internalMsize(object); 3250 else if (orig_aligned_msize) 3251 return orig_aligned_msize(object,alignment,offset); 3252 } 3253 // object is NULL or unknown 3254 errno = EINVAL; 3255 return 0; 3256 } 3257 3258 /********* End code for scalable_msize ***********/ 3259 3260 extern "C" int scalable_allocation_mode(int param, intptr_t value) 3261 { 3262 if (param == TBBMALLOC_SET_SOFT_HEAP_LIMIT) { 3263 defaultMemPool->extMemPool.backend.setRecommendedMaxSize((size_t)value); 3264 return TBBMALLOC_OK; 3265 } else if (param == USE_HUGE_PAGES) { 3266 #if __linux__ 3267 switch (value) { 3268 case 0: 3269 case 1: 3270 hugePages.setMode(value); 3271 return TBBMALLOC_OK; 3272 default: 3273 return TBBMALLOC_INVALID_PARAM; 3274 } 3275 #else 3276 return TBBMALLOC_NO_EFFECT; 3277 #endif 3278 #if __TBB_SOURCE_DIRECTLY_INCLUDED 3279 } else if (param == TBBMALLOC_INTERNAL_SOURCE_INCLUDED) { 3280 switch (value) { 3281 case 0: // used by dynamic library 3282 case 1: // used by static library or directly included sources 3283 usedBySrcIncluded = value; 3284 return TBBMALLOC_OK; 3285 default: 3286 return TBBMALLOC_INVALID_PARAM; 3287 } 3288 #endif 3289 } else if (param == TBBMALLOC_SET_HUGE_SIZE_THRESHOLD) { 3290 defaultMemPool->extMemPool.loc.setHugeSizeThreshold((size_t)value); 3291 return TBBMALLOC_OK; 3292 } 3293 return TBBMALLOC_INVALID_PARAM; 3294 } 3295 3296 extern "C" int scalable_allocation_command(int cmd, void *param) 3297 { 3298 if (param) 3299 return TBBMALLOC_INVALID_PARAM; 3300 3301 bool released = false; 3302 switch(cmd) { 3303 case TBBMALLOC_CLEAN_THREAD_BUFFERS: 3304 if (TLSData *tls = defaultMemPool->getTLS(/*create=*/false)) 3305 released = tls->externalCleanup(/*cleanOnlyUnused*/false, /*cleanBins=*/true); 3306 break; 3307 case TBBMALLOC_CLEAN_ALL_BUFFERS: 3308 released = defaultMemPool->extMemPool.hardCachesCleanup(); 3309 break; 3310 default: 3311 return TBBMALLOC_INVALID_PARAM; 3312 } 3313 return released ? TBBMALLOC_OK : TBBMALLOC_NO_EFFECT; 3314 } 3315