1 /* 2 Copyright (c) 2005-2020 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #include "tbbmalloc_internal.h" 18 #include <errno.h> 19 #include <new> /* for placement new */ 20 #include <string.h> /* for memset */ 21 22 #include "oneapi/tbb/version.h" 23 #include "../tbb/environment.h" 24 #include "../tbb/itt_notify.h" // for __TBB_load_ittnotify() 25 26 #if USE_PTHREAD 27 #define TlsSetValue_func pthread_setspecific 28 #define TlsGetValue_func pthread_getspecific 29 #define GetMyTID() pthread_self() 30 #include <sched.h> 31 inline void do_yield() {sched_yield();} 32 extern "C" { static void mallocThreadShutdownNotification(void*); } 33 #if __sun || __SUNPRO_CC 34 #define __asm__ asm 35 #endif 36 #include <unistd.h> // sysconf(_SC_PAGESIZE) 37 #elif USE_WINTHREAD 38 #define GetMyTID() GetCurrentThreadId() 39 #if __TBB_WIN8UI_SUPPORT 40 #include<thread> 41 #define TlsSetValue_func FlsSetValue 42 #define TlsGetValue_func FlsGetValue 43 #define TlsAlloc() FlsAlloc(NULL) 44 #define TLS_ALLOC_FAILURE FLS_OUT_OF_INDEXES 45 #define TlsFree FlsFree 46 inline void do_yield() {std::this_thread::yield();} 47 #else 48 #define TlsSetValue_func TlsSetValue 49 #define TlsGetValue_func TlsGetValue 50 #define TLS_ALLOC_FAILURE TLS_OUT_OF_INDEXES 51 inline void do_yield() {SwitchToThread();} 52 #endif 53 #else 54 #error Must define USE_PTHREAD or USE_WINTHREAD 55 #endif 56 57 #define FREELIST_NONBLOCKING 1 58 59 namespace rml { 60 class MemoryPool; 61 namespace internal { 62 63 class Block; 64 class MemoryPool; 65 66 #if MALLOC_CHECK_RECURSION 67 68 inline bool isMallocInitialized(); 69 70 bool RecursiveMallocCallProtector::noRecursion() { 71 MALLOC_ASSERT(isMallocInitialized(), 72 "Recursion status can be checked only when initialization was done."); 73 return !mallocRecursionDetected; 74 } 75 76 #endif // MALLOC_CHECK_RECURSION 77 78 /** Support for handling the special UNUSABLE pointer state **/ 79 const intptr_t UNUSABLE = 0x1; 80 inline bool isSolidPtr( void* ptr ) { 81 return (UNUSABLE|(intptr_t)ptr)!=UNUSABLE; 82 } 83 inline bool isNotForUse( void* ptr ) { 84 return (intptr_t)ptr==UNUSABLE; 85 } 86 87 /* 88 * Block::objectSize value used to mark blocks allocated by startupAlloc 89 */ 90 const uint16_t startupAllocObjSizeMark = ~(uint16_t)0; 91 92 /* 93 * The following constant is used to define the size of struct Block, the block header. 94 * The intent is to have the size of a Block multiple of the cache line size, this allows us to 95 * get good alignment at the cost of some overhead equal to the amount of padding included in the Block. 96 */ 97 const int blockHeaderAlignment = estimatedCacheLineSize; 98 99 /********* The data structures and global objects **************/ 100 101 /* 102 * The malloc routines themselves need to be able to occasionally malloc some space, 103 * in order to set up the structures used by the thread local structures. This 104 * routine performs that functions. 105 */ 106 class BootStrapBlocks { 107 MallocMutex bootStrapLock; 108 Block *bootStrapBlock; 109 Block *bootStrapBlockUsed; 110 FreeObject *bootStrapObjectList; 111 public: 112 void *allocate(MemoryPool *memPool, size_t size); 113 void free(void* ptr); 114 void reset(); 115 }; 116 117 #if USE_INTERNAL_TID 118 class ThreadId { 119 static tls_key_t Tid_key; 120 std::atomic<intptr_t> ThreadCount; 121 122 unsigned int id; 123 124 static unsigned int tlsNumber() { 125 unsigned int result = reinterpret_cast<intptr_t>(TlsGetValue_func(Tid_key)); 126 if( !result ) { 127 RecursiveMallocCallProtector scoped; 128 // Thread-local value is zero -> first call from this thread, 129 // need to initialize with next ID value (IDs start from 1) 130 result = ++ThreadCount; // returned new value! 131 TlsSetValue_func( Tid_key, reinterpret_cast<void*>(result) ); 132 } 133 return result; 134 } 135 public: 136 static bool init() { 137 #if USE_WINTHREAD 138 Tid_key = TlsAlloc(); 139 if (Tid_key == TLS_ALLOC_FAILURE) 140 return false; 141 #else 142 int status = pthread_key_create( &Tid_key, NULL ); 143 if ( status ) { 144 fprintf (stderr, "The memory manager cannot create tls key during initialization\n"); 145 return false; 146 } 147 #endif /* USE_WINTHREAD */ 148 return true; 149 } 150 static void destroy() { 151 if( Tid_key ) { 152 #if USE_WINTHREAD 153 BOOL status = !(TlsFree( Tid_key )); // fail is zero 154 #else 155 int status = pthread_key_delete( Tid_key ); 156 #endif /* USE_WINTHREAD */ 157 if ( status ) 158 fprintf (stderr, "The memory manager cannot delete tls key\n"); 159 Tid_key = 0; 160 } 161 } 162 163 ThreadId() : id(ThreadId::tlsNumber()) {} 164 bool isCurrentThreadId() const { return id == ThreadId::tlsNumber(); } 165 166 #if COLLECT_STATISTICS || MALLOC_TRACE 167 friend unsigned int getThreadId() { return ThreadId::tlsNumber(); } 168 #endif 169 #if COLLECT_STATISTICS 170 static unsigned getMaxThreadId() { return ThreadCount.load(std::memory_order_relaxed); } 171 172 friend int STAT_increment(ThreadId tid, int bin, int ctr); 173 #endif 174 }; 175 176 tls_key_t ThreadId::Tid_key; 177 intptr_t ThreadId::ThreadCount; 178 179 #if COLLECT_STATISTICS 180 int STAT_increment(ThreadId tid, int bin, int ctr) 181 { 182 return ::STAT_increment(tid.id, bin, ctr); 183 } 184 #endif 185 186 #else // USE_INTERNAL_TID 187 188 class ThreadId { 189 #if USE_PTHREAD 190 pthread_t tid; 191 #else 192 DWORD tid; 193 #endif 194 public: 195 ThreadId() : tid(GetMyTID()) {} 196 #if USE_PTHREAD 197 bool isCurrentThreadId() const { return pthread_equal(pthread_self(), tid); } 198 #else 199 bool isCurrentThreadId() const { return GetCurrentThreadId() == tid; } 200 #endif 201 static bool init() { return true; } 202 static void destroy() {} 203 }; 204 205 #endif // USE_INTERNAL_TID 206 207 /*********** Code to provide thread ID and a thread-local void pointer **********/ 208 209 bool TLSKey::init() 210 { 211 #if USE_WINTHREAD 212 TLS_pointer_key = TlsAlloc(); 213 if (TLS_pointer_key == TLS_ALLOC_FAILURE) 214 return false; 215 #else 216 int status = pthread_key_create( &TLS_pointer_key, mallocThreadShutdownNotification ); 217 if ( status ) 218 return false; 219 #endif /* USE_WINTHREAD */ 220 return true; 221 } 222 223 bool TLSKey::destroy() 224 { 225 #if USE_WINTHREAD 226 BOOL status1 = !(TlsFree(TLS_pointer_key)); // fail is zero 227 #else 228 int status1 = pthread_key_delete(TLS_pointer_key); 229 #endif /* USE_WINTHREAD */ 230 MALLOC_ASSERT(!status1, "The memory manager cannot delete tls key."); 231 return status1==0; 232 } 233 234 inline TLSData* TLSKey::getThreadMallocTLS() const 235 { 236 return (TLSData *)TlsGetValue_func( TLS_pointer_key ); 237 } 238 239 inline void TLSKey::setThreadMallocTLS( TLSData * newvalue ) { 240 RecursiveMallocCallProtector scoped; 241 TlsSetValue_func( TLS_pointer_key, newvalue ); 242 } 243 244 /* The 'next' field in the block header has to maintain some invariants: 245 * it needs to be on a 16K boundary and the first field in the block. 246 * Any value stored there needs to have the lower 14 bits set to 0 247 * so that various assert work. This means that if you want to smash this memory 248 * for debugging purposes you will need to obey this invariant. 249 * The total size of the header needs to be a power of 2 to simplify 250 * the alignment requirements. For now it is a 128 byte structure. 251 * To avoid false sharing, the fields changed only locally are separated 252 * from the fields changed by foreign threads. 253 * Changing the size of the block header would require to change 254 * some bin allocation sizes, in particular "fitting" sizes (see above). 255 */ 256 class Bin; 257 class StartupBlock; 258 259 class MemoryPool { 260 // if no explicit grainsize, expect to see malloc in user's pAlloc 261 // and set reasonable low granularity 262 static const size_t defaultGranularity = estimatedCacheLineSize; 263 264 MemoryPool(); // deny 265 public: 266 static MallocMutex memPoolListLock; 267 268 // list of all active pools is used to release 269 // all TLS data on thread termination or library unload 270 MemoryPool *next, 271 *prev; 272 ExtMemoryPool extMemPool; 273 BootStrapBlocks bootStrapBlocks; 274 275 static void initDefaultPool(); 276 277 bool init(intptr_t poolId, const MemPoolPolicy* memPoolPolicy); 278 bool reset(); 279 bool destroy(); 280 void onThreadShutdown(TLSData *tlsData); 281 282 inline TLSData *getTLS(bool create); 283 void clearTLS() { extMemPool.tlsPointerKey.setThreadMallocTLS(NULL); } 284 285 Block *getEmptyBlock(size_t size); 286 void returnEmptyBlock(Block *block, bool poolTheBlock); 287 288 // get/put large object to/from local large object cache 289 void *getFromLLOCache(TLSData *tls, size_t size, size_t alignment); 290 void putToLLOCache(TLSData *tls, void *object); 291 }; 292 293 static intptr_t defaultMemPool_space[sizeof(MemoryPool)/sizeof(intptr_t) + 294 (sizeof(MemoryPool)%sizeof(intptr_t)? 1 : 0)]; 295 static MemoryPool *defaultMemPool = (MemoryPool*)defaultMemPool_space; 296 const size_t MemoryPool::defaultGranularity; 297 // zero-initialized 298 MallocMutex MemoryPool::memPoolListLock; 299 // TODO: move huge page status to default pool, because that's its states 300 HugePagesStatus hugePages; 301 static bool usedBySrcIncluded = false; 302 303 // Padding helpers 304 template<size_t padd> 305 struct PaddingImpl { 306 size_t __padding[padd]; 307 }; 308 309 template<> 310 struct PaddingImpl<0> {}; 311 312 template<int N> 313 struct Padding : PaddingImpl<N/sizeof(size_t)> {}; 314 315 // Slab block is 16KB-aligned. To prevent false sharing, separate locally-accessed 316 // fields and fields commonly accessed by not owner threads. 317 class GlobalBlockFields : public BlockI { 318 protected: 319 std::atomic<FreeObject*> publicFreeList; 320 Block *nextPrivatizable; 321 MemoryPool *poolPtr; 322 }; 323 324 class LocalBlockFields : public GlobalBlockFields, Padding<blockHeaderAlignment - sizeof(GlobalBlockFields)> { 325 protected: 326 Block *next; 327 Block *previous; /* Use double linked list to speed up removal */ 328 FreeObject *bumpPtr; /* Bump pointer moves from the end to the beginning of a block */ 329 FreeObject *freeList; 330 /* Pointer to local data for the owner thread. Used for fast finding tls 331 when releasing object from a block that current thread owned. 332 NULL for orphaned blocks. */ 333 TLSData *tlsPtr; 334 ThreadId ownerTid; /* the ID of the thread that owns or last owned the block */ 335 BackRefIdx backRefIdx; 336 uint16_t allocatedCount; /* Number of objects allocated (obviously by the owning thread) */ 337 uint16_t objectSize; 338 bool isFull; 339 340 friend class FreeBlockPool; 341 friend class StartupBlock; 342 friend class LifoList; 343 friend void *BootStrapBlocks::allocate(MemoryPool *, size_t); 344 friend bool OrphanedBlocks::cleanup(Backend*); 345 friend Block *MemoryPool::getEmptyBlock(size_t); 346 }; 347 348 // Use inheritance to guarantee that a user data start on next cache line. 349 // Can't use member for it, because when LocalBlockFields already on cache line, 350 // we must have no additional memory consumption for all compilers. 351 class Block : public LocalBlockFields, 352 Padding<2*blockHeaderAlignment - sizeof(LocalBlockFields)> { 353 public: 354 bool empty() const { 355 if (allocatedCount > 0) return false; 356 MALLOC_ASSERT(!isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT); 357 return true; 358 } 359 inline FreeObject* allocate(); 360 inline FreeObject *allocateFromFreeList(); 361 362 inline bool adjustFullness(); 363 void adjustPositionInBin(Bin* bin = NULL); 364 365 bool freeListNonNull() { return freeList; } 366 void freePublicObject(FreeObject *objectToFree); 367 inline void freeOwnObject(void *object); 368 void reset(); 369 void privatizePublicFreeList( bool reset = true ); 370 void restoreBumpPtr(); 371 void privatizeOrphaned(TLSData *tls, unsigned index); 372 bool readyToShare(); 373 void shareOrphaned(intptr_t binTag, unsigned index); 374 unsigned int getSize() const { 375 MALLOC_ASSERT(isStartupAllocObject() || objectSize<minLargeObjectSize, 376 "Invalid object size"); 377 return isStartupAllocObject()? 0 : objectSize; 378 } 379 const BackRefIdx *getBackRefIdx() const { return &backRefIdx; } 380 inline bool isOwnedByCurrentThread() const; 381 bool isStartupAllocObject() const { return objectSize == startupAllocObjSizeMark; } 382 inline FreeObject *findObjectToFree(const void *object) const; 383 void checkFreePrecond(const void *object) const { 384 #if MALLOC_DEBUG 385 const char *msg = "Possible double free or heap corruption."; 386 // small objects are always at least sizeof(size_t) Byte aligned, 387 // try to check this before this dereference as for invalid objects 388 // this may be unreadable 389 MALLOC_ASSERT(isAligned(object, sizeof(size_t)), "Try to free invalid small object"); 390 // releasing to free slab 391 MALLOC_ASSERT(allocatedCount>0, msg); 392 // must not point to slab's header 393 MALLOC_ASSERT((uintptr_t)object - (uintptr_t)this >= sizeof(Block), msg); 394 if (startupAllocObjSizeMark == objectSize) // startup block 395 MALLOC_ASSERT(object<=bumpPtr, msg); 396 else { 397 // non-startup objects are 8 Byte aligned 398 MALLOC_ASSERT(isAligned(object, 8), "Try to free invalid small object"); 399 MALLOC_ASSERT(allocatedCount <= (slabSize-sizeof(Block))/objectSize 400 && (!bumpPtr || object>bumpPtr), msg); 401 FreeObject *toFree = findObjectToFree(object); 402 // check against head of freeList, as this is mostly 403 // expected after double free 404 MALLOC_ASSERT(toFree != freeList, msg); 405 // check against head of publicFreeList, to detect double free 406 // involving foreign thread 407 MALLOC_ASSERT(toFree != publicFreeList.load(std::memory_order_relaxed), msg); 408 } 409 #else 410 suppress_unused_warning(object); 411 #endif 412 } 413 void initEmptyBlock(TLSData *tls, size_t size); 414 size_t findObjectSize(void *object) const; 415 MemoryPool *getMemPool() const { return poolPtr; } // do not use on the hot path! 416 417 protected: 418 void cleanBlockHeader(); 419 420 private: 421 static const float emptyEnoughRatio; /* Threshold on free space needed to "reactivate" a block */ 422 423 inline FreeObject *allocateFromBumpPtr(); 424 inline FreeObject *findAllocatedObject(const void *address) const; 425 inline bool isProperlyPlaced(const void *object) const; 426 inline void markOwned(TLSData *tls) { 427 MALLOC_ASSERT(!tlsPtr, ASSERT_TEXT); 428 ownerTid = ThreadId(); /* save the ID of the current thread */ 429 tlsPtr = tls; 430 } 431 inline void markOrphaned() { 432 MALLOC_ASSERT(tlsPtr, ASSERT_TEXT); 433 tlsPtr = NULL; 434 } 435 436 friend class Bin; 437 friend class TLSData; 438 friend bool MemoryPool::destroy(); 439 }; 440 441 const float Block::emptyEnoughRatio = 1.0 / 4.0; 442 443 static_assert(sizeof(Block) <= 2*estimatedCacheLineSize, 444 "The class Block does not fit into 2 cache lines on this platform. " 445 "Defining USE_INTERNAL_TID may help to fix it."); 446 447 class Bin { 448 private: 449 Block *activeBlk; 450 std::atomic<Block*> mailbox; 451 MallocMutex mailLock; 452 453 public: 454 inline Block* getActiveBlock() const { return activeBlk; } 455 void resetActiveBlock() { activeBlk = NULL; } 456 bool activeBlockUnused() const { return activeBlk && !activeBlk->allocatedCount; } 457 inline void setActiveBlock(Block *block); 458 inline Block* setPreviousBlockActive(); 459 Block* getPrivatizedFreeListBlock(); 460 void moveBlockToFront(Block *block); 461 bool cleanPublicFreeLists(); 462 void processEmptyBlock(Block *block, bool poolTheBlock); 463 void addPublicFreeListBlock(Block* block); 464 465 void outofTLSBin(Block* block); 466 void verifyTLSBin(size_t size) const; 467 void pushTLSBin(Block* block); 468 469 void verifyInitState() const { 470 MALLOC_ASSERT( !activeBlk, ASSERT_TEXT ); 471 MALLOC_ASSERT( !mailbox.load(std::memory_order_relaxed), ASSERT_TEXT ); 472 } 473 474 friend void Block::freePublicObject (FreeObject *objectToFree); 475 }; 476 477 /********* End of the data structures **************/ 478 479 /* 480 * There are bins for all 8 byte aligned objects less than this segregated size; 8 bins in total 481 */ 482 const uint32_t minSmallObjectIndex = 0; 483 const uint32_t numSmallObjectBins = 8; 484 const uint32_t maxSmallObjectSize = 64; 485 486 /* 487 * There are 4 bins between each couple of powers of 2 [64-128-256-...] 488 * from maxSmallObjectSize till this size; 16 bins in total 489 */ 490 const uint32_t minSegregatedObjectIndex = minSmallObjectIndex+numSmallObjectBins; 491 const uint32_t numSegregatedObjectBins = 16; 492 const uint32_t maxSegregatedObjectSize = 1024; 493 494 /* 495 * And there are 5 bins with allocation sizes that are multiples of estimatedCacheLineSize 496 * and selected to fit 9, 6, 4, 3, and 2 allocations in a block. 497 */ 498 const uint32_t minFittingIndex = minSegregatedObjectIndex+numSegregatedObjectBins; 499 const uint32_t numFittingBins = 5; 500 501 const uint32_t fittingAlignment = estimatedCacheLineSize; 502 503 #define SET_FITTING_SIZE(N) ( (slabSize-sizeof(Block))/N ) & ~(fittingAlignment-1) 504 // For blockSize=16*1024, sizeof(Block)=2*estimatedCacheLineSize and fittingAlignment=estimatedCacheLineSize, 505 // the comments show the fitting sizes and the amounts left unused for estimatedCacheLineSize=64/128: 506 const uint32_t fittingSize1 = SET_FITTING_SIZE(9); // 1792/1792 128/000 507 const uint32_t fittingSize2 = SET_FITTING_SIZE(6); // 2688/2688 128/000 508 const uint32_t fittingSize3 = SET_FITTING_SIZE(4); // 4032/3968 128/256 509 const uint32_t fittingSize4 = SET_FITTING_SIZE(3); // 5376/5376 128/000 510 const uint32_t fittingSize5 = SET_FITTING_SIZE(2); // 8128/8064 000/000 511 #undef SET_FITTING_SIZE 512 513 /* 514 * The total number of thread-specific Block-based bins 515 */ 516 const uint32_t numBlockBins = minFittingIndex+numFittingBins; 517 518 /* 519 * Objects of this size and larger are considered large objects. 520 */ 521 const uint32_t minLargeObjectSize = fittingSize5 + 1; 522 523 /* 524 * Per-thread pool of slab blocks. Idea behind it is to not share with other 525 * threads memory that are likely in local cache(s) of our CPU. 526 */ 527 class FreeBlockPool { 528 private: 529 std::atomic<Block*> head; 530 int size; 531 Backend *backend; 532 bool lastAccessMiss; 533 public: 534 static const int POOL_HIGH_MARK = 32; 535 static const int POOL_LOW_MARK = 8; 536 537 class ResOfGet { 538 ResOfGet(); 539 public: 540 Block* block; 541 bool lastAccMiss; 542 ResOfGet(Block *b, bool lastMiss) : block(b), lastAccMiss(lastMiss) {} 543 }; 544 545 // allocated in zero-initialized memory 546 FreeBlockPool(Backend *bknd) : backend(bknd) {} 547 ResOfGet getBlock(); 548 void returnBlock(Block *block); 549 bool externalCleanup(); // can be called by another thread 550 }; 551 552 template<int LOW_MARK, int HIGH_MARK> 553 class LocalLOCImpl { 554 private: 555 static const size_t MAX_TOTAL_SIZE = 4*1024*1024; 556 // TODO: can single-linked list be faster here? 557 LargeMemoryBlock *tail; // need it when do releasing on overflow 558 std::atomic<LargeMemoryBlock*> head; 559 size_t totalSize; 560 int numOfBlocks; 561 public: 562 bool put(LargeMemoryBlock *object, ExtMemoryPool *extMemPool); 563 LargeMemoryBlock *get(size_t size); 564 bool externalCleanup(ExtMemoryPool *extMemPool); 565 #if __TBB_MALLOC_WHITEBOX_TEST 566 LocalLOCImpl() : head(NULL), tail(NULL), totalSize(0), numOfBlocks(0) {} 567 static size_t getMaxSize() { return MAX_TOTAL_SIZE; } 568 static const int LOC_HIGH_MARK = HIGH_MARK; 569 #else 570 // no ctor, object must be created in zero-initialized memory 571 #endif 572 }; 573 574 typedef LocalLOCImpl<8,32> LocalLOC; // set production code parameters 575 576 class TLSData : public TLSRemote { 577 MemoryPool *memPool; 578 public: 579 Bin bin[numBlockBinLimit]; 580 FreeBlockPool freeSlabBlocks; 581 LocalLOC lloc; 582 unsigned currCacheIdx; 583 private: 584 bool unused; 585 public: 586 TLSData(MemoryPool *mPool, Backend *bknd) : memPool(mPool), freeSlabBlocks(bknd) {} 587 MemoryPool *getMemPool() const { return memPool; } 588 Bin* getAllocationBin(size_t size); 589 void release(); 590 bool externalCleanup(bool cleanOnlyUnused, bool cleanBins) { 591 if (!unused && cleanOnlyUnused) return false; 592 // Heavy operation in terms of synchronization complexity, 593 // should be called only for the current thread 594 bool released = cleanBins ? cleanupBlockBins() : false; 595 // both cleanups to be called, and the order is not important 596 return released | lloc.externalCleanup(&memPool->extMemPool) | freeSlabBlocks.externalCleanup(); 597 } 598 bool cleanupBlockBins(); 599 void markUsed() { unused = false; } // called by owner when TLS touched 600 void markUnused() { unused = true; } // can be called by not owner thread 601 }; 602 603 TLSData *TLSKey::createTLS(MemoryPool *memPool, Backend *backend) 604 { 605 MALLOC_ASSERT( sizeof(TLSData) >= sizeof(Bin) * numBlockBins + sizeof(FreeBlockPool), ASSERT_TEXT ); 606 TLSData* tls = (TLSData*) memPool->bootStrapBlocks.allocate(memPool, sizeof(TLSData)); 607 if ( !tls ) 608 return NULL; 609 new(tls) TLSData(memPool, backend); 610 /* the block contains zeroes after bootStrapMalloc, so bins are initialized */ 611 #if MALLOC_DEBUG 612 for (uint32_t i = 0; i < numBlockBinLimit; i++) 613 tls->bin[i].verifyInitState(); 614 #endif 615 setThreadMallocTLS(tls); 616 memPool->extMemPool.allLocalCaches.registerThread(tls); 617 return tls; 618 } 619 620 bool TLSData::cleanupBlockBins() 621 { 622 bool released = false; 623 for (uint32_t i = 0; i < numBlockBinLimit; i++) { 624 released |= bin[i].cleanPublicFreeLists(); 625 // After cleaning public free lists, only the active block might be empty. 626 // Do not use processEmptyBlock because it will just restore bumpPtr. 627 Block *block = bin[i].getActiveBlock(); 628 if (block && block->empty()) { 629 bin[i].outofTLSBin(block); 630 memPool->returnEmptyBlock(block, /*poolTheBlock=*/false); 631 released = true; 632 } 633 } 634 return released; 635 } 636 637 bool ExtMemoryPool::releaseAllLocalCaches() 638 { 639 // Iterate all registered TLS data and clean LLOC and Slab pools 640 bool released = allLocalCaches.cleanup(/*cleanOnlyUnused=*/false); 641 642 // Bins privatization is done only for the current thread 643 if (TLSData *tlsData = tlsPointerKey.getThreadMallocTLS()) 644 released |= tlsData->cleanupBlockBins(); 645 646 return released; 647 } 648 649 void AllLocalCaches::registerThread(TLSRemote *tls) 650 { 651 tls->prev = NULL; 652 MallocMutex::scoped_lock lock(listLock); 653 MALLOC_ASSERT(head!=tls, ASSERT_TEXT); 654 tls->next = head; 655 if (head) 656 head->prev = tls; 657 head = tls; 658 MALLOC_ASSERT(head->next!=head, ASSERT_TEXT); 659 } 660 661 void AllLocalCaches::unregisterThread(TLSRemote *tls) 662 { 663 MallocMutex::scoped_lock lock(listLock); 664 MALLOC_ASSERT(head, "Can't unregister thread: no threads are registered."); 665 if (head == tls) 666 head = tls->next; 667 if (tls->next) 668 tls->next->prev = tls->prev; 669 if (tls->prev) 670 tls->prev->next = tls->next; 671 MALLOC_ASSERT(!tls->next || tls->next->next!=tls->next, ASSERT_TEXT); 672 } 673 674 bool AllLocalCaches::cleanup(bool cleanOnlyUnused) 675 { 676 bool released = false; 677 { 678 MallocMutex::scoped_lock lock(listLock); 679 for (TLSRemote *curr=head; curr; curr=curr->next) 680 released |= static_cast<TLSData*>(curr)->externalCleanup(cleanOnlyUnused, /*cleanBins=*/false); 681 } 682 return released; 683 } 684 685 void AllLocalCaches::markUnused() 686 { 687 bool locked; 688 MallocMutex::scoped_lock lock(listLock, /*block=*/false, &locked); 689 if (!locked) // not wait for marking if someone doing something with it 690 return; 691 692 for (TLSRemote *curr=head; curr; curr=curr->next) 693 static_cast<TLSData*>(curr)->markUnused(); 694 } 695 696 #if MALLOC_CHECK_RECURSION 697 MallocMutex RecursiveMallocCallProtector::rmc_mutex; 698 pthread_t RecursiveMallocCallProtector::owner_thread; 699 void *RecursiveMallocCallProtector::autoObjPtr; 700 bool RecursiveMallocCallProtector::mallocRecursionDetected; 701 #if __FreeBSD__ 702 bool RecursiveMallocCallProtector::canUsePthread; 703 #endif 704 705 #endif 706 707 /*********** End code to provide thread ID and a TLS pointer **********/ 708 709 // Parameter for isLargeObject, keeps our expectations on memory origin. 710 // Assertions must use unknownMem to reliably report object invalidity. 711 enum MemoryOrigin { 712 ourMem, // allocated by TBB allocator 713 unknownMem // can be allocated by system allocator or TBB allocator 714 }; 715 716 template<MemoryOrigin> bool isLargeObject(void *object); 717 static void *internalMalloc(size_t size); 718 static void internalFree(void *object); 719 static void *internalPoolMalloc(MemoryPool* mPool, size_t size); 720 static bool internalPoolFree(MemoryPool *mPool, void *object, size_t size); 721 722 #if !MALLOC_DEBUG 723 #if __INTEL_COMPILER || _MSC_VER 724 #define NOINLINE(decl) __declspec(noinline) decl 725 #define ALWAYSINLINE(decl) __forceinline decl 726 #elif __GNUC__ 727 #define NOINLINE(decl) decl __attribute__ ((noinline)) 728 #define ALWAYSINLINE(decl) decl __attribute__ ((always_inline)) 729 #else 730 #define NOINLINE(decl) decl 731 #define ALWAYSINLINE(decl) decl 732 #endif 733 734 static NOINLINE( bool doInitialization() ); 735 ALWAYSINLINE( bool isMallocInitialized() ); 736 737 #undef ALWAYSINLINE 738 #undef NOINLINE 739 #endif /* !MALLOC_DEBUG */ 740 741 742 /********* Now some rough utility code to deal with indexing the size bins. **************/ 743 744 /* 745 * Given a number return the highest non-zero bit in it. It is intended to work with 32-bit values only. 746 * Moreover, on IPF, for sake of simplicity and performance, it is narrowed to only serve for 64 to 1023. 747 * This is enough for current algorithm of distribution of sizes among bins. 748 * __TBB_Log2 is not used here to minimize dependencies on TBB specific sources. 749 */ 750 #if _WIN64 && _MSC_VER>=1400 && !__INTEL_COMPILER 751 extern "C" unsigned char _BitScanReverse( unsigned long* i, unsigned long w ); 752 #pragma intrinsic(_BitScanReverse) 753 #endif 754 static inline unsigned int highestBitPos(unsigned int n) 755 { 756 MALLOC_ASSERT( n>=64 && n<1024, ASSERT_TEXT ); // only needed for bsr array lookup, but always true 757 unsigned int pos; 758 #if __ARCH_x86_32||__ARCH_x86_64 759 760 # if __linux__||__APPLE__||__FreeBSD__||__NetBSD__||__OpenBSD__||__sun||__MINGW32__ 761 __asm__ ("bsr %1,%0" : "=r"(pos) : "r"(n)); 762 # elif (_WIN32 && (!_WIN64 || __INTEL_COMPILER)) 763 __asm 764 { 765 bsr eax, n 766 mov pos, eax 767 } 768 # elif _WIN64 && _MSC_VER>=1400 769 _BitScanReverse((unsigned long*)&pos, (unsigned long)n); 770 # else 771 # error highestBitPos() not implemented for this platform 772 # endif 773 #elif __arm__ 774 __asm__ __volatile__ 775 ( 776 "clz %0, %1\n" 777 "rsb %0, %0, %2\n" 778 :"=r" (pos) :"r" (n), "I" (31) 779 ); 780 #else 781 static unsigned int bsr[16] = {0/*N/A*/,6,7,7,8,8,8,8,9,9,9,9,9,9,9,9}; 782 pos = bsr[ n>>6 ]; 783 #endif /* __ARCH_* */ 784 return pos; 785 } 786 787 template<bool Is32Bit> 788 unsigned int getSmallObjectIndex(unsigned int size) 789 { 790 return (size-1)>>3; 791 } 792 template<> 793 unsigned int getSmallObjectIndex</*Is32Bit=*/false>(unsigned int size) 794 { 795 // For 64-bit malloc, 16 byte alignment is needed except for bin 0. 796 unsigned int result = (size-1)>>3; 797 if (result) result |= 1; // 0,1,3,5,7; bins 2,4,6 are not aligned to 16 bytes 798 return result; 799 } 800 /* 801 * Depending on indexRequest, for a given size return either the index into the bin 802 * for objects of this size, or the actual size of objects in this bin. 803 */ 804 template<bool indexRequest> 805 static unsigned int getIndexOrObjectSize (unsigned int size) 806 { 807 if (size <= maxSmallObjectSize) { // selection from 8/16/24/32/40/48/56/64 808 unsigned int index = getSmallObjectIndex</*Is32Bit=*/(sizeof(size_t)<=4)>( size ); 809 /* Bin 0 is for 8 bytes, bin 1 is for 16, and so forth */ 810 return indexRequest ? index : (index+1)<<3; 811 } 812 else if (size <= maxSegregatedObjectSize ) { // 80/96/112/128 / 160/192/224/256 / 320/384/448/512 / 640/768/896/1024 813 unsigned int order = highestBitPos(size-1); // which group of bin sizes? 814 MALLOC_ASSERT( 6<=order && order<=9, ASSERT_TEXT ); 815 if (indexRequest) 816 return minSegregatedObjectIndex - (4*6) - 4 + (4*order) + ((size-1)>>(order-2)); 817 else { 818 unsigned int alignment = 128 >> (9-order); // alignment in the group 819 MALLOC_ASSERT( alignment==16 || alignment==32 || alignment==64 || alignment==128, ASSERT_TEXT ); 820 return alignUp(size,alignment); 821 } 822 } 823 else { 824 if( size <= fittingSize3 ) { 825 if( size <= fittingSize2 ) { 826 if( size <= fittingSize1 ) 827 return indexRequest ? minFittingIndex : fittingSize1; 828 else 829 return indexRequest ? minFittingIndex+1 : fittingSize2; 830 } else 831 return indexRequest ? minFittingIndex+2 : fittingSize3; 832 } else { 833 if( size <= fittingSize5 ) { 834 if( size <= fittingSize4 ) 835 return indexRequest ? minFittingIndex+3 : fittingSize4; 836 else 837 return indexRequest ? minFittingIndex+4 : fittingSize5; 838 } else { 839 MALLOC_ASSERT( 0,ASSERT_TEXT ); // this should not happen 840 return ~0U; 841 } 842 } 843 } 844 } 845 846 static unsigned int getIndex (unsigned int size) 847 { 848 return getIndexOrObjectSize</*indexRequest=*/true>(size); 849 } 850 851 static unsigned int getObjectSize (unsigned int size) 852 { 853 return getIndexOrObjectSize</*indexRequest=*/false>(size); 854 } 855 856 857 void *BootStrapBlocks::allocate(MemoryPool *memPool, size_t size) 858 { 859 FreeObject *result; 860 861 MALLOC_ASSERT( size == sizeof(TLSData), ASSERT_TEXT ); 862 863 { // Lock with acquire 864 MallocMutex::scoped_lock scoped_cs(bootStrapLock); 865 866 if( bootStrapObjectList) { 867 result = bootStrapObjectList; 868 bootStrapObjectList = bootStrapObjectList->next; 869 } else { 870 if (!bootStrapBlock) { 871 bootStrapBlock = memPool->getEmptyBlock(size); 872 if (!bootStrapBlock) return NULL; 873 } 874 result = bootStrapBlock->bumpPtr; 875 bootStrapBlock->bumpPtr = (FreeObject *)((uintptr_t)bootStrapBlock->bumpPtr - bootStrapBlock->objectSize); 876 if ((uintptr_t)bootStrapBlock->bumpPtr < (uintptr_t)bootStrapBlock+sizeof(Block)) { 877 bootStrapBlock->bumpPtr = NULL; 878 bootStrapBlock->next = bootStrapBlockUsed; 879 bootStrapBlockUsed = bootStrapBlock; 880 bootStrapBlock = NULL; 881 } 882 } 883 } // Unlock with release 884 885 memset (result, 0, size); 886 return (void*)result; 887 } 888 889 void BootStrapBlocks::free(void* ptr) 890 { 891 MALLOC_ASSERT( ptr, ASSERT_TEXT ); 892 { // Lock with acquire 893 MallocMutex::scoped_lock scoped_cs(bootStrapLock); 894 ((FreeObject*)ptr)->next = bootStrapObjectList; 895 bootStrapObjectList = (FreeObject*)ptr; 896 } // Unlock with release 897 } 898 899 void BootStrapBlocks::reset() 900 { 901 bootStrapBlock = bootStrapBlockUsed = NULL; 902 bootStrapObjectList = NULL; 903 } 904 905 #if !(FREELIST_NONBLOCKING) 906 static MallocMutex publicFreeListLock; // lock for changes of publicFreeList 907 #endif 908 909 /********* End rough utility code **************/ 910 911 /* LifoList assumes zero initialization so a vector of it can be created 912 * by just allocating some space with no call to constructor. 913 * On Linux, it seems to be necessary to avoid linking with C++ libraries. 914 * 915 * By usage convention there is no race on the initialization. */ 916 LifoList::LifoList( ) : top(NULL) 917 { 918 // MallocMutex assumes zero initialization 919 memset(&lock, 0, sizeof(MallocMutex)); 920 } 921 922 void LifoList::push(Block *block) 923 { 924 MallocMutex::scoped_lock scoped_cs(lock); 925 block->next = top; 926 top = block; 927 } 928 929 Block *LifoList::pop() 930 { 931 Block *block=NULL; 932 if (top) { 933 MallocMutex::scoped_lock scoped_cs(lock); 934 if (top) { 935 block = top; 936 top = block->next; 937 } 938 } 939 return block; 940 } 941 942 Block *LifoList::grab() 943 { 944 Block *block = NULL; 945 if (top) { 946 MallocMutex::scoped_lock scoped_cs(lock); 947 block = top; 948 top = NULL; 949 } 950 return block; 951 } 952 953 /********* Thread and block related code *************/ 954 955 template<bool poolDestroy> void AllLargeBlocksList::releaseAll(Backend *backend) { 956 LargeMemoryBlock *next, *lmb = loHead; 957 loHead = NULL; 958 959 for (; lmb; lmb = next) { 960 next = lmb->gNext; 961 if (poolDestroy) { 962 // as it's pool destruction, no need to return object to backend, 963 // only remove backrefs, as they are global 964 removeBackRef(lmb->backRefIdx); 965 } else { 966 // clean g(Next|Prev) to prevent removing lmb 967 // from AllLargeBlocksList inside returnLargeObject 968 lmb->gNext = lmb->gPrev = NULL; 969 backend->returnLargeObject(lmb); 970 } 971 } 972 } 973 974 TLSData* MemoryPool::getTLS(bool create) 975 { 976 TLSData* tls = extMemPool.tlsPointerKey.getThreadMallocTLS(); 977 if (create && !tls) 978 tls = extMemPool.tlsPointerKey.createTLS(this, &extMemPool.backend); 979 return tls; 980 } 981 982 /* 983 * Return the bin for the given size. 984 */ 985 inline Bin* TLSData::getAllocationBin(size_t size) 986 { 987 return bin + getIndex(size); 988 } 989 990 /* Return an empty uninitialized block in a non-blocking fashion. */ 991 Block *MemoryPool::getEmptyBlock(size_t size) 992 { 993 TLSData* tls = getTLS(/*create=*/false); 994 // try to use per-thread cache, if TLS available 995 FreeBlockPool::ResOfGet resOfGet = tls? 996 tls->freeSlabBlocks.getBlock() : FreeBlockPool::ResOfGet(NULL, false); 997 Block *result = resOfGet.block; 998 999 if (!result) { // not found in local cache, asks backend for slabs 1000 int num = resOfGet.lastAccMiss? Backend::numOfSlabAllocOnMiss : 1; 1001 BackRefIdx backRefIdx[Backend::numOfSlabAllocOnMiss]; 1002 1003 result = static_cast<Block*>(extMemPool.backend.getSlabBlock(num)); 1004 if (!result) return NULL; 1005 1006 if (!extMemPool.userPool()) 1007 for (int i=0; i<num; i++) { 1008 backRefIdx[i] = BackRefIdx::newBackRef(/*largeObj=*/false); 1009 if (backRefIdx[i].isInvalid()) { 1010 // roll back resource allocation 1011 for (int j=0; j<i; j++) 1012 removeBackRef(backRefIdx[j]); 1013 Block *b = result; 1014 for (int j=0; j<num; b=(Block*)((uintptr_t)b+slabSize), j++) 1015 extMemPool.backend.putSlabBlock(b); 1016 return NULL; 1017 } 1018 } 1019 // resources were allocated, register blocks 1020 Block *b = result; 1021 for (int i=0; i<num; b=(Block*)((uintptr_t)b+slabSize), i++) { 1022 // slab block in user's pool must have invalid backRefIdx 1023 if (extMemPool.userPool()) { 1024 new (&b->backRefIdx) BackRefIdx(); 1025 } else { 1026 setBackRef(backRefIdx[i], b); 1027 b->backRefIdx = backRefIdx[i]; 1028 } 1029 b->tlsPtr = tls; 1030 b->poolPtr = this; 1031 // all but first one go to per-thread pool 1032 if (i > 0) { 1033 MALLOC_ASSERT(tls, ASSERT_TEXT); 1034 tls->freeSlabBlocks.returnBlock(b); 1035 } 1036 } 1037 } 1038 MALLOC_ASSERT(result, ASSERT_TEXT); 1039 result->initEmptyBlock(tls, size); 1040 STAT_increment(getThreadId(), getIndex(result->objectSize), allocBlockNew); 1041 return result; 1042 } 1043 1044 void MemoryPool::returnEmptyBlock(Block *block, bool poolTheBlock) 1045 { 1046 block->reset(); 1047 if (poolTheBlock) { 1048 getTLS(/*create=*/false)->freeSlabBlocks.returnBlock(block); 1049 } else { 1050 // slab blocks in user's pools do not have valid backRefIdx 1051 if (!extMemPool.userPool()) 1052 removeBackRef(*(block->getBackRefIdx())); 1053 extMemPool.backend.putSlabBlock(block); 1054 } 1055 } 1056 1057 bool ExtMemoryPool::init(intptr_t poolId, rawAllocType rawAlloc, 1058 rawFreeType rawFree, size_t granularity, 1059 bool keepAllMemory, bool fixedPool) 1060 { 1061 this->poolId = poolId; 1062 this->rawAlloc = rawAlloc; 1063 this->rawFree = rawFree; 1064 this->granularity = granularity; 1065 this->keepAllMemory = keepAllMemory; 1066 this->fixedPool = fixedPool; 1067 this->delayRegsReleasing = false; 1068 if (!initTLS()) 1069 return false; 1070 loc.init(this); 1071 backend.init(this); 1072 MALLOC_ASSERT(isPoolValid(), NULL); 1073 return true; 1074 } 1075 1076 bool ExtMemoryPool::initTLS() { return tlsPointerKey.init(); } 1077 1078 bool MemoryPool::init(intptr_t poolId, const MemPoolPolicy *policy) 1079 { 1080 if (!extMemPool.init(poolId, policy->pAlloc, policy->pFree, 1081 policy->granularity? policy->granularity : defaultGranularity, 1082 policy->keepAllMemory, policy->fixedPool)) 1083 return false; 1084 { 1085 MallocMutex::scoped_lock lock(memPoolListLock); 1086 next = defaultMemPool->next; 1087 defaultMemPool->next = this; 1088 prev = defaultMemPool; 1089 if (next) 1090 next->prev = this; 1091 } 1092 return true; 1093 } 1094 1095 bool MemoryPool::reset() 1096 { 1097 MALLOC_ASSERT(extMemPool.userPool(), "No reset for the system pool."); 1098 // memory is not releasing during pool reset 1099 // TODO: mark regions to release unused on next reset() 1100 extMemPool.delayRegionsReleasing(true); 1101 1102 bootStrapBlocks.reset(); 1103 extMemPool.lmbList.releaseAll</*poolDestroy=*/false>(&extMemPool.backend); 1104 if (!extMemPool.reset()) 1105 return false; 1106 1107 if (!extMemPool.initTLS()) 1108 return false; 1109 extMemPool.delayRegionsReleasing(false); 1110 return true; 1111 } 1112 1113 bool MemoryPool::destroy() 1114 { 1115 #if __TBB_MALLOC_LOCACHE_STAT 1116 extMemPool.loc.reportStat(stdout); 1117 #endif 1118 #if __TBB_MALLOC_BACKEND_STAT 1119 extMemPool.backend.reportStat(stdout); 1120 #endif 1121 { 1122 MallocMutex::scoped_lock lock(memPoolListLock); 1123 // remove itself from global pool list 1124 if (prev) 1125 prev->next = next; 1126 if (next) 1127 next->prev = prev; 1128 } 1129 // slab blocks in non-default pool do not have backreferences, 1130 // only large objects do 1131 if (extMemPool.userPool()) 1132 extMemPool.lmbList.releaseAll</*poolDestroy=*/true>(&extMemPool.backend); 1133 else { 1134 // only one non-userPool() is supported now 1135 MALLOC_ASSERT(this==defaultMemPool, NULL); 1136 // There and below in extMemPool.destroy(), do not restore initial state 1137 // for user pool, because it's just about to be released. But for system 1138 // pool restoring, we do not want to do zeroing of it on subsequent reload. 1139 bootStrapBlocks.reset(); 1140 extMemPool.orphanedBlocks.reset(); 1141 } 1142 return extMemPool.destroy(); 1143 } 1144 1145 void MemoryPool::onThreadShutdown(TLSData *tlsData) 1146 { 1147 if (tlsData) { // might be called for "empty" TLS 1148 tlsData->release(); 1149 bootStrapBlocks.free(tlsData); 1150 clearTLS(); 1151 } 1152 } 1153 1154 #if MALLOC_DEBUG 1155 void Bin::verifyTLSBin (size_t size) const 1156 { 1157 /* The debug version verifies the TLSBin as needed */ 1158 uint32_t objSize = getObjectSize(size); 1159 1160 if (activeBlk) { 1161 MALLOC_ASSERT( activeBlk->isOwnedByCurrentThread(), ASSERT_TEXT ); 1162 MALLOC_ASSERT( activeBlk->objectSize == objSize, ASSERT_TEXT ); 1163 #if MALLOC_DEBUG>1 1164 for (Block* temp = activeBlk->next; temp; temp=temp->next) { 1165 MALLOC_ASSERT( temp!=activeBlk, ASSERT_TEXT ); 1166 MALLOC_ASSERT( temp->isOwnedByCurrentThread(), ASSERT_TEXT ); 1167 MALLOC_ASSERT( temp->objectSize == objSize, ASSERT_TEXT ); 1168 MALLOC_ASSERT( temp->previous->next == temp, ASSERT_TEXT ); 1169 if (temp->next) { 1170 MALLOC_ASSERT( temp->next->previous == temp, ASSERT_TEXT ); 1171 } 1172 } 1173 for (Block* temp = activeBlk->previous; temp; temp=temp->previous) { 1174 MALLOC_ASSERT( temp!=activeBlk, ASSERT_TEXT ); 1175 MALLOC_ASSERT( temp->isOwnedByCurrentThread(), ASSERT_TEXT ); 1176 MALLOC_ASSERT( temp->objectSize == objSize, ASSERT_TEXT ); 1177 MALLOC_ASSERT( temp->next->previous == temp, ASSERT_TEXT ); 1178 if (temp->previous) { 1179 MALLOC_ASSERT( temp->previous->next == temp, ASSERT_TEXT ); 1180 } 1181 } 1182 #endif /* MALLOC_DEBUG>1 */ 1183 } 1184 } 1185 #else /* MALLOC_DEBUG */ 1186 inline void Bin::verifyTLSBin (size_t) const { } 1187 #endif /* MALLOC_DEBUG */ 1188 1189 /* 1190 * Add a block to the start of this tls bin list. 1191 */ 1192 void Bin::pushTLSBin(Block* block) 1193 { 1194 /* The objectSize should be defined and not a parameter 1195 because the function is applied to partially filled blocks as well */ 1196 unsigned int size = block->objectSize; 1197 1198 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1199 MALLOC_ASSERT( block->objectSize != 0, ASSERT_TEXT ); 1200 MALLOC_ASSERT( block->next == NULL, ASSERT_TEXT ); 1201 MALLOC_ASSERT( block->previous == NULL, ASSERT_TEXT ); 1202 1203 MALLOC_ASSERT( this, ASSERT_TEXT ); 1204 verifyTLSBin(size); 1205 1206 block->next = activeBlk; 1207 if( activeBlk ) { 1208 block->previous = activeBlk->previous; 1209 activeBlk->previous = block; 1210 if( block->previous ) 1211 block->previous->next = block; 1212 } else { 1213 activeBlk = block; 1214 } 1215 1216 verifyTLSBin(size); 1217 } 1218 1219 /* 1220 * Take a block out of its tls bin (e.g. before removal). 1221 */ 1222 void Bin::outofTLSBin(Block* block) 1223 { 1224 unsigned int size = block->objectSize; 1225 1226 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1227 MALLOC_ASSERT( block->objectSize != 0, ASSERT_TEXT ); 1228 1229 MALLOC_ASSERT( this, ASSERT_TEXT ); 1230 verifyTLSBin(size); 1231 1232 if (block == activeBlk) { 1233 activeBlk = block->previous? block->previous : block->next; 1234 } 1235 /* Unlink the block */ 1236 if (block->previous) { 1237 MALLOC_ASSERT( block->previous->next == block, ASSERT_TEXT ); 1238 block->previous->next = block->next; 1239 } 1240 if (block->next) { 1241 MALLOC_ASSERT( block->next->previous == block, ASSERT_TEXT ); 1242 block->next->previous = block->previous; 1243 } 1244 block->next = NULL; 1245 block->previous = NULL; 1246 1247 verifyTLSBin(size); 1248 } 1249 1250 Block* Bin::getPrivatizedFreeListBlock() 1251 { 1252 Block* block; 1253 MALLOC_ASSERT( this, ASSERT_TEXT ); 1254 // if this method is called, active block usage must be unsuccessful 1255 MALLOC_ASSERT( !activeBlk && !mailbox.load(std::memory_order_relaxed) || activeBlk && activeBlk->isFull, ASSERT_TEXT ); 1256 1257 // the counter should be changed STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 1258 if (!mailbox.load(std::memory_order_acquire)) // hotpath is empty mailbox 1259 return NULL; 1260 else { // mailbox is not empty, take lock and inspect it 1261 MallocMutex::scoped_lock scoped_cs(mailLock); 1262 block = mailbox.load(std::memory_order_relaxed); 1263 if( block ) { 1264 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1265 MALLOC_ASSERT( !isNotForUse(block->nextPrivatizable), ASSERT_TEXT ); 1266 mailbox.store(block->nextPrivatizable, std::memory_order_relaxed); 1267 block->nextPrivatizable = (Block*) this; 1268 } 1269 } 1270 if( block ) { 1271 MALLOC_ASSERT( isSolidPtr(block->publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1272 block->privatizePublicFreeList(); 1273 block->adjustPositionInBin(this); 1274 } 1275 return block; 1276 } 1277 1278 void Bin::addPublicFreeListBlock(Block* block) 1279 { 1280 MallocMutex::scoped_lock scoped_cs(mailLock); 1281 block->nextPrivatizable = mailbox.load(std::memory_order_relaxed); 1282 mailbox.store(block, std::memory_order_relaxed); 1283 } 1284 1285 // Process publicly freed objects in all blocks and return empty blocks 1286 // to the backend in order to reduce overall footprint. 1287 bool Bin::cleanPublicFreeLists() 1288 { 1289 Block* block; 1290 if (!mailbox.load(std::memory_order_acquire)) 1291 return false; 1292 else { 1293 // Grab all the blocks in the mailbox 1294 MallocMutex::scoped_lock scoped_cs(mailLock); 1295 block = mailbox.load(std::memory_order_relaxed); 1296 mailbox.store(NULL, std::memory_order_relaxed); 1297 } 1298 bool released = false; 1299 while (block) { 1300 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1301 Block* tmp = block->nextPrivatizable; 1302 block->nextPrivatizable = (Block*) this; 1303 block->privatizePublicFreeList(); 1304 if (block->empty()) { 1305 processEmptyBlock(block, /*poolTheBlock=*/false); 1306 released = true; 1307 } else 1308 block->adjustPositionInBin(this); 1309 block = tmp; 1310 } 1311 return released; 1312 } 1313 1314 bool Block::adjustFullness() 1315 { 1316 if (bumpPtr) { 1317 /* If we are still using a bump ptr for this block it is empty enough to use. */ 1318 STAT_increment(getThreadId(), getIndex(objectSize), examineEmptyEnough); 1319 isFull = false; 1320 } else { 1321 const float threshold = (slabSize - sizeof(Block)) * (1 - emptyEnoughRatio); 1322 /* allocatedCount shows how many objects in the block are in use; however it still counts 1323 * blocks freed by other threads; so prior call to privatizePublicFreeList() is recommended */ 1324 isFull = (allocatedCount*objectSize > threshold) ? true : false; 1325 #if COLLECT_STATISTICS 1326 if (isFull) 1327 STAT_increment(getThreadId(), getIndex(objectSize), examineNotEmpty); 1328 else 1329 STAT_increment(getThreadId(), getIndex(objectSize), examineEmptyEnough); 1330 #endif 1331 } 1332 return isFull; 1333 } 1334 1335 // This method resides in class Block, and not in class Bin, in order to avoid 1336 // calling getAllocationBin on a reasonably hot path in Block::freeOwnObject 1337 void Block::adjustPositionInBin(Bin* bin/*=NULL*/) 1338 { 1339 // If the block were full, but became empty enough to use, 1340 // move it to the front of the list 1341 if (isFull && !adjustFullness()) { 1342 if (!bin) 1343 bin = tlsPtr->getAllocationBin(objectSize); 1344 bin->moveBlockToFront(this); 1345 } 1346 } 1347 1348 /* Restore the bump pointer for an empty block that is planned to use */ 1349 void Block::restoreBumpPtr() 1350 { 1351 MALLOC_ASSERT( allocatedCount == 0, ASSERT_TEXT ); 1352 MALLOC_ASSERT( !isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1353 STAT_increment(getThreadId(), getIndex(objectSize), freeRestoreBumpPtr); 1354 bumpPtr = (FreeObject *)((uintptr_t)this + slabSize - objectSize); 1355 freeList = NULL; 1356 isFull = false; 1357 } 1358 1359 void Block::freeOwnObject(void *object) 1360 { 1361 tlsPtr->markUsed(); 1362 allocatedCount--; 1363 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 1364 #if COLLECT_STATISTICS 1365 // Note that getAllocationBin is not called on the hottest path with statistics off. 1366 if (tlsPtr->getAllocationBin(objectSize)->getActiveBlock() != this) 1367 STAT_increment(getThreadId(), getIndex(objectSize), freeToInactiveBlock); 1368 else 1369 STAT_increment(getThreadId(), getIndex(objectSize), freeToActiveBlock); 1370 #endif 1371 if (empty()) { 1372 // If the last object of a slab is freed, the slab cannot be marked full 1373 MALLOC_ASSERT(!isFull, ASSERT_TEXT); 1374 tlsPtr->getAllocationBin(objectSize)->processEmptyBlock(this, /*poolTheBlock=*/true); 1375 } else { // hot path 1376 FreeObject *objectToFree = findObjectToFree(object); 1377 objectToFree->next = freeList; 1378 freeList = objectToFree; 1379 adjustPositionInBin(); 1380 } 1381 } 1382 1383 void Block::freePublicObject (FreeObject *objectToFree) 1384 { 1385 FreeObject *localPublicFreeList; 1386 1387 MALLOC_ITT_SYNC_RELEASING(&publicFreeList); 1388 #if FREELIST_NONBLOCKING 1389 // TBB_REVAMP_TODO: make it non atomic in non-blocking scenario 1390 FreeObject *temp = publicFreeList.load(std::memory_order_acquire); 1391 do { 1392 localPublicFreeList = objectToFree->next = temp; 1393 // no backoff necessary because trying to make change, not waiting for a change 1394 } while( !publicFreeList.compare_exchange_strong(temp, objectToFree) ); 1395 #else 1396 STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 1397 { 1398 MallocMutex::scoped_lock scoped_cs(publicFreeListLock); 1399 localPublicFreeList = objectToFree->next = publicFreeList; 1400 publicFreeList = objectToFree; 1401 } 1402 #endif 1403 1404 if( localPublicFreeList==NULL ) { 1405 // if the block is abandoned, its nextPrivatizable pointer should be UNUSABLE 1406 // otherwise, it should point to the bin the block belongs to. 1407 // reading nextPrivatizable is thread-safe below, because: 1408 // 1) the executing thread atomically got publicFreeList==NULL and changed it to non-NULL; 1409 // 2) only owning thread can change it back to NULL, 1410 // 3) but it can not be done until the block is put to the mailbox 1411 // So the executing thread is now the only one that can change nextPrivatizable 1412 if( !isNotForUse(nextPrivatizable) ) { 1413 MALLOC_ASSERT( nextPrivatizable!=NULL, ASSERT_TEXT ); 1414 Bin* theBin = (Bin*) nextPrivatizable; 1415 #if MALLOC_DEBUG && TBB_REVAMP_TODO 1416 // FIXME: The thread that returns the block is not the block's owner. 1417 // The below assertion compares 'theBin' against the caller's local bin, thus, it always fails. 1418 // Need to find a way to get the correct remote bin for comparison. 1419 { // check that nextPrivatizable points to the bin the block belongs to 1420 uint32_t index = getIndex( objectSize ); 1421 TLSData* tls = getThreadMallocTLS(); 1422 MALLOC_ASSERT( theBin==tls->bin+index, ASSERT_TEXT ); 1423 } 1424 #endif // MALLOC_DEBUG 1425 theBin->addPublicFreeListBlock(this); 1426 } 1427 } 1428 STAT_increment(getThreadId(), ThreadCommonCounters, freeToOtherThread); 1429 STAT_increment(ownerTid, getIndex(objectSize), freeByOtherThread); 1430 } 1431 1432 // Make objects freed by other threads available for use again 1433 void Block::privatizePublicFreeList( bool reset ) 1434 { 1435 FreeObject *localPublicFreeList; 1436 // If reset is false, publicFreeList should not be zeroed but set to UNUSABLE 1437 // to properly synchronize with other threads freeing objects to this slab. 1438 const intptr_t endMarker = reset ? 0 : UNUSABLE; 1439 1440 // Only the owner thread may reset the pointer to NULL 1441 MALLOC_ASSERT( isOwnedByCurrentThread() || !reset, ASSERT_TEXT ); 1442 #if FREELIST_NONBLOCKING 1443 localPublicFreeList = publicFreeList.exchange((FreeObject*)endMarker); 1444 #else 1445 STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 1446 { 1447 MallocMutex::scoped_lock scoped_cs(publicFreeListLock); 1448 localPublicFreeList = publicFreeList; 1449 publicFreeList = endMarker; 1450 } 1451 #endif 1452 MALLOC_ITT_SYNC_ACQUIRED(&publicFreeList); 1453 MALLOC_ASSERT( !(reset && isNotForUse(publicFreeList)), ASSERT_TEXT ); 1454 1455 // publicFreeList must have been UNUSABLE or valid, but not NULL 1456 MALLOC_ASSERT( localPublicFreeList!=NULL, ASSERT_TEXT ); 1457 if( isSolidPtr(localPublicFreeList) ) { 1458 MALLOC_ASSERT( allocatedCount <= (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 1459 /* other threads did not change the counter freeing our blocks */ 1460 allocatedCount--; 1461 FreeObject *temp = localPublicFreeList; 1462 while( isSolidPtr(temp->next) ){ // the list will end with either NULL or UNUSABLE 1463 temp = temp->next; 1464 allocatedCount--; 1465 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 1466 } 1467 /* merge with local freeList */ 1468 temp->next = freeList; 1469 freeList = localPublicFreeList; 1470 STAT_increment(getThreadId(), getIndex(objectSize), allocPrivatized); 1471 } 1472 } 1473 1474 void Block::privatizeOrphaned(TLSData *tls, unsigned index) 1475 { 1476 Bin* bin = tls->bin + index; 1477 STAT_increment(getThreadId(), index, allocBlockPublic); 1478 next = NULL; 1479 previous = NULL; 1480 MALLOC_ASSERT( publicFreeList.load(std::memory_order_relaxed) != NULL, ASSERT_TEXT ); 1481 /* There is not a race here since no other thread owns this block */ 1482 markOwned(tls); 1483 // It is safe to change nextPrivatizable, as publicFreeList is not null 1484 MALLOC_ASSERT( isNotForUse(nextPrivatizable), ASSERT_TEXT ); 1485 nextPrivatizable = (Block*)bin; 1486 // the next call is required to change publicFreeList to 0 1487 privatizePublicFreeList(); 1488 if( empty() ) { 1489 restoreBumpPtr(); 1490 } else { 1491 adjustFullness(); // check the block fullness and set isFull 1492 } 1493 MALLOC_ASSERT( !isNotForUse(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1494 } 1495 1496 1497 bool Block::readyToShare() 1498 { 1499 FreeObject* oldVal = NULL; 1500 #if FREELIST_NONBLOCKING 1501 publicFreeList.compare_exchange_strong(oldVal, (FreeObject*)UNUSABLE); 1502 #else 1503 STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList); 1504 { 1505 MallocMutex::scoped_lock scoped_cs(publicFreeListLock); 1506 if ( (oldVal=publicFreeList)==NULL ) 1507 (intptr_t&)(publicFreeList) = UNUSABLE; 1508 } 1509 #endif 1510 return oldVal==NULL; 1511 } 1512 1513 void Block::shareOrphaned(intptr_t binTag, unsigned index) 1514 { 1515 MALLOC_ASSERT( binTag, ASSERT_TEXT ); 1516 // unreferenced formal parameter warning 1517 tbb::detail::suppress_unused_warning(index); 1518 STAT_increment(getThreadId(), index, freeBlockPublic); 1519 markOrphaned(); 1520 if ((intptr_t)nextPrivatizable==binTag) { 1521 // First check passed: the block is not in mailbox yet. 1522 // Need to set publicFreeList to non-zero, so other threads 1523 // will not change nextPrivatizable and it can be zeroed. 1524 if ( !readyToShare() ) { 1525 // another thread freed an object; we need to wait until it finishes. 1526 // There is no need for exponential backoff, as the wait here is not for a lock; 1527 // but need to yield, so the thread we wait has a chance to run. 1528 // TODO: add a pause to also be friendly to hyperthreads 1529 int count = 256; 1530 while( (intptr_t)const_cast<Block* volatile &>(nextPrivatizable)==binTag ) { 1531 if (--count==0) { 1532 do_yield(); 1533 count = 256; 1534 } 1535 } 1536 } 1537 } 1538 MALLOC_ASSERT( publicFreeList.load(std::memory_order_relaxed) !=NULL, ASSERT_TEXT ); 1539 // now it is safe to change our data 1540 previous = NULL; 1541 // it is caller responsibility to ensure that the list of blocks 1542 // formed by nextPrivatizable pointers is kept consistent if required. 1543 // if only called from thread shutdown code, it does not matter. 1544 (intptr_t&)(nextPrivatizable) = UNUSABLE; 1545 } 1546 1547 void Block::cleanBlockHeader() 1548 { 1549 next = NULL; 1550 previous = NULL; 1551 freeList = NULL; 1552 allocatedCount = 0; 1553 isFull = false; 1554 tlsPtr = NULL; 1555 1556 publicFreeList.store(NULL, std::memory_order_relaxed); 1557 } 1558 1559 void Block::initEmptyBlock(TLSData *tls, size_t size) 1560 { 1561 // Having getIndex and getObjectSize called next to each other 1562 // allows better compiler optimization as they basically share the code. 1563 unsigned int index = getIndex(size); 1564 unsigned int objSz = getObjectSize(size); 1565 1566 cleanBlockHeader(); 1567 objectSize = objSz; 1568 markOwned(tls); 1569 // bump pointer should be prepared for first allocation - thus mode it down to objectSize 1570 bumpPtr = (FreeObject *)((uintptr_t)this + slabSize - objectSize); 1571 1572 // each block should have the address where the head of the list of "privatizable" blocks is kept 1573 // the only exception is a block for boot strap which is initialized when TLS is yet NULL 1574 nextPrivatizable = tls? (Block*)(tls->bin + index) : NULL; 1575 TRACEF(( "[ScalableMalloc trace] Empty block %p is initialized, owner is %ld, objectSize is %d, bumpPtr is %p\n", 1576 this, tlsPtr ? getThreadId() : -1, objectSize, bumpPtr )); 1577 } 1578 1579 Block *OrphanedBlocks::get(TLSData *tls, unsigned int size) 1580 { 1581 // TODO: try to use index from getAllocationBin 1582 unsigned int index = getIndex(size); 1583 Block *block = bins[index].pop(); 1584 if (block) { 1585 MALLOC_ITT_SYNC_ACQUIRED(bins+index); 1586 block->privatizeOrphaned(tls, index); 1587 } 1588 return block; 1589 } 1590 1591 void OrphanedBlocks::put(intptr_t binTag, Block *block) 1592 { 1593 unsigned int index = getIndex(block->getSize()); 1594 block->shareOrphaned(binTag, index); 1595 MALLOC_ITT_SYNC_RELEASING(bins+index); 1596 bins[index].push(block); 1597 } 1598 1599 void OrphanedBlocks::reset() 1600 { 1601 for (uint32_t i=0; i<numBlockBinLimit; i++) 1602 new (bins+i) LifoList(); 1603 } 1604 1605 bool OrphanedBlocks::cleanup(Backend* backend) 1606 { 1607 bool released = false; 1608 for (uint32_t i=0; i<numBlockBinLimit; i++) { 1609 Block* block = bins[i].grab(); 1610 MALLOC_ITT_SYNC_ACQUIRED(bins+i); 1611 while (block) { 1612 Block* next = block->next; 1613 block->privatizePublicFreeList( /*reset=*/false ); // do not set publicFreeList to NULL 1614 if (block->empty()) { 1615 block->reset(); 1616 // slab blocks in user's pools do not have valid backRefIdx 1617 if (!backend->inUserPool()) 1618 removeBackRef(*(block->getBackRefIdx())); 1619 backend->putSlabBlock(block); 1620 released = true; 1621 } else { 1622 MALLOC_ITT_SYNC_RELEASING(bins+i); 1623 bins[i].push(block); 1624 } 1625 block = next; 1626 } 1627 } 1628 return released; 1629 } 1630 1631 FreeBlockPool::ResOfGet FreeBlockPool::getBlock() 1632 { 1633 Block *b = head.exchange(NULL); 1634 1635 if (b) { 1636 size--; 1637 Block *newHead = b->next; 1638 lastAccessMiss = false; 1639 head.store(newHead, std::memory_order_release); 1640 } else { 1641 lastAccessMiss = true; 1642 } 1643 return ResOfGet(b, lastAccessMiss); 1644 } 1645 1646 void FreeBlockPool::returnBlock(Block *block) 1647 { 1648 MALLOC_ASSERT( size <= POOL_HIGH_MARK, ASSERT_TEXT ); 1649 Block *localHead = head.exchange(NULL); 1650 1651 if (!localHead) { 1652 size = 0; // head was stolen by externalClean, correct size accordingly 1653 } else if (size == POOL_HIGH_MARK) { 1654 // release cold blocks and add hot one, 1655 // so keep POOL_LOW_MARK-1 blocks and add new block to head 1656 Block *headToFree = localHead, *helper; 1657 for (int i=0; i<POOL_LOW_MARK-2; i++) 1658 headToFree = headToFree->next; 1659 Block *last = headToFree; 1660 headToFree = headToFree->next; 1661 last->next = NULL; 1662 size = POOL_LOW_MARK-1; 1663 for (Block *currBl = headToFree; currBl; currBl = helper) { 1664 helper = currBl->next; 1665 // slab blocks in user's pools do not have valid backRefIdx 1666 if (!backend->inUserPool()) 1667 removeBackRef(currBl->backRefIdx); 1668 backend->putSlabBlock(currBl); 1669 } 1670 } 1671 size++; 1672 block->next = localHead; 1673 head.store(block, std::memory_order_release); 1674 } 1675 1676 bool FreeBlockPool::externalCleanup() 1677 { 1678 Block *helper; 1679 bool released = false; 1680 1681 for (Block *currBl=head.exchange(NULL); currBl; currBl=helper) { 1682 helper = currBl->next; 1683 // slab blocks in user's pools do not have valid backRefIdx 1684 if (!backend->inUserPool()) 1685 removeBackRef(currBl->backRefIdx); 1686 backend->putSlabBlock(currBl); 1687 released = true; 1688 } 1689 return released; 1690 } 1691 1692 /* Prepare the block for returning to FreeBlockPool */ 1693 void Block::reset() 1694 { 1695 // it is caller's responsibility to ensure no data is lost before calling this 1696 MALLOC_ASSERT( allocatedCount==0, ASSERT_TEXT ); 1697 MALLOC_ASSERT( !isSolidPtr(publicFreeList.load(std::memory_order_relaxed)), ASSERT_TEXT ); 1698 if (!isStartupAllocObject()) 1699 STAT_increment(getThreadId(), getIndex(objectSize), freeBlockBack); 1700 1701 cleanBlockHeader(); 1702 1703 nextPrivatizable = NULL; 1704 1705 objectSize = 0; 1706 // for an empty block, bump pointer should point right after the end of the block 1707 bumpPtr = (FreeObject *)((uintptr_t)this + slabSize); 1708 } 1709 1710 inline void Bin::setActiveBlock (Block *block) 1711 { 1712 // MALLOC_ASSERT( bin, ASSERT_TEXT ); 1713 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT ); 1714 // it is the caller responsibility to keep bin consistence (i.e. ensure this block is in the bin list) 1715 activeBlk = block; 1716 } 1717 1718 inline Block* Bin::setPreviousBlockActive() 1719 { 1720 MALLOC_ASSERT( activeBlk, ASSERT_TEXT ); 1721 Block* temp = activeBlk->previous; 1722 if( temp ) { 1723 MALLOC_ASSERT( !(temp->isFull), ASSERT_TEXT ); 1724 activeBlk = temp; 1725 } 1726 return temp; 1727 } 1728 1729 inline bool Block::isOwnedByCurrentThread() const { 1730 return tlsPtr && ownerTid.isCurrentThreadId(); 1731 } 1732 1733 FreeObject *Block::findObjectToFree(const void *object) const 1734 { 1735 FreeObject *objectToFree; 1736 // Due to aligned allocations, a pointer passed to scalable_free 1737 // might differ from the address of internally allocated object. 1738 // Small objects however should always be fine. 1739 if (objectSize <= maxSegregatedObjectSize) 1740 objectToFree = (FreeObject*)object; 1741 // "Fitting size" allocations are suspicious if aligned higher than naturally 1742 else { 1743 if ( ! isAligned(object,2*fittingAlignment) ) 1744 // TODO: the above check is questionable - it gives false negatives in ~50% cases, 1745 // so might even be slower in average than unconditional use of findAllocatedObject. 1746 // here it should be a "real" object 1747 objectToFree = (FreeObject*)object; 1748 else 1749 // here object can be an aligned address, so applying additional checks 1750 objectToFree = findAllocatedObject(object); 1751 MALLOC_ASSERT( isAligned(objectToFree,fittingAlignment), ASSERT_TEXT ); 1752 } 1753 MALLOC_ASSERT( isProperlyPlaced(objectToFree), ASSERT_TEXT ); 1754 1755 return objectToFree; 1756 } 1757 1758 void TLSData::release() 1759 { 1760 memPool->extMemPool.allLocalCaches.unregisterThread(this); 1761 externalCleanup(/*cleanOnlyUnused=*/false, /*cleanBins=*/false); 1762 1763 for (unsigned index = 0; index < numBlockBins; index++) { 1764 Block *activeBlk = bin[index].getActiveBlock(); 1765 if (!activeBlk) 1766 continue; 1767 Block *threadlessBlock = activeBlk->previous; 1768 while (threadlessBlock) { 1769 Block *threadBlock = threadlessBlock->previous; 1770 if (threadlessBlock->empty()) { 1771 /* we destroy the thread, so not use its block pool */ 1772 memPool->returnEmptyBlock(threadlessBlock, /*poolTheBlock=*/false); 1773 } else { 1774 memPool->extMemPool.orphanedBlocks.put(intptr_t(bin+index), threadlessBlock); 1775 } 1776 threadlessBlock = threadBlock; 1777 } 1778 threadlessBlock = activeBlk; 1779 while (threadlessBlock) { 1780 Block *threadBlock = threadlessBlock->next; 1781 if (threadlessBlock->empty()) { 1782 /* we destroy the thread, so not use its block pool */ 1783 memPool->returnEmptyBlock(threadlessBlock, /*poolTheBlock=*/false); 1784 } else { 1785 memPool->extMemPool.orphanedBlocks.put(intptr_t(bin+index), threadlessBlock); 1786 } 1787 threadlessBlock = threadBlock; 1788 } 1789 bin[index].resetActiveBlock(); 1790 } 1791 } 1792 1793 1794 #if MALLOC_CHECK_RECURSION 1795 // TODO: Use dedicated heap for this 1796 1797 /* 1798 * It's a special kind of allocation that can be used when malloc is 1799 * not available (either during startup or when malloc was already called and 1800 * we are, say, inside pthread_setspecific's call). 1801 * Block can contain objects of different sizes, 1802 * allocations are performed by moving bump pointer and increasing of object counter, 1803 * releasing is done via counter of objects allocated in the block 1804 * or moving bump pointer if releasing object is on a bound. 1805 * TODO: make bump pointer to grow to the same backward direction as all the others. 1806 */ 1807 1808 class StartupBlock : public Block { 1809 size_t availableSize() const { 1810 return slabSize - ((uintptr_t)bumpPtr - (uintptr_t)this); 1811 } 1812 static StartupBlock *getBlock(); 1813 public: 1814 static FreeObject *allocate(size_t size); 1815 static size_t msize(void *ptr) { return *((size_t*)ptr - 1); } 1816 void free(void *ptr); 1817 }; 1818 1819 static MallocMutex startupMallocLock; 1820 static StartupBlock *firstStartupBlock; 1821 1822 StartupBlock *StartupBlock::getBlock() 1823 { 1824 BackRefIdx backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/false); 1825 if (backRefIdx.isInvalid()) return NULL; 1826 1827 StartupBlock *block = static_cast<StartupBlock*>( 1828 defaultMemPool->extMemPool.backend.getSlabBlock(1)); 1829 if (!block) return NULL; 1830 1831 block->cleanBlockHeader(); 1832 setBackRef(backRefIdx, block); 1833 block->backRefIdx = backRefIdx; 1834 // use startupAllocObjSizeMark to mark objects from startup block marker 1835 block->objectSize = startupAllocObjSizeMark; 1836 block->bumpPtr = (FreeObject *)((uintptr_t)block + sizeof(StartupBlock)); 1837 return block; 1838 } 1839 1840 FreeObject *StartupBlock::allocate(size_t size) 1841 { 1842 FreeObject *result; 1843 StartupBlock *newBlock = NULL; 1844 bool newBlockUnused = false; 1845 1846 /* Objects must be aligned on their natural bounds, 1847 and objects bigger than word on word's bound. */ 1848 size = alignUp(size, sizeof(size_t)); 1849 // We need size of an object to implement msize. 1850 size_t reqSize = size + sizeof(size_t); 1851 // speculatively allocates newBlock to try avoid allocation while holding lock 1852 /* TODO: The function is called when malloc nested call is detected, 1853 so simultaneous usage from different threads seems unlikely. 1854 If pre-allocation is found useless, the code might be simplified. */ 1855 if (!firstStartupBlock || firstStartupBlock->availableSize() < reqSize) { 1856 newBlock = StartupBlock::getBlock(); 1857 if (!newBlock) return NULL; 1858 } 1859 { 1860 MallocMutex::scoped_lock scoped_cs(startupMallocLock); 1861 // Re-check whether we need a new block (conditions might have changed) 1862 if (!firstStartupBlock || firstStartupBlock->availableSize() < reqSize) { 1863 if (!newBlock) { 1864 newBlock = StartupBlock::getBlock(); 1865 if (!newBlock) return NULL; 1866 } 1867 newBlock->next = (Block*)firstStartupBlock; 1868 if (firstStartupBlock) 1869 firstStartupBlock->previous = (Block*)newBlock; 1870 firstStartupBlock = newBlock; 1871 } else 1872 newBlockUnused = true; 1873 result = firstStartupBlock->bumpPtr; 1874 firstStartupBlock->allocatedCount++; 1875 firstStartupBlock->bumpPtr = 1876 (FreeObject *)((uintptr_t)firstStartupBlock->bumpPtr + reqSize); 1877 } 1878 if (newBlock && newBlockUnused) 1879 defaultMemPool->returnEmptyBlock(newBlock, /*poolTheBlock=*/false); 1880 1881 // keep object size at the negative offset 1882 *((size_t*)result) = size; 1883 return (FreeObject*)((size_t*)result+1); 1884 } 1885 1886 void StartupBlock::free(void *ptr) 1887 { 1888 Block* blockToRelease = NULL; 1889 { 1890 MallocMutex::scoped_lock scoped_cs(startupMallocLock); 1891 1892 MALLOC_ASSERT(firstStartupBlock, ASSERT_TEXT); 1893 MALLOC_ASSERT(startupAllocObjSizeMark==objectSize 1894 && allocatedCount>0, ASSERT_TEXT); 1895 MALLOC_ASSERT((uintptr_t)ptr>=(uintptr_t)this+sizeof(StartupBlock) 1896 && (uintptr_t)ptr+StartupBlock::msize(ptr)<=(uintptr_t)this+slabSize, 1897 ASSERT_TEXT); 1898 if (0 == --allocatedCount) { 1899 if (this == firstStartupBlock) 1900 firstStartupBlock = (StartupBlock*)firstStartupBlock->next; 1901 if (previous) 1902 previous->next = next; 1903 if (next) 1904 next->previous = previous; 1905 blockToRelease = this; 1906 } else if ((uintptr_t)ptr + StartupBlock::msize(ptr) == (uintptr_t)bumpPtr) { 1907 // last object in the block released 1908 FreeObject *newBump = (FreeObject*)((size_t*)ptr - 1); 1909 MALLOC_ASSERT((uintptr_t)newBump>(uintptr_t)this+sizeof(StartupBlock), 1910 ASSERT_TEXT); 1911 bumpPtr = newBump; 1912 } 1913 } 1914 if (blockToRelease) { 1915 blockToRelease->previous = blockToRelease->next = NULL; 1916 defaultMemPool->returnEmptyBlock(blockToRelease, /*poolTheBlock=*/false); 1917 } 1918 } 1919 1920 #endif /* MALLOC_CHECK_RECURSION */ 1921 1922 /********* End thread related code *************/ 1923 1924 /********* Library initialization *************/ 1925 1926 //! Value indicating the state of initialization. 1927 /* 0 = initialization not started. 1928 * 1 = initialization started but not finished. 1929 * 2 = initialization finished. 1930 * In theory, we only need values 0 and 2. But value 1 is nonetheless 1931 * useful for detecting errors in the double-check pattern. 1932 */ 1933 static std::atomic<intptr_t> mallocInitialized{0}; // implicitly initialized to 0 1934 static MallocMutex initMutex; 1935 1936 /** The leading "\0" is here so that applying "strings" to the binary 1937 delivers a clean result. */ 1938 static char VersionString[] = "\0" TBBMALLOC_VERSION_STRINGS; 1939 1940 #if USE_PTHREAD && (__TBB_SOURCE_DIRECTLY_INCLUDED || __TBB_USE_DLOPEN_REENTRANCY_WORKAROUND) 1941 1942 /* Decrease race interval between dynamic library unloading and pthread key 1943 destructor. Protect only Pthreads with supported unloading. */ 1944 class ShutdownSync { 1945 /* flag is the number of threads in pthread key dtor body 1946 (i.e., between threadDtorStart() and threadDtorDone()) 1947 or the signal to skip dtor, if flag < 0 */ 1948 std::atomic<intptr_t> flag; 1949 static const intptr_t skipDtor = INTPTR_MIN/2; 1950 public: 1951 void init() { flag.store(0, std::memory_order_release); } 1952 /* Suppose that 2*abs(skipDtor) or more threads never call threadDtorStart() 1953 simultaneously, so flag never becomes negative because of that. */ 1954 bool threadDtorStart() { 1955 if (flag.load(std::memory_order_acquire) < 0) 1956 return false; 1957 if (++flag <= 0) { // note that new value returned 1958 flag.fetch_sub(1); // flag is spoiled by us, restore it 1959 return false; 1960 } 1961 return true; 1962 } 1963 void threadDtorDone() { 1964 flag.fetch_sub(1); 1965 } 1966 void processExit() { 1967 if (flag.fetch_add(skipDtor) != 0) { 1968 SpinWaitUntilEq(flag, skipDtor); 1969 } 1970 } 1971 }; 1972 1973 #else 1974 1975 class ShutdownSync { 1976 public: 1977 void init() { } 1978 bool threadDtorStart() { return true; } 1979 void threadDtorDone() { } 1980 void processExit() { } 1981 }; 1982 1983 #endif // USE_PTHREAD && (__TBB_SOURCE_DIRECTLY_INCLUDED || __TBB_USE_DLOPEN_REENTRANCY_WORKAROUND) 1984 1985 static ShutdownSync shutdownSync; 1986 1987 inline bool isMallocInitialized() { 1988 // Load must have acquire fence; otherwise thread taking "initialized" path 1989 // might perform textually later loads *before* mallocInitialized becomes 2. 1990 return 2 == mallocInitialized.load(std::memory_order_acquire); 1991 } 1992 1993 bool isMallocInitializedExt() { 1994 return isMallocInitialized(); 1995 } 1996 1997 /* Caller is responsible for ensuring this routine is called exactly once. */ 1998 extern "C" void MallocInitializeITT() { 1999 #if __TBB_USE_ITT_NOTIFY 2000 if (!usedBySrcIncluded) 2001 tbb::detail::r1::__TBB_load_ittnotify(); 2002 #endif 2003 } 2004 2005 void MemoryPool::initDefaultPool() { 2006 hugePages.init(); 2007 } 2008 2009 /* 2010 * Allocator initialization routine; 2011 * it is called lazily on the very first scalable_malloc call. 2012 */ 2013 static bool initMemoryManager() 2014 { 2015 TRACEF(( "[ScalableMalloc trace] sizeof(Block) is %d (expected 128); sizeof(uintptr_t) is %d\n", 2016 sizeof(Block), sizeof(uintptr_t) )); 2017 MALLOC_ASSERT( 2*blockHeaderAlignment == sizeof(Block), ASSERT_TEXT ); 2018 MALLOC_ASSERT( sizeof(FreeObject) == sizeof(void*), ASSERT_TEXT ); 2019 MALLOC_ASSERT( isAligned(defaultMemPool, sizeof(intptr_t)), 2020 "Memory pool must be void*-aligned for atomic to work over aligned arguments."); 2021 2022 #if USE_WINTHREAD 2023 const size_t granularity = 64*1024; // granulatity of VirtualAlloc 2024 #else 2025 // POSIX.1-2001-compliant way to get page size 2026 const size_t granularity = sysconf(_SC_PAGESIZE); 2027 #endif 2028 if (!defaultMemPool) { 2029 // Do not rely on static constructors and do the assignment in case 2030 // of library static section not initialized at this call yet. 2031 defaultMemPool = (MemoryPool*)defaultMemPool_space; 2032 } 2033 bool initOk = defaultMemPool-> 2034 extMemPool.init(0, NULL, NULL, granularity, 2035 /*keepAllMemory=*/false, /*fixedPool=*/false); 2036 // TODO: extMemPool.init() to not allocate memory 2037 if (!initOk || !initBackRefMaster(&defaultMemPool->extMemPool.backend) || !ThreadId::init()) 2038 return false; 2039 MemoryPool::initDefaultPool(); 2040 // init() is required iff initMemoryManager() is called 2041 // after mallocProcessShutdownNotification() 2042 shutdownSync.init(); 2043 #if COLLECT_STATISTICS 2044 initStatisticsCollection(); 2045 #endif 2046 return true; 2047 } 2048 2049 static bool GetBoolEnvironmentVariable(const char* name) { 2050 return tbb::detail::r1::GetBoolEnvironmentVariable(name); 2051 } 2052 2053 //! Ensures that initMemoryManager() is called once and only once. 2054 /** Does not return until initMemoryManager() has been completed by a thread. 2055 There is no need to call this routine if mallocInitialized==2 . */ 2056 static bool doInitialization() 2057 { 2058 MallocMutex::scoped_lock lock( initMutex ); 2059 if (mallocInitialized.load(std::memory_order_relaxed)!=2) { 2060 MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==0, ASSERT_TEXT ); 2061 mallocInitialized.store(1, std::memory_order_relaxed); 2062 RecursiveMallocCallProtector scoped; 2063 if (!initMemoryManager()) { 2064 mallocInitialized.store(0, std::memory_order_relaxed); // restore and out 2065 return false; 2066 } 2067 #ifdef MALLOC_EXTRA_INITIALIZATION 2068 MALLOC_EXTRA_INITIALIZATION; 2069 #endif 2070 #if MALLOC_CHECK_RECURSION 2071 RecursiveMallocCallProtector::detectNaiveOverload(); 2072 #endif 2073 MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==1, ASSERT_TEXT ); 2074 // Store must have release fence, otherwise mallocInitialized==2 2075 // might become remotely visible before side effects of 2076 // initMemoryManager() become remotely visible. 2077 mallocInitialized.store(2, std::memory_order_release); 2078 if( GetBoolEnvironmentVariable("TBB_VERSION") ) { 2079 fputs(VersionString+1,stderr); 2080 hugePages.printStatus(); 2081 } 2082 } 2083 /* It can't be 0 or I would have initialized it */ 2084 MALLOC_ASSERT( mallocInitialized.load(std::memory_order_relaxed)==2, ASSERT_TEXT ); 2085 return true; 2086 } 2087 2088 /********* End library initialization *************/ 2089 2090 /********* The malloc show begins *************/ 2091 2092 2093 FreeObject *Block::allocateFromFreeList() 2094 { 2095 FreeObject *result; 2096 2097 if (!freeList) return NULL; 2098 2099 result = freeList; 2100 MALLOC_ASSERT( result, ASSERT_TEXT ); 2101 2102 freeList = result->next; 2103 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 2104 allocatedCount++; 2105 STAT_increment(getThreadId(), getIndex(objectSize), allocFreeListUsed); 2106 2107 return result; 2108 } 2109 2110 FreeObject *Block::allocateFromBumpPtr() 2111 { 2112 FreeObject *result = bumpPtr; 2113 if (result) { 2114 bumpPtr = (FreeObject *) ((uintptr_t) bumpPtr - objectSize); 2115 if ( (uintptr_t)bumpPtr < (uintptr_t)this+sizeof(Block) ) { 2116 bumpPtr = NULL; 2117 } 2118 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT ); 2119 allocatedCount++; 2120 STAT_increment(getThreadId(), getIndex(objectSize), allocBumpPtrUsed); 2121 } 2122 return result; 2123 } 2124 2125 inline FreeObject* Block::allocate() 2126 { 2127 MALLOC_ASSERT( isOwnedByCurrentThread(), ASSERT_TEXT ); 2128 2129 /* for better cache locality, first looking in the free list. */ 2130 if ( FreeObject *result = allocateFromFreeList() ) { 2131 return result; 2132 } 2133 MALLOC_ASSERT( !freeList, ASSERT_TEXT ); 2134 2135 /* if free list is empty, try thread local bump pointer allocation. */ 2136 if ( FreeObject *result = allocateFromBumpPtr() ) { 2137 return result; 2138 } 2139 MALLOC_ASSERT( !bumpPtr, ASSERT_TEXT ); 2140 2141 /* the block is considered full. */ 2142 isFull = true; 2143 return NULL; 2144 } 2145 2146 size_t Block::findObjectSize(void *object) const 2147 { 2148 size_t blSize = getSize(); 2149 #if MALLOC_CHECK_RECURSION 2150 // Currently, there is no aligned allocations from startup blocks, 2151 // so we can return just StartupBlock::msize(). 2152 // TODO: This must be extended if we add aligned allocation from startup blocks. 2153 if (!blSize) 2154 return StartupBlock::msize(object); 2155 #endif 2156 // object can be aligned, so real size can be less than block's 2157 size_t size = 2158 blSize - ((uintptr_t)object - (uintptr_t)findObjectToFree(object)); 2159 MALLOC_ASSERT(size>0 && size<minLargeObjectSize, ASSERT_TEXT); 2160 return size; 2161 } 2162 2163 void Bin::moveBlockToFront(Block *block) 2164 { 2165 /* move the block to the front of the bin */ 2166 if (block == activeBlk) return; 2167 outofTLSBin(block); 2168 pushTLSBin(block); 2169 } 2170 2171 void Bin::processEmptyBlock(Block *block, bool poolTheBlock) 2172 { 2173 if (block != activeBlk) { 2174 /* We are not using this block; return it to the pool */ 2175 outofTLSBin(block); 2176 block->getMemPool()->returnEmptyBlock(block, poolTheBlock); 2177 } else { 2178 /* all objects are free - let's restore the bump pointer */ 2179 block->restoreBumpPtr(); 2180 } 2181 } 2182 2183 template<int LOW_MARK, int HIGH_MARK> 2184 bool LocalLOCImpl<LOW_MARK, HIGH_MARK>::put(LargeMemoryBlock *object, ExtMemoryPool *extMemPool) 2185 { 2186 const size_t size = object->unalignedSize; 2187 // not spoil cache with too large object, that can cause its total cleanup 2188 if (size > MAX_TOTAL_SIZE) 2189 return false; 2190 LargeMemoryBlock *localHead = head.exchange(NULL); 2191 2192 object->prev = NULL; 2193 object->next = localHead; 2194 if (localHead) 2195 localHead->prev = object; 2196 else { 2197 // those might not be cleaned during local cache stealing, correct them 2198 totalSize = 0; 2199 numOfBlocks = 0; 2200 tail = object; 2201 } 2202 localHead = object; 2203 totalSize += size; 2204 numOfBlocks++; 2205 // must meet both size and number of cached objects constrains 2206 if (totalSize > MAX_TOTAL_SIZE || numOfBlocks >= HIGH_MARK) { 2207 // scanning from tail until meet conditions 2208 while (totalSize > MAX_TOTAL_SIZE || numOfBlocks > LOW_MARK) { 2209 totalSize -= tail->unalignedSize; 2210 numOfBlocks--; 2211 tail = tail->prev; 2212 } 2213 LargeMemoryBlock *headToRelease = tail->next; 2214 tail->next = NULL; 2215 2216 extMemPool->freeLargeObjectList(headToRelease); 2217 } 2218 2219 head.store(localHead, std::memory_order_release); 2220 return true; 2221 } 2222 2223 template<int LOW_MARK, int HIGH_MARK> 2224 LargeMemoryBlock *LocalLOCImpl<LOW_MARK, HIGH_MARK>::get(size_t size) 2225 { 2226 LargeMemoryBlock *localHead, *res = NULL; 2227 2228 if (size > MAX_TOTAL_SIZE) 2229 return NULL; 2230 2231 // TBB_REVAMP_TODO: review this line 2232 if (!head.load(std::memory_order_acquire) || (localHead = head.exchange(NULL)) == NULL) { 2233 // do not restore totalSize, numOfBlocks and tail at this point, 2234 // as they are used only in put(), where they must be restored 2235 return NULL; 2236 } 2237 2238 for (LargeMemoryBlock *curr = localHead; curr; curr=curr->next) { 2239 if (curr->unalignedSize == size) { 2240 res = curr; 2241 if (curr->next) 2242 curr->next->prev = curr->prev; 2243 else 2244 tail = curr->prev; 2245 if (curr != localHead) 2246 curr->prev->next = curr->next; 2247 else 2248 localHead = curr->next; 2249 totalSize -= size; 2250 numOfBlocks--; 2251 break; 2252 } 2253 } 2254 2255 head.store(localHead, std::memory_order_release); 2256 return res; 2257 } 2258 2259 template<int LOW_MARK, int HIGH_MARK> 2260 bool LocalLOCImpl<LOW_MARK, HIGH_MARK>::externalCleanup(ExtMemoryPool *extMemPool) 2261 { 2262 if (LargeMemoryBlock *localHead = head.exchange(NULL)) { 2263 extMemPool->freeLargeObjectList(localHead); 2264 return true; 2265 } 2266 return false; 2267 } 2268 2269 void *MemoryPool::getFromLLOCache(TLSData* tls, size_t size, size_t alignment) 2270 { 2271 LargeMemoryBlock *lmb = NULL; 2272 2273 size_t headersSize = sizeof(LargeMemoryBlock)+sizeof(LargeObjectHdr); 2274 size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+alignment); 2275 if (allocationSize < size) // allocationSize is wrapped around after alignToBin 2276 return NULL; 2277 MALLOC_ASSERT(allocationSize >= alignment, "Overflow must be checked before."); 2278 2279 if (tls) { 2280 tls->markUsed(); 2281 lmb = tls->lloc.get(allocationSize); 2282 } 2283 if (!lmb) 2284 lmb = extMemPool.mallocLargeObject(this, allocationSize); 2285 2286 if (lmb) { 2287 // doing shuffle we suppose that alignment offset guarantees 2288 // that different cache lines are in use 2289 MALLOC_ASSERT(alignment >= estimatedCacheLineSize, ASSERT_TEXT); 2290 2291 void *alignedArea = (void*)alignUp((uintptr_t)lmb+headersSize, alignment); 2292 uintptr_t alignedRight = 2293 alignDown((uintptr_t)lmb+lmb->unalignedSize - size, alignment); 2294 // Has some room to shuffle object between cache lines? 2295 // Note that alignedRight and alignedArea are aligned at alignment. 2296 unsigned ptrDelta = alignedRight - (uintptr_t)alignedArea; 2297 if (ptrDelta && tls) { // !tls is cold path 2298 // for the hot path of alignment==estimatedCacheLineSize, 2299 // allow compilers to use shift for division 2300 // (since estimatedCacheLineSize is a power-of-2 constant) 2301 unsigned numOfPossibleOffsets = alignment == estimatedCacheLineSize? 2302 ptrDelta / estimatedCacheLineSize : 2303 ptrDelta / alignment; 2304 unsigned myCacheIdx = ++tls->currCacheIdx; 2305 unsigned offset = myCacheIdx % numOfPossibleOffsets; 2306 2307 // Move object to a cache line with an offset that is different from 2308 // previous allocation. This supposedly allows us to use cache 2309 // associativity more efficiently. 2310 alignedArea = (void*)((uintptr_t)alignedArea + offset*alignment); 2311 } 2312 MALLOC_ASSERT((uintptr_t)lmb+lmb->unalignedSize >= 2313 (uintptr_t)alignedArea+size, "Object doesn't fit the block."); 2314 LargeObjectHdr *header = (LargeObjectHdr*)alignedArea-1; 2315 header->memoryBlock = lmb; 2316 header->backRefIdx = lmb->backRefIdx; 2317 setBackRef(header->backRefIdx, header); 2318 2319 lmb->objectSize = size; 2320 2321 MALLOC_ASSERT( isLargeObject<unknownMem>(alignedArea), ASSERT_TEXT ); 2322 MALLOC_ASSERT( isAligned(alignedArea, alignment), ASSERT_TEXT ); 2323 2324 return alignedArea; 2325 } 2326 return NULL; 2327 } 2328 2329 void MemoryPool::putToLLOCache(TLSData *tls, void *object) 2330 { 2331 LargeObjectHdr *header = (LargeObjectHdr*)object - 1; 2332 // overwrite backRefIdx to simplify double free detection 2333 header->backRefIdx = BackRefIdx(); 2334 2335 if (tls) { 2336 tls->markUsed(); 2337 if (tls->lloc.put(header->memoryBlock, &extMemPool)) 2338 return; 2339 } 2340 extMemPool.freeLargeObject(header->memoryBlock); 2341 } 2342 2343 /* 2344 * All aligned allocations fall into one of the following categories: 2345 * 1. if both request size and alignment are <= maxSegregatedObjectSize, 2346 * we just align the size up, and request this amount, because for every size 2347 * aligned to some power of 2, the allocated object is at least that aligned. 2348 * 2. for size<minLargeObjectSize, check if already guaranteed fittingAlignment is enough. 2349 * 3. if size+alignment<minLargeObjectSize, we take an object of fittingSizeN and align 2350 * its address up; given such pointer, scalable_free could find the real object. 2351 * Wrapping of size+alignment is impossible because maximal allowed 2352 * alignment plus minLargeObjectSize can't lead to wrapping. 2353 * 4. otherwise, aligned large object is allocated. 2354 */ 2355 static void *allocateAligned(MemoryPool *memPool, size_t size, size_t alignment) 2356 { 2357 MALLOC_ASSERT( isPowerOfTwo(alignment), ASSERT_TEXT ); 2358 2359 if (!isMallocInitialized()) 2360 if (!doInitialization()) 2361 return NULL; 2362 2363 void *result; 2364 if (size<=maxSegregatedObjectSize && alignment<=maxSegregatedObjectSize) 2365 result = internalPoolMalloc(memPool, alignUp(size? size: sizeof(size_t), alignment)); 2366 else if (size<minLargeObjectSize) { 2367 if (alignment<=fittingAlignment) 2368 result = internalPoolMalloc(memPool, size); 2369 else if (size+alignment < minLargeObjectSize) { 2370 void *unaligned = internalPoolMalloc(memPool, size+alignment); 2371 if (!unaligned) return NULL; 2372 result = alignUp(unaligned, alignment); 2373 } else 2374 goto LargeObjAlloc; 2375 } else { 2376 LargeObjAlloc: 2377 TLSData *tls = memPool->getTLS(/*create=*/true); 2378 // take into account only alignment that are higher then natural 2379 result = 2380 memPool->getFromLLOCache(tls, size, largeObjectAlignment>alignment? 2381 largeObjectAlignment: alignment); 2382 } 2383 2384 MALLOC_ASSERT( isAligned(result, alignment), ASSERT_TEXT ); 2385 return result; 2386 } 2387 2388 static void *reallocAligned(MemoryPool *memPool, void *ptr, 2389 size_t newSize, size_t alignment = 0) 2390 { 2391 void *result; 2392 size_t copySize; 2393 2394 if (isLargeObject<ourMem>(ptr)) { 2395 LargeMemoryBlock* lmb = ((LargeObjectHdr *)ptr - 1)->memoryBlock; 2396 copySize = lmb->unalignedSize-((uintptr_t)ptr-(uintptr_t)lmb); 2397 2398 // Apply different strategies if size decreases 2399 if (newSize <= copySize && (0 == alignment || isAligned(ptr, alignment))) { 2400 2401 // For huge objects (that do not fit in backend cache), keep the same space unless 2402 // the new size is at least twice smaller 2403 bool isMemoryBlockHuge = copySize > memPool->extMemPool.backend.getMaxBinnedSize(); 2404 size_t threshold = isMemoryBlockHuge ? copySize / 2 : 0; 2405 if (newSize > threshold) { 2406 lmb->objectSize = newSize; 2407 return ptr; 2408 } 2409 // TODO: For large objects suitable for the backend cache, 2410 // split out the excessive part and put it to the backend. 2411 } 2412 // Reallocate for real 2413 copySize = lmb->objectSize; 2414 #if BACKEND_HAS_MREMAP 2415 if (void *r = memPool->extMemPool.remap(ptr, copySize, newSize, 2416 alignment < largeObjectAlignment ? largeObjectAlignment : alignment)) 2417 return r; 2418 #endif 2419 result = alignment ? allocateAligned(memPool, newSize, alignment) : 2420 internalPoolMalloc(memPool, newSize); 2421 2422 } else { 2423 Block* block = (Block *)alignDown(ptr, slabSize); 2424 copySize = block->findObjectSize(ptr); 2425 2426 // TODO: Move object to another bin if size decreases and the current bin is "empty enough". 2427 // Currently, in case of size decreasing, old pointer is returned 2428 if (newSize <= copySize && (0==alignment || isAligned(ptr, alignment))) { 2429 return ptr; 2430 } else { 2431 result = alignment ? allocateAligned(memPool, newSize, alignment) : 2432 internalPoolMalloc(memPool, newSize); 2433 } 2434 } 2435 if (result) { 2436 memcpy(result, ptr, copySize < newSize ? copySize : newSize); 2437 internalPoolFree(memPool, ptr, 0); 2438 } 2439 return result; 2440 } 2441 2442 /* A predicate checks if an object is properly placed inside its block */ 2443 inline bool Block::isProperlyPlaced(const void *object) const 2444 { 2445 return 0 == ((uintptr_t)this + slabSize - (uintptr_t)object) % objectSize; 2446 } 2447 2448 /* Finds the real object inside the block */ 2449 FreeObject *Block::findAllocatedObject(const void *address) const 2450 { 2451 // calculate offset from the end of the block space 2452 uint16_t offset = (uintptr_t)this + slabSize - (uintptr_t)address; 2453 MALLOC_ASSERT( offset<=slabSize-sizeof(Block), ASSERT_TEXT ); 2454 // find offset difference from a multiple of allocation size 2455 offset %= objectSize; 2456 // and move the address down to where the real object starts. 2457 return (FreeObject*)((uintptr_t)address - (offset? objectSize-offset: 0)); 2458 } 2459 2460 /* 2461 * Bad dereference caused by a foreign pointer is possible only here, not earlier in call chain. 2462 * Separate function isolates SEH code, as it has bad influence on compiler optimization. 2463 */ 2464 static inline BackRefIdx safer_dereference (const BackRefIdx *ptr) 2465 { 2466 BackRefIdx id; 2467 #if _MSC_VER 2468 __try { 2469 #endif 2470 id = *ptr; 2471 #if _MSC_VER 2472 } __except( GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION? 2473 EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH ) { 2474 id = BackRefIdx(); 2475 } 2476 #endif 2477 return id; 2478 } 2479 2480 template<MemoryOrigin memOrigin> 2481 bool isLargeObject(void *object) 2482 { 2483 if (!isAligned(object, largeObjectAlignment)) 2484 return false; 2485 LargeObjectHdr *header = (LargeObjectHdr*)object - 1; 2486 BackRefIdx idx = (memOrigin == unknownMem) ? 2487 safer_dereference(&header->backRefIdx) : header->backRefIdx; 2488 2489 return idx.isLargeObject() 2490 // in valid LargeObjectHdr memoryBlock is not NULL 2491 && header->memoryBlock 2492 // in valid LargeObjectHdr memoryBlock points somewhere before header 2493 // TODO: more strict check 2494 && (uintptr_t)header->memoryBlock < (uintptr_t)header 2495 && getBackRef(idx) == header; 2496 } 2497 2498 static inline bool isSmallObject (void *ptr) 2499 { 2500 Block* expectedBlock = (Block*)alignDown(ptr, slabSize); 2501 const BackRefIdx* idx = expectedBlock->getBackRefIdx(); 2502 2503 bool isSmall = expectedBlock == getBackRef(safer_dereference(idx)); 2504 if (isSmall) 2505 expectedBlock->checkFreePrecond(ptr); 2506 return isSmall; 2507 } 2508 2509 /**** Check if an object was allocated by scalable_malloc ****/ 2510 static inline bool isRecognized (void* ptr) 2511 { 2512 return defaultMemPool->extMemPool.backend.ptrCanBeValid(ptr) && 2513 (isLargeObject<unknownMem>(ptr) || isSmallObject(ptr)); 2514 } 2515 2516 static inline void freeSmallObject(void *object) 2517 { 2518 /* mask low bits to get the block */ 2519 Block *block = (Block *)alignDown(object, slabSize); 2520 block->checkFreePrecond(object); 2521 2522 #if MALLOC_CHECK_RECURSION 2523 if (block->isStartupAllocObject()) { 2524 ((StartupBlock *)block)->free(object); 2525 return; 2526 } 2527 #endif 2528 if (block->isOwnedByCurrentThread()) { 2529 block->freeOwnObject(object); 2530 } else { /* Slower path to add to the shared list, the allocatedCount is updated by the owner thread in malloc. */ 2531 FreeObject *objectToFree = block->findObjectToFree(object); 2532 block->freePublicObject(objectToFree); 2533 } 2534 } 2535 2536 static void *internalPoolMalloc(MemoryPool* memPool, size_t size) 2537 { 2538 Bin* bin; 2539 Block * mallocBlock; 2540 2541 if (!memPool) return NULL; 2542 2543 if (!size) size = sizeof(size_t); 2544 2545 TLSData *tls = memPool->getTLS(/*create=*/true); 2546 2547 /* Allocate a large object */ 2548 if (size >= minLargeObjectSize) 2549 return memPool->getFromLLOCache(tls, size, largeObjectAlignment); 2550 2551 if (!tls) return NULL; 2552 2553 tls->markUsed(); 2554 /* 2555 * Get an element in thread-local array corresponding to the given size; 2556 * It keeps ptr to the active block for allocations of this size 2557 */ 2558 bin = tls->getAllocationBin(size); 2559 if ( !bin ) return NULL; 2560 2561 /* Get a block to try to allocate in. */ 2562 for( mallocBlock = bin->getActiveBlock(); mallocBlock; 2563 mallocBlock = bin->setPreviousBlockActive() ) // the previous block should be empty enough 2564 { 2565 if( FreeObject *result = mallocBlock->allocate() ) 2566 return result; 2567 } 2568 2569 /* 2570 * else privatize publicly freed objects in some block and allocate from it 2571 */ 2572 mallocBlock = bin->getPrivatizedFreeListBlock(); 2573 if (mallocBlock) { 2574 MALLOC_ASSERT( mallocBlock->freeListNonNull(), ASSERT_TEXT ); 2575 if ( FreeObject *result = mallocBlock->allocateFromFreeList() ) 2576 return result; 2577 /* Else something strange happened, need to retry from the beginning; */ 2578 TRACEF(( "[ScalableMalloc trace] Something is wrong: no objects in public free list; reentering.\n" )); 2579 return internalPoolMalloc(memPool, size); 2580 } 2581 2582 /* 2583 * no suitable own blocks, try to get a partial block that some other thread has discarded. 2584 */ 2585 mallocBlock = memPool->extMemPool.orphanedBlocks.get(tls, size); 2586 while (mallocBlock) { 2587 bin->pushTLSBin(mallocBlock); 2588 bin->setActiveBlock(mallocBlock); // TODO: move under the below condition? 2589 if( FreeObject *result = mallocBlock->allocate() ) 2590 return result; 2591 mallocBlock = memPool->extMemPool.orphanedBlocks.get(tls, size); 2592 } 2593 2594 /* 2595 * else try to get a new empty block 2596 */ 2597 mallocBlock = memPool->getEmptyBlock(size); 2598 if (mallocBlock) { 2599 bin->pushTLSBin(mallocBlock); 2600 bin->setActiveBlock(mallocBlock); 2601 if( FreeObject *result = mallocBlock->allocate() ) 2602 return result; 2603 /* Else something strange happened, need to retry from the beginning; */ 2604 TRACEF(( "[ScalableMalloc trace] Something is wrong: no objects in empty block; reentering.\n" )); 2605 return internalPoolMalloc(memPool, size); 2606 } 2607 /* 2608 * else nothing works so return NULL 2609 */ 2610 TRACEF(( "[ScalableMalloc trace] No memory found, returning NULL.\n" )); 2611 return NULL; 2612 } 2613 2614 // When size==0 (i.e. unknown), detect here whether the object is large. 2615 // For size is known and < minLargeObjectSize, we still need to check 2616 // if the actual object is large, because large objects might be used 2617 // for aligned small allocations. 2618 static bool internalPoolFree(MemoryPool *memPool, void *object, size_t size) 2619 { 2620 if (!memPool || !object) return false; 2621 2622 // The library is initialized at allocation call, so releasing while 2623 // not initialized means foreign object is releasing. 2624 MALLOC_ASSERT(isMallocInitialized(), ASSERT_TEXT); 2625 MALLOC_ASSERT(memPool->extMemPool.userPool() || isRecognized(object), 2626 "Invalid pointer during object releasing is detected."); 2627 2628 if (size >= minLargeObjectSize || isLargeObject<ourMem>(object)) 2629 memPool->putToLLOCache(memPool->getTLS(/*create=*/false), object); 2630 else 2631 freeSmallObject(object); 2632 return true; 2633 } 2634 2635 static void *internalMalloc(size_t size) 2636 { 2637 if (!size) size = sizeof(size_t); 2638 2639 #if MALLOC_CHECK_RECURSION 2640 if (RecursiveMallocCallProtector::sameThreadActive()) 2641 return size<minLargeObjectSize? StartupBlock::allocate(size) : 2642 // nested allocation, so skip tls 2643 (FreeObject*)defaultMemPool->getFromLLOCache(NULL, size, slabSize); 2644 #endif 2645 2646 if (!isMallocInitialized()) 2647 if (!doInitialization()) 2648 return NULL; 2649 return internalPoolMalloc(defaultMemPool, size); 2650 } 2651 2652 static void internalFree(void *object) 2653 { 2654 internalPoolFree(defaultMemPool, object, 0); 2655 } 2656 2657 static size_t internalMsize(void* ptr) 2658 { 2659 MALLOC_ASSERT(ptr, "Invalid pointer passed to internalMsize"); 2660 if (isLargeObject<ourMem>(ptr)) { 2661 // TODO: return the maximum memory size, that can be written to this object 2662 LargeMemoryBlock* lmb = ((LargeObjectHdr*)ptr - 1)->memoryBlock; 2663 return lmb->objectSize; 2664 } else { 2665 Block *block = (Block*)alignDown(ptr, slabSize); 2666 return block->findObjectSize(ptr); 2667 } 2668 } 2669 2670 } // namespace internal 2671 2672 using namespace rml::internal; 2673 2674 // legacy entry point saved for compatibility with binaries complied 2675 // with pre-6003 versions of TBB 2676 rml::MemoryPool *pool_create(intptr_t pool_id, const MemPoolPolicy *policy) 2677 { 2678 rml::MemoryPool *pool; 2679 MemPoolPolicy pol(policy->pAlloc, policy->pFree, policy->granularity); 2680 2681 pool_create_v1(pool_id, &pol, &pool); 2682 return pool; 2683 } 2684 2685 rml::MemPoolError pool_create_v1(intptr_t pool_id, const MemPoolPolicy *policy, 2686 rml::MemoryPool **pool) 2687 { 2688 if ( !policy->pAlloc || policy->version<MemPoolPolicy::TBBMALLOC_POOL_VERSION 2689 // empty pFree allowed only for fixed pools 2690 || !(policy->fixedPool || policy->pFree)) { 2691 *pool = NULL; 2692 return INVALID_POLICY; 2693 } 2694 if ( policy->version>MemPoolPolicy::TBBMALLOC_POOL_VERSION // future versions are not supported 2695 // new flags can be added in place of reserved, but default 2696 // behaviour must be supported by this version 2697 || policy->reserved ) { 2698 *pool = NULL; 2699 return UNSUPPORTED_POLICY; 2700 } 2701 if (!isMallocInitialized()) 2702 if (!doInitialization()) { 2703 *pool = NULL; 2704 return NO_MEMORY; 2705 } 2706 rml::internal::MemoryPool *memPool = 2707 (rml::internal::MemoryPool*)internalMalloc((sizeof(rml::internal::MemoryPool))); 2708 if (!memPool) { 2709 *pool = NULL; 2710 return NO_MEMORY; 2711 } 2712 memset(memPool, 0, sizeof(rml::internal::MemoryPool)); 2713 if (!memPool->init(pool_id, policy)) { 2714 internalFree(memPool); 2715 *pool = NULL; 2716 return NO_MEMORY; 2717 } 2718 2719 *pool = (rml::MemoryPool*)memPool; 2720 return POOL_OK; 2721 } 2722 2723 bool pool_destroy(rml::MemoryPool* memPool) 2724 { 2725 if (!memPool) return false; 2726 bool ret = ((rml::internal::MemoryPool*)memPool)->destroy(); 2727 internalFree(memPool); 2728 2729 return ret; 2730 } 2731 2732 bool pool_reset(rml::MemoryPool* memPool) 2733 { 2734 if (!memPool) return false; 2735 2736 return ((rml::internal::MemoryPool*)memPool)->reset(); 2737 } 2738 2739 void *pool_malloc(rml::MemoryPool* mPool, size_t size) 2740 { 2741 return internalPoolMalloc((rml::internal::MemoryPool*)mPool, size); 2742 } 2743 2744 void *pool_realloc(rml::MemoryPool* mPool, void *object, size_t size) 2745 { 2746 if (!object) 2747 return internalPoolMalloc((rml::internal::MemoryPool*)mPool, size); 2748 if (!size) { 2749 internalPoolFree((rml::internal::MemoryPool*)mPool, object, 0); 2750 return NULL; 2751 } 2752 return reallocAligned((rml::internal::MemoryPool*)mPool, object, size, 0); 2753 } 2754 2755 void *pool_aligned_malloc(rml::MemoryPool* mPool, size_t size, size_t alignment) 2756 { 2757 if (!isPowerOfTwo(alignment) || 0==size) 2758 return NULL; 2759 2760 return allocateAligned((rml::internal::MemoryPool*)mPool, size, alignment); 2761 } 2762 2763 void *pool_aligned_realloc(rml::MemoryPool* memPool, void *ptr, size_t size, size_t alignment) 2764 { 2765 if (!isPowerOfTwo(alignment)) 2766 return NULL; 2767 rml::internal::MemoryPool *mPool = (rml::internal::MemoryPool*)memPool; 2768 void *tmp; 2769 2770 if (!ptr) 2771 tmp = allocateAligned(mPool, size, alignment); 2772 else if (!size) { 2773 internalPoolFree(mPool, ptr, 0); 2774 return NULL; 2775 } else 2776 tmp = reallocAligned(mPool, ptr, size, alignment); 2777 2778 return tmp; 2779 } 2780 2781 bool pool_free(rml::MemoryPool *mPool, void *object) 2782 { 2783 return internalPoolFree((rml::internal::MemoryPool*)mPool, object, 0); 2784 } 2785 2786 rml::MemoryPool *pool_identify(void *object) 2787 { 2788 rml::internal::MemoryPool *pool; 2789 if (isLargeObject<ourMem>(object)) { 2790 LargeObjectHdr *header = (LargeObjectHdr*)object - 1; 2791 pool = header->memoryBlock->pool; 2792 } else { 2793 Block *block = (Block*)alignDown(object, slabSize); 2794 pool = block->getMemPool(); 2795 } 2796 // do not return defaultMemPool, as it can't be used in pool_free() etc 2797 __TBB_ASSERT_RELEASE(pool!=defaultMemPool, 2798 "rml::pool_identify() can't be used for scalable_malloc() etc results."); 2799 return (rml::MemoryPool*)pool; 2800 } 2801 2802 size_t pool_msize(rml::MemoryPool *mPool, void* object) 2803 { 2804 if (object) { 2805 // No assert for object recognition, cause objects allocated from non-default 2806 // memory pool do not participate in range checking and do not have valid backreferences for 2807 // small objects. Instead, check that an object belong to the certain memory pool. 2808 MALLOC_ASSERT_EX(mPool == pool_identify(object), "Object does not belong to the specified pool"); 2809 return internalMsize(object); 2810 } 2811 errno = EINVAL; 2812 // Unlike _msize, return 0 in case of parameter error. 2813 // Returning size_t(-1) looks more like the way to troubles. 2814 return 0; 2815 } 2816 2817 } // namespace rml 2818 2819 using namespace rml::internal; 2820 2821 #if MALLOC_TRACE 2822 static unsigned int threadGoingDownCount = 0; 2823 #endif 2824 2825 /* 2826 * When a thread is shutting down this routine should be called to remove all the thread ids 2827 * from the malloc blocks and replace them with a NULL thread id. 2828 * 2829 * For pthreads, the function is set as a callback in pthread_key_create for TLS bin. 2830 * It will be automatically called at thread exit with the key value as the argument, 2831 * unless that value is NULL. 2832 * For Windows, it is called from DllMain( DLL_THREAD_DETACH ). 2833 * 2834 * However neither of the above is called for the main process thread, so the routine 2835 * also needs to be called during the process shutdown. 2836 * 2837 */ 2838 // TODO: Consider making this function part of class MemoryPool. 2839 void doThreadShutdownNotification(TLSData* tls, bool main_thread) 2840 { 2841 TRACEF(( "[ScalableMalloc trace] Thread id %d blocks return start %d\n", 2842 getThreadId(), threadGoingDownCount++ )); 2843 2844 #if USE_PTHREAD 2845 if (tls) { 2846 if (!shutdownSync.threadDtorStart()) return; 2847 tls->getMemPool()->onThreadShutdown(tls); 2848 shutdownSync.threadDtorDone(); 2849 } else 2850 #endif 2851 { 2852 suppress_unused_warning(tls); // not used on Windows 2853 // The default pool is safe to use at this point: 2854 // on Linux, only the main thread can go here before destroying defaultMemPool; 2855 // on Windows, shutdown is synchronized via loader lock and isMallocInitialized(). 2856 // See also __TBB_mallocProcessShutdownNotification() 2857 defaultMemPool->onThreadShutdown(defaultMemPool->getTLS(/*create=*/false)); 2858 // Take lock to walk through other pools; but waiting might be dangerous at this point 2859 // (e.g. on Windows the main thread might deadlock) 2860 bool locked; 2861 MallocMutex::scoped_lock lock(MemoryPool::memPoolListLock, /*wait=*/!main_thread, &locked); 2862 if (locked) { // the list is safe to process 2863 for (MemoryPool *memPool = defaultMemPool->next; memPool; memPool = memPool->next) 2864 memPool->onThreadShutdown(memPool->getTLS(/*create=*/false)); 2865 } 2866 } 2867 2868 TRACEF(( "[ScalableMalloc trace] Thread id %d blocks return end\n", getThreadId() )); 2869 } 2870 2871 #if USE_PTHREAD 2872 void mallocThreadShutdownNotification(void* arg) 2873 { 2874 // The routine is called for each pool (as TLS dtor) on each thread, except for the main thread 2875 if (!isMallocInitialized()) return; 2876 doThreadShutdownNotification((TLSData*)arg, false); 2877 } 2878 #else 2879 extern "C" void __TBB_mallocThreadShutdownNotification() 2880 { 2881 // The routine is called once per thread on Windows 2882 if (!isMallocInitialized()) return; 2883 doThreadShutdownNotification(NULL, false); 2884 } 2885 #endif 2886 2887 extern "C" void __TBB_mallocProcessShutdownNotification(bool windows_process_dying) 2888 { 2889 if (!isMallocInitialized()) return; 2890 2891 // Don't clean allocator internals if the entire process is exiting 2892 if (!windows_process_dying) { 2893 doThreadShutdownNotification(NULL, /*main_thread=*/true); 2894 } 2895 #if __TBB_MALLOC_LOCACHE_STAT 2896 printf("cache hit ratio %f, size hit %f\n", 2897 1.*cacheHits/mallocCalls, 1.*memHitKB/memAllocKB); 2898 defaultMemPool->extMemPool.loc.reportStat(stdout); 2899 #endif 2900 2901 shutdownSync.processExit(); 2902 #if __TBB_SOURCE_DIRECTLY_INCLUDED 2903 /* Pthread keys must be deleted as soon as possible to not call key dtor 2904 on thread termination when then the tbbmalloc code can be already unloaded. 2905 */ 2906 defaultMemPool->destroy(); 2907 destroyBackRefMaster(&defaultMemPool->extMemPool.backend); 2908 ThreadId::destroy(); // Delete key for thread id 2909 hugePages.reset(); 2910 // new total malloc initialization is possible after this point 2911 mallocInitialized.store(0, std::memory_order_release); 2912 #elif __TBB_USE_DLOPEN_REENTRANCY_WORKAROUND 2913 /* In most cases we prevent unloading tbbmalloc, and don't clean up memory 2914 on process shutdown. When impossible to prevent, library unload results 2915 in shutdown notification, and it makes sense to release unused memory 2916 at that point (we can't release all memory because it's possible that 2917 it will be accessed after this point). 2918 TODO: better support systems where we can't prevent unloading by removing 2919 pthread destructors and releasing caches. 2920 */ 2921 defaultMemPool->extMemPool.hardCachesCleanup(); 2922 #endif // __TBB_SOURCE_DIRECTLY_INCLUDED 2923 2924 #if COLLECT_STATISTICS 2925 unsigned nThreads = ThreadId::getMaxThreadId(); 2926 for( int i=1; i<=nThreads && i<MAX_THREADS; ++i ) 2927 STAT_print(i); 2928 #endif 2929 if (!usedBySrcIncluded) 2930 MALLOC_ITT_FINI_ITTLIB(); 2931 } 2932 2933 extern "C" void * scalable_malloc(size_t size) 2934 { 2935 void *ptr = internalMalloc(size); 2936 if (!ptr) errno = ENOMEM; 2937 return ptr; 2938 } 2939 2940 extern "C" void scalable_free(void *object) 2941 { 2942 internalFree(object); 2943 } 2944 2945 #if MALLOC_ZONE_OVERLOAD_ENABLED 2946 extern "C" void __TBB_malloc_free_definite_size(void *object, size_t size) 2947 { 2948 internalPoolFree(defaultMemPool, object, size); 2949 } 2950 #endif 2951 2952 /* 2953 * A variant that provides additional memory safety, by checking whether the given address 2954 * was obtained with this allocator, and if not redirecting to the provided alternative call. 2955 */ 2956 extern "C" void __TBB_malloc_safer_free(void *object, void (*original_free)(void*)) 2957 { 2958 if (!object) 2959 return; 2960 2961 // tbbmalloc can allocate object only when tbbmalloc has been initialized 2962 if (mallocInitialized.load(std::memory_order_acquire) && defaultMemPool->extMemPool.backend.ptrCanBeValid(object)) { 2963 if (isLargeObject<unknownMem>(object)) { 2964 // must check 1st for large object, because small object check touches 4 pages on left, 2965 // and it can be inaccessible 2966 TLSData *tls = defaultMemPool->getTLS(/*create=*/false); 2967 2968 defaultMemPool->putToLLOCache(tls, object); 2969 return; 2970 } else if (isSmallObject(object)) { 2971 freeSmallObject(object); 2972 return; 2973 } 2974 } 2975 if (original_free) 2976 original_free(object); 2977 } 2978 2979 /********* End the free code *************/ 2980 2981 /********* Code for scalable_realloc ***********/ 2982 2983 /* 2984 * From K&R 2985 * "realloc changes the size of the object pointed to by p to size. The contents will 2986 * be unchanged up to the minimum of the old and the new sizes. If the new size is larger, 2987 * the new space is uninitialized. realloc returns a pointer to the new space, or 2988 * NULL if the request cannot be satisfied, in which case *p is unchanged." 2989 * 2990 */ 2991 extern "C" void* scalable_realloc(void* ptr, size_t size) 2992 { 2993 void *tmp; 2994 2995 if (!ptr) 2996 tmp = internalMalloc(size); 2997 else if (!size) { 2998 internalFree(ptr); 2999 return NULL; 3000 } else 3001 tmp = reallocAligned(defaultMemPool, ptr, size, 0); 3002 3003 if (!tmp) errno = ENOMEM; 3004 return tmp; 3005 } 3006 3007 /* 3008 * A variant that provides additional memory safety, by checking whether the given address 3009 * was obtained with this allocator, and if not redirecting to the provided alternative call. 3010 */ 3011 extern "C" void* __TBB_malloc_safer_realloc(void* ptr, size_t sz, void* original_realloc) 3012 { 3013 void *tmp; // TODO: fix warnings about uninitialized use of tmp 3014 3015 if (!ptr) { 3016 tmp = internalMalloc(sz); 3017 } else if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(ptr)) { 3018 if (!sz) { 3019 internalFree(ptr); 3020 return NULL; 3021 } else { 3022 tmp = reallocAligned(defaultMemPool, ptr, sz, 0); 3023 } 3024 } 3025 #if USE_WINTHREAD 3026 else if (original_realloc && sz) { 3027 orig_ptrs *original_ptrs = static_cast<orig_ptrs*>(original_realloc); 3028 if ( original_ptrs->msize ){ 3029 size_t oldSize = original_ptrs->msize(ptr); 3030 tmp = internalMalloc(sz); 3031 if (tmp) { 3032 memcpy(tmp, ptr, sz<oldSize? sz : oldSize); 3033 if ( original_ptrs->free ){ 3034 original_ptrs->free( ptr ); 3035 } 3036 } 3037 } else 3038 tmp = NULL; 3039 } 3040 #else 3041 else if (original_realloc) { 3042 typedef void* (*realloc_ptr_t)(void*,size_t); 3043 realloc_ptr_t original_realloc_ptr; 3044 (void *&)original_realloc_ptr = original_realloc; 3045 tmp = original_realloc_ptr(ptr,sz); 3046 } 3047 #endif 3048 else tmp = NULL; 3049 3050 if (!tmp) errno = ENOMEM; 3051 return tmp; 3052 } 3053 3054 /********* End code for scalable_realloc ***********/ 3055 3056 /********* Code for scalable_calloc ***********/ 3057 3058 /* 3059 * From K&R 3060 * calloc returns a pointer to space for an array of nobj objects, 3061 * each of size size, or NULL if the request cannot be satisfied. 3062 * The space is initialized to zero bytes. 3063 * 3064 */ 3065 3066 extern "C" void * scalable_calloc(size_t nobj, size_t size) 3067 { 3068 // it's square root of maximal size_t value 3069 const size_t mult_not_overflow = size_t(1) << (sizeof(size_t)*CHAR_BIT/2); 3070 const size_t arraySize = nobj * size; 3071 3072 // check for overflow during multiplication: 3073 if (nobj>=mult_not_overflow || size>=mult_not_overflow) // 1) heuristic check 3074 if (nobj && arraySize / nobj != size) { // 2) exact check 3075 errno = ENOMEM; 3076 return NULL; 3077 } 3078 void* result = internalMalloc(arraySize); 3079 if (result) 3080 memset(result, 0, arraySize); 3081 else 3082 errno = ENOMEM; 3083 return result; 3084 } 3085 3086 /********* End code for scalable_calloc ***********/ 3087 3088 /********* Code for aligned allocation API **********/ 3089 3090 extern "C" int scalable_posix_memalign(void **memptr, size_t alignment, size_t size) 3091 { 3092 if ( !isPowerOfTwoAtLeast(alignment, sizeof(void*)) ) 3093 return EINVAL; 3094 void *result = allocateAligned(defaultMemPool, size, alignment); 3095 if (!result) 3096 return ENOMEM; 3097 *memptr = result; 3098 return 0; 3099 } 3100 3101 extern "C" void * scalable_aligned_malloc(size_t size, size_t alignment) 3102 { 3103 if (!isPowerOfTwo(alignment) || 0==size) { 3104 errno = EINVAL; 3105 return NULL; 3106 } 3107 void *tmp = allocateAligned(defaultMemPool, size, alignment); 3108 if (!tmp) errno = ENOMEM; 3109 return tmp; 3110 } 3111 3112 extern "C" void * scalable_aligned_realloc(void *ptr, size_t size, size_t alignment) 3113 { 3114 if (!isPowerOfTwo(alignment)) { 3115 errno = EINVAL; 3116 return NULL; 3117 } 3118 void *tmp; 3119 3120 if (!ptr) 3121 tmp = allocateAligned(defaultMemPool, size, alignment); 3122 else if (!size) { 3123 scalable_free(ptr); 3124 return NULL; 3125 } else 3126 tmp = reallocAligned(defaultMemPool, ptr, size, alignment); 3127 3128 if (!tmp) errno = ENOMEM; 3129 return tmp; 3130 } 3131 3132 extern "C" void * __TBB_malloc_safer_aligned_realloc(void *ptr, size_t size, size_t alignment, void* orig_function) 3133 { 3134 /* corner cases left out of reallocAligned to not deal with errno there */ 3135 if (!isPowerOfTwo(alignment)) { 3136 errno = EINVAL; 3137 return NULL; 3138 } 3139 void *tmp = NULL; 3140 3141 if (!ptr) { 3142 tmp = allocateAligned(defaultMemPool, size, alignment); 3143 } else if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(ptr)) { 3144 if (!size) { 3145 internalFree(ptr); 3146 return NULL; 3147 } else { 3148 tmp = reallocAligned(defaultMemPool, ptr, size, alignment); 3149 } 3150 } 3151 #if USE_WINTHREAD 3152 else { 3153 orig_aligned_ptrs *original_ptrs = static_cast<orig_aligned_ptrs*>(orig_function); 3154 if (size) { 3155 // Without orig_msize, we can't do anything with this. 3156 // Just keeping old pointer. 3157 if ( original_ptrs->aligned_msize ){ 3158 // set alignment and offset to have possibly correct oldSize 3159 size_t oldSize = original_ptrs->aligned_msize(ptr, sizeof(void*), 0); 3160 tmp = allocateAligned(defaultMemPool, size, alignment); 3161 if (tmp) { 3162 memcpy(tmp, ptr, size<oldSize? size : oldSize); 3163 if ( original_ptrs->aligned_free ){ 3164 original_ptrs->aligned_free( ptr ); 3165 } 3166 } 3167 } 3168 } else { 3169 if ( original_ptrs->aligned_free ){ 3170 original_ptrs->aligned_free( ptr ); 3171 } 3172 return NULL; 3173 } 3174 } 3175 #else 3176 // As original_realloc can't align result, and there is no way to find 3177 // size of reallocating object, we are giving up. 3178 suppress_unused_warning(orig_function); 3179 #endif 3180 if (!tmp) errno = ENOMEM; 3181 return tmp; 3182 } 3183 3184 extern "C" void scalable_aligned_free(void *ptr) 3185 { 3186 internalFree(ptr); 3187 } 3188 3189 /********* end code for aligned allocation API **********/ 3190 3191 /********* Code for scalable_msize ***********/ 3192 3193 /* 3194 * Returns the size of a memory block allocated in the heap. 3195 */ 3196 extern "C" size_t scalable_msize(void* ptr) 3197 { 3198 if (ptr) { 3199 MALLOC_ASSERT(isRecognized(ptr), "Invalid pointer in scalable_msize detected."); 3200 return internalMsize(ptr); 3201 } 3202 errno = EINVAL; 3203 // Unlike _msize, return 0 in case of parameter error. 3204 // Returning size_t(-1) looks more like the way to troubles. 3205 return 0; 3206 } 3207 3208 /* 3209 * A variant that provides additional memory safety, by checking whether the given address 3210 * was obtained with this allocator, and if not redirecting to the provided alternative call. 3211 */ 3212 extern "C" size_t __TBB_malloc_safer_msize(void *object, size_t (*original_msize)(void*)) 3213 { 3214 if (object) { 3215 // Check if the memory was allocated by scalable_malloc 3216 if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(object)) 3217 return internalMsize(object); 3218 else if (original_msize) 3219 return original_msize(object); 3220 } 3221 // object is NULL or unknown, or foreign and no original_msize 3222 #if USE_WINTHREAD 3223 errno = EINVAL; // errno expected to be set only on this platform 3224 #endif 3225 return 0; 3226 } 3227 3228 /* 3229 * The same as above but for _aligned_msize case 3230 */ 3231 extern "C" size_t __TBB_malloc_safer_aligned_msize(void *object, size_t alignment, size_t offset, size_t (*orig_aligned_msize)(void*,size_t,size_t)) 3232 { 3233 if (object) { 3234 // Check if the memory was allocated by scalable_malloc 3235 if (mallocInitialized.load(std::memory_order_acquire) && isRecognized(object)) 3236 return internalMsize(object); 3237 else if (orig_aligned_msize) 3238 return orig_aligned_msize(object,alignment,offset); 3239 } 3240 // object is NULL or unknown 3241 errno = EINVAL; 3242 return 0; 3243 } 3244 3245 /********* End code for scalable_msize ***********/ 3246 3247 extern "C" int scalable_allocation_mode(int param, intptr_t value) 3248 { 3249 if (param == TBBMALLOC_SET_SOFT_HEAP_LIMIT) { 3250 defaultMemPool->extMemPool.backend.setRecommendedMaxSize((size_t)value); 3251 return TBBMALLOC_OK; 3252 } else if (param == USE_HUGE_PAGES) { 3253 #if __linux__ 3254 switch (value) { 3255 case 0: 3256 case 1: 3257 hugePages.setMode(value); 3258 return TBBMALLOC_OK; 3259 default: 3260 return TBBMALLOC_INVALID_PARAM; 3261 } 3262 #else 3263 return TBBMALLOC_NO_EFFECT; 3264 #endif 3265 #if __TBB_SOURCE_DIRECTLY_INCLUDED 3266 } else if (param == TBBMALLOC_INTERNAL_SOURCE_INCLUDED) { 3267 switch (value) { 3268 case 0: // used by dynamic library 3269 case 1: // used by static library or directly included sources 3270 usedBySrcIncluded = value; 3271 return TBBMALLOC_OK; 3272 default: 3273 return TBBMALLOC_INVALID_PARAM; 3274 } 3275 #endif 3276 } else if (param == TBBMALLOC_SET_HUGE_SIZE_THRESHOLD) { 3277 defaultMemPool->extMemPool.loc.setHugeSizeThreshold((size_t)value); 3278 return TBBMALLOC_OK; 3279 } 3280 return TBBMALLOC_INVALID_PARAM; 3281 } 3282 3283 extern "C" int scalable_allocation_command(int cmd, void *param) 3284 { 3285 if (param) 3286 return TBBMALLOC_INVALID_PARAM; 3287 3288 bool released = false; 3289 switch(cmd) { 3290 case TBBMALLOC_CLEAN_THREAD_BUFFERS: 3291 if (TLSData *tls = defaultMemPool->getTLS(/*create=*/false)) 3292 released = tls->externalCleanup(/*cleanOnlyUnused*/false, /*cleanBins=*/true); 3293 break; 3294 case TBBMALLOC_CLEAN_ALL_BUFFERS: 3295 released = defaultMemPool->extMemPool.hardCachesCleanup(); 3296 break; 3297 default: 3298 return TBBMALLOC_INVALID_PARAM; 3299 } 3300 return released ? TBBMALLOC_OK : TBBMALLOC_NO_EFFECT; 3301 } 3302