1 /* 2 Copyright (c) 2005-2023 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #include <string.h> /* for memset */ 18 #include <errno.h> 19 #include "tbbmalloc_internal.h" 20 21 namespace rml { 22 namespace internal { 23 24 /*********** Code to acquire memory from the OS or other executive ****************/ 25 26 /* 27 syscall/malloc can set non-zero errno in case of failure, 28 but later allocator might be able to find memory to fulfill the request. 29 And we do not want changing of errno by successful scalable_malloc call. 30 To support this, restore old errno in (get|free)RawMemory, and set errno 31 in frontend just before returning to user code. 32 Please note: every syscall/libc call used inside scalable_malloc that 33 sets errno must be protected this way, not just memory allocation per se. 34 */ 35 36 #if USE_DEFAULT_MEMORY_MAPPING 37 #include "MapMemory.h" 38 #else 39 /* assume MapMemory and UnmapMemory are customized */ 40 #endif 41 42 void* getRawMemory (size_t size, PageType pageType) { 43 return MapMemory(size, pageType); 44 } 45 46 int freeRawMemory (void *object, size_t size) { 47 return UnmapMemory(object, size); 48 } 49 50 #if CHECK_ALLOCATION_RANGE 51 52 void Backend::UsedAddressRange::registerAlloc(uintptr_t left, uintptr_t right) 53 { 54 MallocMutex::scoped_lock lock(mutex); 55 if (left < leftBound.load(std::memory_order_relaxed)) 56 leftBound.store(left, std::memory_order_relaxed); 57 if (right > rightBound.load(std::memory_order_relaxed)) 58 rightBound.store(right, std::memory_order_relaxed); 59 MALLOC_ASSERT(leftBound.load(std::memory_order_relaxed), ASSERT_TEXT); 60 MALLOC_ASSERT(leftBound.load(std::memory_order_relaxed) < rightBound.load(std::memory_order_relaxed), ASSERT_TEXT); 61 MALLOC_ASSERT(leftBound.load(std::memory_order_relaxed) <= left && right <= rightBound.load(std::memory_order_relaxed), ASSERT_TEXT); 62 } 63 64 void Backend::UsedAddressRange::registerFree(uintptr_t left, uintptr_t right) 65 { 66 MallocMutex::scoped_lock lock(mutex); 67 if (leftBound.load(std::memory_order_relaxed) == left) { 68 if (rightBound.load(std::memory_order_relaxed) == right) { 69 leftBound.store(ADDRESS_UPPER_BOUND, std::memory_order_relaxed); 70 rightBound.store(0, std::memory_order_relaxed); 71 } else 72 leftBound.store(right, std::memory_order_relaxed); 73 } else if (rightBound.load(std::memory_order_relaxed) == right) 74 rightBound.store(left, std::memory_order_relaxed); 75 MALLOC_ASSERT((!rightBound.load(std::memory_order_relaxed) && leftBound.load(std::memory_order_relaxed) == ADDRESS_UPPER_BOUND) 76 || leftBound.load(std::memory_order_relaxed) < rightBound.load(std::memory_order_relaxed), ASSERT_TEXT); 77 } 78 #endif // CHECK_ALLOCATION_RANGE 79 80 // Initialized in frontend inside defaultMemPool 81 extern HugePagesStatus hugePages; 82 83 void *Backend::allocRawMem(size_t &size) 84 { 85 void *res = nullptr; 86 size_t allocSize = 0; 87 88 if (extMemPool->userPool()) { 89 if (extMemPool->fixedPool && bootsrapMemDone == bootsrapMemStatus.load(std::memory_order_acquire)) 90 return nullptr; 91 MALLOC_ASSERT(bootsrapMemStatus != bootsrapMemNotDone, 92 "Backend::allocRawMem() called prematurely?"); 93 // TODO: support for raw mem not aligned at sizeof(uintptr_t) 94 // memory from fixed pool is asked once and only once 95 allocSize = alignUpGeneric(size, extMemPool->granularity); 96 res = (*extMemPool->rawAlloc)(extMemPool->poolId, allocSize); 97 } else { 98 // Align allocation on page size 99 size_t pageSize = hugePages.isEnabled ? hugePages.getGranularity() : extMemPool->granularity; 100 MALLOC_ASSERT(pageSize, "Page size cannot be zero."); 101 allocSize = alignUpGeneric(size, pageSize); 102 103 // If user requested huge pages and they are available, try to use preallocated ones firstly. 104 // If there are none, lets check transparent huge pages support and use them instead. 105 if (hugePages.isEnabled) { 106 if (hugePages.isHPAvailable) { 107 res = getRawMemory(allocSize, PREALLOCATED_HUGE_PAGE); 108 } 109 if (!res && hugePages.isTHPAvailable) { 110 res = getRawMemory(allocSize, TRANSPARENT_HUGE_PAGE); 111 } 112 } 113 114 if (!res) { 115 res = getRawMemory(allocSize, REGULAR); 116 } 117 } 118 119 if (res) { 120 MALLOC_ASSERT(allocSize > 0, "Invalid size of an allocated region."); 121 size = allocSize; 122 if (!extMemPool->userPool()) 123 usedAddrRange.registerAlloc((uintptr_t)res, (uintptr_t)res+size); 124 #if MALLOC_DEBUG 125 volatile size_t curTotalSize = totalMemSize; // to read global value once 126 MALLOC_ASSERT(curTotalSize+size > curTotalSize, "Overflow allocation size."); 127 #endif 128 totalMemSize.fetch_add(size); 129 } 130 131 return res; 132 } 133 134 bool Backend::freeRawMem(void *object, size_t size) 135 { 136 bool fail; 137 #if MALLOC_DEBUG 138 volatile size_t curTotalSize = totalMemSize; // to read global value once 139 MALLOC_ASSERT(curTotalSize-size < curTotalSize, "Negative allocation size."); 140 #endif 141 totalMemSize.fetch_sub(size); 142 if (extMemPool->userPool()) { 143 MALLOC_ASSERT(!extMemPool->fixedPool, "No free for fixed-size pools."); 144 fail = (*extMemPool->rawFree)(extMemPool->poolId, object, size); 145 } else { 146 usedAddrRange.registerFree((uintptr_t)object, (uintptr_t)object + size); 147 fail = freeRawMemory(object, size); 148 } 149 // TODO: use result in all freeRawMem() callers 150 return !fail; 151 } 152 153 /********* End memory acquisition code ********************************/ 154 155 // Protected object size. After successful locking returns size of locked block, 156 // and releasing requires setting block size. 157 class GuardedSize : tbb::detail::no_copy { 158 std::atomic<uintptr_t> value; 159 public: 160 enum State { 161 LOCKED, 162 COAL_BLOCK, // block is coalescing now 163 MAX_LOCKED_VAL = COAL_BLOCK, 164 LAST_REGION_BLOCK, // used to mark last block in region 165 // values after this are "normal" block sizes 166 MAX_SPEC_VAL = LAST_REGION_BLOCK 167 }; 168 169 void initLocked() { value.store(LOCKED, std::memory_order_release); } // TBB_REVAMP_TODO: was relaxed 170 void makeCoalscing() { 171 MALLOC_ASSERT(value.load(std::memory_order_relaxed) == LOCKED, ASSERT_TEXT); 172 value.store(COAL_BLOCK, std::memory_order_release); // TBB_REVAMP_TODO: was relaxed 173 } 174 size_t tryLock(State state) { 175 MALLOC_ASSERT(state <= MAX_LOCKED_VAL, ASSERT_TEXT); 176 size_t sz = value.load(std::memory_order_acquire); 177 for (;;) { 178 if (sz <= MAX_LOCKED_VAL) { 179 break; 180 } 181 if (value.compare_exchange_strong(sz, state)) { 182 break; 183 } 184 } 185 return sz; 186 } 187 void unlock(size_t size) { 188 MALLOC_ASSERT(value.load(std::memory_order_relaxed) <= MAX_LOCKED_VAL, "The lock is not locked"); 189 MALLOC_ASSERT(size > MAX_LOCKED_VAL, ASSERT_TEXT); 190 value.store(size, std::memory_order_release); 191 } 192 bool isLastRegionBlock() const { return value.load(std::memory_order_relaxed) == LAST_REGION_BLOCK; } 193 friend void Backend::IndexedBins::verify(); 194 }; 195 196 struct MemRegion { 197 MemRegion *next, // keep all regions in any pool to release all them on 198 *prev; // pool destroying, 2-linked list to release individual 199 // regions. 200 size_t allocSz, // got from pool callback 201 blockSz; // initial and maximal inner block size 202 MemRegionType type; 203 }; 204 205 // this data must be unmodified while block is in use, so separate it 206 class BlockMutexes { 207 protected: 208 GuardedSize myL, // lock for me 209 leftL; // lock for left neighbor 210 }; 211 212 class FreeBlock : BlockMutexes { 213 public: 214 static const size_t minBlockSize; 215 friend void Backend::IndexedBins::verify(); 216 217 FreeBlock *prev, // in 2-linked list related to bin 218 *next, 219 *nextToFree; // used to form a queue during coalescing 220 // valid only when block is in processing, i.e. one is not free and not 221 size_t sizeTmp; // used outside of backend 222 int myBin; // bin that is owner of the block 223 bool slabAligned; 224 bool blockInBin; // this block in myBin already 225 226 FreeBlock *rightNeig(size_t sz) const { 227 MALLOC_ASSERT(sz, ASSERT_TEXT); 228 return (FreeBlock*)((uintptr_t)this+sz); 229 } 230 FreeBlock *leftNeig(size_t sz) const { 231 MALLOC_ASSERT(sz, ASSERT_TEXT); 232 return (FreeBlock*)((uintptr_t)this - sz); 233 } 234 235 void initHeader() { myL.initLocked(); leftL.initLocked(); } 236 void setMeFree(size_t size) { myL.unlock(size); } 237 size_t trySetMeUsed(GuardedSize::State s) { return myL.tryLock(s); } 238 bool isLastRegionBlock() const { return myL.isLastRegionBlock(); } 239 240 void setLeftFree(size_t sz) { leftL.unlock(sz); } 241 size_t trySetLeftUsed(GuardedSize::State s) { return leftL.tryLock(s); } 242 243 size_t tryLockBlock() { 244 size_t rSz, sz = trySetMeUsed(GuardedSize::LOCKED); 245 246 if (sz <= GuardedSize::MAX_LOCKED_VAL) 247 return false; 248 rSz = rightNeig(sz)->trySetLeftUsed(GuardedSize::LOCKED); 249 if (rSz <= GuardedSize::MAX_LOCKED_VAL) { 250 setMeFree(sz); 251 return false; 252 } 253 MALLOC_ASSERT(rSz == sz, ASSERT_TEXT); 254 return sz; 255 } 256 void markCoalescing(size_t blockSz) { 257 myL.makeCoalscing(); 258 rightNeig(blockSz)->leftL.makeCoalscing(); 259 sizeTmp = blockSz; 260 nextToFree = nullptr; 261 } 262 void markUsed() { 263 myL.initLocked(); 264 rightNeig(sizeTmp)->leftL.initLocked(); 265 nextToFree = nullptr; 266 } 267 static void markBlocks(FreeBlock *fBlock, int num, size_t size) { 268 for (int i=1; i<num; i++) { 269 fBlock = (FreeBlock*)((uintptr_t)fBlock + size); 270 fBlock->initHeader(); 271 } 272 } 273 }; 274 275 // Last block in any region. Its "size" field is GuardedSize::LAST_REGION_BLOCK, 276 // This kind of blocks used to find region header 277 // and have a possibility to return region back to OS 278 struct LastFreeBlock : public FreeBlock { 279 MemRegion *memRegion; 280 }; 281 282 const size_t FreeBlock::minBlockSize = sizeof(FreeBlock); 283 284 inline bool BackendSync::waitTillBlockReleased(intptr_t startModifiedCnt) 285 { 286 AtomicBackoff backoff; 287 #if __TBB_MALLOC_BACKEND_STAT 288 class ITT_Guard { 289 void *ptr; 290 public: 291 ITT_Guard(void *p) : ptr(p) { 292 MALLOC_ITT_SYNC_PREPARE(ptr); 293 } 294 ~ITT_Guard() { 295 MALLOC_ITT_SYNC_ACQUIRED(ptr); 296 } 297 }; 298 ITT_Guard ittGuard(&inFlyBlocks); 299 #endif 300 intptr_t myBinsInFlyBlocks = inFlyBlocks.load(std::memory_order_acquire); 301 intptr_t myCoalescQInFlyBlocks = backend->blocksInCoalescing(); 302 while (true) { 303 MALLOC_ASSERT(myBinsInFlyBlocks>=0 && myCoalescQInFlyBlocks>=0, nullptr); 304 305 intptr_t currBinsInFlyBlocks = inFlyBlocks.load(std::memory_order_acquire); 306 intptr_t currCoalescQInFlyBlocks = backend->blocksInCoalescing(); 307 WhiteboxTestingYield(); 308 // Stop waiting iff: 309 310 // 1) blocks were removed from processing, not added 311 if (myBinsInFlyBlocks > currBinsInFlyBlocks 312 // 2) released during delayed coalescing queue 313 || myCoalescQInFlyBlocks > currCoalescQInFlyBlocks) 314 break; 315 // 3) if there are blocks in coalescing, and no progress in its processing, 316 // try to scan coalescing queue and stop waiting, if changes were made 317 // (if there are no changes and in-fly blocks exist, we continue 318 // waiting to not increase load on coalescQ) 319 if (currCoalescQInFlyBlocks > 0 && backend->scanCoalescQ(/*forceCoalescQDrop=*/false)) 320 break; 321 // 4) when there are no blocks 322 if (!currBinsInFlyBlocks && !currCoalescQInFlyBlocks) { 323 // re-scan make sense only if bins were modified since scanned 324 auto pool = backend->extMemPool; 325 if (pool->hardCachesCleanupInProgress.load(std::memory_order_acquire) || 326 pool->softCachesCleanupInProgress.load(std::memory_order_acquire)) { 327 backoff.pause(); 328 continue; 329 } 330 331 return startModifiedCnt != getNumOfMods(); 332 } 333 myBinsInFlyBlocks = currBinsInFlyBlocks; 334 myCoalescQInFlyBlocks = currCoalescQInFlyBlocks; 335 backoff.pause(); 336 } 337 return true; 338 } 339 340 void CoalRequestQ::putBlock(FreeBlock *fBlock) 341 { 342 MALLOC_ASSERT(fBlock->sizeTmp >= FreeBlock::minBlockSize, ASSERT_TEXT); 343 fBlock->markUsed(); 344 // the block is in the queue, do not forget that it's here 345 inFlyBlocks++; 346 347 FreeBlock *myBlToFree = blocksToFree.load(std::memory_order_acquire); 348 for (;;) { 349 fBlock->nextToFree = myBlToFree; 350 if (blocksToFree.compare_exchange_strong(myBlToFree, fBlock)) { 351 return; 352 } 353 } 354 } 355 356 FreeBlock *CoalRequestQ::getAll() 357 { 358 for (;;) { 359 FreeBlock *myBlToFree = blocksToFree.load(std::memory_order_acquire); 360 361 if (!myBlToFree) { 362 return nullptr; 363 } else { 364 if (blocksToFree.compare_exchange_strong(myBlToFree, nullptr)) { 365 return myBlToFree; 366 } else { 367 continue; 368 } 369 } 370 } 371 } 372 373 inline void CoalRequestQ::blockWasProcessed() 374 { 375 bkndSync->binsModified(); 376 int prev = inFlyBlocks.fetch_sub(1); 377 tbb::detail::suppress_unused_warning(prev); 378 MALLOC_ASSERT(prev > 0, ASSERT_TEXT); 379 } 380 381 // Try to get a block from a bin. 382 // If the remaining free space would stay in the same bin, 383 // split the block without removing it. 384 // If the free space should go to other bin(s), remove the block. 385 // alignedBin is true, if all blocks in the bin have slab-aligned right side. 386 FreeBlock *Backend::IndexedBins::getFromBin(int binIdx, BackendSync *sync, size_t size, 387 bool needAlignedRes, bool alignedBin, bool wait, int *binLocked) 388 { 389 Bin *b = &freeBins[binIdx]; 390 try_next: 391 FreeBlock *fBlock = nullptr; 392 if (!b->empty()) { 393 bool locked = false; 394 MallocMutex::scoped_lock scopedLock(b->tLock, wait, &locked); 395 396 if (!locked) { 397 if (binLocked) (*binLocked)++; 398 return nullptr; 399 } 400 401 for (FreeBlock *curr = b->head.load(std::memory_order_relaxed); curr; curr = curr->next) { 402 size_t szBlock = curr->tryLockBlock(); 403 if (!szBlock) { 404 // block is locked, re-do bin lock, as there is no place to spin 405 // while block coalescing 406 goto try_next; 407 } 408 409 // GENERAL CASE 410 if (alignedBin || !needAlignedRes) { 411 size_t splitSz = szBlock - size; 412 // If we got a block as split result, it must have a room for control structures. 413 if (szBlock >= size && (splitSz >= FreeBlock::minBlockSize || !splitSz)) 414 fBlock = curr; 415 } else { 416 // SPECIAL CASE, to get aligned block from unaligned bin we have to cut the middle of a block 417 // and return remaining left and right part. Possible only in fixed pool scenario, assert for this 418 // is set inside splitBlock() function. 419 420 void *newB = alignUp(curr, slabSize); 421 uintptr_t rightNew = (uintptr_t)newB + size; 422 uintptr_t rightCurr = (uintptr_t)curr + szBlock; 423 // Check if the block size is sufficient, 424 // and also left and right split results are either big enough or non-existent 425 if (rightNew <= rightCurr 426 && (newB == curr || ((uintptr_t)newB - (uintptr_t)curr) >= FreeBlock::minBlockSize) 427 && (rightNew == rightCurr || (rightCurr - rightNew) >= FreeBlock::minBlockSize)) 428 fBlock = curr; 429 } 430 431 if (fBlock) { 432 // consume must be called before result of removing from a bin is visible externally. 433 sync->blockConsumed(); 434 // TODO: think about cases when block stays in the same bin 435 b->removeBlock(fBlock); 436 if (freeBins[binIdx].empty()) 437 bitMask.set(binIdx, false); 438 fBlock->sizeTmp = szBlock; 439 break; 440 } else { // block size is not valid, search for next block in the bin 441 curr->setMeFree(szBlock); 442 curr->rightNeig(szBlock)->setLeftFree(szBlock); 443 } 444 } 445 } 446 return fBlock; 447 } 448 449 bool Backend::IndexedBins::tryReleaseRegions(int binIdx, Backend *backend) 450 { 451 Bin *b = &freeBins[binIdx]; 452 FreeBlock *fBlockList = nullptr; 453 454 // got all blocks from the bin and re-do coalesce on them 455 // to release single-block regions 456 try_next: 457 if (!b->empty()) { 458 MallocMutex::scoped_lock binLock(b->tLock); 459 for (FreeBlock *curr = b->head.load(std::memory_order_relaxed); curr; ) { 460 size_t szBlock = curr->tryLockBlock(); 461 if (!szBlock) 462 goto try_next; 463 464 FreeBlock *next = curr->next; 465 466 b->removeBlock(curr); 467 curr->sizeTmp = szBlock; 468 curr->nextToFree = fBlockList; 469 fBlockList = curr; 470 curr = next; 471 } 472 } 473 return backend->coalescAndPutList(fBlockList, /*forceCoalescQDrop=*/true, 474 /*reportBlocksProcessed=*/false); 475 } 476 477 void Backend::Bin::removeBlock(FreeBlock *fBlock) 478 { 479 MALLOC_ASSERT(fBlock->next||fBlock->prev||fBlock== head.load(std::memory_order_relaxed), 480 "Detected that a block is not in the bin."); 481 if (head.load(std::memory_order_relaxed) == fBlock) 482 head.store(fBlock->next, std::memory_order_relaxed); 483 if (tail == fBlock) 484 tail = fBlock->prev; 485 if (fBlock->prev) 486 fBlock->prev->next = fBlock->next; 487 if (fBlock->next) 488 fBlock->next->prev = fBlock->prev; 489 } 490 491 void Backend::IndexedBins::addBlock(int binIdx, FreeBlock *fBlock, size_t /* blockSz */, bool addToTail) 492 { 493 Bin *b = &freeBins[binIdx]; 494 fBlock->myBin = binIdx; 495 fBlock->next = fBlock->prev = nullptr; 496 { 497 MallocMutex::scoped_lock scopedLock(b->tLock); 498 if (addToTail) { 499 fBlock->prev = b->tail; 500 b->tail = fBlock; 501 if (fBlock->prev) 502 fBlock->prev->next = fBlock; 503 if (!b->head.load(std::memory_order_relaxed)) 504 b->head.store(fBlock, std::memory_order_relaxed); 505 } else { 506 fBlock->next = b->head.load(std::memory_order_relaxed); 507 b->head.store(fBlock, std::memory_order_relaxed); 508 if (fBlock->next) 509 fBlock->next->prev = fBlock; 510 if (!b->tail) 511 b->tail = fBlock; 512 } 513 } 514 bitMask.set(binIdx, true); 515 } 516 517 bool Backend::IndexedBins::tryAddBlock(int binIdx, FreeBlock *fBlock, bool addToTail) 518 { 519 bool locked = false; 520 Bin *b = &freeBins[binIdx]; 521 fBlock->myBin = binIdx; 522 if (addToTail) { 523 fBlock->next = nullptr; 524 { 525 MallocMutex::scoped_lock scopedLock(b->tLock, /*wait=*/false, &locked); 526 if (!locked) 527 return false; 528 fBlock->prev = b->tail; 529 b->tail = fBlock; 530 if (fBlock->prev) 531 fBlock->prev->next = fBlock; 532 if (!b->head.load(std::memory_order_relaxed)) 533 b->head.store(fBlock, std::memory_order_relaxed); 534 } 535 } else { 536 fBlock->prev = nullptr; 537 { 538 MallocMutex::scoped_lock scopedLock(b->tLock, /*wait=*/false, &locked); 539 if (!locked) 540 return false; 541 fBlock->next = b->head.load(std::memory_order_relaxed); 542 b->head.store(fBlock, std::memory_order_relaxed); 543 if (fBlock->next) 544 fBlock->next->prev = fBlock; 545 if (!b->tail) 546 b->tail = fBlock; 547 } 548 } 549 bitMask.set(binIdx, true); 550 return true; 551 } 552 553 void Backend::IndexedBins::reset() 554 { 555 for (unsigned i=0; i<Backend::freeBinsNum; i++) 556 freeBins[i].reset(); 557 bitMask.reset(); 558 } 559 560 void Backend::IndexedBins::lockRemoveBlock(int binIdx, FreeBlock *fBlock) 561 { 562 MallocMutex::scoped_lock scopedLock(freeBins[binIdx].tLock); 563 freeBins[binIdx].removeBlock(fBlock); 564 if (freeBins[binIdx].empty()) 565 bitMask.set(binIdx, false); 566 } 567 568 bool ExtMemoryPool::regionsAreReleaseable() const 569 { 570 return !keepAllMemory && !delayRegsReleasing; 571 } 572 573 FreeBlock *Backend::splitBlock(FreeBlock *fBlock, int num, size_t size, bool blockIsAligned, bool needAlignedBlock) 574 { 575 const size_t totalSize = num * size; 576 577 // SPECIAL CASE, for unaligned block we have to cut the middle of a block 578 // and return remaining left and right part. Possible only in a fixed pool scenario. 579 if (needAlignedBlock && !blockIsAligned) { 580 MALLOC_ASSERT(extMemPool->fixedPool, 581 "Aligned block request from unaligned bin possible only in fixed pool scenario."); 582 583 // Space to use is in the middle 584 FreeBlock *newBlock = alignUp(fBlock, slabSize); 585 FreeBlock *rightPart = (FreeBlock*)((uintptr_t)newBlock + totalSize); 586 uintptr_t fBlockEnd = (uintptr_t)fBlock + fBlock->sizeTmp; 587 588 // Return free right part 589 if ((uintptr_t)rightPart != fBlockEnd) { 590 rightPart->initHeader(); // to prevent coalescing rightPart with fBlock 591 size_t rightSize = fBlockEnd - (uintptr_t)rightPart; 592 coalescAndPut(rightPart, rightSize, toAlignedBin(rightPart, rightSize)); 593 } 594 // And free left part 595 if (newBlock != fBlock) { 596 newBlock->initHeader(); // to prevent coalescing fBlock with newB 597 size_t leftSize = (uintptr_t)newBlock - (uintptr_t)fBlock; 598 coalescAndPut(fBlock, leftSize, toAlignedBin(fBlock, leftSize)); 599 } 600 fBlock = newBlock; 601 } else if (size_t splitSize = fBlock->sizeTmp - totalSize) { // need to split the block 602 // GENERAL CASE, cut the left or right part of the block 603 FreeBlock *splitBlock = nullptr; 604 if (needAlignedBlock) { 605 // For slab aligned blocks cut the right side of the block 606 // and return it to a requester, original block returns to backend 607 splitBlock = fBlock; 608 fBlock = (FreeBlock*)((uintptr_t)splitBlock + splitSize); 609 fBlock->initHeader(); 610 } else { 611 // For large object blocks cut original block and put free right part to backend 612 splitBlock = (FreeBlock*)((uintptr_t)fBlock + totalSize); 613 splitBlock->initHeader(); 614 } 615 // Mark free block as it`s parent only when the requested type (needAlignedBlock) 616 // and returned from Bins/OS block (isAligned) are equal (XOR operation used) 617 bool markAligned = (blockIsAligned ^ needAlignedBlock) ? toAlignedBin(splitBlock, splitSize) : blockIsAligned; 618 coalescAndPut(splitBlock, splitSize, markAligned); 619 } 620 MALLOC_ASSERT(!needAlignedBlock || isAligned(fBlock, slabSize), "Expect to get aligned block, if one was requested."); 621 FreeBlock::markBlocks(fBlock, num, size); 622 return fBlock; 623 } 624 625 size_t Backend::getMaxBinnedSize() const 626 { 627 return hugePages.isEnabled && !inUserPool() ? 628 maxBinned_HugePage : maxBinned_SmallPage; 629 } 630 631 inline bool Backend::MaxRequestComparator::operator()(size_t oldMaxReq, size_t requestSize) const 632 { 633 return requestSize > oldMaxReq && requestSize < backend->getMaxBinnedSize(); 634 } 635 636 // last chance to get memory 637 FreeBlock *Backend::releaseMemInCaches(intptr_t startModifiedCnt, 638 int *lockedBinsThreshold, int numOfLockedBins) 639 { 640 // something released from caches 641 if (extMemPool->hardCachesCleanup(false)) 642 return (FreeBlock*)VALID_BLOCK_IN_BIN; 643 644 if (bkndSync.waitTillBlockReleased(startModifiedCnt)) 645 return (FreeBlock*)VALID_BLOCK_IN_BIN; 646 647 // OS can't give us more memory, but we have some in locked bins 648 if (*lockedBinsThreshold && numOfLockedBins) { 649 *lockedBinsThreshold = 0; 650 return (FreeBlock*)VALID_BLOCK_IN_BIN; 651 } 652 return nullptr; // nothing found, give up 653 } 654 655 FreeBlock *Backend::askMemFromOS(size_t blockSize, intptr_t startModifiedCnt, 656 int *lockedBinsThreshold, int numOfLockedBins, 657 bool *splittableRet, bool needSlabRegion) 658 { 659 FreeBlock *block; 660 // The block sizes can be divided into 3 groups: 661 // 1. "quite small": popular object size, we are in bootstarp or something 662 // like; request several regions. 663 // 2. "quite large": we want to have several such blocks in the region 664 // but not want several pre-allocated regions. 665 // 3. "huge": exact fit, we allocate only one block and do not allow 666 // any other allocations to placed in a region. 667 // Dividing the block sizes in these groups we are trying to balance between 668 // too small regions (that leads to fragmentation) and too large ones (that 669 // leads to excessive address space consumption). If a region is "too 670 // large", allocate only one, to prevent fragmentation. It supposedly 671 // doesn't hurt performance, because the object requested by user is large. 672 // Bounds for the groups are: 673 const size_t maxBinned = getMaxBinnedSize(); 674 const size_t quiteSmall = maxBinned / 8; 675 const size_t quiteLarge = maxBinned; 676 677 if (blockSize >= quiteLarge) { 678 // Do not interact with other threads via semaphores, as for exact fit 679 // we can't share regions with them, memory requesting is individual. 680 block = addNewRegion(blockSize, MEMREG_ONE_BLOCK, /*addToBin=*/false); 681 if (!block) 682 return releaseMemInCaches(startModifiedCnt, lockedBinsThreshold, numOfLockedBins); 683 *splittableRet = false; 684 } else { 685 const size_t regSz_sizeBased = alignUp(4*maxRequestedSize, 1024*1024); 686 // Another thread is modifying backend while we can't get the block. 687 // Wait while it leaves and re-do the scan 688 // before trying other ways to extend the backend. 689 if (bkndSync.waitTillBlockReleased(startModifiedCnt) 690 // semaphore is protecting adding more more memory from OS 691 || memExtendingSema.wait()) 692 return (FreeBlock*)VALID_BLOCK_IN_BIN; 693 694 if (startModifiedCnt != bkndSync.getNumOfMods()) { 695 memExtendingSema.signal(); 696 return (FreeBlock*)VALID_BLOCK_IN_BIN; 697 } 698 699 if (blockSize < quiteSmall) { 700 // For this size of blocks, add NUM_OF_REG "advance" regions in bin, 701 // and return one as a result. 702 // TODO: add to bin first, because other threads can use them right away. 703 // This must be done carefully, because blocks in bins can be released 704 // in releaseCachesToLimit(). 705 const unsigned NUM_OF_REG = 3; 706 MemRegionType regType = needSlabRegion ? MEMREG_SLAB_BLOCKS : MEMREG_LARGE_BLOCKS; 707 block = addNewRegion(regSz_sizeBased, regType, /*addToBin=*/false); 708 if (block) 709 for (unsigned idx=0; idx<NUM_OF_REG; idx++) 710 if (! addNewRegion(regSz_sizeBased, regType, /*addToBin=*/true)) 711 break; 712 } else { 713 block = addNewRegion(regSz_sizeBased, MEMREG_LARGE_BLOCKS, /*addToBin=*/false); 714 } 715 memExtendingSema.signal(); 716 717 // no regions found, try to clean cache 718 if (!block || block == (FreeBlock*)VALID_BLOCK_IN_BIN) 719 return releaseMemInCaches(startModifiedCnt, lockedBinsThreshold, numOfLockedBins); 720 // Since a region can hold more than one block it can be split. 721 *splittableRet = true; 722 } 723 // after asking memory from OS, release caches if we above the memory limits 724 releaseCachesToLimit(); 725 726 return block; 727 } 728 729 void Backend::releaseCachesToLimit() 730 { 731 if (!memSoftLimit.load(std::memory_order_relaxed) 732 || totalMemSize.load(std::memory_order_relaxed) <= memSoftLimit.load(std::memory_order_relaxed)) { 733 return; 734 } 735 size_t locTotalMemSize, locMemSoftLimit; 736 737 scanCoalescQ(/*forceCoalescQDrop=*/false); 738 if (extMemPool->softCachesCleanup() && 739 (locTotalMemSize = totalMemSize.load(std::memory_order_acquire)) <= 740 (locMemSoftLimit = memSoftLimit.load(std::memory_order_acquire))) 741 return; 742 // clean global large-object cache, if this is not enough, clean local caches 743 // do this in several tries, because backend fragmentation can prevent 744 // region from releasing 745 for (int cleanLocal = 0; cleanLocal<2; cleanLocal++) 746 while (cleanLocal ? 747 extMemPool->allLocalCaches.cleanup(/*cleanOnlyUnused=*/true) : 748 extMemPool->loc.decreasingCleanup()) 749 if ((locTotalMemSize = totalMemSize.load(std::memory_order_acquire)) <= 750 (locMemSoftLimit = memSoftLimit.load(std::memory_order_acquire))) 751 return; 752 // last chance to match memSoftLimit 753 extMemPool->hardCachesCleanup(true); 754 } 755 756 int Backend::IndexedBins::getMinNonemptyBin(unsigned startBin) const 757 { 758 int p = bitMask.getMinTrue(startBin); 759 return p == -1 ? Backend::freeBinsNum : p; 760 } 761 762 FreeBlock *Backend::IndexedBins::findBlock(int nativeBin, BackendSync *sync, size_t size, 763 bool needAlignedBlock, bool alignedBin, int *numOfLockedBins) 764 { 765 for (int i=getMinNonemptyBin(nativeBin); i<(int)freeBinsNum; i=getMinNonemptyBin(i+1)) 766 if (FreeBlock *block = getFromBin(i, sync, size, needAlignedBlock, alignedBin, /*wait=*/false, numOfLockedBins)) 767 return block; 768 769 return nullptr; 770 } 771 772 void Backend::requestBootstrapMem() 773 { 774 if (bootsrapMemDone == bootsrapMemStatus.load(std::memory_order_acquire)) 775 return; 776 MallocMutex::scoped_lock lock( bootsrapMemStatusMutex ); 777 if (bootsrapMemDone == bootsrapMemStatus) 778 return; 779 MALLOC_ASSERT(bootsrapMemNotDone == bootsrapMemStatus, ASSERT_TEXT); 780 bootsrapMemStatus = bootsrapMemInitializing; 781 // request some rather big region during bootstrap in advance 782 // ok to get nullptr here, as later we re-do a request with more modest size 783 addNewRegion(2*1024*1024, MEMREG_SLAB_BLOCKS, /*addToBin=*/true); 784 bootsrapMemStatus = bootsrapMemDone; 785 } 786 787 // try to allocate size Byte block in available bins 788 // needAlignedRes is true if result must be slab-aligned 789 FreeBlock *Backend::genericGetBlock(int num, size_t size, bool needAlignedBlock) 790 { 791 FreeBlock *block = nullptr; 792 const size_t totalReqSize = num*size; 793 // no splitting after requesting new region, asks exact size 794 const int nativeBin = sizeToBin(totalReqSize); 795 796 requestBootstrapMem(); 797 // If we found 2 or less locked bins, it's time to ask more memory from OS. 798 // But nothing can be asked from fixed pool. And we prefer wait, not ask 799 // for more memory, if block is quite large. 800 int lockedBinsThreshold = extMemPool->fixedPool || size>=maxBinned_SmallPage? 0 : 2; 801 802 // Find maximal requested size limited by getMaxBinnedSize() 803 AtomicUpdate(maxRequestedSize, totalReqSize, MaxRequestComparator(this)); 804 scanCoalescQ(/*forceCoalescQDrop=*/false); 805 806 bool splittable = true; 807 for (;;) { 808 const intptr_t startModifiedCnt = bkndSync.getNumOfMods(); 809 int numOfLockedBins; 810 intptr_t cleanCnt; 811 do { 812 cleanCnt = backendCleanCnt.load(std::memory_order_acquire); 813 numOfLockedBins = 0; 814 if (needAlignedBlock) { 815 block = freeSlabAlignedBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock, 816 /*alignedBin=*/true, &numOfLockedBins); 817 if (!block && extMemPool->fixedPool) 818 block = freeLargeBlockBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock, 819 /*alignedBin=*/false, &numOfLockedBins); 820 } else { 821 block = freeLargeBlockBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock, 822 /*alignedBin=*/false, &numOfLockedBins); 823 if (!block && extMemPool->fixedPool) 824 block = freeSlabAlignedBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock, 825 /*alignedBin=*/true, &numOfLockedBins); 826 } 827 } while (!block && (numOfLockedBins>lockedBinsThreshold || cleanCnt % 2 == 1 || 828 cleanCnt != backendCleanCnt.load(std::memory_order_acquire))); 829 830 if (block) 831 break; 832 833 bool retScanCoalescQ = scanCoalescQ(/*forceCoalescQDrop=*/true); 834 bool retSoftCachesCleanup = extMemPool->softCachesCleanup(); 835 if (!(retScanCoalescQ || retSoftCachesCleanup)) { 836 // bins are not updated, 837 // only remaining possibility is to ask for more memory 838 block = askMemFromOS(totalReqSize, startModifiedCnt, &lockedBinsThreshold, 839 numOfLockedBins, &splittable, needAlignedBlock); 840 if (!block) 841 return nullptr; 842 if (block != (FreeBlock*)VALID_BLOCK_IN_BIN) { 843 // size can be increased in askMemFromOS, that's why >= 844 MALLOC_ASSERT(block->sizeTmp >= size, ASSERT_TEXT); 845 break; 846 } 847 // valid block somewhere in bins, let's find it 848 block = nullptr; 849 } 850 } 851 MALLOC_ASSERT(block, ASSERT_TEXT); 852 if (splittable) { 853 // At this point we have to be sure that slabAligned attribute describes the right block state 854 block = splitBlock(block, num, size, block->slabAligned, needAlignedBlock); 855 } 856 // matched blockConsumed() from startUseBlock() 857 bkndSync.blockReleased(); 858 859 return block; 860 } 861 862 LargeMemoryBlock *Backend::getLargeBlock(size_t size) 863 { 864 LargeMemoryBlock *lmb = 865 (LargeMemoryBlock*)genericGetBlock(1, size, /*needAlignedRes=*/false); 866 if (lmb) { 867 lmb->unalignedSize = size; 868 if (extMemPool->userPool()) 869 extMemPool->lmbList.add(lmb); 870 } 871 return lmb; 872 } 873 874 BlockI *Backend::getSlabBlock(int num) { 875 BlockI *b = (BlockI*)genericGetBlock(num, slabSize, /*slabAligned=*/true); 876 MALLOC_ASSERT(isAligned(b, slabSize), ASSERT_TEXT); 877 return b; 878 } 879 880 void Backend::putSlabBlock(BlockI *block) { 881 genericPutBlock((FreeBlock *)block, slabSize, /*slabAligned=*/true); 882 } 883 884 void *Backend::getBackRefSpace(size_t size, bool *rawMemUsed) 885 { 886 // This block is released only at shutdown, so it can prevent 887 // a entire region releasing when it's received from the backend, 888 // so prefer getRawMemory using. 889 if (void *ret = getRawMemory(size, REGULAR)) { 890 *rawMemUsed = true; 891 return ret; 892 } 893 void *ret = genericGetBlock(1, size, /*needAlignedRes=*/false); 894 if (ret) *rawMemUsed = false; 895 return ret; 896 } 897 898 void Backend::putBackRefSpace(void *b, size_t size, bool rawMemUsed) 899 { 900 if (rawMemUsed) 901 freeRawMemory(b, size); 902 // ignore not raw mem, as it released on region releasing 903 } 904 905 void Backend::removeBlockFromBin(FreeBlock *fBlock) 906 { 907 if (fBlock->myBin != Backend::NO_BIN) { 908 if (fBlock->slabAligned) 909 freeSlabAlignedBins.lockRemoveBlock(fBlock->myBin, fBlock); 910 else 911 freeLargeBlockBins.lockRemoveBlock(fBlock->myBin, fBlock); 912 } 913 } 914 915 void Backend::genericPutBlock(FreeBlock *fBlock, size_t blockSz, bool slabAligned) 916 { 917 bkndSync.blockConsumed(); 918 coalescAndPut(fBlock, blockSz, slabAligned); 919 bkndSync.blockReleased(); 920 } 921 922 void AllLargeBlocksList::add(LargeMemoryBlock *lmb) 923 { 924 MallocMutex::scoped_lock scoped_cs(largeObjLock); 925 lmb->gPrev = nullptr; 926 lmb->gNext = loHead; 927 if (lmb->gNext) 928 lmb->gNext->gPrev = lmb; 929 loHead = lmb; 930 } 931 932 void AllLargeBlocksList::remove(LargeMemoryBlock *lmb) 933 { 934 MallocMutex::scoped_lock scoped_cs(largeObjLock); 935 if (loHead == lmb) 936 loHead = lmb->gNext; 937 if (lmb->gNext) 938 lmb->gNext->gPrev = lmb->gPrev; 939 if (lmb->gPrev) 940 lmb->gPrev->gNext = lmb->gNext; 941 } 942 943 void Backend::putLargeBlock(LargeMemoryBlock *lmb) 944 { 945 if (extMemPool->userPool()) 946 extMemPool->lmbList.remove(lmb); 947 genericPutBlock((FreeBlock *)lmb, lmb->unalignedSize, false); 948 } 949 950 void Backend::returnLargeObject(LargeMemoryBlock *lmb) 951 { 952 removeBackRef(lmb->backRefIdx); 953 putLargeBlock(lmb); 954 STAT_increment(getThreadId(), ThreadCommonCounters, freeLargeObj); 955 } 956 957 #if BACKEND_HAS_MREMAP 958 void *Backend::remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment) 959 { 960 // no remap for user pools and for object too small that living in bins 961 if (inUserPool() || min(oldSize, newSize)<maxBinned_SmallPage 962 // during remap, can't guarantee alignment more strict than current or 963 // more strict than page alignment 964 || !isAligned(ptr, alignment) || alignment>extMemPool->granularity) 965 return nullptr; 966 const LargeMemoryBlock* lmbOld = ((LargeObjectHdr *)ptr - 1)->memoryBlock; 967 const size_t oldUnalignedSize = lmbOld->unalignedSize; 968 FreeBlock *oldFBlock = (FreeBlock *)lmbOld; 969 FreeBlock *right = oldFBlock->rightNeig(oldUnalignedSize); 970 // in every region only one block can have LAST_REGION_BLOCK on right, 971 // so don't need no synchronization 972 if (!right->isLastRegionBlock()) 973 return nullptr; 974 975 MemRegion *oldRegion = static_cast<LastFreeBlock*>(right)->memRegion; 976 MALLOC_ASSERT( oldRegion < ptr, ASSERT_TEXT ); 977 const size_t oldRegionSize = oldRegion->allocSz; 978 if (oldRegion->type != MEMREG_ONE_BLOCK) 979 return nullptr; // we are not single in the region 980 const size_t userOffset = (uintptr_t)ptr - (uintptr_t)oldRegion; 981 const size_t alignedSize = LargeObjectCache::alignToBin(newSize + userOffset); 982 const size_t requestSize = 983 alignUp(sizeof(MemRegion) + alignedSize + sizeof(LastFreeBlock), extMemPool->granularity); 984 if (requestSize < alignedSize) // is wrapped around? 985 return nullptr; 986 regionList.remove(oldRegion); 987 988 // The deallocation should be registered in address range before mremap to 989 // prevent a race condition with allocation on another thread. 990 // (OS can reuse the memory and registerAlloc will be missed on another thread) 991 usedAddrRange.registerFree((uintptr_t)oldRegion, (uintptr_t)oldRegion + oldRegionSize); 992 993 void *ret = mremap(oldRegion, oldRegion->allocSz, requestSize, MREMAP_MAYMOVE); 994 if (MAP_FAILED == ret) { // can't remap, revert and leave 995 regionList.add(oldRegion); 996 usedAddrRange.registerAlloc((uintptr_t)oldRegion, (uintptr_t)oldRegion + oldRegionSize); 997 return nullptr; 998 } 999 MemRegion *region = (MemRegion*)ret; 1000 MALLOC_ASSERT(region->type == MEMREG_ONE_BLOCK, ASSERT_TEXT); 1001 region->allocSz = requestSize; 1002 region->blockSz = alignedSize; 1003 1004 FreeBlock *fBlock = (FreeBlock *)alignUp((uintptr_t)region + sizeof(MemRegion), 1005 largeObjectAlignment); 1006 1007 regionList.add(region); 1008 startUseBlock(region, fBlock, /*addToBin=*/false); 1009 MALLOC_ASSERT(fBlock->sizeTmp == region->blockSz, ASSERT_TEXT); 1010 // matched blockConsumed() in startUseBlock(). 1011 // TODO: get rid of useless pair blockConsumed()/blockReleased() 1012 bkndSync.blockReleased(); 1013 1014 // object must start at same offset from region's start 1015 void *object = (void*)((uintptr_t)region + userOffset); 1016 MALLOC_ASSERT(isAligned(object, alignment), ASSERT_TEXT); 1017 LargeObjectHdr *header = (LargeObjectHdr*)object - 1; 1018 setBackRef(header->backRefIdx, header); 1019 1020 LargeMemoryBlock *lmb = (LargeMemoryBlock*)fBlock; 1021 lmb->unalignedSize = region->blockSz; 1022 lmb->objectSize = newSize; 1023 lmb->backRefIdx = header->backRefIdx; 1024 header->memoryBlock = lmb; 1025 MALLOC_ASSERT((uintptr_t)lmb + lmb->unalignedSize >= 1026 (uintptr_t)object + lmb->objectSize, "An object must fit to the block."); 1027 1028 usedAddrRange.registerAlloc((uintptr_t)region, (uintptr_t)region + requestSize); 1029 totalMemSize.fetch_add(region->allocSz - oldRegionSize); 1030 1031 return object; 1032 } 1033 #endif /* BACKEND_HAS_MREMAP */ 1034 1035 void Backend::releaseRegion(MemRegion *memRegion) 1036 { 1037 regionList.remove(memRegion); 1038 freeRawMem(memRegion, memRegion->allocSz); 1039 } 1040 1041 // coalesce fBlock with its neighborhood 1042 FreeBlock *Backend::doCoalesc(FreeBlock *fBlock, MemRegion **mRegion) 1043 { 1044 FreeBlock *resBlock = fBlock; 1045 size_t resSize = fBlock->sizeTmp; 1046 MemRegion *memRegion = nullptr; 1047 1048 fBlock->markCoalescing(resSize); 1049 resBlock->blockInBin = false; 1050 1051 // coalescing with left neighbor 1052 size_t leftSz = fBlock->trySetLeftUsed(GuardedSize::COAL_BLOCK); 1053 if (leftSz != GuardedSize::LOCKED) { 1054 if (leftSz == GuardedSize::COAL_BLOCK) { 1055 coalescQ.putBlock(fBlock); 1056 return nullptr; 1057 } else { 1058 FreeBlock *left = fBlock->leftNeig(leftSz); 1059 size_t lSz = left->trySetMeUsed(GuardedSize::COAL_BLOCK); 1060 if (lSz <= GuardedSize::MAX_LOCKED_VAL) { 1061 fBlock->setLeftFree(leftSz); // rollback 1062 coalescQ.putBlock(fBlock); 1063 return nullptr; 1064 } else { 1065 MALLOC_ASSERT(lSz == leftSz, "Invalid header"); 1066 left->blockInBin = true; 1067 resBlock = left; 1068 resSize += leftSz; 1069 resBlock->sizeTmp = resSize; 1070 } 1071 } 1072 } 1073 // coalescing with right neighbor 1074 FreeBlock *right = fBlock->rightNeig(fBlock->sizeTmp); 1075 size_t rightSz = right->trySetMeUsed(GuardedSize::COAL_BLOCK); 1076 if (rightSz != GuardedSize::LOCKED) { 1077 // LastFreeBlock is on the right side 1078 if (GuardedSize::LAST_REGION_BLOCK == rightSz) { 1079 right->setMeFree(GuardedSize::LAST_REGION_BLOCK); 1080 memRegion = static_cast<LastFreeBlock*>(right)->memRegion; 1081 } else if (GuardedSize::COAL_BLOCK == rightSz) { 1082 if (resBlock->blockInBin) { 1083 resBlock->blockInBin = false; 1084 removeBlockFromBin(resBlock); 1085 } 1086 coalescQ.putBlock(resBlock); 1087 return nullptr; 1088 } else { 1089 size_t rSz = right->rightNeig(rightSz)-> 1090 trySetLeftUsed(GuardedSize::COAL_BLOCK); 1091 if (rSz <= GuardedSize::MAX_LOCKED_VAL) { 1092 right->setMeFree(rightSz); // rollback 1093 if (resBlock->blockInBin) { 1094 resBlock->blockInBin = false; 1095 removeBlockFromBin(resBlock); 1096 } 1097 coalescQ.putBlock(resBlock); 1098 return nullptr; 1099 } else { 1100 MALLOC_ASSERT(rSz == rightSz, "Invalid header"); 1101 removeBlockFromBin(right); 1102 resSize += rightSz; 1103 1104 // Is LastFreeBlock on the right side of right? 1105 FreeBlock *nextRight = right->rightNeig(rightSz); 1106 size_t nextRightSz = nextRight-> 1107 trySetMeUsed(GuardedSize::COAL_BLOCK); 1108 if (nextRightSz > GuardedSize::MAX_LOCKED_VAL) { 1109 if (nextRightSz == GuardedSize::LAST_REGION_BLOCK) 1110 memRegion = static_cast<LastFreeBlock*>(nextRight)->memRegion; 1111 1112 nextRight->setMeFree(nextRightSz); 1113 } 1114 } 1115 } 1116 } 1117 if (memRegion) { 1118 MALLOC_ASSERT((uintptr_t)memRegion + memRegion->allocSz >= 1119 (uintptr_t)right + sizeof(LastFreeBlock), ASSERT_TEXT); 1120 MALLOC_ASSERT((uintptr_t)memRegion < (uintptr_t)resBlock, ASSERT_TEXT); 1121 *mRegion = memRegion; 1122 } else 1123 *mRegion = nullptr; 1124 resBlock->sizeTmp = resSize; 1125 return resBlock; 1126 } 1127 1128 bool Backend::coalescAndPutList(FreeBlock *list, bool forceCoalescQDrop, bool reportBlocksProcessed) 1129 { 1130 bool regionReleased = false; 1131 1132 for (FreeBlock *helper; list; 1133 list = helper, 1134 // matches block enqueue in CoalRequestQ::putBlock() 1135 reportBlocksProcessed? coalescQ.blockWasProcessed() : (void)0) { 1136 MemRegion *memRegion; 1137 bool addToTail = false; 1138 1139 helper = list->nextToFree; 1140 FreeBlock *toRet = doCoalesc(list, &memRegion); 1141 if (!toRet) 1142 continue; 1143 1144 if (memRegion && memRegion->blockSz == toRet->sizeTmp 1145 && !extMemPool->fixedPool) { 1146 if (extMemPool->regionsAreReleaseable()) { 1147 // release the region, because there is no used blocks in it 1148 if (toRet->blockInBin) 1149 removeBlockFromBin(toRet); 1150 releaseRegion(memRegion); 1151 regionReleased = true; 1152 continue; 1153 } else // add block from empty region to end of bin, 1154 addToTail = true; // preserving for exact fit 1155 } 1156 size_t currSz = toRet->sizeTmp; 1157 int bin = sizeToBin(currSz); 1158 bool toAligned = extMemPool->fixedPool ? toAlignedBin(toRet, currSz) : toRet->slabAligned; 1159 bool needAddToBin = true; 1160 1161 if (toRet->blockInBin) { 1162 // Does it stay in same bin? 1163 if (toRet->myBin == bin && toRet->slabAligned == toAligned) 1164 needAddToBin = false; 1165 else { 1166 toRet->blockInBin = false; 1167 removeBlockFromBin(toRet); 1168 } 1169 } 1170 1171 // Does not stay in same bin, or bin-less; add it 1172 if (needAddToBin) { 1173 toRet->prev = toRet->next = toRet->nextToFree = nullptr; 1174 toRet->myBin = NO_BIN; 1175 toRet->slabAligned = toAligned; 1176 1177 // If the block is too small to fit in any bin, keep it bin-less. 1178 // It's not a leak because the block later can be coalesced. 1179 if (currSz >= minBinnedSize) { 1180 toRet->sizeTmp = currSz; 1181 IndexedBins *target = toRet->slabAligned ? &freeSlabAlignedBins : &freeLargeBlockBins; 1182 if (forceCoalescQDrop) { 1183 target->addBlock(bin, toRet, toRet->sizeTmp, addToTail); 1184 } else if (!target->tryAddBlock(bin, toRet, addToTail)) { 1185 coalescQ.putBlock(toRet); 1186 continue; 1187 } 1188 } 1189 toRet->sizeTmp = 0; 1190 } 1191 // Free (possibly coalesced) free block. 1192 // Adding to bin must be done before this point, 1193 // because after a block is free it can be coalesced, and 1194 // using its pointer became unsafe. 1195 // Remember that coalescing is not done under any global lock. 1196 toRet->setMeFree(currSz); 1197 toRet->rightNeig(currSz)->setLeftFree(currSz); 1198 } 1199 return regionReleased; 1200 } 1201 1202 // Coalesce fBlock and add it back to a bin; 1203 // processing delayed coalescing requests. 1204 void Backend::coalescAndPut(FreeBlock *fBlock, size_t blockSz, bool slabAligned) 1205 { 1206 fBlock->sizeTmp = blockSz; 1207 fBlock->nextToFree = nullptr; 1208 fBlock->slabAligned = slabAligned; 1209 1210 coalescAndPutList(fBlock, /*forceCoalescQDrop=*/false, /*reportBlocksProcessed=*/false); 1211 } 1212 1213 bool Backend::scanCoalescQ(bool forceCoalescQDrop) 1214 { 1215 FreeBlock *currCoalescList = coalescQ.getAll(); 1216 1217 if (currCoalescList) 1218 // reportBlocksProcessed=true informs that the blocks leave coalescQ, 1219 // matches blockConsumed() from CoalRequestQ::putBlock() 1220 coalescAndPutList(currCoalescList, forceCoalescQDrop, 1221 /*reportBlocksProcessed=*/true); 1222 // returns status of coalescQ.getAll(), as an indication of possible changes in backend 1223 // TODO: coalescAndPutList() may report is some new free blocks became available or not 1224 return currCoalescList; 1225 } 1226 1227 FreeBlock *Backend::findBlockInRegion(MemRegion *region, size_t exactBlockSize) 1228 { 1229 FreeBlock *fBlock; 1230 size_t blockSz; 1231 uintptr_t fBlockEnd, 1232 lastFreeBlock = (uintptr_t)region + region->allocSz - sizeof(LastFreeBlock); 1233 1234 static_assert(sizeof(LastFreeBlock) % sizeof(uintptr_t) == 0, 1235 "Atomic applied on LastFreeBlock, and we put it at the end of region, that" 1236 " is uintptr_t-aligned, so no unaligned atomic operations are possible."); 1237 // right bound is slab-aligned, keep LastFreeBlock after it 1238 if (region->type == MEMREG_SLAB_BLOCKS) { 1239 fBlock = (FreeBlock *)alignUp((uintptr_t)region + sizeof(MemRegion), sizeof(uintptr_t)); 1240 fBlockEnd = alignDown(lastFreeBlock, slabSize); 1241 } else { 1242 fBlock = (FreeBlock *)alignUp((uintptr_t)region + sizeof(MemRegion), largeObjectAlignment); 1243 fBlockEnd = (uintptr_t)fBlock + exactBlockSize; 1244 MALLOC_ASSERT(fBlockEnd <= lastFreeBlock, ASSERT_TEXT); 1245 } 1246 if (fBlockEnd <= (uintptr_t)fBlock) 1247 return nullptr; // allocSz is too small 1248 blockSz = fBlockEnd - (uintptr_t)fBlock; 1249 // TODO: extend getSlabBlock to support degradation, i.e. getting less blocks 1250 // then requested, and then relax this check 1251 // (now all or nothing is implemented, check according to this) 1252 if (blockSz < numOfSlabAllocOnMiss*slabSize) 1253 return nullptr; 1254 1255 region->blockSz = blockSz; 1256 return fBlock; 1257 } 1258 1259 // startUseBlock may add the free block to a bin, the block can be used and 1260 // even released after this, so the region must be added to regionList already 1261 void Backend::startUseBlock(MemRegion *region, FreeBlock *fBlock, bool addToBin) 1262 { 1263 size_t blockSz = region->blockSz; 1264 fBlock->initHeader(); 1265 fBlock->setMeFree(blockSz); 1266 1267 LastFreeBlock *lastBl = static_cast<LastFreeBlock*>(fBlock->rightNeig(blockSz)); 1268 // to not get unaligned atomics during LastFreeBlock access 1269 MALLOC_ASSERT(isAligned(lastBl, sizeof(uintptr_t)), nullptr); 1270 lastBl->initHeader(); 1271 lastBl->setMeFree(GuardedSize::LAST_REGION_BLOCK); 1272 lastBl->setLeftFree(blockSz); 1273 lastBl->myBin = NO_BIN; 1274 lastBl->memRegion = region; 1275 1276 if (addToBin) { 1277 unsigned targetBin = sizeToBin(blockSz); 1278 // during adding advance regions, register bin for a largest block in region 1279 advRegBins.registerBin(targetBin); 1280 if (region->type == MEMREG_SLAB_BLOCKS) { 1281 fBlock->slabAligned = true; 1282 freeSlabAlignedBins.addBlock(targetBin, fBlock, blockSz, /*addToTail=*/false); 1283 } else { 1284 fBlock->slabAligned = false; 1285 freeLargeBlockBins.addBlock(targetBin, fBlock, blockSz, /*addToTail=*/false); 1286 } 1287 } else { 1288 // to match with blockReleased() in genericGetBlock 1289 bkndSync.blockConsumed(); 1290 // Understand our alignment for correct splitBlock operation 1291 fBlock->slabAligned = region->type == MEMREG_SLAB_BLOCKS ? true : false; 1292 fBlock->sizeTmp = fBlock->tryLockBlock(); 1293 MALLOC_ASSERT(fBlock->sizeTmp >= FreeBlock::minBlockSize, "Locking must be successful"); 1294 } 1295 } 1296 1297 void MemRegionList::add(MemRegion *r) 1298 { 1299 r->prev = nullptr; 1300 MallocMutex::scoped_lock lock(regionListLock); 1301 r->next = head; 1302 head = r; 1303 if (head->next) 1304 head->next->prev = head; 1305 } 1306 1307 void MemRegionList::remove(MemRegion *r) 1308 { 1309 MallocMutex::scoped_lock lock(regionListLock); 1310 if (head == r) 1311 head = head->next; 1312 if (r->next) 1313 r->next->prev = r->prev; 1314 if (r->prev) 1315 r->prev->next = r->next; 1316 } 1317 1318 #if __TBB_MALLOC_BACKEND_STAT 1319 int MemRegionList::reportStat(FILE *f) 1320 { 1321 int regNum = 0; 1322 MallocMutex::scoped_lock lock(regionListLock); 1323 for (MemRegion *curr = head; curr; curr = curr->next) { 1324 fprintf(f, "%p: max block %lu B, ", curr, curr->blockSz); 1325 regNum++; 1326 } 1327 return regNum; 1328 } 1329 #endif 1330 1331 FreeBlock *Backend::addNewRegion(size_t size, MemRegionType memRegType, bool addToBin) 1332 { 1333 static_assert(sizeof(BlockMutexes) <= sizeof(BlockI), "Header must be not overwritten in used blocks"); 1334 MALLOC_ASSERT(FreeBlock::minBlockSize > GuardedSize::MAX_SPEC_VAL, 1335 "Block length must not conflict with special values of GuardedSize"); 1336 // If the region is not "for slabs" we should reserve some space for 1337 // a region header, the worst case alignment and the last block mark. 1338 const size_t requestSize = memRegType == MEMREG_SLAB_BLOCKS ? size : 1339 size + sizeof(MemRegion) + largeObjectAlignment 1340 + FreeBlock::minBlockSize + sizeof(LastFreeBlock); 1341 1342 size_t rawSize = requestSize; 1343 MemRegion *region = (MemRegion*)allocRawMem(rawSize); 1344 if (!region) { 1345 MALLOC_ASSERT(rawSize==requestSize, "getRawMem has not allocated memory but changed the allocated size."); 1346 return nullptr; 1347 } 1348 if (rawSize < sizeof(MemRegion)) { 1349 if (!extMemPool->fixedPool) 1350 freeRawMem(region, rawSize); 1351 return nullptr; 1352 } 1353 1354 region->type = memRegType; 1355 region->allocSz = rawSize; 1356 FreeBlock *fBlock = findBlockInRegion(region, size); 1357 if (!fBlock) { 1358 if (!extMemPool->fixedPool) 1359 freeRawMem(region, rawSize); 1360 return nullptr; 1361 } 1362 regionList.add(region); 1363 startUseBlock(region, fBlock, addToBin); 1364 bkndSync.binsModified(); 1365 return addToBin? (FreeBlock*)VALID_BLOCK_IN_BIN : fBlock; 1366 } 1367 1368 void Backend::init(ExtMemoryPool *extMemoryPool) 1369 { 1370 extMemPool = extMemoryPool; 1371 usedAddrRange.init(); 1372 coalescQ.init(&bkndSync); 1373 bkndSync.init(this); 1374 } 1375 1376 void Backend::reset() 1377 { 1378 MALLOC_ASSERT(extMemPool->userPool(), "Only user pool can be reset."); 1379 // no active threads are allowed in backend while reset() called 1380 verify(); 1381 1382 freeLargeBlockBins.reset(); 1383 freeSlabAlignedBins.reset(); 1384 advRegBins.reset(); 1385 1386 for (MemRegion *curr = regionList.head; curr; curr = curr->next) { 1387 FreeBlock *fBlock = findBlockInRegion(curr, curr->blockSz); 1388 MALLOC_ASSERT(fBlock, "A memory region unexpectedly got smaller"); 1389 startUseBlock(curr, fBlock, /*addToBin=*/true); 1390 } 1391 } 1392 1393 bool Backend::destroy() 1394 { 1395 bool noError = true; 1396 // no active threads are allowed in backend while destroy() called 1397 verify(); 1398 if (!inUserPool()) { 1399 freeLargeBlockBins.reset(); 1400 freeSlabAlignedBins.reset(); 1401 } 1402 while (regionList.head) { 1403 MemRegion *helper = regionList.head->next; 1404 noError &= freeRawMem(regionList.head, regionList.head->allocSz); 1405 regionList.head = helper; 1406 } 1407 return noError; 1408 } 1409 1410 bool Backend::clean() 1411 { 1412 scanCoalescQ(/*forceCoalescQDrop=*/false); 1413 // Backend::clean is always called under synchronization so only one thread can 1414 // enter to this method at once. 1415 // backendCleanCnt%2== 1 means that clean operation is in progress 1416 backendCleanCnt.fetch_add(1, std::memory_order_acq_rel); 1417 bool res = false; 1418 // We can have several blocks occupying a whole region, 1419 // because such regions are added in advance (see askMemFromOS() and reset()), 1420 // and never used. Release them all. 1421 for (int i = advRegBins.getMinUsedBin(0); i != -1; i = advRegBins.getMinUsedBin(i+1)) { 1422 if (i == freeSlabAlignedBins.getMinNonemptyBin(i)) 1423 res |= freeSlabAlignedBins.tryReleaseRegions(i, this); 1424 if (i == freeLargeBlockBins.getMinNonemptyBin(i)) 1425 res |= freeLargeBlockBins.tryReleaseRegions(i, this); 1426 } 1427 backendCleanCnt.fetch_add(1, std::memory_order_acq_rel); 1428 return res; 1429 } 1430 1431 void Backend::IndexedBins::verify() 1432 { 1433 #if MALLOC_DEBUG 1434 for (int i=0; i<(int)freeBinsNum; i++) { 1435 for (FreeBlock *fb = freeBins[i].head.load(std::memory_order_relaxed); fb; fb=fb->next) { 1436 uintptr_t mySz = fb->myL.value; 1437 MALLOC_ASSERT(mySz>GuardedSize::MAX_SPEC_VAL, ASSERT_TEXT); 1438 FreeBlock *right = (FreeBlock*)((uintptr_t)fb + mySz); 1439 suppress_unused_warning(right); 1440 MALLOC_ASSERT(right->myL.value<=GuardedSize::MAX_SPEC_VAL, ASSERT_TEXT); 1441 MALLOC_ASSERT(right->leftL.value==mySz, ASSERT_TEXT); 1442 MALLOC_ASSERT(fb->leftL.value<=GuardedSize::MAX_SPEC_VAL, ASSERT_TEXT); 1443 } 1444 } 1445 #endif 1446 } 1447 1448 // For correct operation, it must be called when no other threads 1449 // is changing backend. 1450 void Backend::verify() 1451 { 1452 #if MALLOC_DEBUG 1453 scanCoalescQ(/*forceCoalescQDrop=*/false); 1454 #endif // MALLOC_DEBUG 1455 1456 freeLargeBlockBins.verify(); 1457 freeSlabAlignedBins.verify(); 1458 } 1459 1460 #if __TBB_MALLOC_BACKEND_STAT 1461 size_t Backend::Bin::countFreeBlocks() 1462 { 1463 size_t cnt = 0; 1464 { 1465 MallocMutex::scoped_lock lock(tLock); 1466 for (FreeBlock *fb = head; fb; fb = fb->next) 1467 cnt++; 1468 } 1469 return cnt; 1470 } 1471 1472 size_t Backend::Bin::reportFreeBlocks(FILE *f) 1473 { 1474 size_t totalSz = 0; 1475 MallocMutex::scoped_lock lock(tLock); 1476 for (FreeBlock *fb = head; fb; fb = fb->next) { 1477 size_t sz = fb->tryLockBlock(); 1478 fb->setMeFree(sz); 1479 fb->rightNeig(sz)->setLeftFree(sz); 1480 fprintf(f, " [%p;%p]", fb, (void*)((uintptr_t)fb+sz)); 1481 totalSz += sz; 1482 } 1483 return totalSz; 1484 } 1485 1486 void Backend::IndexedBins::reportStat(FILE *f) 1487 { 1488 size_t totalSize = 0; 1489 1490 for (int i=0; i<Backend::freeBinsNum; i++) 1491 if (size_t cnt = freeBins[i].countFreeBlocks()) { 1492 totalSize += freeBins[i].reportFreeBlocks(f); 1493 fprintf(f, " %d:%lu, ", i, cnt); 1494 } 1495 fprintf(f, "\ttotal size %lu KB", totalSize/1024); 1496 } 1497 1498 void Backend::reportStat(FILE *f) 1499 { 1500 scanCoalescQ(/*forceCoalescQDrop=*/false); 1501 1502 fprintf(f, "\n regions:\n"); 1503 int regNum = regionList.reportStat(f); 1504 fprintf(f, "\n%d regions, %lu KB in all regions\n free bins:\nlarge bins: ", 1505 regNum, totalMemSize/1024); 1506 freeLargeBlockBins.reportStat(f); 1507 fprintf(f, "\naligned bins: "); 1508 freeSlabAlignedBins.reportStat(f); 1509 fprintf(f, "\n"); 1510 } 1511 #endif // __TBB_MALLOC_BACKEND_STAT 1512 1513 } } // namespaces 1514