1 /* 2 Copyright (c) 2005-2023 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #include <string.h> /* for memset */ 18 #include <errno.h> 19 #include "tbbmalloc_internal.h" 20 21 namespace rml { 22 namespace internal { 23 24 /*********** Code to acquire memory from the OS or other executive ****************/ 25 26 /* 27 syscall/malloc can set non-zero errno in case of failure, 28 but later allocator might be able to find memory to fulfill the request. 29 And we do not want changing of errno by successful scalable_malloc call. 30 To support this, restore old errno in (get|free)RawMemory, and set errno 31 in frontend just before returning to user code. 32 Please note: every syscall/libc call used inside scalable_malloc that 33 sets errno must be protected this way, not just memory allocation per se. 34 */ 35 36 #if USE_DEFAULT_MEMORY_MAPPING 37 #include "MapMemory.h" 38 #else 39 /* assume MapMemory and UnmapMemory are customized */ 40 #endif 41 42 void* getRawMemory (size_t size, PageType pageType) { 43 return MapMemory(size, pageType); 44 } 45 46 int freeRawMemory (void *object, size_t size) { 47 return UnmapMemory(object, size); 48 } 49 50 #if CHECK_ALLOCATION_RANGE 51 52 void Backend::UsedAddressRange::registerAlloc(uintptr_t left, uintptr_t right) 53 { 54 MallocMutex::scoped_lock lock(mutex); 55 if (left < leftBound.load(std::memory_order_relaxed)) 56 leftBound.store(left, std::memory_order_relaxed); 57 if (right > rightBound.load(std::memory_order_relaxed)) 58 rightBound.store(right, std::memory_order_relaxed); 59 MALLOC_ASSERT(leftBound.load(std::memory_order_relaxed), ASSERT_TEXT); 60 MALLOC_ASSERT(leftBound.load(std::memory_order_relaxed) < rightBound.load(std::memory_order_relaxed), ASSERT_TEXT); 61 MALLOC_ASSERT(leftBound.load(std::memory_order_relaxed) <= left && right <= rightBound.load(std::memory_order_relaxed), ASSERT_TEXT); 62 } 63 64 void Backend::UsedAddressRange::registerFree(uintptr_t left, uintptr_t right) 65 { 66 MallocMutex::scoped_lock lock(mutex); 67 if (leftBound.load(std::memory_order_relaxed) == left) { 68 if (rightBound.load(std::memory_order_relaxed) == right) { 69 leftBound.store(ADDRESS_UPPER_BOUND, std::memory_order_relaxed); 70 rightBound.store(0, std::memory_order_relaxed); 71 } else 72 leftBound.store(right, std::memory_order_relaxed); 73 } else if (rightBound.load(std::memory_order_relaxed) == right) 74 rightBound.store(left, std::memory_order_relaxed); 75 MALLOC_ASSERT((!rightBound.load(std::memory_order_relaxed) && leftBound.load(std::memory_order_relaxed) == ADDRESS_UPPER_BOUND) 76 || leftBound.load(std::memory_order_relaxed) < rightBound.load(std::memory_order_relaxed), ASSERT_TEXT); 77 } 78 #endif // CHECK_ALLOCATION_RANGE 79 80 // Initialized in frontend inside defaultMemPool 81 extern HugePagesStatus hugePages; 82 83 void *Backend::allocRawMem(size_t &size) 84 { 85 void *res = nullptr; 86 size_t allocSize = 0; 87 88 if (extMemPool->userPool()) { 89 if (extMemPool->fixedPool && bootsrapMemDone == bootsrapMemStatus.load(std::memory_order_acquire)) 90 return nullptr; 91 MALLOC_ASSERT(bootsrapMemStatus != bootsrapMemNotDone, 92 "Backend::allocRawMem() called prematurely?"); 93 // TODO: support for raw mem not aligned at sizeof(uintptr_t) 94 // memory from fixed pool is asked once and only once 95 allocSize = alignUpGeneric(size, extMemPool->granularity); 96 res = (*extMemPool->rawAlloc)(extMemPool->poolId, allocSize); 97 } else { 98 // Align allocation on page size 99 size_t pageSize = hugePages.isEnabled ? hugePages.getGranularity() : extMemPool->granularity; 100 MALLOC_ASSERT(pageSize, "Page size cannot be zero."); 101 allocSize = alignUpGeneric(size, pageSize); 102 103 // If user requested huge pages and they are available, try to use preallocated ones firstly. 104 // If there are none, lets check transparent huge pages support and use them instead. 105 if (hugePages.isEnabled) { 106 if (hugePages.isHPAvailable) { 107 res = getRawMemory(allocSize, PREALLOCATED_HUGE_PAGE); 108 } 109 if (!res && hugePages.isTHPAvailable) { 110 res = getRawMemory(allocSize, TRANSPARENT_HUGE_PAGE); 111 } 112 } 113 114 if (!res) { 115 res = getRawMemory(allocSize, REGULAR); 116 } 117 } 118 119 if (res) { 120 MALLOC_ASSERT(allocSize > 0, "Invalid size of an allocated region."); 121 size = allocSize; 122 if (!extMemPool->userPool()) 123 usedAddrRange.registerAlloc((uintptr_t)res, (uintptr_t)res+size); 124 #if MALLOC_DEBUG 125 volatile size_t curTotalSize = totalMemSize; // to read global value once 126 MALLOC_ASSERT(curTotalSize+size > curTotalSize, "Overflow allocation size."); 127 #endif 128 totalMemSize.fetch_add(size); 129 } 130 131 return res; 132 } 133 134 bool Backend::freeRawMem(void *object, size_t size) 135 { 136 bool fail; 137 #if MALLOC_DEBUG 138 volatile size_t curTotalSize = totalMemSize; // to read global value once 139 MALLOC_ASSERT(curTotalSize-size < curTotalSize, "Negative allocation size."); 140 #endif 141 totalMemSize.fetch_sub(size); 142 if (extMemPool->userPool()) { 143 MALLOC_ASSERT(!extMemPool->fixedPool, "No free for fixed-size pools."); 144 fail = (*extMemPool->rawFree)(extMemPool->poolId, object, size); 145 } else { 146 usedAddrRange.registerFree((uintptr_t)object, (uintptr_t)object + size); 147 fail = freeRawMemory(object, size); 148 } 149 // TODO: use result in all freeRawMem() callers 150 return !fail; 151 } 152 153 /********* End memory acquisition code ********************************/ 154 155 // Protected object size. After successful locking returns size of locked block, 156 // and releasing requires setting block size. 157 class GuardedSize : tbb::detail::no_copy { 158 std::atomic<uintptr_t> value; 159 public: 160 enum State { 161 LOCKED, 162 COAL_BLOCK, // block is coalescing now 163 MAX_LOCKED_VAL = COAL_BLOCK, 164 LAST_REGION_BLOCK, // used to mark last block in region 165 // values after this are "normal" block sizes 166 MAX_SPEC_VAL = LAST_REGION_BLOCK 167 }; 168 169 void initLocked() { value.store(LOCKED, std::memory_order_release); } // TBB_REVAMP_TODO: was relaxed 170 void makeCoalscing() { 171 MALLOC_ASSERT(value.load(std::memory_order_relaxed) == LOCKED, ASSERT_TEXT); 172 value.store(COAL_BLOCK, std::memory_order_release); // TBB_REVAMP_TODO: was relaxed 173 } 174 size_t tryLock(State state) { 175 MALLOC_ASSERT(state <= MAX_LOCKED_VAL, ASSERT_TEXT); 176 size_t sz = value.load(std::memory_order_acquire); 177 for (;;) { 178 if (sz <= MAX_LOCKED_VAL) { 179 break; 180 } 181 if (value.compare_exchange_strong(sz, state)) { 182 break; 183 } 184 } 185 return sz; 186 } 187 void unlock(size_t size) { 188 MALLOC_ASSERT(value.load(std::memory_order_relaxed) <= MAX_LOCKED_VAL, "The lock is not locked"); 189 MALLOC_ASSERT(size > MAX_LOCKED_VAL, ASSERT_TEXT); 190 value.store(size, std::memory_order_release); 191 } 192 bool isLastRegionBlock() const { return value.load(std::memory_order_relaxed) == LAST_REGION_BLOCK; } 193 friend void Backend::IndexedBins::verify(); 194 }; 195 196 struct MemRegion { 197 MemRegion *next, // keep all regions in any pool to release all them on 198 *prev; // pool destroying, 2-linked list to release individual 199 // regions. 200 size_t allocSz, // got from pool callback 201 blockSz; // initial and maximal inner block size 202 MemRegionType type; 203 }; 204 205 // this data must be unmodified while block is in use, so separate it 206 class BlockMutexes { 207 protected: 208 GuardedSize myL, // lock for me 209 leftL; // lock for left neighbor 210 }; 211 212 class FreeBlock : BlockMutexes { 213 public: 214 static const size_t minBlockSize; 215 friend void Backend::IndexedBins::verify(); 216 217 FreeBlock *prev, // in 2-linked list related to bin 218 *next, 219 *nextToFree; // used to form a queue during coalescing 220 // valid only when block is in processing, i.e. one is not free and not 221 size_t sizeTmp; // used outside of backend 222 int myBin; // bin that is owner of the block 223 bool slabAligned; 224 bool blockInBin; // this block in myBin already 225 226 FreeBlock *rightNeig(size_t sz) const { 227 MALLOC_ASSERT(sz, ASSERT_TEXT); 228 return (FreeBlock*)((uintptr_t)this+sz); 229 } 230 FreeBlock *leftNeig(size_t sz) const { 231 MALLOC_ASSERT(sz, ASSERT_TEXT); 232 return (FreeBlock*)((uintptr_t)this - sz); 233 } 234 235 void initHeader() { myL.initLocked(); leftL.initLocked(); } 236 void setMeFree(size_t size) { myL.unlock(size); } 237 size_t trySetMeUsed(GuardedSize::State s) { return myL.tryLock(s); } 238 bool isLastRegionBlock() const { return myL.isLastRegionBlock(); } 239 240 void setLeftFree(size_t sz) { leftL.unlock(sz); } 241 size_t trySetLeftUsed(GuardedSize::State s) { return leftL.tryLock(s); } 242 243 size_t tryLockBlock() { 244 size_t rSz, sz = trySetMeUsed(GuardedSize::LOCKED); 245 246 if (sz <= GuardedSize::MAX_LOCKED_VAL) 247 return false; 248 rSz = rightNeig(sz)->trySetLeftUsed(GuardedSize::LOCKED); 249 if (rSz <= GuardedSize::MAX_LOCKED_VAL) { 250 setMeFree(sz); 251 return false; 252 } 253 MALLOC_ASSERT(rSz == sz, ASSERT_TEXT); 254 return sz; 255 } 256 void markCoalescing(size_t blockSz) { 257 myL.makeCoalscing(); 258 rightNeig(blockSz)->leftL.makeCoalscing(); 259 sizeTmp = blockSz; 260 nextToFree = nullptr; 261 } 262 void markUsed() { 263 myL.initLocked(); 264 rightNeig(sizeTmp)->leftL.initLocked(); 265 nextToFree = nullptr; 266 } 267 static void markBlocks(FreeBlock *fBlock, int num, size_t size) { 268 for (int i=1; i<num; i++) { 269 fBlock = (FreeBlock*)((uintptr_t)fBlock + size); 270 fBlock->initHeader(); 271 } 272 } 273 }; 274 275 // Last block in any region. Its "size" field is GuardedSize::LAST_REGION_BLOCK, 276 // This kind of blocks used to find region header 277 // and have a possibility to return region back to OS 278 struct LastFreeBlock : public FreeBlock { 279 MemRegion *memRegion; 280 }; 281 282 const size_t FreeBlock::minBlockSize = sizeof(FreeBlock); 283 284 inline bool BackendSync::waitTillBlockReleased(intptr_t startModifiedCnt) 285 { 286 AtomicBackoff backoff; 287 #if __TBB_MALLOC_BACKEND_STAT 288 class ITT_Guard { 289 void *ptr; 290 public: 291 ITT_Guard(void *p) : ptr(p) { 292 MALLOC_ITT_SYNC_PREPARE(ptr); 293 } 294 ~ITT_Guard() { 295 MALLOC_ITT_SYNC_ACQUIRED(ptr); 296 } 297 }; 298 ITT_Guard ittGuard(&inFlyBlocks); 299 #endif 300 for (intptr_t myBinsInFlyBlocks = inFlyBlocks.load(std::memory_order_acquire), 301 myCoalescQInFlyBlocks = backend->blocksInCoalescing(); ; backoff.pause()) { 302 MALLOC_ASSERT(myBinsInFlyBlocks>=0 && myCoalescQInFlyBlocks>=0, nullptr); 303 intptr_t currBinsInFlyBlocks = inFlyBlocks.load(std::memory_order_acquire), 304 currCoalescQInFlyBlocks = backend->blocksInCoalescing(); 305 WhiteboxTestingYield(); 306 // Stop waiting iff: 307 308 // 1) blocks were removed from processing, not added 309 if (myBinsInFlyBlocks > currBinsInFlyBlocks 310 // 2) released during delayed coalescing queue 311 || myCoalescQInFlyBlocks > currCoalescQInFlyBlocks) 312 break; 313 // 3) if there are blocks in coalescing, and no progress in its processing, 314 // try to scan coalescing queue and stop waiting, if changes were made 315 // (if there are no changes and in-fly blocks exist, we continue 316 // waiting to not increase load on coalescQ) 317 if (currCoalescQInFlyBlocks > 0 && backend->scanCoalescQ(/*forceCoalescQDrop=*/false)) 318 break; 319 // 4) when there are no blocks 320 if (!currBinsInFlyBlocks && !currCoalescQInFlyBlocks) 321 // re-scan make sense only if bins were modified since scanned 322 return startModifiedCnt != getNumOfMods(); 323 myBinsInFlyBlocks = currBinsInFlyBlocks; 324 myCoalescQInFlyBlocks = currCoalescQInFlyBlocks; 325 } 326 return true; 327 } 328 329 void CoalRequestQ::putBlock(FreeBlock *fBlock) 330 { 331 MALLOC_ASSERT(fBlock->sizeTmp >= FreeBlock::minBlockSize, ASSERT_TEXT); 332 fBlock->markUsed(); 333 // the block is in the queue, do not forget that it's here 334 inFlyBlocks++; 335 336 FreeBlock *myBlToFree = blocksToFree.load(std::memory_order_acquire); 337 for (;;) { 338 fBlock->nextToFree = myBlToFree; 339 if (blocksToFree.compare_exchange_strong(myBlToFree, fBlock)) { 340 return; 341 } 342 } 343 } 344 345 FreeBlock *CoalRequestQ::getAll() 346 { 347 for (;;) { 348 FreeBlock *myBlToFree = blocksToFree.load(std::memory_order_acquire); 349 350 if (!myBlToFree) { 351 return nullptr; 352 } else { 353 if (blocksToFree.compare_exchange_strong(myBlToFree, nullptr)) { 354 return myBlToFree; 355 } else { 356 continue; 357 } 358 } 359 } 360 } 361 362 inline void CoalRequestQ::blockWasProcessed() 363 { 364 bkndSync->binsModified(); 365 int prev = inFlyBlocks.fetch_sub(1); 366 tbb::detail::suppress_unused_warning(prev); 367 MALLOC_ASSERT(prev > 0, ASSERT_TEXT); 368 } 369 370 // Try to get a block from a bin. 371 // If the remaining free space would stay in the same bin, 372 // split the block without removing it. 373 // If the free space should go to other bin(s), remove the block. 374 // alignedBin is true, if all blocks in the bin have slab-aligned right side. 375 FreeBlock *Backend::IndexedBins::getFromBin(int binIdx, BackendSync *sync, size_t size, 376 bool needAlignedRes, bool alignedBin, bool wait, int *binLocked) 377 { 378 Bin *b = &freeBins[binIdx]; 379 try_next: 380 FreeBlock *fBlock = nullptr; 381 if (!b->empty()) { 382 bool locked = false; 383 MallocMutex::scoped_lock scopedLock(b->tLock, wait, &locked); 384 385 if (!locked) { 386 if (binLocked) (*binLocked)++; 387 return nullptr; 388 } 389 390 for (FreeBlock *curr = b->head.load(std::memory_order_relaxed); curr; curr = curr->next) { 391 size_t szBlock = curr->tryLockBlock(); 392 if (!szBlock) { 393 // block is locked, re-do bin lock, as there is no place to spin 394 // while block coalescing 395 goto try_next; 396 } 397 398 // GENERAL CASE 399 if (alignedBin || !needAlignedRes) { 400 size_t splitSz = szBlock - size; 401 // If we got a block as split result, it must have a room for control structures. 402 if (szBlock >= size && (splitSz >= FreeBlock::minBlockSize || !splitSz)) 403 fBlock = curr; 404 } else { 405 // SPECIAL CASE, to get aligned block from unaligned bin we have to cut the middle of a block 406 // and return remaining left and right part. Possible only in fixed pool scenario, assert for this 407 // is set inside splitBlock() function. 408 409 void *newB = alignUp(curr, slabSize); 410 uintptr_t rightNew = (uintptr_t)newB + size; 411 uintptr_t rightCurr = (uintptr_t)curr + szBlock; 412 // Check if the block size is sufficient, 413 // and also left and right split results are either big enough or non-existent 414 if (rightNew <= rightCurr 415 && (newB == curr || ((uintptr_t)newB - (uintptr_t)curr) >= FreeBlock::minBlockSize) 416 && (rightNew == rightCurr || (rightCurr - rightNew) >= FreeBlock::minBlockSize)) 417 fBlock = curr; 418 } 419 420 if (fBlock) { 421 // consume must be called before result of removing from a bin is visible externally. 422 sync->blockConsumed(); 423 // TODO: think about cases when block stays in the same bin 424 b->removeBlock(fBlock); 425 if (freeBins[binIdx].empty()) 426 bitMask.set(binIdx, false); 427 fBlock->sizeTmp = szBlock; 428 break; 429 } else { // block size is not valid, search for next block in the bin 430 curr->setMeFree(szBlock); 431 curr->rightNeig(szBlock)->setLeftFree(szBlock); 432 } 433 } 434 } 435 return fBlock; 436 } 437 438 bool Backend::IndexedBins::tryReleaseRegions(int binIdx, Backend *backend) 439 { 440 Bin *b = &freeBins[binIdx]; 441 FreeBlock *fBlockList = nullptr; 442 443 // got all blocks from the bin and re-do coalesce on them 444 // to release single-block regions 445 try_next: 446 if (!b->empty()) { 447 MallocMutex::scoped_lock binLock(b->tLock); 448 for (FreeBlock *curr = b->head.load(std::memory_order_relaxed); curr; ) { 449 size_t szBlock = curr->tryLockBlock(); 450 if (!szBlock) 451 goto try_next; 452 453 FreeBlock *next = curr->next; 454 455 b->removeBlock(curr); 456 curr->sizeTmp = szBlock; 457 curr->nextToFree = fBlockList; 458 fBlockList = curr; 459 curr = next; 460 } 461 } 462 return backend->coalescAndPutList(fBlockList, /*forceCoalescQDrop=*/true, 463 /*reportBlocksProcessed=*/false); 464 } 465 466 void Backend::Bin::removeBlock(FreeBlock *fBlock) 467 { 468 MALLOC_ASSERT(fBlock->next||fBlock->prev||fBlock== head.load(std::memory_order_relaxed), 469 "Detected that a block is not in the bin."); 470 if (head.load(std::memory_order_relaxed) == fBlock) 471 head.store(fBlock->next, std::memory_order_relaxed); 472 if (tail == fBlock) 473 tail = fBlock->prev; 474 if (fBlock->prev) 475 fBlock->prev->next = fBlock->next; 476 if (fBlock->next) 477 fBlock->next->prev = fBlock->prev; 478 } 479 480 void Backend::IndexedBins::addBlock(int binIdx, FreeBlock *fBlock, size_t /* blockSz */, bool addToTail) 481 { 482 Bin *b = &freeBins[binIdx]; 483 fBlock->myBin = binIdx; 484 fBlock->next = fBlock->prev = nullptr; 485 { 486 MallocMutex::scoped_lock scopedLock(b->tLock); 487 if (addToTail) { 488 fBlock->prev = b->tail; 489 b->tail = fBlock; 490 if (fBlock->prev) 491 fBlock->prev->next = fBlock; 492 if (!b->head.load(std::memory_order_relaxed)) 493 b->head.store(fBlock, std::memory_order_relaxed); 494 } else { 495 fBlock->next = b->head.load(std::memory_order_relaxed); 496 b->head.store(fBlock, std::memory_order_relaxed); 497 if (fBlock->next) 498 fBlock->next->prev = fBlock; 499 if (!b->tail) 500 b->tail = fBlock; 501 } 502 } 503 bitMask.set(binIdx, true); 504 } 505 506 bool Backend::IndexedBins::tryAddBlock(int binIdx, FreeBlock *fBlock, bool addToTail) 507 { 508 bool locked = false; 509 Bin *b = &freeBins[binIdx]; 510 fBlock->myBin = binIdx; 511 if (addToTail) { 512 fBlock->next = nullptr; 513 { 514 MallocMutex::scoped_lock scopedLock(b->tLock, /*wait=*/false, &locked); 515 if (!locked) 516 return false; 517 fBlock->prev = b->tail; 518 b->tail = fBlock; 519 if (fBlock->prev) 520 fBlock->prev->next = fBlock; 521 if (!b->head.load(std::memory_order_relaxed)) 522 b->head.store(fBlock, std::memory_order_relaxed); 523 } 524 } else { 525 fBlock->prev = nullptr; 526 { 527 MallocMutex::scoped_lock scopedLock(b->tLock, /*wait=*/false, &locked); 528 if (!locked) 529 return false; 530 fBlock->next = b->head.load(std::memory_order_relaxed); 531 b->head.store(fBlock, std::memory_order_relaxed); 532 if (fBlock->next) 533 fBlock->next->prev = fBlock; 534 if (!b->tail) 535 b->tail = fBlock; 536 } 537 } 538 bitMask.set(binIdx, true); 539 return true; 540 } 541 542 void Backend::IndexedBins::reset() 543 { 544 for (unsigned i=0; i<Backend::freeBinsNum; i++) 545 freeBins[i].reset(); 546 bitMask.reset(); 547 } 548 549 void Backend::IndexedBins::lockRemoveBlock(int binIdx, FreeBlock *fBlock) 550 { 551 MallocMutex::scoped_lock scopedLock(freeBins[binIdx].tLock); 552 freeBins[binIdx].removeBlock(fBlock); 553 if (freeBins[binIdx].empty()) 554 bitMask.set(binIdx, false); 555 } 556 557 bool ExtMemoryPool::regionsAreReleaseable() const 558 { 559 return !keepAllMemory && !delayRegsReleasing; 560 } 561 562 FreeBlock *Backend::splitBlock(FreeBlock *fBlock, int num, size_t size, bool blockIsAligned, bool needAlignedBlock) 563 { 564 const size_t totalSize = num * size; 565 566 // SPECIAL CASE, for unaligned block we have to cut the middle of a block 567 // and return remaining left and right part. Possible only in a fixed pool scenario. 568 if (needAlignedBlock && !blockIsAligned) { 569 MALLOC_ASSERT(extMemPool->fixedPool, 570 "Aligned block request from unaligned bin possible only in fixed pool scenario."); 571 572 // Space to use is in the middle 573 FreeBlock *newBlock = alignUp(fBlock, slabSize); 574 FreeBlock *rightPart = (FreeBlock*)((uintptr_t)newBlock + totalSize); 575 uintptr_t fBlockEnd = (uintptr_t)fBlock + fBlock->sizeTmp; 576 577 // Return free right part 578 if ((uintptr_t)rightPart != fBlockEnd) { 579 rightPart->initHeader(); // to prevent coalescing rightPart with fBlock 580 size_t rightSize = fBlockEnd - (uintptr_t)rightPart; 581 coalescAndPut(rightPart, rightSize, toAlignedBin(rightPart, rightSize)); 582 } 583 // And free left part 584 if (newBlock != fBlock) { 585 newBlock->initHeader(); // to prevent coalescing fBlock with newB 586 size_t leftSize = (uintptr_t)newBlock - (uintptr_t)fBlock; 587 coalescAndPut(fBlock, leftSize, toAlignedBin(fBlock, leftSize)); 588 } 589 fBlock = newBlock; 590 } else if (size_t splitSize = fBlock->sizeTmp - totalSize) { // need to split the block 591 // GENERAL CASE, cut the left or right part of the block 592 FreeBlock *splitBlock = nullptr; 593 if (needAlignedBlock) { 594 // For slab aligned blocks cut the right side of the block 595 // and return it to a requester, original block returns to backend 596 splitBlock = fBlock; 597 fBlock = (FreeBlock*)((uintptr_t)splitBlock + splitSize); 598 fBlock->initHeader(); 599 } else { 600 // For large object blocks cut original block and put free righ part to backend 601 splitBlock = (FreeBlock*)((uintptr_t)fBlock + totalSize); 602 splitBlock->initHeader(); 603 } 604 // Mark free block as it`s parent only when the requested type (needAlignedBlock) 605 // and returned from Bins/OS block (isAligned) are equal (XOR operation used) 606 bool markAligned = (blockIsAligned ^ needAlignedBlock) ? toAlignedBin(splitBlock, splitSize) : blockIsAligned; 607 coalescAndPut(splitBlock, splitSize, markAligned); 608 } 609 MALLOC_ASSERT(!needAlignedBlock || isAligned(fBlock, slabSize), "Expect to get aligned block, if one was requested."); 610 FreeBlock::markBlocks(fBlock, num, size); 611 return fBlock; 612 } 613 614 size_t Backend::getMaxBinnedSize() const 615 { 616 return hugePages.isEnabled && !inUserPool() ? 617 maxBinned_HugePage : maxBinned_SmallPage; 618 } 619 620 inline bool Backend::MaxRequestComparator::operator()(size_t oldMaxReq, size_t requestSize) const 621 { 622 return requestSize > oldMaxReq && requestSize < backend->getMaxBinnedSize(); 623 } 624 625 // last chance to get memory 626 FreeBlock *Backend::releaseMemInCaches(intptr_t startModifiedCnt, 627 int *lockedBinsThreshold, int numOfLockedBins) 628 { 629 // something released from caches 630 if (extMemPool->hardCachesCleanup() 631 // ..or can use blocks that are in processing now 632 || bkndSync.waitTillBlockReleased(startModifiedCnt)) 633 return (FreeBlock*)VALID_BLOCK_IN_BIN; 634 // OS can't give us more memory, but we have some in locked bins 635 if (*lockedBinsThreshold && numOfLockedBins) { 636 *lockedBinsThreshold = 0; 637 return (FreeBlock*)VALID_BLOCK_IN_BIN; 638 } 639 return nullptr; // nothing found, give up 640 } 641 642 FreeBlock *Backend::askMemFromOS(size_t blockSize, intptr_t startModifiedCnt, 643 int *lockedBinsThreshold, int numOfLockedBins, 644 bool *splittableRet, bool needSlabRegion) 645 { 646 FreeBlock *block; 647 // The block sizes can be divided into 3 groups: 648 // 1. "quite small": popular object size, we are in bootstarp or something 649 // like; request several regions. 650 // 2. "quite large": we want to have several such blocks in the region 651 // but not want several pre-allocated regions. 652 // 3. "huge": exact fit, we allocate only one block and do not allow 653 // any other allocations to placed in a region. 654 // Dividing the block sizes in these groups we are trying to balance between 655 // too small regions (that leads to fragmentation) and too large ones (that 656 // leads to excessive address space consumption). If a region is "too 657 // large", allocate only one, to prevent fragmentation. It supposedly 658 // doesn't hurt performance, because the object requested by user is large. 659 // Bounds for the groups are: 660 const size_t maxBinned = getMaxBinnedSize(); 661 const size_t quiteSmall = maxBinned / 8; 662 const size_t quiteLarge = maxBinned; 663 664 if (blockSize >= quiteLarge) { 665 // Do not interact with other threads via semaphores, as for exact fit 666 // we can't share regions with them, memory requesting is individual. 667 block = addNewRegion(blockSize, MEMREG_ONE_BLOCK, /*addToBin=*/false); 668 if (!block) 669 return releaseMemInCaches(startModifiedCnt, lockedBinsThreshold, numOfLockedBins); 670 *splittableRet = false; 671 } else { 672 const size_t regSz_sizeBased = alignUp(4*maxRequestedSize, 1024*1024); 673 // Another thread is modifying backend while we can't get the block. 674 // Wait while it leaves and re-do the scan 675 // before trying other ways to extend the backend. 676 if (bkndSync.waitTillBlockReleased(startModifiedCnt) 677 // semaphore is protecting adding more more memory from OS 678 || memExtendingSema.wait()) 679 return (FreeBlock*)VALID_BLOCK_IN_BIN; 680 681 if (startModifiedCnt != bkndSync.getNumOfMods()) { 682 memExtendingSema.signal(); 683 return (FreeBlock*)VALID_BLOCK_IN_BIN; 684 } 685 686 if (blockSize < quiteSmall) { 687 // For this size of blocks, add NUM_OF_REG "advance" regions in bin, 688 // and return one as a result. 689 // TODO: add to bin first, because other threads can use them right away. 690 // This must be done carefully, because blocks in bins can be released 691 // in releaseCachesToLimit(). 692 const unsigned NUM_OF_REG = 3; 693 MemRegionType regType = needSlabRegion ? MEMREG_SLAB_BLOCKS : MEMREG_LARGE_BLOCKS; 694 block = addNewRegion(regSz_sizeBased, regType, /*addToBin=*/false); 695 if (block) 696 for (unsigned idx=0; idx<NUM_OF_REG; idx++) 697 if (! addNewRegion(regSz_sizeBased, regType, /*addToBin=*/true)) 698 break; 699 } else { 700 block = addNewRegion(regSz_sizeBased, MEMREG_LARGE_BLOCKS, /*addToBin=*/false); 701 } 702 memExtendingSema.signal(); 703 704 // no regions found, try to clean cache 705 if (!block || block == (FreeBlock*)VALID_BLOCK_IN_BIN) 706 return releaseMemInCaches(startModifiedCnt, lockedBinsThreshold, numOfLockedBins); 707 // Since a region can hold more than one block it can be split. 708 *splittableRet = true; 709 } 710 // after asking memory from OS, release caches if we above the memory limits 711 releaseCachesToLimit(); 712 713 return block; 714 } 715 716 void Backend::releaseCachesToLimit() 717 { 718 if (!memSoftLimit.load(std::memory_order_relaxed) 719 || totalMemSize.load(std::memory_order_relaxed) <= memSoftLimit.load(std::memory_order_relaxed)) { 720 return; 721 } 722 size_t locTotalMemSize, locMemSoftLimit; 723 724 scanCoalescQ(/*forceCoalescQDrop=*/false); 725 if (extMemPool->softCachesCleanup() && 726 (locTotalMemSize = totalMemSize.load(std::memory_order_acquire)) <= 727 (locMemSoftLimit = memSoftLimit.load(std::memory_order_acquire))) 728 return; 729 // clean global large-object cache, if this is not enough, clean local caches 730 // do this in several tries, because backend fragmentation can prevent 731 // region from releasing 732 for (int cleanLocal = 0; cleanLocal<2; cleanLocal++) 733 while (cleanLocal ? 734 extMemPool->allLocalCaches.cleanup(/*cleanOnlyUnused=*/true) : 735 extMemPool->loc.decreasingCleanup()) 736 if ((locTotalMemSize = totalMemSize.load(std::memory_order_acquire)) <= 737 (locMemSoftLimit = memSoftLimit.load(std::memory_order_acquire))) 738 return; 739 // last chance to match memSoftLimit 740 extMemPool->hardCachesCleanup(); 741 } 742 743 int Backend::IndexedBins::getMinNonemptyBin(unsigned startBin) const 744 { 745 int p = bitMask.getMinTrue(startBin); 746 return p == -1 ? Backend::freeBinsNum : p; 747 } 748 749 FreeBlock *Backend::IndexedBins::findBlock(int nativeBin, BackendSync *sync, size_t size, 750 bool needAlignedBlock, bool alignedBin, int *numOfLockedBins) 751 { 752 for (int i=getMinNonemptyBin(nativeBin); i<(int)freeBinsNum; i=getMinNonemptyBin(i+1)) 753 if (FreeBlock *block = getFromBin(i, sync, size, needAlignedBlock, alignedBin, /*wait=*/false, numOfLockedBins)) 754 return block; 755 756 return nullptr; 757 } 758 759 void Backend::requestBootstrapMem() 760 { 761 if (bootsrapMemDone == bootsrapMemStatus.load(std::memory_order_acquire)) 762 return; 763 MallocMutex::scoped_lock lock( bootsrapMemStatusMutex ); 764 if (bootsrapMemDone == bootsrapMemStatus) 765 return; 766 MALLOC_ASSERT(bootsrapMemNotDone == bootsrapMemStatus, ASSERT_TEXT); 767 bootsrapMemStatus = bootsrapMemInitializing; 768 // request some rather big region during bootstrap in advance 769 // ok to get nullptr here, as later we re-do a request with more modest size 770 addNewRegion(2*1024*1024, MEMREG_SLAB_BLOCKS, /*addToBin=*/true); 771 bootsrapMemStatus = bootsrapMemDone; 772 } 773 774 // try to allocate size Byte block in available bins 775 // needAlignedRes is true if result must be slab-aligned 776 FreeBlock *Backend::genericGetBlock(int num, size_t size, bool needAlignedBlock) 777 { 778 FreeBlock *block = nullptr; 779 const size_t totalReqSize = num*size; 780 // no splitting after requesting new region, asks exact size 781 const int nativeBin = sizeToBin(totalReqSize); 782 783 requestBootstrapMem(); 784 // If we found 2 or less locked bins, it's time to ask more memory from OS. 785 // But nothing can be asked from fixed pool. And we prefer wait, not ask 786 // for more memory, if block is quite large. 787 int lockedBinsThreshold = extMemPool->fixedPool || size>=maxBinned_SmallPage? 0 : 2; 788 789 // Find maximal requested size limited by getMaxBinnedSize() 790 AtomicUpdate(maxRequestedSize, totalReqSize, MaxRequestComparator(this)); 791 scanCoalescQ(/*forceCoalescQDrop=*/false); 792 793 bool splittable = true; 794 for (;;) { 795 const intptr_t startModifiedCnt = bkndSync.getNumOfMods(); 796 int numOfLockedBins; 797 798 do { 799 numOfLockedBins = 0; 800 if (needAlignedBlock) { 801 block = freeSlabAlignedBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock, 802 /*alignedBin=*/true, &numOfLockedBins); 803 if (!block && extMemPool->fixedPool) 804 block = freeLargeBlockBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock, 805 /*alignedBin=*/false, &numOfLockedBins); 806 } else { 807 block = freeLargeBlockBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock, 808 /*alignedBin=*/false, &numOfLockedBins); 809 if (!block && extMemPool->fixedPool) 810 block = freeSlabAlignedBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock, 811 /*alignedBin=*/true, &numOfLockedBins); 812 } 813 } while (!block && numOfLockedBins>lockedBinsThreshold); 814 815 if (block) 816 break; 817 818 bool retScanCoalescQ = scanCoalescQ(/*forceCoalescQDrop=*/true); 819 bool retSoftCachesCleanup = extMemPool->softCachesCleanup(); 820 if (!(retScanCoalescQ || retSoftCachesCleanup)) { 821 // bins are not updated, 822 // only remaining possibility is to ask for more memory 823 block = askMemFromOS(totalReqSize, startModifiedCnt, &lockedBinsThreshold, 824 numOfLockedBins, &splittable, needAlignedBlock); 825 if (!block) 826 return nullptr; 827 if (block != (FreeBlock*)VALID_BLOCK_IN_BIN) { 828 // size can be increased in askMemFromOS, that's why >= 829 MALLOC_ASSERT(block->sizeTmp >= size, ASSERT_TEXT); 830 break; 831 } 832 // valid block somewhere in bins, let's find it 833 block = nullptr; 834 } 835 } 836 MALLOC_ASSERT(block, ASSERT_TEXT); 837 if (splittable) { 838 // At this point we have to be sure that slabAligned attribute describes the right block state 839 block = splitBlock(block, num, size, block->slabAligned, needAlignedBlock); 840 } 841 // matched blockConsumed() from startUseBlock() 842 bkndSync.blockReleased(); 843 844 return block; 845 } 846 847 LargeMemoryBlock *Backend::getLargeBlock(size_t size) 848 { 849 LargeMemoryBlock *lmb = 850 (LargeMemoryBlock*)genericGetBlock(1, size, /*needAlignedRes=*/false); 851 if (lmb) { 852 lmb->unalignedSize = size; 853 if (extMemPool->userPool()) 854 extMemPool->lmbList.add(lmb); 855 } 856 return lmb; 857 } 858 859 BlockI *Backend::getSlabBlock(int num) { 860 BlockI *b = (BlockI*)genericGetBlock(num, slabSize, /*slabAligned=*/true); 861 MALLOC_ASSERT(isAligned(b, slabSize), ASSERT_TEXT); 862 return b; 863 } 864 865 void Backend::putSlabBlock(BlockI *block) { 866 genericPutBlock((FreeBlock *)block, slabSize, /*slabAligned=*/true); 867 } 868 869 void *Backend::getBackRefSpace(size_t size, bool *rawMemUsed) 870 { 871 // This block is released only at shutdown, so it can prevent 872 // a entire region releasing when it's received from the backend, 873 // so prefer getRawMemory using. 874 if (void *ret = getRawMemory(size, REGULAR)) { 875 *rawMemUsed = true; 876 return ret; 877 } 878 void *ret = genericGetBlock(1, size, /*needAlignedRes=*/false); 879 if (ret) *rawMemUsed = false; 880 return ret; 881 } 882 883 void Backend::putBackRefSpace(void *b, size_t size, bool rawMemUsed) 884 { 885 if (rawMemUsed) 886 freeRawMemory(b, size); 887 // ignore not raw mem, as it released on region releasing 888 } 889 890 void Backend::removeBlockFromBin(FreeBlock *fBlock) 891 { 892 if (fBlock->myBin != Backend::NO_BIN) { 893 if (fBlock->slabAligned) 894 freeSlabAlignedBins.lockRemoveBlock(fBlock->myBin, fBlock); 895 else 896 freeLargeBlockBins.lockRemoveBlock(fBlock->myBin, fBlock); 897 } 898 } 899 900 void Backend::genericPutBlock(FreeBlock *fBlock, size_t blockSz, bool slabAligned) 901 { 902 bkndSync.blockConsumed(); 903 coalescAndPut(fBlock, blockSz, slabAligned); 904 bkndSync.blockReleased(); 905 } 906 907 void AllLargeBlocksList::add(LargeMemoryBlock *lmb) 908 { 909 MallocMutex::scoped_lock scoped_cs(largeObjLock); 910 lmb->gPrev = nullptr; 911 lmb->gNext = loHead; 912 if (lmb->gNext) 913 lmb->gNext->gPrev = lmb; 914 loHead = lmb; 915 } 916 917 void AllLargeBlocksList::remove(LargeMemoryBlock *lmb) 918 { 919 MallocMutex::scoped_lock scoped_cs(largeObjLock); 920 if (loHead == lmb) 921 loHead = lmb->gNext; 922 if (lmb->gNext) 923 lmb->gNext->gPrev = lmb->gPrev; 924 if (lmb->gPrev) 925 lmb->gPrev->gNext = lmb->gNext; 926 } 927 928 void Backend::putLargeBlock(LargeMemoryBlock *lmb) 929 { 930 if (extMemPool->userPool()) 931 extMemPool->lmbList.remove(lmb); 932 genericPutBlock((FreeBlock *)lmb, lmb->unalignedSize, false); 933 } 934 935 void Backend::returnLargeObject(LargeMemoryBlock *lmb) 936 { 937 removeBackRef(lmb->backRefIdx); 938 putLargeBlock(lmb); 939 STAT_increment(getThreadId(), ThreadCommonCounters, freeLargeObj); 940 } 941 942 #if BACKEND_HAS_MREMAP 943 void *Backend::remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment) 944 { 945 // no remap for user pools and for object too small that living in bins 946 if (inUserPool() || min(oldSize, newSize)<maxBinned_SmallPage 947 // during remap, can't guarantee alignment more strict than current or 948 // more strict than page alignment 949 || !isAligned(ptr, alignment) || alignment>extMemPool->granularity) 950 return nullptr; 951 const LargeMemoryBlock* lmbOld = ((LargeObjectHdr *)ptr - 1)->memoryBlock; 952 const size_t oldUnalignedSize = lmbOld->unalignedSize; 953 FreeBlock *oldFBlock = (FreeBlock *)lmbOld; 954 FreeBlock *right = oldFBlock->rightNeig(oldUnalignedSize); 955 // in every region only one block can have LAST_REGION_BLOCK on right, 956 // so don't need no synchronization 957 if (!right->isLastRegionBlock()) 958 return nullptr; 959 960 MemRegion *oldRegion = static_cast<LastFreeBlock*>(right)->memRegion; 961 MALLOC_ASSERT( oldRegion < ptr, ASSERT_TEXT ); 962 const size_t oldRegionSize = oldRegion->allocSz; 963 if (oldRegion->type != MEMREG_ONE_BLOCK) 964 return nullptr; // we are not single in the region 965 const size_t userOffset = (uintptr_t)ptr - (uintptr_t)oldRegion; 966 const size_t alignedSize = LargeObjectCache::alignToBin(newSize + userOffset); 967 const size_t requestSize = 968 alignUp(sizeof(MemRegion) + alignedSize + sizeof(LastFreeBlock), extMemPool->granularity); 969 if (requestSize < alignedSize) // is wrapped around? 970 return nullptr; 971 regionList.remove(oldRegion); 972 973 // The deallocation should be registered in address range before mremap to 974 // prevent a race condition with allocation on another thread. 975 // (OS can reuse the memory and registerAlloc will be missed on another thread) 976 usedAddrRange.registerFree((uintptr_t)oldRegion, (uintptr_t)oldRegion + oldRegionSize); 977 978 void *ret = mremap(oldRegion, oldRegion->allocSz, requestSize, MREMAP_MAYMOVE); 979 if (MAP_FAILED == ret) { // can't remap, revert and leave 980 regionList.add(oldRegion); 981 usedAddrRange.registerAlloc((uintptr_t)oldRegion, (uintptr_t)oldRegion + oldRegionSize); 982 return nullptr; 983 } 984 MemRegion *region = (MemRegion*)ret; 985 MALLOC_ASSERT(region->type == MEMREG_ONE_BLOCK, ASSERT_TEXT); 986 region->allocSz = requestSize; 987 region->blockSz = alignedSize; 988 989 FreeBlock *fBlock = (FreeBlock *)alignUp((uintptr_t)region + sizeof(MemRegion), 990 largeObjectAlignment); 991 992 regionList.add(region); 993 startUseBlock(region, fBlock, /*addToBin=*/false); 994 MALLOC_ASSERT(fBlock->sizeTmp == region->blockSz, ASSERT_TEXT); 995 // matched blockConsumed() in startUseBlock(). 996 // TODO: get rid of useless pair blockConsumed()/blockReleased() 997 bkndSync.blockReleased(); 998 999 // object must start at same offset from region's start 1000 void *object = (void*)((uintptr_t)region + userOffset); 1001 MALLOC_ASSERT(isAligned(object, alignment), ASSERT_TEXT); 1002 LargeObjectHdr *header = (LargeObjectHdr*)object - 1; 1003 setBackRef(header->backRefIdx, header); 1004 1005 LargeMemoryBlock *lmb = (LargeMemoryBlock*)fBlock; 1006 lmb->unalignedSize = region->blockSz; 1007 lmb->objectSize = newSize; 1008 lmb->backRefIdx = header->backRefIdx; 1009 header->memoryBlock = lmb; 1010 MALLOC_ASSERT((uintptr_t)lmb + lmb->unalignedSize >= 1011 (uintptr_t)object + lmb->objectSize, "An object must fit to the block."); 1012 1013 usedAddrRange.registerAlloc((uintptr_t)region, (uintptr_t)region + requestSize); 1014 totalMemSize.fetch_add(region->allocSz - oldRegionSize); 1015 1016 return object; 1017 } 1018 #endif /* BACKEND_HAS_MREMAP */ 1019 1020 void Backend::releaseRegion(MemRegion *memRegion) 1021 { 1022 regionList.remove(memRegion); 1023 freeRawMem(memRegion, memRegion->allocSz); 1024 } 1025 1026 // coalesce fBlock with its neighborhood 1027 FreeBlock *Backend::doCoalesc(FreeBlock *fBlock, MemRegion **mRegion) 1028 { 1029 FreeBlock *resBlock = fBlock; 1030 size_t resSize = fBlock->sizeTmp; 1031 MemRegion *memRegion = nullptr; 1032 1033 fBlock->markCoalescing(resSize); 1034 resBlock->blockInBin = false; 1035 1036 // coalescing with left neighbor 1037 size_t leftSz = fBlock->trySetLeftUsed(GuardedSize::COAL_BLOCK); 1038 if (leftSz != GuardedSize::LOCKED) { 1039 if (leftSz == GuardedSize::COAL_BLOCK) { 1040 coalescQ.putBlock(fBlock); 1041 return nullptr; 1042 } else { 1043 FreeBlock *left = fBlock->leftNeig(leftSz); 1044 size_t lSz = left->trySetMeUsed(GuardedSize::COAL_BLOCK); 1045 if (lSz <= GuardedSize::MAX_LOCKED_VAL) { 1046 fBlock->setLeftFree(leftSz); // rollback 1047 coalescQ.putBlock(fBlock); 1048 return nullptr; 1049 } else { 1050 MALLOC_ASSERT(lSz == leftSz, "Invalid header"); 1051 left->blockInBin = true; 1052 resBlock = left; 1053 resSize += leftSz; 1054 resBlock->sizeTmp = resSize; 1055 } 1056 } 1057 } 1058 // coalescing with right neighbor 1059 FreeBlock *right = fBlock->rightNeig(fBlock->sizeTmp); 1060 size_t rightSz = right->trySetMeUsed(GuardedSize::COAL_BLOCK); 1061 if (rightSz != GuardedSize::LOCKED) { 1062 // LastFreeBlock is on the right side 1063 if (GuardedSize::LAST_REGION_BLOCK == rightSz) { 1064 right->setMeFree(GuardedSize::LAST_REGION_BLOCK); 1065 memRegion = static_cast<LastFreeBlock*>(right)->memRegion; 1066 } else if (GuardedSize::COAL_BLOCK == rightSz) { 1067 if (resBlock->blockInBin) { 1068 resBlock->blockInBin = false; 1069 removeBlockFromBin(resBlock); 1070 } 1071 coalescQ.putBlock(resBlock); 1072 return nullptr; 1073 } else { 1074 size_t rSz = right->rightNeig(rightSz)-> 1075 trySetLeftUsed(GuardedSize::COAL_BLOCK); 1076 if (rSz <= GuardedSize::MAX_LOCKED_VAL) { 1077 right->setMeFree(rightSz); // rollback 1078 if (resBlock->blockInBin) { 1079 resBlock->blockInBin = false; 1080 removeBlockFromBin(resBlock); 1081 } 1082 coalescQ.putBlock(resBlock); 1083 return nullptr; 1084 } else { 1085 MALLOC_ASSERT(rSz == rightSz, "Invalid header"); 1086 removeBlockFromBin(right); 1087 resSize += rightSz; 1088 1089 // Is LastFreeBlock on the right side of right? 1090 FreeBlock *nextRight = right->rightNeig(rightSz); 1091 size_t nextRightSz = nextRight-> 1092 trySetMeUsed(GuardedSize::COAL_BLOCK); 1093 if (nextRightSz > GuardedSize::MAX_LOCKED_VAL) { 1094 if (nextRightSz == GuardedSize::LAST_REGION_BLOCK) 1095 memRegion = static_cast<LastFreeBlock*>(nextRight)->memRegion; 1096 1097 nextRight->setMeFree(nextRightSz); 1098 } 1099 } 1100 } 1101 } 1102 if (memRegion) { 1103 MALLOC_ASSERT((uintptr_t)memRegion + memRegion->allocSz >= 1104 (uintptr_t)right + sizeof(LastFreeBlock), ASSERT_TEXT); 1105 MALLOC_ASSERT((uintptr_t)memRegion < (uintptr_t)resBlock, ASSERT_TEXT); 1106 *mRegion = memRegion; 1107 } else 1108 *mRegion = nullptr; 1109 resBlock->sizeTmp = resSize; 1110 return resBlock; 1111 } 1112 1113 bool Backend::coalescAndPutList(FreeBlock *list, bool forceCoalescQDrop, bool reportBlocksProcessed) 1114 { 1115 bool regionReleased = false; 1116 1117 for (FreeBlock *helper; list; 1118 list = helper, 1119 // matches block enqueue in CoalRequestQ::putBlock() 1120 reportBlocksProcessed? coalescQ.blockWasProcessed() : (void)0) { 1121 MemRegion *memRegion; 1122 bool addToTail = false; 1123 1124 helper = list->nextToFree; 1125 FreeBlock *toRet = doCoalesc(list, &memRegion); 1126 if (!toRet) 1127 continue; 1128 1129 if (memRegion && memRegion->blockSz == toRet->sizeTmp 1130 && !extMemPool->fixedPool) { 1131 if (extMemPool->regionsAreReleaseable()) { 1132 // release the region, because there is no used blocks in it 1133 if (toRet->blockInBin) 1134 removeBlockFromBin(toRet); 1135 releaseRegion(memRegion); 1136 regionReleased = true; 1137 continue; 1138 } else // add block from empty region to end of bin, 1139 addToTail = true; // preserving for exact fit 1140 } 1141 size_t currSz = toRet->sizeTmp; 1142 int bin = sizeToBin(currSz); 1143 bool toAligned = extMemPool->fixedPool ? toAlignedBin(toRet, currSz) : toRet->slabAligned; 1144 bool needAddToBin = true; 1145 1146 if (toRet->blockInBin) { 1147 // Does it stay in same bin? 1148 if (toRet->myBin == bin && toRet->slabAligned == toAligned) 1149 needAddToBin = false; 1150 else { 1151 toRet->blockInBin = false; 1152 removeBlockFromBin(toRet); 1153 } 1154 } 1155 1156 // Does not stay in same bin, or bin-less; add it 1157 if (needAddToBin) { 1158 toRet->prev = toRet->next = toRet->nextToFree = nullptr; 1159 toRet->myBin = NO_BIN; 1160 toRet->slabAligned = toAligned; 1161 1162 // If the block is too small to fit in any bin, keep it bin-less. 1163 // It's not a leak because the block later can be coalesced. 1164 if (currSz >= minBinnedSize) { 1165 toRet->sizeTmp = currSz; 1166 IndexedBins *target = toRet->slabAligned ? &freeSlabAlignedBins : &freeLargeBlockBins; 1167 if (forceCoalescQDrop) { 1168 target->addBlock(bin, toRet, toRet->sizeTmp, addToTail); 1169 } else if (!target->tryAddBlock(bin, toRet, addToTail)) { 1170 coalescQ.putBlock(toRet); 1171 continue; 1172 } 1173 } 1174 toRet->sizeTmp = 0; 1175 } 1176 // Free (possibly coalesced) free block. 1177 // Adding to bin must be done before this point, 1178 // because after a block is free it can be coalesced, and 1179 // using its pointer became unsafe. 1180 // Remember that coalescing is not done under any global lock. 1181 toRet->setMeFree(currSz); 1182 toRet->rightNeig(currSz)->setLeftFree(currSz); 1183 } 1184 return regionReleased; 1185 } 1186 1187 // Coalesce fBlock and add it back to a bin; 1188 // processing delayed coalescing requests. 1189 void Backend::coalescAndPut(FreeBlock *fBlock, size_t blockSz, bool slabAligned) 1190 { 1191 fBlock->sizeTmp = blockSz; 1192 fBlock->nextToFree = nullptr; 1193 fBlock->slabAligned = slabAligned; 1194 1195 coalescAndPutList(fBlock, /*forceCoalescQDrop=*/false, /*reportBlocksProcessed=*/false); 1196 } 1197 1198 bool Backend::scanCoalescQ(bool forceCoalescQDrop) 1199 { 1200 FreeBlock *currCoalescList = coalescQ.getAll(); 1201 1202 if (currCoalescList) 1203 // reportBlocksProcessed=true informs that the blocks leave coalescQ, 1204 // matches blockConsumed() from CoalRequestQ::putBlock() 1205 coalescAndPutList(currCoalescList, forceCoalescQDrop, 1206 /*reportBlocksProcessed=*/true); 1207 // returns status of coalescQ.getAll(), as an indication of possible changes in backend 1208 // TODO: coalescAndPutList() may report is some new free blocks became available or not 1209 return currCoalescList; 1210 } 1211 1212 FreeBlock *Backend::findBlockInRegion(MemRegion *region, size_t exactBlockSize) 1213 { 1214 FreeBlock *fBlock; 1215 size_t blockSz; 1216 uintptr_t fBlockEnd, 1217 lastFreeBlock = (uintptr_t)region + region->allocSz - sizeof(LastFreeBlock); 1218 1219 static_assert(sizeof(LastFreeBlock) % sizeof(uintptr_t) == 0, 1220 "Atomic applied on LastFreeBlock, and we put it at the end of region, that" 1221 " is uintptr_t-aligned, so no unaligned atomic operations are possible."); 1222 // right bound is slab-aligned, keep LastFreeBlock after it 1223 if (region->type == MEMREG_SLAB_BLOCKS) { 1224 fBlock = (FreeBlock *)alignUp((uintptr_t)region + sizeof(MemRegion), sizeof(uintptr_t)); 1225 fBlockEnd = alignDown(lastFreeBlock, slabSize); 1226 } else { 1227 fBlock = (FreeBlock *)alignUp((uintptr_t)region + sizeof(MemRegion), largeObjectAlignment); 1228 fBlockEnd = (uintptr_t)fBlock + exactBlockSize; 1229 MALLOC_ASSERT(fBlockEnd <= lastFreeBlock, ASSERT_TEXT); 1230 } 1231 if (fBlockEnd <= (uintptr_t)fBlock) 1232 return nullptr; // allocSz is too small 1233 blockSz = fBlockEnd - (uintptr_t)fBlock; 1234 // TODO: extend getSlabBlock to support degradation, i.e. getting less blocks 1235 // then requested, and then relax this check 1236 // (now all or nothing is implemented, check according to this) 1237 if (blockSz < numOfSlabAllocOnMiss*slabSize) 1238 return nullptr; 1239 1240 region->blockSz = blockSz; 1241 return fBlock; 1242 } 1243 1244 // startUseBlock may add the free block to a bin, the block can be used and 1245 // even released after this, so the region must be added to regionList already 1246 void Backend::startUseBlock(MemRegion *region, FreeBlock *fBlock, bool addToBin) 1247 { 1248 size_t blockSz = region->blockSz; 1249 fBlock->initHeader(); 1250 fBlock->setMeFree(blockSz); 1251 1252 LastFreeBlock *lastBl = static_cast<LastFreeBlock*>(fBlock->rightNeig(blockSz)); 1253 // to not get unaligned atomics during LastFreeBlock access 1254 MALLOC_ASSERT(isAligned(lastBl, sizeof(uintptr_t)), nullptr); 1255 lastBl->initHeader(); 1256 lastBl->setMeFree(GuardedSize::LAST_REGION_BLOCK); 1257 lastBl->setLeftFree(blockSz); 1258 lastBl->myBin = NO_BIN; 1259 lastBl->memRegion = region; 1260 1261 if (addToBin) { 1262 unsigned targetBin = sizeToBin(blockSz); 1263 // during adding advance regions, register bin for a largest block in region 1264 advRegBins.registerBin(targetBin); 1265 if (region->type == MEMREG_SLAB_BLOCKS) { 1266 fBlock->slabAligned = true; 1267 freeSlabAlignedBins.addBlock(targetBin, fBlock, blockSz, /*addToTail=*/false); 1268 } else { 1269 fBlock->slabAligned = false; 1270 freeLargeBlockBins.addBlock(targetBin, fBlock, blockSz, /*addToTail=*/false); 1271 } 1272 } else { 1273 // to match with blockReleased() in genericGetBlock 1274 bkndSync.blockConsumed(); 1275 // Understand our alignment for correct splitBlock operation 1276 fBlock->slabAligned = region->type == MEMREG_SLAB_BLOCKS ? true : false; 1277 fBlock->sizeTmp = fBlock->tryLockBlock(); 1278 MALLOC_ASSERT(fBlock->sizeTmp >= FreeBlock::minBlockSize, "Locking must be successful"); 1279 } 1280 } 1281 1282 void MemRegionList::add(MemRegion *r) 1283 { 1284 r->prev = nullptr; 1285 MallocMutex::scoped_lock lock(regionListLock); 1286 r->next = head; 1287 head = r; 1288 if (head->next) 1289 head->next->prev = head; 1290 } 1291 1292 void MemRegionList::remove(MemRegion *r) 1293 { 1294 MallocMutex::scoped_lock lock(regionListLock); 1295 if (head == r) 1296 head = head->next; 1297 if (r->next) 1298 r->next->prev = r->prev; 1299 if (r->prev) 1300 r->prev->next = r->next; 1301 } 1302 1303 #if __TBB_MALLOC_BACKEND_STAT 1304 int MemRegionList::reportStat(FILE *f) 1305 { 1306 int regNum = 0; 1307 MallocMutex::scoped_lock lock(regionListLock); 1308 for (MemRegion *curr = head; curr; curr = curr->next) { 1309 fprintf(f, "%p: max block %lu B, ", curr, curr->blockSz); 1310 regNum++; 1311 } 1312 return regNum; 1313 } 1314 #endif 1315 1316 FreeBlock *Backend::addNewRegion(size_t size, MemRegionType memRegType, bool addToBin) 1317 { 1318 static_assert(sizeof(BlockMutexes) <= sizeof(BlockI), "Header must be not overwritten in used blocks"); 1319 MALLOC_ASSERT(FreeBlock::minBlockSize > GuardedSize::MAX_SPEC_VAL, 1320 "Block length must not conflict with special values of GuardedSize"); 1321 // If the region is not "for slabs" we should reserve some space for 1322 // a region header, the worst case alignment and the last block mark. 1323 const size_t requestSize = memRegType == MEMREG_SLAB_BLOCKS ? size : 1324 size + sizeof(MemRegion) + largeObjectAlignment 1325 + FreeBlock::minBlockSize + sizeof(LastFreeBlock); 1326 1327 size_t rawSize = requestSize; 1328 MemRegion *region = (MemRegion*)allocRawMem(rawSize); 1329 if (!region) { 1330 MALLOC_ASSERT(rawSize==requestSize, "getRawMem has not allocated memory but changed the allocated size."); 1331 return nullptr; 1332 } 1333 if (rawSize < sizeof(MemRegion)) { 1334 if (!extMemPool->fixedPool) 1335 freeRawMem(region, rawSize); 1336 return nullptr; 1337 } 1338 1339 region->type = memRegType; 1340 region->allocSz = rawSize; 1341 FreeBlock *fBlock = findBlockInRegion(region, size); 1342 if (!fBlock) { 1343 if (!extMemPool->fixedPool) 1344 freeRawMem(region, rawSize); 1345 return nullptr; 1346 } 1347 regionList.add(region); 1348 startUseBlock(region, fBlock, addToBin); 1349 bkndSync.binsModified(); 1350 return addToBin? (FreeBlock*)VALID_BLOCK_IN_BIN : fBlock; 1351 } 1352 1353 void Backend::init(ExtMemoryPool *extMemoryPool) 1354 { 1355 extMemPool = extMemoryPool; 1356 usedAddrRange.init(); 1357 coalescQ.init(&bkndSync); 1358 bkndSync.init(this); 1359 } 1360 1361 void Backend::reset() 1362 { 1363 MALLOC_ASSERT(extMemPool->userPool(), "Only user pool can be reset."); 1364 // no active threads are allowed in backend while reset() called 1365 verify(); 1366 1367 freeLargeBlockBins.reset(); 1368 freeSlabAlignedBins.reset(); 1369 advRegBins.reset(); 1370 1371 for (MemRegion *curr = regionList.head; curr; curr = curr->next) { 1372 FreeBlock *fBlock = findBlockInRegion(curr, curr->blockSz); 1373 MALLOC_ASSERT(fBlock, "A memory region unexpectedly got smaller"); 1374 startUseBlock(curr, fBlock, /*addToBin=*/true); 1375 } 1376 } 1377 1378 bool Backend::destroy() 1379 { 1380 bool noError = true; 1381 // no active threads are allowed in backend while destroy() called 1382 verify(); 1383 if (!inUserPool()) { 1384 freeLargeBlockBins.reset(); 1385 freeSlabAlignedBins.reset(); 1386 } 1387 while (regionList.head) { 1388 MemRegion *helper = regionList.head->next; 1389 noError &= freeRawMem(regionList.head, regionList.head->allocSz); 1390 regionList.head = helper; 1391 } 1392 return noError; 1393 } 1394 1395 bool Backend::clean() 1396 { 1397 scanCoalescQ(/*forceCoalescQDrop=*/false); 1398 1399 bool res = false; 1400 // We can have several blocks occupying a whole region, 1401 // because such regions are added in advance (see askMemFromOS() and reset()), 1402 // and never used. Release them all. 1403 for (int i = advRegBins.getMinUsedBin(0); i != -1; i = advRegBins.getMinUsedBin(i+1)) { 1404 if (i == freeSlabAlignedBins.getMinNonemptyBin(i)) 1405 res |= freeSlabAlignedBins.tryReleaseRegions(i, this); 1406 if (i == freeLargeBlockBins.getMinNonemptyBin(i)) 1407 res |= freeLargeBlockBins.tryReleaseRegions(i, this); 1408 } 1409 1410 return res; 1411 } 1412 1413 void Backend::IndexedBins::verify() 1414 { 1415 #if MALLOC_DEBUG 1416 for (int i=0; i<(int)freeBinsNum; i++) { 1417 for (FreeBlock *fb = freeBins[i].head.load(std::memory_order_relaxed); fb; fb=fb->next) { 1418 uintptr_t mySz = fb->myL.value; 1419 MALLOC_ASSERT(mySz>GuardedSize::MAX_SPEC_VAL, ASSERT_TEXT); 1420 FreeBlock *right = (FreeBlock*)((uintptr_t)fb + mySz); 1421 suppress_unused_warning(right); 1422 MALLOC_ASSERT(right->myL.value<=GuardedSize::MAX_SPEC_VAL, ASSERT_TEXT); 1423 MALLOC_ASSERT(right->leftL.value==mySz, ASSERT_TEXT); 1424 MALLOC_ASSERT(fb->leftL.value<=GuardedSize::MAX_SPEC_VAL, ASSERT_TEXT); 1425 } 1426 } 1427 #endif 1428 } 1429 1430 // For correct operation, it must be called when no other threads 1431 // is changing backend. 1432 void Backend::verify() 1433 { 1434 #if MALLOC_DEBUG 1435 scanCoalescQ(/*forceCoalescQDrop=*/false); 1436 #endif // MALLOC_DEBUG 1437 1438 freeLargeBlockBins.verify(); 1439 freeSlabAlignedBins.verify(); 1440 } 1441 1442 #if __TBB_MALLOC_BACKEND_STAT 1443 size_t Backend::Bin::countFreeBlocks() 1444 { 1445 size_t cnt = 0; 1446 { 1447 MallocMutex::scoped_lock lock(tLock); 1448 for (FreeBlock *fb = head; fb; fb = fb->next) 1449 cnt++; 1450 } 1451 return cnt; 1452 } 1453 1454 size_t Backend::Bin::reportFreeBlocks(FILE *f) 1455 { 1456 size_t totalSz = 0; 1457 MallocMutex::scoped_lock lock(tLock); 1458 for (FreeBlock *fb = head; fb; fb = fb->next) { 1459 size_t sz = fb->tryLockBlock(); 1460 fb->setMeFree(sz); 1461 fprintf(f, " [%p;%p]", fb, (void*)((uintptr_t)fb+sz)); 1462 totalSz += sz; 1463 } 1464 return totalSz; 1465 } 1466 1467 void Backend::IndexedBins::reportStat(FILE *f) 1468 { 1469 size_t totalSize = 0; 1470 1471 for (int i=0; i<Backend::freeBinsNum; i++) 1472 if (size_t cnt = freeBins[i].countFreeBlocks()) { 1473 totalSize += freeBins[i].reportFreeBlocks(f); 1474 fprintf(f, " %d:%lu, ", i, cnt); 1475 } 1476 fprintf(f, "\ttotal size %lu KB", totalSize/1024); 1477 } 1478 1479 void Backend::reportStat(FILE *f) 1480 { 1481 scanCoalescQ(/*forceCoalescQDrop=*/false); 1482 1483 fprintf(f, "\n regions:\n"); 1484 int regNum = regionList.reportStat(f); 1485 fprintf(f, "\n%d regions, %lu KB in all regions\n free bins:\nlarge bins: ", 1486 regNum, totalMemSize/1024); 1487 freeLargeBlockBins.reportStat(f); 1488 fprintf(f, "\naligned bins: "); 1489 freeSlabAlignedBins.reportStat(f); 1490 fprintf(f, "\n"); 1491 } 1492 #endif // __TBB_MALLOC_BACKEND_STAT 1493 1494 } } // namespaces 1495