1 /* 2 Copyright (c) 2005-2023 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #include <string.h> /* for memset */ 18 #include <errno.h> 19 #include "tbbmalloc_internal.h" 20 21 namespace rml { 22 namespace internal { 23 24 /*********** Code to acquire memory from the OS or other executive ****************/ 25 26 /* 27 syscall/malloc can set non-zero errno in case of failure, 28 but later allocator might be able to find memory to fulfill the request. 29 And we do not want changing of errno by successful scalable_malloc call. 30 To support this, restore old errno in (get|free)RawMemory, and set errno 31 in frontend just before returning to user code. 32 Please note: every syscall/libc call used inside scalable_malloc that 33 sets errno must be protected this way, not just memory allocation per se. 34 */ 35 36 #if USE_DEFAULT_MEMORY_MAPPING 37 #include "MapMemory.h" 38 #else 39 /* assume MapMemory and UnmapMemory are customized */ 40 #endif 41 42 void* getRawMemory (size_t size, PageType pageType) { 43 return MapMemory(size, pageType); 44 } 45 46 int freeRawMemory (void *object, size_t size) { 47 return UnmapMemory(object, size); 48 } 49 50 #if CHECK_ALLOCATION_RANGE 51 52 void Backend::UsedAddressRange::registerAlloc(uintptr_t left, uintptr_t right) 53 { 54 MallocMutex::scoped_lock lock(mutex); 55 if (left < leftBound.load(std::memory_order_relaxed)) 56 leftBound.store(left, std::memory_order_relaxed); 57 if (right > rightBound.load(std::memory_order_relaxed)) 58 rightBound.store(right, std::memory_order_relaxed); 59 MALLOC_ASSERT(leftBound.load(std::memory_order_relaxed), ASSERT_TEXT); 60 MALLOC_ASSERT(leftBound.load(std::memory_order_relaxed) < rightBound.load(std::memory_order_relaxed), ASSERT_TEXT); 61 MALLOC_ASSERT(leftBound.load(std::memory_order_relaxed) <= left && right <= rightBound.load(std::memory_order_relaxed), ASSERT_TEXT); 62 } 63 64 void Backend::UsedAddressRange::registerFree(uintptr_t left, uintptr_t right) 65 { 66 MallocMutex::scoped_lock lock(mutex); 67 if (leftBound.load(std::memory_order_relaxed) == left) { 68 if (rightBound.load(std::memory_order_relaxed) == right) { 69 leftBound.store(ADDRESS_UPPER_BOUND, std::memory_order_relaxed); 70 rightBound.store(0, std::memory_order_relaxed); 71 } else 72 leftBound.store(right, std::memory_order_relaxed); 73 } else if (rightBound.load(std::memory_order_relaxed) == right) 74 rightBound.store(left, std::memory_order_relaxed); 75 MALLOC_ASSERT((!rightBound.load(std::memory_order_relaxed) && leftBound.load(std::memory_order_relaxed) == ADDRESS_UPPER_BOUND) 76 || leftBound.load(std::memory_order_relaxed) < rightBound.load(std::memory_order_relaxed), ASSERT_TEXT); 77 } 78 #endif // CHECK_ALLOCATION_RANGE 79 80 // Initialized in frontend inside defaultMemPool 81 extern HugePagesStatus hugePages; 82 83 void *Backend::allocRawMem(size_t &size) 84 { 85 void *res = nullptr; 86 size_t allocSize = 0; 87 88 if (extMemPool->userPool()) { 89 if (extMemPool->fixedPool && bootsrapMemDone == bootsrapMemStatus.load(std::memory_order_acquire)) 90 return nullptr; 91 MALLOC_ASSERT(bootsrapMemStatus != bootsrapMemNotDone, 92 "Backend::allocRawMem() called prematurely?"); 93 // TODO: support for raw mem not aligned at sizeof(uintptr_t) 94 // memory from fixed pool is asked once and only once 95 allocSize = alignUpGeneric(size, extMemPool->granularity); 96 res = (*extMemPool->rawAlloc)(extMemPool->poolId, allocSize); 97 } else { 98 // Align allocation on page size 99 size_t pageSize = hugePages.isEnabled ? hugePages.getGranularity() : extMemPool->granularity; 100 MALLOC_ASSERT(pageSize, "Page size cannot be zero."); 101 allocSize = alignUpGeneric(size, pageSize); 102 103 // If user requested huge pages and they are available, try to use preallocated ones firstly. 104 // If there are none, lets check transparent huge pages support and use them instead. 105 if (hugePages.isEnabled) { 106 if (hugePages.isHPAvailable) { 107 res = getRawMemory(allocSize, PREALLOCATED_HUGE_PAGE); 108 } 109 if (!res && hugePages.isTHPAvailable) { 110 res = getRawMemory(allocSize, TRANSPARENT_HUGE_PAGE); 111 } 112 } 113 114 if (!res) { 115 res = getRawMemory(allocSize, REGULAR); 116 } 117 } 118 119 if (res) { 120 MALLOC_ASSERT(allocSize > 0, "Invalid size of an allocated region."); 121 size = allocSize; 122 if (!extMemPool->userPool()) 123 usedAddrRange.registerAlloc((uintptr_t)res, (uintptr_t)res+size); 124 #if MALLOC_DEBUG 125 volatile size_t curTotalSize = totalMemSize; // to read global value once 126 MALLOC_ASSERT(curTotalSize+size > curTotalSize, "Overflow allocation size."); 127 #endif 128 totalMemSize.fetch_add(size); 129 } 130 131 return res; 132 } 133 134 bool Backend::freeRawMem(void *object, size_t size) 135 { 136 bool fail; 137 #if MALLOC_DEBUG 138 volatile size_t curTotalSize = totalMemSize; // to read global value once 139 MALLOC_ASSERT(curTotalSize-size < curTotalSize, "Negative allocation size."); 140 #endif 141 totalMemSize.fetch_sub(size); 142 if (extMemPool->userPool()) { 143 MALLOC_ASSERT(!extMemPool->fixedPool, "No free for fixed-size pools."); 144 fail = (*extMemPool->rawFree)(extMemPool->poolId, object, size); 145 } else { 146 usedAddrRange.registerFree((uintptr_t)object, (uintptr_t)object + size); 147 fail = freeRawMemory(object, size); 148 } 149 // TODO: use result in all freeRawMem() callers 150 return !fail; 151 } 152 153 /********* End memory acquisition code ********************************/ 154 155 // Protected object size. After successful locking returns size of locked block, 156 // and releasing requires setting block size. 157 class GuardedSize : tbb::detail::no_copy { 158 std::atomic<uintptr_t> value; 159 public: 160 enum State { 161 LOCKED, 162 COAL_BLOCK, // block is coalescing now 163 MAX_LOCKED_VAL = COAL_BLOCK, 164 LAST_REGION_BLOCK, // used to mark last block in region 165 // values after this are "normal" block sizes 166 MAX_SPEC_VAL = LAST_REGION_BLOCK 167 }; 168 169 void initLocked() { value.store(LOCKED, std::memory_order_release); } // TBB_REVAMP_TODO: was relaxed 170 void makeCoalscing() { 171 MALLOC_ASSERT(value.load(std::memory_order_relaxed) == LOCKED, ASSERT_TEXT); 172 value.store(COAL_BLOCK, std::memory_order_release); // TBB_REVAMP_TODO: was relaxed 173 } 174 size_t tryLock(State state) { 175 MALLOC_ASSERT(state <= MAX_LOCKED_VAL, ASSERT_TEXT); 176 size_t sz = value.load(std::memory_order_acquire); 177 for (;;) { 178 if (sz <= MAX_LOCKED_VAL) { 179 break; 180 } 181 if (value.compare_exchange_strong(sz, state)) { 182 break; 183 } 184 } 185 return sz; 186 } 187 void unlock(size_t size) { 188 MALLOC_ASSERT(value.load(std::memory_order_relaxed) <= MAX_LOCKED_VAL, "The lock is not locked"); 189 MALLOC_ASSERT(size > MAX_LOCKED_VAL, ASSERT_TEXT); 190 value.store(size, std::memory_order_release); 191 } 192 bool isLastRegionBlock() const { return value.load(std::memory_order_relaxed) == LAST_REGION_BLOCK; } 193 friend void Backend::IndexedBins::verify(); 194 }; 195 196 struct MemRegion { 197 MemRegion *next, // keep all regions in any pool to release all them on 198 *prev; // pool destroying, 2-linked list to release individual 199 // regions. 200 size_t allocSz, // got from pool callback 201 blockSz; // initial and maximal inner block size 202 MemRegionType type; 203 }; 204 205 // this data must be unmodified while block is in use, so separate it 206 class BlockMutexes { 207 protected: 208 GuardedSize myL, // lock for me 209 leftL; // lock for left neighbor 210 }; 211 212 class FreeBlock : BlockMutexes { 213 public: 214 static const size_t minBlockSize; 215 friend void Backend::IndexedBins::verify(); 216 217 FreeBlock *prev, // in 2-linked list related to bin 218 *next, 219 *nextToFree; // used to form a queue during coalescing 220 // valid only when block is in processing, i.e. one is not free and not 221 size_t sizeTmp; // used outside of backend 222 int myBin; // bin that is owner of the block 223 bool slabAligned; 224 bool blockInBin; // this block in myBin already 225 226 FreeBlock *rightNeig(size_t sz) const { 227 MALLOC_ASSERT(sz, ASSERT_TEXT); 228 return (FreeBlock*)((uintptr_t)this+sz); 229 } 230 FreeBlock *leftNeig(size_t sz) const { 231 MALLOC_ASSERT(sz, ASSERT_TEXT); 232 return (FreeBlock*)((uintptr_t)this - sz); 233 } 234 235 void initHeader() { myL.initLocked(); leftL.initLocked(); } 236 void setMeFree(size_t size) { myL.unlock(size); } 237 size_t trySetMeUsed(GuardedSize::State s) { return myL.tryLock(s); } 238 bool isLastRegionBlock() const { return myL.isLastRegionBlock(); } 239 240 void setLeftFree(size_t sz) { leftL.unlock(sz); } 241 size_t trySetLeftUsed(GuardedSize::State s) { return leftL.tryLock(s); } 242 243 size_t tryLockBlock() { 244 size_t rSz, sz = trySetMeUsed(GuardedSize::LOCKED); 245 246 if (sz <= GuardedSize::MAX_LOCKED_VAL) 247 return false; 248 rSz = rightNeig(sz)->trySetLeftUsed(GuardedSize::LOCKED); 249 if (rSz <= GuardedSize::MAX_LOCKED_VAL) { 250 setMeFree(sz); 251 return false; 252 } 253 MALLOC_ASSERT(rSz == sz, ASSERT_TEXT); 254 return sz; 255 } 256 void markCoalescing(size_t blockSz) { 257 myL.makeCoalscing(); 258 rightNeig(blockSz)->leftL.makeCoalscing(); 259 sizeTmp = blockSz; 260 nextToFree = nullptr; 261 } 262 void markUsed() { 263 myL.initLocked(); 264 rightNeig(sizeTmp)->leftL.initLocked(); 265 nextToFree = nullptr; 266 } 267 static void markBlocks(FreeBlock *fBlock, int num, size_t size) { 268 for (int i=1; i<num; i++) { 269 fBlock = (FreeBlock*)((uintptr_t)fBlock + size); 270 fBlock->initHeader(); 271 } 272 } 273 }; 274 275 // Last block in any region. Its "size" field is GuardedSize::LAST_REGION_BLOCK, 276 // This kind of blocks used to find region header 277 // and have a possibility to return region back to OS 278 struct LastFreeBlock : public FreeBlock { 279 MemRegion *memRegion; 280 }; 281 282 const size_t FreeBlock::minBlockSize = sizeof(FreeBlock); 283 284 inline bool BackendSync::waitTillBlockReleased(intptr_t startModifiedCnt) 285 { 286 AtomicBackoff backoff; 287 #if __TBB_MALLOC_BACKEND_STAT 288 class ITT_Guard { 289 void *ptr; 290 public: 291 ITT_Guard(void *p) : ptr(p) { 292 MALLOC_ITT_SYNC_PREPARE(ptr); 293 } 294 ~ITT_Guard() { 295 MALLOC_ITT_SYNC_ACQUIRED(ptr); 296 } 297 }; 298 ITT_Guard ittGuard(&inFlyBlocks); 299 #endif 300 intptr_t myBinsInFlyBlocks = inFlyBlocks.load(std::memory_order_acquire); 301 intptr_t myCoalescQInFlyBlocks = backend->blocksInCoalescing(); 302 while (true) { 303 MALLOC_ASSERT(myBinsInFlyBlocks>=0 && myCoalescQInFlyBlocks>=0, nullptr); 304 305 intptr_t currBinsInFlyBlocks = inFlyBlocks.load(std::memory_order_acquire); 306 intptr_t currCoalescQInFlyBlocks = backend->blocksInCoalescing(); 307 WhiteboxTestingYield(); 308 // Stop waiting iff: 309 310 // 1) blocks were removed from processing, not added 311 if (myBinsInFlyBlocks > currBinsInFlyBlocks 312 // 2) released during delayed coalescing queue 313 || myCoalescQInFlyBlocks > currCoalescQInFlyBlocks) 314 break; 315 // 3) if there are blocks in coalescing, and no progress in its processing, 316 // try to scan coalescing queue and stop waiting, if changes were made 317 // (if there are no changes and in-fly blocks exist, we continue 318 // waiting to not increase load on coalescQ) 319 if (currCoalescQInFlyBlocks > 0 && backend->scanCoalescQ(/*forceCoalescQDrop=*/false)) 320 break; 321 // 4) when there are no blocks 322 if (!currBinsInFlyBlocks && !currCoalescQInFlyBlocks) 323 // re-scan make sense only if bins were modified since scanned 324 return startModifiedCnt != getNumOfMods(); 325 myBinsInFlyBlocks = currBinsInFlyBlocks; 326 myCoalescQInFlyBlocks = currCoalescQInFlyBlocks; 327 backoff.pause(); 328 } 329 return true; 330 } 331 332 void CoalRequestQ::putBlock(FreeBlock *fBlock) 333 { 334 MALLOC_ASSERT(fBlock->sizeTmp >= FreeBlock::minBlockSize, ASSERT_TEXT); 335 fBlock->markUsed(); 336 // the block is in the queue, do not forget that it's here 337 inFlyBlocks++; 338 339 FreeBlock *myBlToFree = blocksToFree.load(std::memory_order_acquire); 340 for (;;) { 341 fBlock->nextToFree = myBlToFree; 342 if (blocksToFree.compare_exchange_strong(myBlToFree, fBlock)) { 343 return; 344 } 345 } 346 } 347 348 FreeBlock *CoalRequestQ::getAll() 349 { 350 for (;;) { 351 FreeBlock *myBlToFree = blocksToFree.load(std::memory_order_acquire); 352 353 if (!myBlToFree) { 354 return nullptr; 355 } else { 356 if (blocksToFree.compare_exchange_strong(myBlToFree, nullptr)) { 357 return myBlToFree; 358 } else { 359 continue; 360 } 361 } 362 } 363 } 364 365 inline void CoalRequestQ::blockWasProcessed() 366 { 367 bkndSync->binsModified(); 368 int prev = inFlyBlocks.fetch_sub(1); 369 tbb::detail::suppress_unused_warning(prev); 370 MALLOC_ASSERT(prev > 0, ASSERT_TEXT); 371 } 372 373 // Try to get a block from a bin. 374 // If the remaining free space would stay in the same bin, 375 // split the block without removing it. 376 // If the free space should go to other bin(s), remove the block. 377 // alignedBin is true, if all blocks in the bin have slab-aligned right side. 378 FreeBlock *Backend::IndexedBins::getFromBin(int binIdx, BackendSync *sync, size_t size, 379 bool needAlignedRes, bool alignedBin, bool wait, int *binLocked) 380 { 381 Bin *b = &freeBins[binIdx]; 382 try_next: 383 FreeBlock *fBlock = nullptr; 384 if (!b->empty()) { 385 bool locked = false; 386 MallocMutex::scoped_lock scopedLock(b->tLock, wait, &locked); 387 388 if (!locked) { 389 if (binLocked) (*binLocked)++; 390 return nullptr; 391 } 392 393 for (FreeBlock *curr = b->head.load(std::memory_order_relaxed); curr; curr = curr->next) { 394 size_t szBlock = curr->tryLockBlock(); 395 if (!szBlock) { 396 // block is locked, re-do bin lock, as there is no place to spin 397 // while block coalescing 398 goto try_next; 399 } 400 401 // GENERAL CASE 402 if (alignedBin || !needAlignedRes) { 403 size_t splitSz = szBlock - size; 404 // If we got a block as split result, it must have a room for control structures. 405 if (szBlock >= size && (splitSz >= FreeBlock::minBlockSize || !splitSz)) 406 fBlock = curr; 407 } else { 408 // SPECIAL CASE, to get aligned block from unaligned bin we have to cut the middle of a block 409 // and return remaining left and right part. Possible only in fixed pool scenario, assert for this 410 // is set inside splitBlock() function. 411 412 void *newB = alignUp(curr, slabSize); 413 uintptr_t rightNew = (uintptr_t)newB + size; 414 uintptr_t rightCurr = (uintptr_t)curr + szBlock; 415 // Check if the block size is sufficient, 416 // and also left and right split results are either big enough or non-existent 417 if (rightNew <= rightCurr 418 && (newB == curr || ((uintptr_t)newB - (uintptr_t)curr) >= FreeBlock::minBlockSize) 419 && (rightNew == rightCurr || (rightCurr - rightNew) >= FreeBlock::minBlockSize)) 420 fBlock = curr; 421 } 422 423 if (fBlock) { 424 // consume must be called before result of removing from a bin is visible externally. 425 sync->blockConsumed(); 426 // TODO: think about cases when block stays in the same bin 427 b->removeBlock(fBlock); 428 if (freeBins[binIdx].empty()) 429 bitMask.set(binIdx, false); 430 fBlock->sizeTmp = szBlock; 431 break; 432 } else { // block size is not valid, search for next block in the bin 433 curr->setMeFree(szBlock); 434 curr->rightNeig(szBlock)->setLeftFree(szBlock); 435 } 436 } 437 } 438 return fBlock; 439 } 440 441 bool Backend::IndexedBins::tryReleaseRegions(int binIdx, Backend *backend) 442 { 443 Bin *b = &freeBins[binIdx]; 444 FreeBlock *fBlockList = nullptr; 445 446 // got all blocks from the bin and re-do coalesce on them 447 // to release single-block regions 448 try_next: 449 if (!b->empty()) { 450 MallocMutex::scoped_lock binLock(b->tLock); 451 for (FreeBlock *curr = b->head.load(std::memory_order_relaxed); curr; ) { 452 size_t szBlock = curr->tryLockBlock(); 453 if (!szBlock) 454 goto try_next; 455 456 FreeBlock *next = curr->next; 457 458 b->removeBlock(curr); 459 curr->sizeTmp = szBlock; 460 curr->nextToFree = fBlockList; 461 fBlockList = curr; 462 curr = next; 463 } 464 } 465 return backend->coalescAndPutList(fBlockList, /*forceCoalescQDrop=*/true, 466 /*reportBlocksProcessed=*/false); 467 } 468 469 void Backend::Bin::removeBlock(FreeBlock *fBlock) 470 { 471 MALLOC_ASSERT(fBlock->next||fBlock->prev||fBlock== head.load(std::memory_order_relaxed), 472 "Detected that a block is not in the bin."); 473 if (head.load(std::memory_order_relaxed) == fBlock) 474 head.store(fBlock->next, std::memory_order_relaxed); 475 if (tail == fBlock) 476 tail = fBlock->prev; 477 if (fBlock->prev) 478 fBlock->prev->next = fBlock->next; 479 if (fBlock->next) 480 fBlock->next->prev = fBlock->prev; 481 } 482 483 void Backend::IndexedBins::addBlock(int binIdx, FreeBlock *fBlock, size_t /* blockSz */, bool addToTail) 484 { 485 Bin *b = &freeBins[binIdx]; 486 fBlock->myBin = binIdx; 487 fBlock->next = fBlock->prev = nullptr; 488 { 489 MallocMutex::scoped_lock scopedLock(b->tLock); 490 if (addToTail) { 491 fBlock->prev = b->tail; 492 b->tail = fBlock; 493 if (fBlock->prev) 494 fBlock->prev->next = fBlock; 495 if (!b->head.load(std::memory_order_relaxed)) 496 b->head.store(fBlock, std::memory_order_relaxed); 497 } else { 498 fBlock->next = b->head.load(std::memory_order_relaxed); 499 b->head.store(fBlock, std::memory_order_relaxed); 500 if (fBlock->next) 501 fBlock->next->prev = fBlock; 502 if (!b->tail) 503 b->tail = fBlock; 504 } 505 } 506 bitMask.set(binIdx, true); 507 } 508 509 bool Backend::IndexedBins::tryAddBlock(int binIdx, FreeBlock *fBlock, bool addToTail) 510 { 511 bool locked = false; 512 Bin *b = &freeBins[binIdx]; 513 fBlock->myBin = binIdx; 514 if (addToTail) { 515 fBlock->next = nullptr; 516 { 517 MallocMutex::scoped_lock scopedLock(b->tLock, /*wait=*/false, &locked); 518 if (!locked) 519 return false; 520 fBlock->prev = b->tail; 521 b->tail = fBlock; 522 if (fBlock->prev) 523 fBlock->prev->next = fBlock; 524 if (!b->head.load(std::memory_order_relaxed)) 525 b->head.store(fBlock, std::memory_order_relaxed); 526 } 527 } else { 528 fBlock->prev = nullptr; 529 { 530 MallocMutex::scoped_lock scopedLock(b->tLock, /*wait=*/false, &locked); 531 if (!locked) 532 return false; 533 fBlock->next = b->head.load(std::memory_order_relaxed); 534 b->head.store(fBlock, std::memory_order_relaxed); 535 if (fBlock->next) 536 fBlock->next->prev = fBlock; 537 if (!b->tail) 538 b->tail = fBlock; 539 } 540 } 541 bitMask.set(binIdx, true); 542 return true; 543 } 544 545 void Backend::IndexedBins::reset() 546 { 547 for (unsigned i=0; i<Backend::freeBinsNum; i++) 548 freeBins[i].reset(); 549 bitMask.reset(); 550 } 551 552 void Backend::IndexedBins::lockRemoveBlock(int binIdx, FreeBlock *fBlock) 553 { 554 MallocMutex::scoped_lock scopedLock(freeBins[binIdx].tLock); 555 freeBins[binIdx].removeBlock(fBlock); 556 if (freeBins[binIdx].empty()) 557 bitMask.set(binIdx, false); 558 } 559 560 bool ExtMemoryPool::regionsAreReleaseable() const 561 { 562 return !keepAllMemory && !delayRegsReleasing; 563 } 564 565 FreeBlock *Backend::splitBlock(FreeBlock *fBlock, int num, size_t size, bool blockIsAligned, bool needAlignedBlock) 566 { 567 const size_t totalSize = num * size; 568 569 // SPECIAL CASE, for unaligned block we have to cut the middle of a block 570 // and return remaining left and right part. Possible only in a fixed pool scenario. 571 if (needAlignedBlock && !blockIsAligned) { 572 MALLOC_ASSERT(extMemPool->fixedPool, 573 "Aligned block request from unaligned bin possible only in fixed pool scenario."); 574 575 // Space to use is in the middle 576 FreeBlock *newBlock = alignUp(fBlock, slabSize); 577 FreeBlock *rightPart = (FreeBlock*)((uintptr_t)newBlock + totalSize); 578 uintptr_t fBlockEnd = (uintptr_t)fBlock + fBlock->sizeTmp; 579 580 // Return free right part 581 if ((uintptr_t)rightPart != fBlockEnd) { 582 rightPart->initHeader(); // to prevent coalescing rightPart with fBlock 583 size_t rightSize = fBlockEnd - (uintptr_t)rightPart; 584 coalescAndPut(rightPart, rightSize, toAlignedBin(rightPart, rightSize)); 585 } 586 // And free left part 587 if (newBlock != fBlock) { 588 newBlock->initHeader(); // to prevent coalescing fBlock with newB 589 size_t leftSize = (uintptr_t)newBlock - (uintptr_t)fBlock; 590 coalescAndPut(fBlock, leftSize, toAlignedBin(fBlock, leftSize)); 591 } 592 fBlock = newBlock; 593 } else if (size_t splitSize = fBlock->sizeTmp - totalSize) { // need to split the block 594 // GENERAL CASE, cut the left or right part of the block 595 FreeBlock *splitBlock = nullptr; 596 if (needAlignedBlock) { 597 // For slab aligned blocks cut the right side of the block 598 // and return it to a requester, original block returns to backend 599 splitBlock = fBlock; 600 fBlock = (FreeBlock*)((uintptr_t)splitBlock + splitSize); 601 fBlock->initHeader(); 602 } else { 603 // For large object blocks cut original block and put free righ part to backend 604 splitBlock = (FreeBlock*)((uintptr_t)fBlock + totalSize); 605 splitBlock->initHeader(); 606 } 607 // Mark free block as it`s parent only when the requested type (needAlignedBlock) 608 // and returned from Bins/OS block (isAligned) are equal (XOR operation used) 609 bool markAligned = (blockIsAligned ^ needAlignedBlock) ? toAlignedBin(splitBlock, splitSize) : blockIsAligned; 610 coalescAndPut(splitBlock, splitSize, markAligned); 611 } 612 MALLOC_ASSERT(!needAlignedBlock || isAligned(fBlock, slabSize), "Expect to get aligned block, if one was requested."); 613 FreeBlock::markBlocks(fBlock, num, size); 614 return fBlock; 615 } 616 617 size_t Backend::getMaxBinnedSize() const 618 { 619 return hugePages.isEnabled && !inUserPool() ? 620 maxBinned_HugePage : maxBinned_SmallPage; 621 } 622 623 inline bool Backend::MaxRequestComparator::operator()(size_t oldMaxReq, size_t requestSize) const 624 { 625 return requestSize > oldMaxReq && requestSize < backend->getMaxBinnedSize(); 626 } 627 628 // last chance to get memory 629 FreeBlock *Backend::releaseMemInCaches(intptr_t startModifiedCnt, 630 int *lockedBinsThreshold, int numOfLockedBins) 631 { 632 // something released from caches 633 if (extMemPool->hardCachesCleanup() 634 // ..or can use blocks that are in processing now 635 || bkndSync.waitTillBlockReleased(startModifiedCnt)) 636 return (FreeBlock*)VALID_BLOCK_IN_BIN; 637 // OS can't give us more memory, but we have some in locked bins 638 if (*lockedBinsThreshold && numOfLockedBins) { 639 *lockedBinsThreshold = 0; 640 return (FreeBlock*)VALID_BLOCK_IN_BIN; 641 } 642 return nullptr; // nothing found, give up 643 } 644 645 FreeBlock *Backend::askMemFromOS(size_t blockSize, intptr_t startModifiedCnt, 646 int *lockedBinsThreshold, int numOfLockedBins, 647 bool *splittableRet, bool needSlabRegion) 648 { 649 FreeBlock *block; 650 // The block sizes can be divided into 3 groups: 651 // 1. "quite small": popular object size, we are in bootstarp or something 652 // like; request several regions. 653 // 2. "quite large": we want to have several such blocks in the region 654 // but not want several pre-allocated regions. 655 // 3. "huge": exact fit, we allocate only one block and do not allow 656 // any other allocations to placed in a region. 657 // Dividing the block sizes in these groups we are trying to balance between 658 // too small regions (that leads to fragmentation) and too large ones (that 659 // leads to excessive address space consumption). If a region is "too 660 // large", allocate only one, to prevent fragmentation. It supposedly 661 // doesn't hurt performance, because the object requested by user is large. 662 // Bounds for the groups are: 663 const size_t maxBinned = getMaxBinnedSize(); 664 const size_t quiteSmall = maxBinned / 8; 665 const size_t quiteLarge = maxBinned; 666 667 if (blockSize >= quiteLarge) { 668 // Do not interact with other threads via semaphores, as for exact fit 669 // we can't share regions with them, memory requesting is individual. 670 block = addNewRegion(blockSize, MEMREG_ONE_BLOCK, /*addToBin=*/false); 671 if (!block) 672 return releaseMemInCaches(startModifiedCnt, lockedBinsThreshold, numOfLockedBins); 673 *splittableRet = false; 674 } else { 675 const size_t regSz_sizeBased = alignUp(4*maxRequestedSize, 1024*1024); 676 // Another thread is modifying backend while we can't get the block. 677 // Wait while it leaves and re-do the scan 678 // before trying other ways to extend the backend. 679 if (bkndSync.waitTillBlockReleased(startModifiedCnt) 680 // semaphore is protecting adding more more memory from OS 681 || memExtendingSema.wait()) 682 return (FreeBlock*)VALID_BLOCK_IN_BIN; 683 684 if (startModifiedCnt != bkndSync.getNumOfMods()) { 685 memExtendingSema.signal(); 686 return (FreeBlock*)VALID_BLOCK_IN_BIN; 687 } 688 689 if (blockSize < quiteSmall) { 690 // For this size of blocks, add NUM_OF_REG "advance" regions in bin, 691 // and return one as a result. 692 // TODO: add to bin first, because other threads can use them right away. 693 // This must be done carefully, because blocks in bins can be released 694 // in releaseCachesToLimit(). 695 const unsigned NUM_OF_REG = 3; 696 MemRegionType regType = needSlabRegion ? MEMREG_SLAB_BLOCKS : MEMREG_LARGE_BLOCKS; 697 block = addNewRegion(regSz_sizeBased, regType, /*addToBin=*/false); 698 if (block) 699 for (unsigned idx=0; idx<NUM_OF_REG; idx++) 700 if (! addNewRegion(regSz_sizeBased, regType, /*addToBin=*/true)) 701 break; 702 } else { 703 block = addNewRegion(regSz_sizeBased, MEMREG_LARGE_BLOCKS, /*addToBin=*/false); 704 } 705 memExtendingSema.signal(); 706 707 // no regions found, try to clean cache 708 if (!block || block == (FreeBlock*)VALID_BLOCK_IN_BIN) 709 return releaseMemInCaches(startModifiedCnt, lockedBinsThreshold, numOfLockedBins); 710 // Since a region can hold more than one block it can be split. 711 *splittableRet = true; 712 } 713 // after asking memory from OS, release caches if we above the memory limits 714 releaseCachesToLimit(); 715 716 return block; 717 } 718 719 void Backend::releaseCachesToLimit() 720 { 721 if (!memSoftLimit.load(std::memory_order_relaxed) 722 || totalMemSize.load(std::memory_order_relaxed) <= memSoftLimit.load(std::memory_order_relaxed)) { 723 return; 724 } 725 size_t locTotalMemSize, locMemSoftLimit; 726 727 scanCoalescQ(/*forceCoalescQDrop=*/false); 728 if (extMemPool->softCachesCleanup() && 729 (locTotalMemSize = totalMemSize.load(std::memory_order_acquire)) <= 730 (locMemSoftLimit = memSoftLimit.load(std::memory_order_acquire))) 731 return; 732 // clean global large-object cache, if this is not enough, clean local caches 733 // do this in several tries, because backend fragmentation can prevent 734 // region from releasing 735 for (int cleanLocal = 0; cleanLocal<2; cleanLocal++) 736 while (cleanLocal ? 737 extMemPool->allLocalCaches.cleanup(/*cleanOnlyUnused=*/true) : 738 extMemPool->loc.decreasingCleanup()) 739 if ((locTotalMemSize = totalMemSize.load(std::memory_order_acquire)) <= 740 (locMemSoftLimit = memSoftLimit.load(std::memory_order_acquire))) 741 return; 742 // last chance to match memSoftLimit 743 extMemPool->hardCachesCleanup(); 744 } 745 746 int Backend::IndexedBins::getMinNonemptyBin(unsigned startBin) const 747 { 748 int p = bitMask.getMinTrue(startBin); 749 return p == -1 ? Backend::freeBinsNum : p; 750 } 751 752 FreeBlock *Backend::IndexedBins::findBlock(int nativeBin, BackendSync *sync, size_t size, 753 bool needAlignedBlock, bool alignedBin, int *numOfLockedBins) 754 { 755 for (int i=getMinNonemptyBin(nativeBin); i<(int)freeBinsNum; i=getMinNonemptyBin(i+1)) 756 if (FreeBlock *block = getFromBin(i, sync, size, needAlignedBlock, alignedBin, /*wait=*/false, numOfLockedBins)) 757 return block; 758 759 return nullptr; 760 } 761 762 void Backend::requestBootstrapMem() 763 { 764 if (bootsrapMemDone == bootsrapMemStatus.load(std::memory_order_acquire)) 765 return; 766 MallocMutex::scoped_lock lock( bootsrapMemStatusMutex ); 767 if (bootsrapMemDone == bootsrapMemStatus) 768 return; 769 MALLOC_ASSERT(bootsrapMemNotDone == bootsrapMemStatus, ASSERT_TEXT); 770 bootsrapMemStatus = bootsrapMemInitializing; 771 // request some rather big region during bootstrap in advance 772 // ok to get nullptr here, as later we re-do a request with more modest size 773 addNewRegion(2*1024*1024, MEMREG_SLAB_BLOCKS, /*addToBin=*/true); 774 bootsrapMemStatus = bootsrapMemDone; 775 } 776 777 // try to allocate size Byte block in available bins 778 // needAlignedRes is true if result must be slab-aligned 779 FreeBlock *Backend::genericGetBlock(int num, size_t size, bool needAlignedBlock) 780 { 781 FreeBlock *block = nullptr; 782 const size_t totalReqSize = num*size; 783 // no splitting after requesting new region, asks exact size 784 const int nativeBin = sizeToBin(totalReqSize); 785 786 requestBootstrapMem(); 787 // If we found 2 or less locked bins, it's time to ask more memory from OS. 788 // But nothing can be asked from fixed pool. And we prefer wait, not ask 789 // for more memory, if block is quite large. 790 int lockedBinsThreshold = extMemPool->fixedPool || size>=maxBinned_SmallPage? 0 : 2; 791 792 // Find maximal requested size limited by getMaxBinnedSize() 793 AtomicUpdate(maxRequestedSize, totalReqSize, MaxRequestComparator(this)); 794 scanCoalescQ(/*forceCoalescQDrop=*/false); 795 796 bool splittable = true; 797 for (;;) { 798 const intptr_t startModifiedCnt = bkndSync.getNumOfMods(); 799 int numOfLockedBins; 800 801 do { 802 numOfLockedBins = 0; 803 if (needAlignedBlock) { 804 block = freeSlabAlignedBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock, 805 /*alignedBin=*/true, &numOfLockedBins); 806 if (!block && extMemPool->fixedPool) 807 block = freeLargeBlockBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock, 808 /*alignedBin=*/false, &numOfLockedBins); 809 } else { 810 block = freeLargeBlockBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock, 811 /*alignedBin=*/false, &numOfLockedBins); 812 if (!block && extMemPool->fixedPool) 813 block = freeSlabAlignedBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock, 814 /*alignedBin=*/true, &numOfLockedBins); 815 } 816 } while (!block && numOfLockedBins>lockedBinsThreshold); 817 818 if (block) 819 break; 820 821 bool retScanCoalescQ = scanCoalescQ(/*forceCoalescQDrop=*/true); 822 bool retSoftCachesCleanup = extMemPool->softCachesCleanup(); 823 if (!(retScanCoalescQ || retSoftCachesCleanup)) { 824 // bins are not updated, 825 // only remaining possibility is to ask for more memory 826 block = askMemFromOS(totalReqSize, startModifiedCnt, &lockedBinsThreshold, 827 numOfLockedBins, &splittable, needAlignedBlock); 828 if (!block) 829 return nullptr; 830 if (block != (FreeBlock*)VALID_BLOCK_IN_BIN) { 831 // size can be increased in askMemFromOS, that's why >= 832 MALLOC_ASSERT(block->sizeTmp >= size, ASSERT_TEXT); 833 break; 834 } 835 // valid block somewhere in bins, let's find it 836 block = nullptr; 837 } 838 } 839 MALLOC_ASSERT(block, ASSERT_TEXT); 840 if (splittable) { 841 // At this point we have to be sure that slabAligned attribute describes the right block state 842 block = splitBlock(block, num, size, block->slabAligned, needAlignedBlock); 843 } 844 // matched blockConsumed() from startUseBlock() 845 bkndSync.blockReleased(); 846 847 return block; 848 } 849 850 LargeMemoryBlock *Backend::getLargeBlock(size_t size) 851 { 852 LargeMemoryBlock *lmb = 853 (LargeMemoryBlock*)genericGetBlock(1, size, /*needAlignedRes=*/false); 854 if (lmb) { 855 lmb->unalignedSize = size; 856 if (extMemPool->userPool()) 857 extMemPool->lmbList.add(lmb); 858 } 859 return lmb; 860 } 861 862 BlockI *Backend::getSlabBlock(int num) { 863 BlockI *b = (BlockI*)genericGetBlock(num, slabSize, /*slabAligned=*/true); 864 MALLOC_ASSERT(isAligned(b, slabSize), ASSERT_TEXT); 865 return b; 866 } 867 868 void Backend::putSlabBlock(BlockI *block) { 869 genericPutBlock((FreeBlock *)block, slabSize, /*slabAligned=*/true); 870 } 871 872 void *Backend::getBackRefSpace(size_t size, bool *rawMemUsed) 873 { 874 // This block is released only at shutdown, so it can prevent 875 // a entire region releasing when it's received from the backend, 876 // so prefer getRawMemory using. 877 if (void *ret = getRawMemory(size, REGULAR)) { 878 *rawMemUsed = true; 879 return ret; 880 } 881 void *ret = genericGetBlock(1, size, /*needAlignedRes=*/false); 882 if (ret) *rawMemUsed = false; 883 return ret; 884 } 885 886 void Backend::putBackRefSpace(void *b, size_t size, bool rawMemUsed) 887 { 888 if (rawMemUsed) 889 freeRawMemory(b, size); 890 // ignore not raw mem, as it released on region releasing 891 } 892 893 void Backend::removeBlockFromBin(FreeBlock *fBlock) 894 { 895 if (fBlock->myBin != Backend::NO_BIN) { 896 if (fBlock->slabAligned) 897 freeSlabAlignedBins.lockRemoveBlock(fBlock->myBin, fBlock); 898 else 899 freeLargeBlockBins.lockRemoveBlock(fBlock->myBin, fBlock); 900 } 901 } 902 903 void Backend::genericPutBlock(FreeBlock *fBlock, size_t blockSz, bool slabAligned) 904 { 905 bkndSync.blockConsumed(); 906 coalescAndPut(fBlock, blockSz, slabAligned); 907 bkndSync.blockReleased(); 908 } 909 910 void AllLargeBlocksList::add(LargeMemoryBlock *lmb) 911 { 912 MallocMutex::scoped_lock scoped_cs(largeObjLock); 913 lmb->gPrev = nullptr; 914 lmb->gNext = loHead; 915 if (lmb->gNext) 916 lmb->gNext->gPrev = lmb; 917 loHead = lmb; 918 } 919 920 void AllLargeBlocksList::remove(LargeMemoryBlock *lmb) 921 { 922 MallocMutex::scoped_lock scoped_cs(largeObjLock); 923 if (loHead == lmb) 924 loHead = lmb->gNext; 925 if (lmb->gNext) 926 lmb->gNext->gPrev = lmb->gPrev; 927 if (lmb->gPrev) 928 lmb->gPrev->gNext = lmb->gNext; 929 } 930 931 void Backend::putLargeBlock(LargeMemoryBlock *lmb) 932 { 933 if (extMemPool->userPool()) 934 extMemPool->lmbList.remove(lmb); 935 genericPutBlock((FreeBlock *)lmb, lmb->unalignedSize, false); 936 } 937 938 void Backend::returnLargeObject(LargeMemoryBlock *lmb) 939 { 940 removeBackRef(lmb->backRefIdx); 941 putLargeBlock(lmb); 942 STAT_increment(getThreadId(), ThreadCommonCounters, freeLargeObj); 943 } 944 945 #if BACKEND_HAS_MREMAP 946 void *Backend::remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment) 947 { 948 // no remap for user pools and for object too small that living in bins 949 if (inUserPool() || min(oldSize, newSize)<maxBinned_SmallPage 950 // during remap, can't guarantee alignment more strict than current or 951 // more strict than page alignment 952 || !isAligned(ptr, alignment) || alignment>extMemPool->granularity) 953 return nullptr; 954 const LargeMemoryBlock* lmbOld = ((LargeObjectHdr *)ptr - 1)->memoryBlock; 955 const size_t oldUnalignedSize = lmbOld->unalignedSize; 956 FreeBlock *oldFBlock = (FreeBlock *)lmbOld; 957 FreeBlock *right = oldFBlock->rightNeig(oldUnalignedSize); 958 // in every region only one block can have LAST_REGION_BLOCK on right, 959 // so don't need no synchronization 960 if (!right->isLastRegionBlock()) 961 return nullptr; 962 963 MemRegion *oldRegion = static_cast<LastFreeBlock*>(right)->memRegion; 964 MALLOC_ASSERT( oldRegion < ptr, ASSERT_TEXT ); 965 const size_t oldRegionSize = oldRegion->allocSz; 966 if (oldRegion->type != MEMREG_ONE_BLOCK) 967 return nullptr; // we are not single in the region 968 const size_t userOffset = (uintptr_t)ptr - (uintptr_t)oldRegion; 969 const size_t alignedSize = LargeObjectCache::alignToBin(newSize + userOffset); 970 const size_t requestSize = 971 alignUp(sizeof(MemRegion) + alignedSize + sizeof(LastFreeBlock), extMemPool->granularity); 972 if (requestSize < alignedSize) // is wrapped around? 973 return nullptr; 974 regionList.remove(oldRegion); 975 976 // The deallocation should be registered in address range before mremap to 977 // prevent a race condition with allocation on another thread. 978 // (OS can reuse the memory and registerAlloc will be missed on another thread) 979 usedAddrRange.registerFree((uintptr_t)oldRegion, (uintptr_t)oldRegion + oldRegionSize); 980 981 void *ret = mremap(oldRegion, oldRegion->allocSz, requestSize, MREMAP_MAYMOVE); 982 if (MAP_FAILED == ret) { // can't remap, revert and leave 983 regionList.add(oldRegion); 984 usedAddrRange.registerAlloc((uintptr_t)oldRegion, (uintptr_t)oldRegion + oldRegionSize); 985 return nullptr; 986 } 987 MemRegion *region = (MemRegion*)ret; 988 MALLOC_ASSERT(region->type == MEMREG_ONE_BLOCK, ASSERT_TEXT); 989 region->allocSz = requestSize; 990 region->blockSz = alignedSize; 991 992 FreeBlock *fBlock = (FreeBlock *)alignUp((uintptr_t)region + sizeof(MemRegion), 993 largeObjectAlignment); 994 995 regionList.add(region); 996 startUseBlock(region, fBlock, /*addToBin=*/false); 997 MALLOC_ASSERT(fBlock->sizeTmp == region->blockSz, ASSERT_TEXT); 998 // matched blockConsumed() in startUseBlock(). 999 // TODO: get rid of useless pair blockConsumed()/blockReleased() 1000 bkndSync.blockReleased(); 1001 1002 // object must start at same offset from region's start 1003 void *object = (void*)((uintptr_t)region + userOffset); 1004 MALLOC_ASSERT(isAligned(object, alignment), ASSERT_TEXT); 1005 LargeObjectHdr *header = (LargeObjectHdr*)object - 1; 1006 setBackRef(header->backRefIdx, header); 1007 1008 LargeMemoryBlock *lmb = (LargeMemoryBlock*)fBlock; 1009 lmb->unalignedSize = region->blockSz; 1010 lmb->objectSize = newSize; 1011 lmb->backRefIdx = header->backRefIdx; 1012 header->memoryBlock = lmb; 1013 MALLOC_ASSERT((uintptr_t)lmb + lmb->unalignedSize >= 1014 (uintptr_t)object + lmb->objectSize, "An object must fit to the block."); 1015 1016 usedAddrRange.registerAlloc((uintptr_t)region, (uintptr_t)region + requestSize); 1017 totalMemSize.fetch_add(region->allocSz - oldRegionSize); 1018 1019 return object; 1020 } 1021 #endif /* BACKEND_HAS_MREMAP */ 1022 1023 void Backend::releaseRegion(MemRegion *memRegion) 1024 { 1025 regionList.remove(memRegion); 1026 freeRawMem(memRegion, memRegion->allocSz); 1027 } 1028 1029 // coalesce fBlock with its neighborhood 1030 FreeBlock *Backend::doCoalesc(FreeBlock *fBlock, MemRegion **mRegion) 1031 { 1032 FreeBlock *resBlock = fBlock; 1033 size_t resSize = fBlock->sizeTmp; 1034 MemRegion *memRegion = nullptr; 1035 1036 fBlock->markCoalescing(resSize); 1037 resBlock->blockInBin = false; 1038 1039 // coalescing with left neighbor 1040 size_t leftSz = fBlock->trySetLeftUsed(GuardedSize::COAL_BLOCK); 1041 if (leftSz != GuardedSize::LOCKED) { 1042 if (leftSz == GuardedSize::COAL_BLOCK) { 1043 coalescQ.putBlock(fBlock); 1044 return nullptr; 1045 } else { 1046 FreeBlock *left = fBlock->leftNeig(leftSz); 1047 size_t lSz = left->trySetMeUsed(GuardedSize::COAL_BLOCK); 1048 if (lSz <= GuardedSize::MAX_LOCKED_VAL) { 1049 fBlock->setLeftFree(leftSz); // rollback 1050 coalescQ.putBlock(fBlock); 1051 return nullptr; 1052 } else { 1053 MALLOC_ASSERT(lSz == leftSz, "Invalid header"); 1054 left->blockInBin = true; 1055 resBlock = left; 1056 resSize += leftSz; 1057 resBlock->sizeTmp = resSize; 1058 } 1059 } 1060 } 1061 // coalescing with right neighbor 1062 FreeBlock *right = fBlock->rightNeig(fBlock->sizeTmp); 1063 size_t rightSz = right->trySetMeUsed(GuardedSize::COAL_BLOCK); 1064 if (rightSz != GuardedSize::LOCKED) { 1065 // LastFreeBlock is on the right side 1066 if (GuardedSize::LAST_REGION_BLOCK == rightSz) { 1067 right->setMeFree(GuardedSize::LAST_REGION_BLOCK); 1068 memRegion = static_cast<LastFreeBlock*>(right)->memRegion; 1069 } else if (GuardedSize::COAL_BLOCK == rightSz) { 1070 if (resBlock->blockInBin) { 1071 resBlock->blockInBin = false; 1072 removeBlockFromBin(resBlock); 1073 } 1074 coalescQ.putBlock(resBlock); 1075 return nullptr; 1076 } else { 1077 size_t rSz = right->rightNeig(rightSz)-> 1078 trySetLeftUsed(GuardedSize::COAL_BLOCK); 1079 if (rSz <= GuardedSize::MAX_LOCKED_VAL) { 1080 right->setMeFree(rightSz); // rollback 1081 if (resBlock->blockInBin) { 1082 resBlock->blockInBin = false; 1083 removeBlockFromBin(resBlock); 1084 } 1085 coalescQ.putBlock(resBlock); 1086 return nullptr; 1087 } else { 1088 MALLOC_ASSERT(rSz == rightSz, "Invalid header"); 1089 removeBlockFromBin(right); 1090 resSize += rightSz; 1091 1092 // Is LastFreeBlock on the right side of right? 1093 FreeBlock *nextRight = right->rightNeig(rightSz); 1094 size_t nextRightSz = nextRight-> 1095 trySetMeUsed(GuardedSize::COAL_BLOCK); 1096 if (nextRightSz > GuardedSize::MAX_LOCKED_VAL) { 1097 if (nextRightSz == GuardedSize::LAST_REGION_BLOCK) 1098 memRegion = static_cast<LastFreeBlock*>(nextRight)->memRegion; 1099 1100 nextRight->setMeFree(nextRightSz); 1101 } 1102 } 1103 } 1104 } 1105 if (memRegion) { 1106 MALLOC_ASSERT((uintptr_t)memRegion + memRegion->allocSz >= 1107 (uintptr_t)right + sizeof(LastFreeBlock), ASSERT_TEXT); 1108 MALLOC_ASSERT((uintptr_t)memRegion < (uintptr_t)resBlock, ASSERT_TEXT); 1109 *mRegion = memRegion; 1110 } else 1111 *mRegion = nullptr; 1112 resBlock->sizeTmp = resSize; 1113 return resBlock; 1114 } 1115 1116 bool Backend::coalescAndPutList(FreeBlock *list, bool forceCoalescQDrop, bool reportBlocksProcessed) 1117 { 1118 bool regionReleased = false; 1119 1120 for (FreeBlock *helper; list; 1121 list = helper, 1122 // matches block enqueue in CoalRequestQ::putBlock() 1123 reportBlocksProcessed? coalescQ.blockWasProcessed() : (void)0) { 1124 MemRegion *memRegion; 1125 bool addToTail = false; 1126 1127 helper = list->nextToFree; 1128 FreeBlock *toRet = doCoalesc(list, &memRegion); 1129 if (!toRet) 1130 continue; 1131 1132 if (memRegion && memRegion->blockSz == toRet->sizeTmp 1133 && !extMemPool->fixedPool) { 1134 if (extMemPool->regionsAreReleaseable()) { 1135 // release the region, because there is no used blocks in it 1136 if (toRet->blockInBin) 1137 removeBlockFromBin(toRet); 1138 releaseRegion(memRegion); 1139 regionReleased = true; 1140 continue; 1141 } else // add block from empty region to end of bin, 1142 addToTail = true; // preserving for exact fit 1143 } 1144 size_t currSz = toRet->sizeTmp; 1145 int bin = sizeToBin(currSz); 1146 bool toAligned = extMemPool->fixedPool ? toAlignedBin(toRet, currSz) : toRet->slabAligned; 1147 bool needAddToBin = true; 1148 1149 if (toRet->blockInBin) { 1150 // Does it stay in same bin? 1151 if (toRet->myBin == bin && toRet->slabAligned == toAligned) 1152 needAddToBin = false; 1153 else { 1154 toRet->blockInBin = false; 1155 removeBlockFromBin(toRet); 1156 } 1157 } 1158 1159 // Does not stay in same bin, or bin-less; add it 1160 if (needAddToBin) { 1161 toRet->prev = toRet->next = toRet->nextToFree = nullptr; 1162 toRet->myBin = NO_BIN; 1163 toRet->slabAligned = toAligned; 1164 1165 // If the block is too small to fit in any bin, keep it bin-less. 1166 // It's not a leak because the block later can be coalesced. 1167 if (currSz >= minBinnedSize) { 1168 toRet->sizeTmp = currSz; 1169 IndexedBins *target = toRet->slabAligned ? &freeSlabAlignedBins : &freeLargeBlockBins; 1170 if (forceCoalescQDrop) { 1171 target->addBlock(bin, toRet, toRet->sizeTmp, addToTail); 1172 } else if (!target->tryAddBlock(bin, toRet, addToTail)) { 1173 coalescQ.putBlock(toRet); 1174 continue; 1175 } 1176 } 1177 toRet->sizeTmp = 0; 1178 } 1179 // Free (possibly coalesced) free block. 1180 // Adding to bin must be done before this point, 1181 // because after a block is free it can be coalesced, and 1182 // using its pointer became unsafe. 1183 // Remember that coalescing is not done under any global lock. 1184 toRet->setMeFree(currSz); 1185 toRet->rightNeig(currSz)->setLeftFree(currSz); 1186 } 1187 return regionReleased; 1188 } 1189 1190 // Coalesce fBlock and add it back to a bin; 1191 // processing delayed coalescing requests. 1192 void Backend::coalescAndPut(FreeBlock *fBlock, size_t blockSz, bool slabAligned) 1193 { 1194 fBlock->sizeTmp = blockSz; 1195 fBlock->nextToFree = nullptr; 1196 fBlock->slabAligned = slabAligned; 1197 1198 coalescAndPutList(fBlock, /*forceCoalescQDrop=*/false, /*reportBlocksProcessed=*/false); 1199 } 1200 1201 bool Backend::scanCoalescQ(bool forceCoalescQDrop) 1202 { 1203 FreeBlock *currCoalescList = coalescQ.getAll(); 1204 1205 if (currCoalescList) 1206 // reportBlocksProcessed=true informs that the blocks leave coalescQ, 1207 // matches blockConsumed() from CoalRequestQ::putBlock() 1208 coalescAndPutList(currCoalescList, forceCoalescQDrop, 1209 /*reportBlocksProcessed=*/true); 1210 // returns status of coalescQ.getAll(), as an indication of possible changes in backend 1211 // TODO: coalescAndPutList() may report is some new free blocks became available or not 1212 return currCoalescList; 1213 } 1214 1215 FreeBlock *Backend::findBlockInRegion(MemRegion *region, size_t exactBlockSize) 1216 { 1217 FreeBlock *fBlock; 1218 size_t blockSz; 1219 uintptr_t fBlockEnd, 1220 lastFreeBlock = (uintptr_t)region + region->allocSz - sizeof(LastFreeBlock); 1221 1222 static_assert(sizeof(LastFreeBlock) % sizeof(uintptr_t) == 0, 1223 "Atomic applied on LastFreeBlock, and we put it at the end of region, that" 1224 " is uintptr_t-aligned, so no unaligned atomic operations are possible."); 1225 // right bound is slab-aligned, keep LastFreeBlock after it 1226 if (region->type == MEMREG_SLAB_BLOCKS) { 1227 fBlock = (FreeBlock *)alignUp((uintptr_t)region + sizeof(MemRegion), sizeof(uintptr_t)); 1228 fBlockEnd = alignDown(lastFreeBlock, slabSize); 1229 } else { 1230 fBlock = (FreeBlock *)alignUp((uintptr_t)region + sizeof(MemRegion), largeObjectAlignment); 1231 fBlockEnd = (uintptr_t)fBlock + exactBlockSize; 1232 MALLOC_ASSERT(fBlockEnd <= lastFreeBlock, ASSERT_TEXT); 1233 } 1234 if (fBlockEnd <= (uintptr_t)fBlock) 1235 return nullptr; // allocSz is too small 1236 blockSz = fBlockEnd - (uintptr_t)fBlock; 1237 // TODO: extend getSlabBlock to support degradation, i.e. getting less blocks 1238 // then requested, and then relax this check 1239 // (now all or nothing is implemented, check according to this) 1240 if (blockSz < numOfSlabAllocOnMiss*slabSize) 1241 return nullptr; 1242 1243 region->blockSz = blockSz; 1244 return fBlock; 1245 } 1246 1247 // startUseBlock may add the free block to a bin, the block can be used and 1248 // even released after this, so the region must be added to regionList already 1249 void Backend::startUseBlock(MemRegion *region, FreeBlock *fBlock, bool addToBin) 1250 { 1251 size_t blockSz = region->blockSz; 1252 fBlock->initHeader(); 1253 fBlock->setMeFree(blockSz); 1254 1255 LastFreeBlock *lastBl = static_cast<LastFreeBlock*>(fBlock->rightNeig(blockSz)); 1256 // to not get unaligned atomics during LastFreeBlock access 1257 MALLOC_ASSERT(isAligned(lastBl, sizeof(uintptr_t)), nullptr); 1258 lastBl->initHeader(); 1259 lastBl->setMeFree(GuardedSize::LAST_REGION_BLOCK); 1260 lastBl->setLeftFree(blockSz); 1261 lastBl->myBin = NO_BIN; 1262 lastBl->memRegion = region; 1263 1264 if (addToBin) { 1265 unsigned targetBin = sizeToBin(blockSz); 1266 // during adding advance regions, register bin for a largest block in region 1267 advRegBins.registerBin(targetBin); 1268 if (region->type == MEMREG_SLAB_BLOCKS) { 1269 fBlock->slabAligned = true; 1270 freeSlabAlignedBins.addBlock(targetBin, fBlock, blockSz, /*addToTail=*/false); 1271 } else { 1272 fBlock->slabAligned = false; 1273 freeLargeBlockBins.addBlock(targetBin, fBlock, blockSz, /*addToTail=*/false); 1274 } 1275 } else { 1276 // to match with blockReleased() in genericGetBlock 1277 bkndSync.blockConsumed(); 1278 // Understand our alignment for correct splitBlock operation 1279 fBlock->slabAligned = region->type == MEMREG_SLAB_BLOCKS ? true : false; 1280 fBlock->sizeTmp = fBlock->tryLockBlock(); 1281 MALLOC_ASSERT(fBlock->sizeTmp >= FreeBlock::minBlockSize, "Locking must be successful"); 1282 } 1283 } 1284 1285 void MemRegionList::add(MemRegion *r) 1286 { 1287 r->prev = nullptr; 1288 MallocMutex::scoped_lock lock(regionListLock); 1289 r->next = head; 1290 head = r; 1291 if (head->next) 1292 head->next->prev = head; 1293 } 1294 1295 void MemRegionList::remove(MemRegion *r) 1296 { 1297 MallocMutex::scoped_lock lock(regionListLock); 1298 if (head == r) 1299 head = head->next; 1300 if (r->next) 1301 r->next->prev = r->prev; 1302 if (r->prev) 1303 r->prev->next = r->next; 1304 } 1305 1306 #if __TBB_MALLOC_BACKEND_STAT 1307 int MemRegionList::reportStat(FILE *f) 1308 { 1309 int regNum = 0; 1310 MallocMutex::scoped_lock lock(regionListLock); 1311 for (MemRegion *curr = head; curr; curr = curr->next) { 1312 fprintf(f, "%p: max block %lu B, ", curr, curr->blockSz); 1313 regNum++; 1314 } 1315 return regNum; 1316 } 1317 #endif 1318 1319 FreeBlock *Backend::addNewRegion(size_t size, MemRegionType memRegType, bool addToBin) 1320 { 1321 static_assert(sizeof(BlockMutexes) <= sizeof(BlockI), "Header must be not overwritten in used blocks"); 1322 MALLOC_ASSERT(FreeBlock::minBlockSize > GuardedSize::MAX_SPEC_VAL, 1323 "Block length must not conflict with special values of GuardedSize"); 1324 // If the region is not "for slabs" we should reserve some space for 1325 // a region header, the worst case alignment and the last block mark. 1326 const size_t requestSize = memRegType == MEMREG_SLAB_BLOCKS ? size : 1327 size + sizeof(MemRegion) + largeObjectAlignment 1328 + FreeBlock::minBlockSize + sizeof(LastFreeBlock); 1329 1330 size_t rawSize = requestSize; 1331 MemRegion *region = (MemRegion*)allocRawMem(rawSize); 1332 if (!region) { 1333 MALLOC_ASSERT(rawSize==requestSize, "getRawMem has not allocated memory but changed the allocated size."); 1334 return nullptr; 1335 } 1336 if (rawSize < sizeof(MemRegion)) { 1337 if (!extMemPool->fixedPool) 1338 freeRawMem(region, rawSize); 1339 return nullptr; 1340 } 1341 1342 region->type = memRegType; 1343 region->allocSz = rawSize; 1344 FreeBlock *fBlock = findBlockInRegion(region, size); 1345 if (!fBlock) { 1346 if (!extMemPool->fixedPool) 1347 freeRawMem(region, rawSize); 1348 return nullptr; 1349 } 1350 regionList.add(region); 1351 startUseBlock(region, fBlock, addToBin); 1352 bkndSync.binsModified(); 1353 return addToBin? (FreeBlock*)VALID_BLOCK_IN_BIN : fBlock; 1354 } 1355 1356 void Backend::init(ExtMemoryPool *extMemoryPool) 1357 { 1358 extMemPool = extMemoryPool; 1359 usedAddrRange.init(); 1360 coalescQ.init(&bkndSync); 1361 bkndSync.init(this); 1362 } 1363 1364 void Backend::reset() 1365 { 1366 MALLOC_ASSERT(extMemPool->userPool(), "Only user pool can be reset."); 1367 // no active threads are allowed in backend while reset() called 1368 verify(); 1369 1370 freeLargeBlockBins.reset(); 1371 freeSlabAlignedBins.reset(); 1372 advRegBins.reset(); 1373 1374 for (MemRegion *curr = regionList.head; curr; curr = curr->next) { 1375 FreeBlock *fBlock = findBlockInRegion(curr, curr->blockSz); 1376 MALLOC_ASSERT(fBlock, "A memory region unexpectedly got smaller"); 1377 startUseBlock(curr, fBlock, /*addToBin=*/true); 1378 } 1379 } 1380 1381 bool Backend::destroy() 1382 { 1383 bool noError = true; 1384 // no active threads are allowed in backend while destroy() called 1385 verify(); 1386 if (!inUserPool()) { 1387 freeLargeBlockBins.reset(); 1388 freeSlabAlignedBins.reset(); 1389 } 1390 while (regionList.head) { 1391 MemRegion *helper = regionList.head->next; 1392 noError &= freeRawMem(regionList.head, regionList.head->allocSz); 1393 regionList.head = helper; 1394 } 1395 return noError; 1396 } 1397 1398 bool Backend::clean() 1399 { 1400 scanCoalescQ(/*forceCoalescQDrop=*/false); 1401 1402 bool res = false; 1403 // We can have several blocks occupying a whole region, 1404 // because such regions are added in advance (see askMemFromOS() and reset()), 1405 // and never used. Release them all. 1406 for (int i = advRegBins.getMinUsedBin(0); i != -1; i = advRegBins.getMinUsedBin(i+1)) { 1407 if (i == freeSlabAlignedBins.getMinNonemptyBin(i)) 1408 res |= freeSlabAlignedBins.tryReleaseRegions(i, this); 1409 if (i == freeLargeBlockBins.getMinNonemptyBin(i)) 1410 res |= freeLargeBlockBins.tryReleaseRegions(i, this); 1411 } 1412 1413 return res; 1414 } 1415 1416 void Backend::IndexedBins::verify() 1417 { 1418 #if MALLOC_DEBUG 1419 for (int i=0; i<(int)freeBinsNum; i++) { 1420 for (FreeBlock *fb = freeBins[i].head.load(std::memory_order_relaxed); fb; fb=fb->next) { 1421 uintptr_t mySz = fb->myL.value; 1422 MALLOC_ASSERT(mySz>GuardedSize::MAX_SPEC_VAL, ASSERT_TEXT); 1423 FreeBlock *right = (FreeBlock*)((uintptr_t)fb + mySz); 1424 suppress_unused_warning(right); 1425 MALLOC_ASSERT(right->myL.value<=GuardedSize::MAX_SPEC_VAL, ASSERT_TEXT); 1426 MALLOC_ASSERT(right->leftL.value==mySz, ASSERT_TEXT); 1427 MALLOC_ASSERT(fb->leftL.value<=GuardedSize::MAX_SPEC_VAL, ASSERT_TEXT); 1428 } 1429 } 1430 #endif 1431 } 1432 1433 // For correct operation, it must be called when no other threads 1434 // is changing backend. 1435 void Backend::verify() 1436 { 1437 #if MALLOC_DEBUG 1438 scanCoalescQ(/*forceCoalescQDrop=*/false); 1439 #endif // MALLOC_DEBUG 1440 1441 freeLargeBlockBins.verify(); 1442 freeSlabAlignedBins.verify(); 1443 } 1444 1445 #if __TBB_MALLOC_BACKEND_STAT 1446 size_t Backend::Bin::countFreeBlocks() 1447 { 1448 size_t cnt = 0; 1449 { 1450 MallocMutex::scoped_lock lock(tLock); 1451 for (FreeBlock *fb = head; fb; fb = fb->next) 1452 cnt++; 1453 } 1454 return cnt; 1455 } 1456 1457 size_t Backend::Bin::reportFreeBlocks(FILE *f) 1458 { 1459 size_t totalSz = 0; 1460 MallocMutex::scoped_lock lock(tLock); 1461 for (FreeBlock *fb = head; fb; fb = fb->next) { 1462 size_t sz = fb->tryLockBlock(); 1463 fb->setMeFree(sz); 1464 fb->rightNeig(sz)->setLeftFree(sz); 1465 fprintf(f, " [%p;%p]", fb, (void*)((uintptr_t)fb+sz)); 1466 totalSz += sz; 1467 } 1468 return totalSz; 1469 } 1470 1471 void Backend::IndexedBins::reportStat(FILE *f) 1472 { 1473 size_t totalSize = 0; 1474 1475 for (int i=0; i<Backend::freeBinsNum; i++) 1476 if (size_t cnt = freeBins[i].countFreeBlocks()) { 1477 totalSize += freeBins[i].reportFreeBlocks(f); 1478 fprintf(f, " %d:%lu, ", i, cnt); 1479 } 1480 fprintf(f, "\ttotal size %lu KB", totalSize/1024); 1481 } 1482 1483 void Backend::reportStat(FILE *f) 1484 { 1485 scanCoalescQ(/*forceCoalescQDrop=*/false); 1486 1487 fprintf(f, "\n regions:\n"); 1488 int regNum = regionList.reportStat(f); 1489 fprintf(f, "\n%d regions, %lu KB in all regions\n free bins:\nlarge bins: ", 1490 regNum, totalMemSize/1024); 1491 freeLargeBlockBins.reportStat(f); 1492 fprintf(f, "\naligned bins: "); 1493 freeSlabAlignedBins.reportStat(f); 1494 fprintf(f, "\n"); 1495 } 1496 #endif // __TBB_MALLOC_BACKEND_STAT 1497 1498 } } // namespaces 1499