1 /*
2 Copyright (c) 2005-2023 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 */
16
17 #include <string.h> /* for memset */
18 #include <errno.h>
19 #include "tbbmalloc_internal.h"
20
21 namespace rml {
22 namespace internal {
23
24 /*********** Code to acquire memory from the OS or other executive ****************/
25
26 /*
27 syscall/malloc can set non-zero errno in case of failure,
28 but later allocator might be able to find memory to fulfill the request.
29 And we do not want changing of errno by successful scalable_malloc call.
30 To support this, restore old errno in (get|free)RawMemory, and set errno
31 in frontend just before returning to user code.
32 Please note: every syscall/libc call used inside scalable_malloc that
33 sets errno must be protected this way, not just memory allocation per se.
34 */
35
36 #if USE_DEFAULT_MEMORY_MAPPING
37 #include "MapMemory.h"
38 #else
39 /* assume MapMemory and UnmapMemory are customized */
40 #endif
41
getRawMemory(size_t size,PageType pageType)42 void* getRawMemory (size_t size, PageType pageType) {
43 return MapMemory(size, pageType);
44 }
45
freeRawMemory(void * object,size_t size)46 int freeRawMemory (void *object, size_t size) {
47 return UnmapMemory(object, size);
48 }
49
50 #if CHECK_ALLOCATION_RANGE
51
registerAlloc(uintptr_t left,uintptr_t right)52 void Backend::UsedAddressRange::registerAlloc(uintptr_t left, uintptr_t right)
53 {
54 MallocMutex::scoped_lock lock(mutex);
55 if (left < leftBound.load(std::memory_order_relaxed))
56 leftBound.store(left, std::memory_order_relaxed);
57 if (right > rightBound.load(std::memory_order_relaxed))
58 rightBound.store(right, std::memory_order_relaxed);
59 MALLOC_ASSERT(leftBound.load(std::memory_order_relaxed), ASSERT_TEXT);
60 MALLOC_ASSERT(leftBound.load(std::memory_order_relaxed) < rightBound.load(std::memory_order_relaxed), ASSERT_TEXT);
61 MALLOC_ASSERT(leftBound.load(std::memory_order_relaxed) <= left && right <= rightBound.load(std::memory_order_relaxed), ASSERT_TEXT);
62 }
63
registerFree(uintptr_t left,uintptr_t right)64 void Backend::UsedAddressRange::registerFree(uintptr_t left, uintptr_t right)
65 {
66 MallocMutex::scoped_lock lock(mutex);
67 if (leftBound.load(std::memory_order_relaxed) == left) {
68 if (rightBound.load(std::memory_order_relaxed) == right) {
69 leftBound.store(ADDRESS_UPPER_BOUND, std::memory_order_relaxed);
70 rightBound.store(0, std::memory_order_relaxed);
71 } else
72 leftBound.store(right, std::memory_order_relaxed);
73 } else if (rightBound.load(std::memory_order_relaxed) == right)
74 rightBound.store(left, std::memory_order_relaxed);
75 MALLOC_ASSERT((!rightBound.load(std::memory_order_relaxed) && leftBound.load(std::memory_order_relaxed) == ADDRESS_UPPER_BOUND)
76 || leftBound.load(std::memory_order_relaxed) < rightBound.load(std::memory_order_relaxed), ASSERT_TEXT);
77 }
78 #endif // CHECK_ALLOCATION_RANGE
79
80 // Initialized in frontend inside defaultMemPool
81 extern HugePagesStatus hugePages;
82
allocRawMem(size_t & size)83 void *Backend::allocRawMem(size_t &size)
84 {
85 void *res = nullptr;
86 size_t allocSize = 0;
87
88 if (extMemPool->userPool()) {
89 if (extMemPool->fixedPool && bootsrapMemDone == bootsrapMemStatus.load(std::memory_order_acquire))
90 return nullptr;
91 MALLOC_ASSERT(bootsrapMemStatus != bootsrapMemNotDone,
92 "Backend::allocRawMem() called prematurely?");
93 // TODO: support for raw mem not aligned at sizeof(uintptr_t)
94 // memory from fixed pool is asked once and only once
95 allocSize = alignUpGeneric(size, extMemPool->granularity);
96 res = (*extMemPool->rawAlloc)(extMemPool->poolId, allocSize);
97 } else {
98 // Align allocation on page size
99 size_t pageSize = hugePages.isEnabled ? hugePages.getGranularity() : extMemPool->granularity;
100 MALLOC_ASSERT(pageSize, "Page size cannot be zero.");
101 allocSize = alignUpGeneric(size, pageSize);
102
103 // If user requested huge pages and they are available, try to use preallocated ones firstly.
104 // If there are none, lets check transparent huge pages support and use them instead.
105 if (hugePages.isEnabled) {
106 if (hugePages.isHPAvailable) {
107 res = getRawMemory(allocSize, PREALLOCATED_HUGE_PAGE);
108 }
109 if (!res && hugePages.isTHPAvailable) {
110 res = getRawMemory(allocSize, TRANSPARENT_HUGE_PAGE);
111 }
112 }
113
114 if (!res) {
115 res = getRawMemory(allocSize, REGULAR);
116 }
117 }
118
119 if (res) {
120 MALLOC_ASSERT(allocSize > 0, "Invalid size of an allocated region.");
121 size = allocSize;
122 if (!extMemPool->userPool())
123 usedAddrRange.registerAlloc((uintptr_t)res, (uintptr_t)res+size);
124 #if MALLOC_DEBUG
125 volatile size_t curTotalSize = totalMemSize; // to read global value once
126 MALLOC_ASSERT(curTotalSize+size > curTotalSize, "Overflow allocation size.");
127 #endif
128 totalMemSize.fetch_add(size);
129 }
130
131 return res;
132 }
133
freeRawMem(void * object,size_t size)134 bool Backend::freeRawMem(void *object, size_t size)
135 {
136 bool fail;
137 #if MALLOC_DEBUG
138 volatile size_t curTotalSize = totalMemSize; // to read global value once
139 MALLOC_ASSERT(curTotalSize-size < curTotalSize, "Negative allocation size.");
140 #endif
141 totalMemSize.fetch_sub(size);
142 if (extMemPool->userPool()) {
143 MALLOC_ASSERT(!extMemPool->fixedPool, "No free for fixed-size pools.");
144 fail = (*extMemPool->rawFree)(extMemPool->poolId, object, size);
145 } else {
146 usedAddrRange.registerFree((uintptr_t)object, (uintptr_t)object + size);
147 fail = freeRawMemory(object, size);
148 }
149 // TODO: use result in all freeRawMem() callers
150 return !fail;
151 }
152
153 /********* End memory acquisition code ********************************/
154
155 // Protected object size. After successful locking returns size of locked block,
156 // and releasing requires setting block size.
157 class GuardedSize : tbb::detail::no_copy {
158 std::atomic<uintptr_t> value;
159 public:
160 enum State {
161 LOCKED,
162 COAL_BLOCK, // block is coalescing now
163 MAX_LOCKED_VAL = COAL_BLOCK,
164 LAST_REGION_BLOCK, // used to mark last block in region
165 // values after this are "normal" block sizes
166 MAX_SPEC_VAL = LAST_REGION_BLOCK
167 };
168
initLocked()169 void initLocked() { value.store(LOCKED, std::memory_order_release); } // TBB_REVAMP_TODO: was relaxed
makeCoalscing()170 void makeCoalscing() {
171 MALLOC_ASSERT(value.load(std::memory_order_relaxed) == LOCKED, ASSERT_TEXT);
172 value.store(COAL_BLOCK, std::memory_order_release); // TBB_REVAMP_TODO: was relaxed
173 }
tryLock(State state)174 size_t tryLock(State state) {
175 MALLOC_ASSERT(state <= MAX_LOCKED_VAL, ASSERT_TEXT);
176 size_t sz = value.load(std::memory_order_acquire);
177 for (;;) {
178 if (sz <= MAX_LOCKED_VAL) {
179 break;
180 }
181 if (value.compare_exchange_strong(sz, state)) {
182 break;
183 }
184 }
185 return sz;
186 }
unlock(size_t size)187 void unlock(size_t size) {
188 MALLOC_ASSERT(value.load(std::memory_order_relaxed) <= MAX_LOCKED_VAL, "The lock is not locked");
189 MALLOC_ASSERT(size > MAX_LOCKED_VAL, ASSERT_TEXT);
190 value.store(size, std::memory_order_release);
191 }
isLastRegionBlock() const192 bool isLastRegionBlock() const { return value.load(std::memory_order_relaxed) == LAST_REGION_BLOCK; }
193 friend void Backend::IndexedBins::verify();
194 };
195
196 struct MemRegion {
197 MemRegion *next, // keep all regions in any pool to release all them on
198 *prev; // pool destroying, 2-linked list to release individual
199 // regions.
200 size_t allocSz, // got from pool callback
201 blockSz; // initial and maximal inner block size
202 MemRegionType type;
203 };
204
205 // this data must be unmodified while block is in use, so separate it
206 class BlockMutexes {
207 protected:
208 GuardedSize myL, // lock for me
209 leftL; // lock for left neighbor
210 };
211
212 class FreeBlock : BlockMutexes {
213 public:
214 static const size_t minBlockSize;
215 friend void Backend::IndexedBins::verify();
216
217 FreeBlock *prev, // in 2-linked list related to bin
218 *next,
219 *nextToFree; // used to form a queue during coalescing
220 // valid only when block is in processing, i.e. one is not free and not
221 size_t sizeTmp; // used outside of backend
222 int myBin; // bin that is owner of the block
223 bool slabAligned;
224 bool blockInBin; // this block in myBin already
225
rightNeig(size_t sz) const226 FreeBlock *rightNeig(size_t sz) const {
227 MALLOC_ASSERT(sz, ASSERT_TEXT);
228 return (FreeBlock*)((uintptr_t)this+sz);
229 }
leftNeig(size_t sz) const230 FreeBlock *leftNeig(size_t sz) const {
231 MALLOC_ASSERT(sz, ASSERT_TEXT);
232 return (FreeBlock*)((uintptr_t)this - sz);
233 }
234
initHeader()235 void initHeader() { myL.initLocked(); leftL.initLocked(); }
setMeFree(size_t size)236 void setMeFree(size_t size) { myL.unlock(size); }
trySetMeUsed(GuardedSize::State s)237 size_t trySetMeUsed(GuardedSize::State s) { return myL.tryLock(s); }
isLastRegionBlock() const238 bool isLastRegionBlock() const { return myL.isLastRegionBlock(); }
239
setLeftFree(size_t sz)240 void setLeftFree(size_t sz) { leftL.unlock(sz); }
trySetLeftUsed(GuardedSize::State s)241 size_t trySetLeftUsed(GuardedSize::State s) { return leftL.tryLock(s); }
242
tryLockBlock()243 size_t tryLockBlock() {
244 size_t rSz, sz = trySetMeUsed(GuardedSize::LOCKED);
245
246 if (sz <= GuardedSize::MAX_LOCKED_VAL)
247 return false;
248 rSz = rightNeig(sz)->trySetLeftUsed(GuardedSize::LOCKED);
249 if (rSz <= GuardedSize::MAX_LOCKED_VAL) {
250 setMeFree(sz);
251 return false;
252 }
253 MALLOC_ASSERT(rSz == sz, ASSERT_TEXT);
254 return sz;
255 }
markCoalescing(size_t blockSz)256 void markCoalescing(size_t blockSz) {
257 myL.makeCoalscing();
258 rightNeig(blockSz)->leftL.makeCoalscing();
259 sizeTmp = blockSz;
260 nextToFree = nullptr;
261 }
markUsed()262 void markUsed() {
263 myL.initLocked();
264 rightNeig(sizeTmp)->leftL.initLocked();
265 nextToFree = nullptr;
266 }
markBlocks(FreeBlock * fBlock,int num,size_t size)267 static void markBlocks(FreeBlock *fBlock, int num, size_t size) {
268 for (int i=1; i<num; i++) {
269 fBlock = (FreeBlock*)((uintptr_t)fBlock + size);
270 fBlock->initHeader();
271 }
272 }
273 };
274
275 // Last block in any region. Its "size" field is GuardedSize::LAST_REGION_BLOCK,
276 // This kind of blocks used to find region header
277 // and have a possibility to return region back to OS
278 struct LastFreeBlock : public FreeBlock {
279 MemRegion *memRegion;
280 };
281
282 const size_t FreeBlock::minBlockSize = sizeof(FreeBlock);
283
waitTillBlockReleased(intptr_t startModifiedCnt)284 inline bool BackendSync::waitTillBlockReleased(intptr_t startModifiedCnt)
285 {
286 AtomicBackoff backoff;
287 #if __TBB_MALLOC_BACKEND_STAT
288 class ITT_Guard {
289 void *ptr;
290 public:
291 ITT_Guard(void *p) : ptr(p) {
292 MALLOC_ITT_SYNC_PREPARE(ptr);
293 }
294 ~ITT_Guard() {
295 MALLOC_ITT_SYNC_ACQUIRED(ptr);
296 }
297 };
298 ITT_Guard ittGuard(&inFlyBlocks);
299 #endif
300 intptr_t myBinsInFlyBlocks = inFlyBlocks.load(std::memory_order_acquire);
301 intptr_t myCoalescQInFlyBlocks = backend->blocksInCoalescing();
302 while (true) {
303 MALLOC_ASSERT(myBinsInFlyBlocks>=0 && myCoalescQInFlyBlocks>=0, nullptr);
304
305 intptr_t currBinsInFlyBlocks = inFlyBlocks.load(std::memory_order_acquire);
306 intptr_t currCoalescQInFlyBlocks = backend->blocksInCoalescing();
307 WhiteboxTestingYield();
308 // Stop waiting iff:
309
310 // 1) blocks were removed from processing, not added
311 if (myBinsInFlyBlocks > currBinsInFlyBlocks
312 // 2) released during delayed coalescing queue
313 || myCoalescQInFlyBlocks > currCoalescQInFlyBlocks)
314 break;
315 // 3) if there are blocks in coalescing, and no progress in its processing,
316 // try to scan coalescing queue and stop waiting, if changes were made
317 // (if there are no changes and in-fly blocks exist, we continue
318 // waiting to not increase load on coalescQ)
319 if (currCoalescQInFlyBlocks > 0 && backend->scanCoalescQ(/*forceCoalescQDrop=*/false))
320 break;
321 // 4) when there are no blocks
322 if (!currBinsInFlyBlocks && !currCoalescQInFlyBlocks) {
323 // re-scan make sense only if bins were modified since scanned
324 auto pool = backend->extMemPool;
325 if (pool->hardCachesCleanupInProgress.load(std::memory_order_acquire) ||
326 pool->softCachesCleanupInProgress.load(std::memory_order_acquire)) {
327 backoff.pause();
328 continue;
329 }
330
331 return startModifiedCnt != getNumOfMods();
332 }
333 myBinsInFlyBlocks = currBinsInFlyBlocks;
334 myCoalescQInFlyBlocks = currCoalescQInFlyBlocks;
335 backoff.pause();
336 }
337 return true;
338 }
339
putBlock(FreeBlock * fBlock)340 void CoalRequestQ::putBlock(FreeBlock *fBlock)
341 {
342 MALLOC_ASSERT(fBlock->sizeTmp >= FreeBlock::minBlockSize, ASSERT_TEXT);
343 fBlock->markUsed();
344 // the block is in the queue, do not forget that it's here
345 inFlyBlocks++;
346
347 FreeBlock *myBlToFree = blocksToFree.load(std::memory_order_acquire);
348 for (;;) {
349 fBlock->nextToFree = myBlToFree;
350 if (blocksToFree.compare_exchange_strong(myBlToFree, fBlock)) {
351 return;
352 }
353 }
354 }
355
getAll()356 FreeBlock *CoalRequestQ::getAll()
357 {
358 for (;;) {
359 FreeBlock *myBlToFree = blocksToFree.load(std::memory_order_acquire);
360
361 if (!myBlToFree) {
362 return nullptr;
363 } else {
364 if (blocksToFree.compare_exchange_strong(myBlToFree, nullptr)) {
365 return myBlToFree;
366 } else {
367 continue;
368 }
369 }
370 }
371 }
372
blockWasProcessed()373 inline void CoalRequestQ::blockWasProcessed()
374 {
375 bkndSync->binsModified();
376 int prev = inFlyBlocks.fetch_sub(1);
377 tbb::detail::suppress_unused_warning(prev);
378 MALLOC_ASSERT(prev > 0, ASSERT_TEXT);
379 }
380
381 // Try to get a block from a bin.
382 // If the remaining free space would stay in the same bin,
383 // split the block without removing it.
384 // If the free space should go to other bin(s), remove the block.
385 // alignedBin is true, if all blocks in the bin have slab-aligned right side.
getFromBin(int binIdx,BackendSync * sync,size_t size,bool needAlignedRes,bool alignedBin,bool wait,int * binLocked)386 FreeBlock *Backend::IndexedBins::getFromBin(int binIdx, BackendSync *sync, size_t size,
387 bool needAlignedRes, bool alignedBin, bool wait, int *binLocked)
388 {
389 Bin *b = &freeBins[binIdx];
390 try_next:
391 FreeBlock *fBlock = nullptr;
392 if (!b->empty()) {
393 bool locked = false;
394 MallocMutex::scoped_lock scopedLock(b->tLock, wait, &locked);
395
396 if (!locked) {
397 if (binLocked) (*binLocked)++;
398 return nullptr;
399 }
400
401 for (FreeBlock *curr = b->head.load(std::memory_order_relaxed); curr; curr = curr->next) {
402 size_t szBlock = curr->tryLockBlock();
403 if (!szBlock) {
404 // block is locked, re-do bin lock, as there is no place to spin
405 // while block coalescing
406 goto try_next;
407 }
408
409 // GENERAL CASE
410 if (alignedBin || !needAlignedRes) {
411 size_t splitSz = szBlock - size;
412 // If we got a block as split result, it must have a room for control structures.
413 if (szBlock >= size && (splitSz >= FreeBlock::minBlockSize || !splitSz))
414 fBlock = curr;
415 } else {
416 // SPECIAL CASE, to get aligned block from unaligned bin we have to cut the middle of a block
417 // and return remaining left and right part. Possible only in fixed pool scenario, assert for this
418 // is set inside splitBlock() function.
419
420 void *newB = alignUp(curr, slabSize);
421 uintptr_t rightNew = (uintptr_t)newB + size;
422 uintptr_t rightCurr = (uintptr_t)curr + szBlock;
423 // Check if the block size is sufficient,
424 // and also left and right split results are either big enough or non-existent
425 if (rightNew <= rightCurr
426 && (newB == curr || ((uintptr_t)newB - (uintptr_t)curr) >= FreeBlock::minBlockSize)
427 && (rightNew == rightCurr || (rightCurr - rightNew) >= FreeBlock::minBlockSize))
428 fBlock = curr;
429 }
430
431 if (fBlock) {
432 // consume must be called before result of removing from a bin is visible externally.
433 sync->blockConsumed();
434 // TODO: think about cases when block stays in the same bin
435 b->removeBlock(fBlock);
436 if (freeBins[binIdx].empty())
437 bitMask.set(binIdx, false);
438 fBlock->sizeTmp = szBlock;
439 break;
440 } else { // block size is not valid, search for next block in the bin
441 curr->setMeFree(szBlock);
442 curr->rightNeig(szBlock)->setLeftFree(szBlock);
443 }
444 }
445 }
446 return fBlock;
447 }
448
tryReleaseRegions(int binIdx,Backend * backend)449 bool Backend::IndexedBins::tryReleaseRegions(int binIdx, Backend *backend)
450 {
451 Bin *b = &freeBins[binIdx];
452 FreeBlock *fBlockList = nullptr;
453
454 // got all blocks from the bin and re-do coalesce on them
455 // to release single-block regions
456 try_next:
457 if (!b->empty()) {
458 MallocMutex::scoped_lock binLock(b->tLock);
459 for (FreeBlock *curr = b->head.load(std::memory_order_relaxed); curr; ) {
460 size_t szBlock = curr->tryLockBlock();
461 if (!szBlock)
462 goto try_next;
463
464 FreeBlock *next = curr->next;
465
466 b->removeBlock(curr);
467 curr->sizeTmp = szBlock;
468 curr->nextToFree = fBlockList;
469 fBlockList = curr;
470 curr = next;
471 }
472 }
473 return backend->coalescAndPutList(fBlockList, /*forceCoalescQDrop=*/true,
474 /*reportBlocksProcessed=*/false);
475 }
476
removeBlock(FreeBlock * fBlock)477 void Backend::Bin::removeBlock(FreeBlock *fBlock)
478 {
479 MALLOC_ASSERT(fBlock->next||fBlock->prev||fBlock== head.load(std::memory_order_relaxed),
480 "Detected that a block is not in the bin.");
481 if (head.load(std::memory_order_relaxed) == fBlock)
482 head.store(fBlock->next, std::memory_order_relaxed);
483 if (tail == fBlock)
484 tail = fBlock->prev;
485 if (fBlock->prev)
486 fBlock->prev->next = fBlock->next;
487 if (fBlock->next)
488 fBlock->next->prev = fBlock->prev;
489 }
490
addBlock(int binIdx,FreeBlock * fBlock,size_t,bool addToTail)491 void Backend::IndexedBins::addBlock(int binIdx, FreeBlock *fBlock, size_t /* blockSz */, bool addToTail)
492 {
493 Bin *b = &freeBins[binIdx];
494 fBlock->myBin = binIdx;
495 fBlock->next = fBlock->prev = nullptr;
496 {
497 MallocMutex::scoped_lock scopedLock(b->tLock);
498 if (addToTail) {
499 fBlock->prev = b->tail;
500 b->tail = fBlock;
501 if (fBlock->prev)
502 fBlock->prev->next = fBlock;
503 if (!b->head.load(std::memory_order_relaxed))
504 b->head.store(fBlock, std::memory_order_relaxed);
505 } else {
506 fBlock->next = b->head.load(std::memory_order_relaxed);
507 b->head.store(fBlock, std::memory_order_relaxed);
508 if (fBlock->next)
509 fBlock->next->prev = fBlock;
510 if (!b->tail)
511 b->tail = fBlock;
512 }
513 }
514 bitMask.set(binIdx, true);
515 }
516
tryAddBlock(int binIdx,FreeBlock * fBlock,bool addToTail)517 bool Backend::IndexedBins::tryAddBlock(int binIdx, FreeBlock *fBlock, bool addToTail)
518 {
519 bool locked = false;
520 Bin *b = &freeBins[binIdx];
521 fBlock->myBin = binIdx;
522 if (addToTail) {
523 fBlock->next = nullptr;
524 {
525 MallocMutex::scoped_lock scopedLock(b->tLock, /*wait=*/false, &locked);
526 if (!locked)
527 return false;
528 fBlock->prev = b->tail;
529 b->tail = fBlock;
530 if (fBlock->prev)
531 fBlock->prev->next = fBlock;
532 if (!b->head.load(std::memory_order_relaxed))
533 b->head.store(fBlock, std::memory_order_relaxed);
534 }
535 } else {
536 fBlock->prev = nullptr;
537 {
538 MallocMutex::scoped_lock scopedLock(b->tLock, /*wait=*/false, &locked);
539 if (!locked)
540 return false;
541 fBlock->next = b->head.load(std::memory_order_relaxed);
542 b->head.store(fBlock, std::memory_order_relaxed);
543 if (fBlock->next)
544 fBlock->next->prev = fBlock;
545 if (!b->tail)
546 b->tail = fBlock;
547 }
548 }
549 bitMask.set(binIdx, true);
550 return true;
551 }
552
reset()553 void Backend::IndexedBins::reset()
554 {
555 for (unsigned i=0; i<Backend::freeBinsNum; i++)
556 freeBins[i].reset();
557 bitMask.reset();
558 }
559
lockRemoveBlock(int binIdx,FreeBlock * fBlock)560 void Backend::IndexedBins::lockRemoveBlock(int binIdx, FreeBlock *fBlock)
561 {
562 MallocMutex::scoped_lock scopedLock(freeBins[binIdx].tLock);
563 freeBins[binIdx].removeBlock(fBlock);
564 if (freeBins[binIdx].empty())
565 bitMask.set(binIdx, false);
566 }
567
regionsAreReleaseable() const568 bool ExtMemoryPool::regionsAreReleaseable() const
569 {
570 return !keepAllMemory && !delayRegsReleasing;
571 }
572
splitBlock(FreeBlock * fBlock,int num,size_t size,bool blockIsAligned,bool needAlignedBlock)573 FreeBlock *Backend::splitBlock(FreeBlock *fBlock, int num, size_t size, bool blockIsAligned, bool needAlignedBlock)
574 {
575 const size_t totalSize = num * size;
576
577 // SPECIAL CASE, for unaligned block we have to cut the middle of a block
578 // and return remaining left and right part. Possible only in a fixed pool scenario.
579 if (needAlignedBlock && !blockIsAligned) {
580 MALLOC_ASSERT(extMemPool->fixedPool,
581 "Aligned block request from unaligned bin possible only in fixed pool scenario.");
582
583 // Space to use is in the middle
584 FreeBlock *newBlock = alignUp(fBlock, slabSize);
585 FreeBlock *rightPart = (FreeBlock*)((uintptr_t)newBlock + totalSize);
586 uintptr_t fBlockEnd = (uintptr_t)fBlock + fBlock->sizeTmp;
587
588 // Return free right part
589 if ((uintptr_t)rightPart != fBlockEnd) {
590 rightPart->initHeader(); // to prevent coalescing rightPart with fBlock
591 size_t rightSize = fBlockEnd - (uintptr_t)rightPart;
592 coalescAndPut(rightPart, rightSize, toAlignedBin(rightPart, rightSize));
593 }
594 // And free left part
595 if (newBlock != fBlock) {
596 newBlock->initHeader(); // to prevent coalescing fBlock with newB
597 size_t leftSize = (uintptr_t)newBlock - (uintptr_t)fBlock;
598 coalescAndPut(fBlock, leftSize, toAlignedBin(fBlock, leftSize));
599 }
600 fBlock = newBlock;
601 } else if (size_t splitSize = fBlock->sizeTmp - totalSize) { // need to split the block
602 // GENERAL CASE, cut the left or right part of the block
603 FreeBlock *splitBlock = nullptr;
604 if (needAlignedBlock) {
605 // For slab aligned blocks cut the right side of the block
606 // and return it to a requester, original block returns to backend
607 splitBlock = fBlock;
608 fBlock = (FreeBlock*)((uintptr_t)splitBlock + splitSize);
609 fBlock->initHeader();
610 } else {
611 // For large object blocks cut original block and put free right part to backend
612 splitBlock = (FreeBlock*)((uintptr_t)fBlock + totalSize);
613 splitBlock->initHeader();
614 }
615 // Mark free block as it`s parent only when the requested type (needAlignedBlock)
616 // and returned from Bins/OS block (isAligned) are equal (XOR operation used)
617 bool markAligned = (blockIsAligned ^ needAlignedBlock) ? toAlignedBin(splitBlock, splitSize) : blockIsAligned;
618 coalescAndPut(splitBlock, splitSize, markAligned);
619 }
620 MALLOC_ASSERT(!needAlignedBlock || isAligned(fBlock, slabSize), "Expect to get aligned block, if one was requested.");
621 FreeBlock::markBlocks(fBlock, num, size);
622 return fBlock;
623 }
624
getMaxBinnedSize() const625 size_t Backend::getMaxBinnedSize() const
626 {
627 return hugePages.isEnabled && !inUserPool() ?
628 maxBinned_HugePage : maxBinned_SmallPage;
629 }
630
operator ()(size_t oldMaxReq,size_t requestSize) const631 inline bool Backend::MaxRequestComparator::operator()(size_t oldMaxReq, size_t requestSize) const
632 {
633 return requestSize > oldMaxReq && requestSize < backend->getMaxBinnedSize();
634 }
635
636 // last chance to get memory
releaseMemInCaches(intptr_t startModifiedCnt,int * lockedBinsThreshold,int numOfLockedBins)637 FreeBlock *Backend::releaseMemInCaches(intptr_t startModifiedCnt,
638 int *lockedBinsThreshold, int numOfLockedBins)
639 {
640 // something released from caches
641 if (extMemPool->hardCachesCleanup(false))
642 return (FreeBlock*)VALID_BLOCK_IN_BIN;
643
644 if (bkndSync.waitTillBlockReleased(startModifiedCnt))
645 return (FreeBlock*)VALID_BLOCK_IN_BIN;
646
647 // OS can't give us more memory, but we have some in locked bins
648 if (*lockedBinsThreshold && numOfLockedBins) {
649 *lockedBinsThreshold = 0;
650 return (FreeBlock*)VALID_BLOCK_IN_BIN;
651 }
652 return nullptr; // nothing found, give up
653 }
654
askMemFromOS(size_t blockSize,intptr_t startModifiedCnt,int * lockedBinsThreshold,int numOfLockedBins,bool * splittableRet,bool needSlabRegion)655 FreeBlock *Backend::askMemFromOS(size_t blockSize, intptr_t startModifiedCnt,
656 int *lockedBinsThreshold, int numOfLockedBins,
657 bool *splittableRet, bool needSlabRegion)
658 {
659 FreeBlock *block;
660 // The block sizes can be divided into 3 groups:
661 // 1. "quite small": popular object size, we are in bootstarp or something
662 // like; request several regions.
663 // 2. "quite large": we want to have several such blocks in the region
664 // but not want several pre-allocated regions.
665 // 3. "huge": exact fit, we allocate only one block and do not allow
666 // any other allocations to placed in a region.
667 // Dividing the block sizes in these groups we are trying to balance between
668 // too small regions (that leads to fragmentation) and too large ones (that
669 // leads to excessive address space consumption). If a region is "too
670 // large", allocate only one, to prevent fragmentation. It supposedly
671 // doesn't hurt performance, because the object requested by user is large.
672 // Bounds for the groups are:
673 const size_t maxBinned = getMaxBinnedSize();
674 const size_t quiteSmall = maxBinned / 8;
675 const size_t quiteLarge = maxBinned;
676
677 if (blockSize >= quiteLarge) {
678 // Do not interact with other threads via semaphores, as for exact fit
679 // we can't share regions with them, memory requesting is individual.
680 block = addNewRegion(blockSize, MEMREG_ONE_BLOCK, /*addToBin=*/false);
681 if (!block)
682 return releaseMemInCaches(startModifiedCnt, lockedBinsThreshold, numOfLockedBins);
683 *splittableRet = false;
684 } else {
685 const size_t regSz_sizeBased = alignUp(4*maxRequestedSize, 1024*1024);
686 // Another thread is modifying backend while we can't get the block.
687 // Wait while it leaves and re-do the scan
688 // before trying other ways to extend the backend.
689 if (bkndSync.waitTillBlockReleased(startModifiedCnt)
690 // semaphore is protecting adding more more memory from OS
691 || memExtendingSema.wait())
692 return (FreeBlock*)VALID_BLOCK_IN_BIN;
693
694 if (startModifiedCnt != bkndSync.getNumOfMods()) {
695 memExtendingSema.signal();
696 return (FreeBlock*)VALID_BLOCK_IN_BIN;
697 }
698
699 if (blockSize < quiteSmall) {
700 // For this size of blocks, add NUM_OF_REG "advance" regions in bin,
701 // and return one as a result.
702 // TODO: add to bin first, because other threads can use them right away.
703 // This must be done carefully, because blocks in bins can be released
704 // in releaseCachesToLimit().
705 const unsigned NUM_OF_REG = 3;
706 MemRegionType regType = needSlabRegion ? MEMREG_SLAB_BLOCKS : MEMREG_LARGE_BLOCKS;
707 block = addNewRegion(regSz_sizeBased, regType, /*addToBin=*/false);
708 if (block)
709 for (unsigned idx=0; idx<NUM_OF_REG; idx++)
710 if (! addNewRegion(regSz_sizeBased, regType, /*addToBin=*/true))
711 break;
712 } else {
713 block = addNewRegion(regSz_sizeBased, MEMREG_LARGE_BLOCKS, /*addToBin=*/false);
714 }
715 memExtendingSema.signal();
716
717 // no regions found, try to clean cache
718 if (!block || block == (FreeBlock*)VALID_BLOCK_IN_BIN)
719 return releaseMemInCaches(startModifiedCnt, lockedBinsThreshold, numOfLockedBins);
720 // Since a region can hold more than one block it can be split.
721 *splittableRet = true;
722 }
723 // after asking memory from OS, release caches if we above the memory limits
724 releaseCachesToLimit();
725
726 return block;
727 }
728
releaseCachesToLimit()729 void Backend::releaseCachesToLimit()
730 {
731 if (!memSoftLimit.load(std::memory_order_relaxed)
732 || totalMemSize.load(std::memory_order_relaxed) <= memSoftLimit.load(std::memory_order_relaxed)) {
733 return;
734 }
735 size_t locTotalMemSize, locMemSoftLimit;
736
737 scanCoalescQ(/*forceCoalescQDrop=*/false);
738 if (extMemPool->softCachesCleanup() &&
739 (locTotalMemSize = totalMemSize.load(std::memory_order_acquire)) <=
740 (locMemSoftLimit = memSoftLimit.load(std::memory_order_acquire)))
741 return;
742 // clean global large-object cache, if this is not enough, clean local caches
743 // do this in several tries, because backend fragmentation can prevent
744 // region from releasing
745 for (int cleanLocal = 0; cleanLocal<2; cleanLocal++)
746 while (cleanLocal ?
747 extMemPool->allLocalCaches.cleanup(/*cleanOnlyUnused=*/true) :
748 extMemPool->loc.decreasingCleanup())
749 if ((locTotalMemSize = totalMemSize.load(std::memory_order_acquire)) <=
750 (locMemSoftLimit = memSoftLimit.load(std::memory_order_acquire)))
751 return;
752 // last chance to match memSoftLimit
753 extMemPool->hardCachesCleanup(true);
754 }
755
getMinNonemptyBin(unsigned startBin) const756 int Backend::IndexedBins::getMinNonemptyBin(unsigned startBin) const
757 {
758 int p = bitMask.getMinTrue(startBin);
759 return p == -1 ? Backend::freeBinsNum : p;
760 }
761
findBlock(int nativeBin,BackendSync * sync,size_t size,bool needAlignedBlock,bool alignedBin,int * numOfLockedBins)762 FreeBlock *Backend::IndexedBins::findBlock(int nativeBin, BackendSync *sync, size_t size,
763 bool needAlignedBlock, bool alignedBin, int *numOfLockedBins)
764 {
765 for (int i=getMinNonemptyBin(nativeBin); i<(int)freeBinsNum; i=getMinNonemptyBin(i+1))
766 if (FreeBlock *block = getFromBin(i, sync, size, needAlignedBlock, alignedBin, /*wait=*/false, numOfLockedBins))
767 return block;
768
769 return nullptr;
770 }
771
requestBootstrapMem()772 void Backend::requestBootstrapMem()
773 {
774 if (bootsrapMemDone == bootsrapMemStatus.load(std::memory_order_acquire))
775 return;
776 MallocMutex::scoped_lock lock( bootsrapMemStatusMutex );
777 if (bootsrapMemDone == bootsrapMemStatus)
778 return;
779 MALLOC_ASSERT(bootsrapMemNotDone == bootsrapMemStatus, ASSERT_TEXT);
780 bootsrapMemStatus = bootsrapMemInitializing;
781 // request some rather big region during bootstrap in advance
782 // ok to get nullptr here, as later we re-do a request with more modest size
783 addNewRegion(2*1024*1024, MEMREG_SLAB_BLOCKS, /*addToBin=*/true);
784 bootsrapMemStatus = bootsrapMemDone;
785 }
786
787 // try to allocate size Byte block in available bins
788 // needAlignedRes is true if result must be slab-aligned
genericGetBlock(int num,size_t size,bool needAlignedBlock)789 FreeBlock *Backend::genericGetBlock(int num, size_t size, bool needAlignedBlock)
790 {
791 FreeBlock *block = nullptr;
792 const size_t totalReqSize = num*size;
793 // no splitting after requesting new region, asks exact size
794 const int nativeBin = sizeToBin(totalReqSize);
795
796 requestBootstrapMem();
797 // If we found 2 or less locked bins, it's time to ask more memory from OS.
798 // But nothing can be asked from fixed pool. And we prefer wait, not ask
799 // for more memory, if block is quite large.
800 int lockedBinsThreshold = extMemPool->fixedPool || size>=maxBinned_SmallPage? 0 : 2;
801
802 // Find maximal requested size limited by getMaxBinnedSize()
803 AtomicUpdate(maxRequestedSize, totalReqSize, MaxRequestComparator(this));
804 scanCoalescQ(/*forceCoalescQDrop=*/false);
805
806 bool splittable = true;
807 for (;;) {
808 const intptr_t startModifiedCnt = bkndSync.getNumOfMods();
809 int numOfLockedBins;
810 intptr_t cleanCnt;
811 do {
812 cleanCnt = backendCleanCnt.load(std::memory_order_acquire);
813 numOfLockedBins = 0;
814 if (needAlignedBlock) {
815 block = freeSlabAlignedBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock,
816 /*alignedBin=*/true, &numOfLockedBins);
817 if (!block && extMemPool->fixedPool)
818 block = freeLargeBlockBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock,
819 /*alignedBin=*/false, &numOfLockedBins);
820 } else {
821 block = freeLargeBlockBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock,
822 /*alignedBin=*/false, &numOfLockedBins);
823 if (!block && extMemPool->fixedPool)
824 block = freeSlabAlignedBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock,
825 /*alignedBin=*/true, &numOfLockedBins);
826 }
827 } while (!block && (numOfLockedBins>lockedBinsThreshold || cleanCnt % 2 == 1 ||
828 cleanCnt != backendCleanCnt.load(std::memory_order_acquire)));
829
830 if (block)
831 break;
832
833 bool retScanCoalescQ = scanCoalescQ(/*forceCoalescQDrop=*/true);
834 bool retSoftCachesCleanup = extMemPool->softCachesCleanup();
835 if (!(retScanCoalescQ || retSoftCachesCleanup)) {
836 // bins are not updated,
837 // only remaining possibility is to ask for more memory
838 block = askMemFromOS(totalReqSize, startModifiedCnt, &lockedBinsThreshold,
839 numOfLockedBins, &splittable, needAlignedBlock);
840 if (!block)
841 return nullptr;
842 if (block != (FreeBlock*)VALID_BLOCK_IN_BIN) {
843 // size can be increased in askMemFromOS, that's why >=
844 MALLOC_ASSERT(block->sizeTmp >= size, ASSERT_TEXT);
845 break;
846 }
847 // valid block somewhere in bins, let's find it
848 block = nullptr;
849 }
850 }
851 MALLOC_ASSERT(block, ASSERT_TEXT);
852 if (splittable) {
853 // At this point we have to be sure that slabAligned attribute describes the right block state
854 block = splitBlock(block, num, size, block->slabAligned, needAlignedBlock);
855 }
856 // matched blockConsumed() from startUseBlock()
857 bkndSync.blockReleased();
858
859 return block;
860 }
861
getLargeBlock(size_t size)862 LargeMemoryBlock *Backend::getLargeBlock(size_t size)
863 {
864 LargeMemoryBlock *lmb =
865 (LargeMemoryBlock*)genericGetBlock(1, size, /*needAlignedRes=*/false);
866 if (lmb) {
867 lmb->unalignedSize = size;
868 if (extMemPool->userPool())
869 extMemPool->lmbList.add(lmb);
870 }
871 return lmb;
872 }
873
getSlabBlock(int num)874 BlockI *Backend::getSlabBlock(int num) {
875 BlockI *b = (BlockI*)genericGetBlock(num, slabSize, /*slabAligned=*/true);
876 MALLOC_ASSERT(isAligned(b, slabSize), ASSERT_TEXT);
877 return b;
878 }
879
putSlabBlock(BlockI * block)880 void Backend::putSlabBlock(BlockI *block) {
881 genericPutBlock((FreeBlock *)block, slabSize, /*slabAligned=*/true);
882 }
883
getBackRefSpace(size_t size,bool * rawMemUsed)884 void *Backend::getBackRefSpace(size_t size, bool *rawMemUsed)
885 {
886 // This block is released only at shutdown, so it can prevent
887 // a entire region releasing when it's received from the backend,
888 // so prefer getRawMemory using.
889 if (void *ret = getRawMemory(size, REGULAR)) {
890 *rawMemUsed = true;
891 return ret;
892 }
893 void *ret = genericGetBlock(1, size, /*needAlignedRes=*/false);
894 if (ret) *rawMemUsed = false;
895 return ret;
896 }
897
putBackRefSpace(void * b,size_t size,bool rawMemUsed)898 void Backend::putBackRefSpace(void *b, size_t size, bool rawMemUsed)
899 {
900 if (rawMemUsed)
901 freeRawMemory(b, size);
902 // ignore not raw mem, as it released on region releasing
903 }
904
removeBlockFromBin(FreeBlock * fBlock)905 void Backend::removeBlockFromBin(FreeBlock *fBlock)
906 {
907 if (fBlock->myBin != Backend::NO_BIN) {
908 if (fBlock->slabAligned)
909 freeSlabAlignedBins.lockRemoveBlock(fBlock->myBin, fBlock);
910 else
911 freeLargeBlockBins.lockRemoveBlock(fBlock->myBin, fBlock);
912 }
913 }
914
genericPutBlock(FreeBlock * fBlock,size_t blockSz,bool slabAligned)915 void Backend::genericPutBlock(FreeBlock *fBlock, size_t blockSz, bool slabAligned)
916 {
917 bkndSync.blockConsumed();
918 coalescAndPut(fBlock, blockSz, slabAligned);
919 bkndSync.blockReleased();
920 }
921
add(LargeMemoryBlock * lmb)922 void AllLargeBlocksList::add(LargeMemoryBlock *lmb)
923 {
924 MallocMutex::scoped_lock scoped_cs(largeObjLock);
925 lmb->gPrev = nullptr;
926 lmb->gNext = loHead;
927 if (lmb->gNext)
928 lmb->gNext->gPrev = lmb;
929 loHead = lmb;
930 }
931
remove(LargeMemoryBlock * lmb)932 void AllLargeBlocksList::remove(LargeMemoryBlock *lmb)
933 {
934 MallocMutex::scoped_lock scoped_cs(largeObjLock);
935 if (loHead == lmb)
936 loHead = lmb->gNext;
937 if (lmb->gNext)
938 lmb->gNext->gPrev = lmb->gPrev;
939 if (lmb->gPrev)
940 lmb->gPrev->gNext = lmb->gNext;
941 }
942
putLargeBlock(LargeMemoryBlock * lmb)943 void Backend::putLargeBlock(LargeMemoryBlock *lmb)
944 {
945 if (extMemPool->userPool())
946 extMemPool->lmbList.remove(lmb);
947 genericPutBlock((FreeBlock *)lmb, lmb->unalignedSize, false);
948 }
949
returnLargeObject(LargeMemoryBlock * lmb)950 void Backend::returnLargeObject(LargeMemoryBlock *lmb)
951 {
952 removeBackRef(lmb->backRefIdx);
953 putLargeBlock(lmb);
954 STAT_increment(getThreadId(), ThreadCommonCounters, freeLargeObj);
955 }
956
957 #if BACKEND_HAS_MREMAP
remap(void * ptr,size_t oldSize,size_t newSize,size_t alignment)958 void *Backend::remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment)
959 {
960 // no remap for user pools and for object too small that living in bins
961 if (inUserPool() || min(oldSize, newSize)<maxBinned_SmallPage
962 // during remap, can't guarantee alignment more strict than current or
963 // more strict than page alignment
964 || !isAligned(ptr, alignment) || alignment>extMemPool->granularity)
965 return nullptr;
966 const LargeMemoryBlock* lmbOld = ((LargeObjectHdr *)ptr - 1)->memoryBlock;
967 const size_t oldUnalignedSize = lmbOld->unalignedSize;
968 FreeBlock *oldFBlock = (FreeBlock *)lmbOld;
969 FreeBlock *right = oldFBlock->rightNeig(oldUnalignedSize);
970 // in every region only one block can have LAST_REGION_BLOCK on right,
971 // so don't need no synchronization
972 if (!right->isLastRegionBlock())
973 return nullptr;
974
975 MemRegion *oldRegion = static_cast<LastFreeBlock*>(right)->memRegion;
976 MALLOC_ASSERT( oldRegion < ptr, ASSERT_TEXT );
977 const size_t oldRegionSize = oldRegion->allocSz;
978 if (oldRegion->type != MEMREG_ONE_BLOCK)
979 return nullptr; // we are not single in the region
980 const size_t userOffset = (uintptr_t)ptr - (uintptr_t)oldRegion;
981 const size_t alignedSize = LargeObjectCache::alignToBin(newSize + userOffset);
982 const size_t requestSize =
983 alignUp(sizeof(MemRegion) + alignedSize + sizeof(LastFreeBlock), extMemPool->granularity);
984 if (requestSize < alignedSize) // is wrapped around?
985 return nullptr;
986 regionList.remove(oldRegion);
987
988 // The deallocation should be registered in address range before mremap to
989 // prevent a race condition with allocation on another thread.
990 // (OS can reuse the memory and registerAlloc will be missed on another thread)
991 usedAddrRange.registerFree((uintptr_t)oldRegion, (uintptr_t)oldRegion + oldRegionSize);
992
993 void *ret = mremap(oldRegion, oldRegion->allocSz, requestSize, MREMAP_MAYMOVE);
994 if (MAP_FAILED == ret) { // can't remap, revert and leave
995 regionList.add(oldRegion);
996 usedAddrRange.registerAlloc((uintptr_t)oldRegion, (uintptr_t)oldRegion + oldRegionSize);
997 return nullptr;
998 }
999 MemRegion *region = (MemRegion*)ret;
1000 MALLOC_ASSERT(region->type == MEMREG_ONE_BLOCK, ASSERT_TEXT);
1001 region->allocSz = requestSize;
1002 region->blockSz = alignedSize;
1003
1004 FreeBlock *fBlock = (FreeBlock *)alignUp((uintptr_t)region + sizeof(MemRegion),
1005 largeObjectAlignment);
1006
1007 regionList.add(region);
1008 startUseBlock(region, fBlock, /*addToBin=*/false);
1009 MALLOC_ASSERT(fBlock->sizeTmp == region->blockSz, ASSERT_TEXT);
1010 // matched blockConsumed() in startUseBlock().
1011 // TODO: get rid of useless pair blockConsumed()/blockReleased()
1012 bkndSync.blockReleased();
1013
1014 // object must start at same offset from region's start
1015 void *object = (void*)((uintptr_t)region + userOffset);
1016 MALLOC_ASSERT(isAligned(object, alignment), ASSERT_TEXT);
1017 LargeObjectHdr *header = (LargeObjectHdr*)object - 1;
1018 setBackRef(header->backRefIdx, header);
1019
1020 LargeMemoryBlock *lmb = (LargeMemoryBlock*)fBlock;
1021 lmb->unalignedSize = region->blockSz;
1022 lmb->objectSize = newSize;
1023 lmb->backRefIdx = header->backRefIdx;
1024 header->memoryBlock = lmb;
1025 MALLOC_ASSERT((uintptr_t)lmb + lmb->unalignedSize >=
1026 (uintptr_t)object + lmb->objectSize, "An object must fit to the block.");
1027
1028 usedAddrRange.registerAlloc((uintptr_t)region, (uintptr_t)region + requestSize);
1029 totalMemSize.fetch_add(region->allocSz - oldRegionSize);
1030
1031 return object;
1032 }
1033 #endif /* BACKEND_HAS_MREMAP */
1034
releaseRegion(MemRegion * memRegion)1035 void Backend::releaseRegion(MemRegion *memRegion)
1036 {
1037 regionList.remove(memRegion);
1038 freeRawMem(memRegion, memRegion->allocSz);
1039 }
1040
1041 // coalesce fBlock with its neighborhood
doCoalesc(FreeBlock * fBlock,MemRegion ** mRegion)1042 FreeBlock *Backend::doCoalesc(FreeBlock *fBlock, MemRegion **mRegion)
1043 {
1044 FreeBlock *resBlock = fBlock;
1045 size_t resSize = fBlock->sizeTmp;
1046 MemRegion *memRegion = nullptr;
1047
1048 fBlock->markCoalescing(resSize);
1049 resBlock->blockInBin = false;
1050
1051 // coalescing with left neighbor
1052 size_t leftSz = fBlock->trySetLeftUsed(GuardedSize::COAL_BLOCK);
1053 if (leftSz != GuardedSize::LOCKED) {
1054 if (leftSz == GuardedSize::COAL_BLOCK) {
1055 coalescQ.putBlock(fBlock);
1056 return nullptr;
1057 } else {
1058 FreeBlock *left = fBlock->leftNeig(leftSz);
1059 size_t lSz = left->trySetMeUsed(GuardedSize::COAL_BLOCK);
1060 if (lSz <= GuardedSize::MAX_LOCKED_VAL) {
1061 fBlock->setLeftFree(leftSz); // rollback
1062 coalescQ.putBlock(fBlock);
1063 return nullptr;
1064 } else {
1065 MALLOC_ASSERT(lSz == leftSz, "Invalid header");
1066 left->blockInBin = true;
1067 resBlock = left;
1068 resSize += leftSz;
1069 resBlock->sizeTmp = resSize;
1070 }
1071 }
1072 }
1073 // coalescing with right neighbor
1074 FreeBlock *right = fBlock->rightNeig(fBlock->sizeTmp);
1075 size_t rightSz = right->trySetMeUsed(GuardedSize::COAL_BLOCK);
1076 if (rightSz != GuardedSize::LOCKED) {
1077 // LastFreeBlock is on the right side
1078 if (GuardedSize::LAST_REGION_BLOCK == rightSz) {
1079 right->setMeFree(GuardedSize::LAST_REGION_BLOCK);
1080 memRegion = static_cast<LastFreeBlock*>(right)->memRegion;
1081 } else if (GuardedSize::COAL_BLOCK == rightSz) {
1082 if (resBlock->blockInBin) {
1083 resBlock->blockInBin = false;
1084 removeBlockFromBin(resBlock);
1085 }
1086 coalescQ.putBlock(resBlock);
1087 return nullptr;
1088 } else {
1089 size_t rSz = right->rightNeig(rightSz)->
1090 trySetLeftUsed(GuardedSize::COAL_BLOCK);
1091 if (rSz <= GuardedSize::MAX_LOCKED_VAL) {
1092 right->setMeFree(rightSz); // rollback
1093 if (resBlock->blockInBin) {
1094 resBlock->blockInBin = false;
1095 removeBlockFromBin(resBlock);
1096 }
1097 coalescQ.putBlock(resBlock);
1098 return nullptr;
1099 } else {
1100 MALLOC_ASSERT(rSz == rightSz, "Invalid header");
1101 removeBlockFromBin(right);
1102 resSize += rightSz;
1103
1104 // Is LastFreeBlock on the right side of right?
1105 FreeBlock *nextRight = right->rightNeig(rightSz);
1106 size_t nextRightSz = nextRight->
1107 trySetMeUsed(GuardedSize::COAL_BLOCK);
1108 if (nextRightSz > GuardedSize::MAX_LOCKED_VAL) {
1109 if (nextRightSz == GuardedSize::LAST_REGION_BLOCK)
1110 memRegion = static_cast<LastFreeBlock*>(nextRight)->memRegion;
1111
1112 nextRight->setMeFree(nextRightSz);
1113 }
1114 }
1115 }
1116 }
1117 if (memRegion) {
1118 MALLOC_ASSERT((uintptr_t)memRegion + memRegion->allocSz >=
1119 (uintptr_t)right + sizeof(LastFreeBlock), ASSERT_TEXT);
1120 MALLOC_ASSERT((uintptr_t)memRegion < (uintptr_t)resBlock, ASSERT_TEXT);
1121 *mRegion = memRegion;
1122 } else
1123 *mRegion = nullptr;
1124 resBlock->sizeTmp = resSize;
1125 return resBlock;
1126 }
1127
coalescAndPutList(FreeBlock * list,bool forceCoalescQDrop,bool reportBlocksProcessed)1128 bool Backend::coalescAndPutList(FreeBlock *list, bool forceCoalescQDrop, bool reportBlocksProcessed)
1129 {
1130 bool regionReleased = false;
1131
1132 for (FreeBlock *helper; list;
1133 list = helper,
1134 // matches block enqueue in CoalRequestQ::putBlock()
1135 reportBlocksProcessed? coalescQ.blockWasProcessed() : (void)0) {
1136 MemRegion *memRegion;
1137 bool addToTail = false;
1138
1139 helper = list->nextToFree;
1140 FreeBlock *toRet = doCoalesc(list, &memRegion);
1141 if (!toRet)
1142 continue;
1143
1144 if (memRegion && memRegion->blockSz == toRet->sizeTmp
1145 && !extMemPool->fixedPool) {
1146 if (extMemPool->regionsAreReleaseable()) {
1147 // release the region, because there is no used blocks in it
1148 if (toRet->blockInBin)
1149 removeBlockFromBin(toRet);
1150 releaseRegion(memRegion);
1151 regionReleased = true;
1152 continue;
1153 } else // add block from empty region to end of bin,
1154 addToTail = true; // preserving for exact fit
1155 }
1156 size_t currSz = toRet->sizeTmp;
1157 int bin = sizeToBin(currSz);
1158 bool toAligned = extMemPool->fixedPool ? toAlignedBin(toRet, currSz) : toRet->slabAligned;
1159 bool needAddToBin = true;
1160
1161 if (toRet->blockInBin) {
1162 // Does it stay in same bin?
1163 if (toRet->myBin == bin && toRet->slabAligned == toAligned)
1164 needAddToBin = false;
1165 else {
1166 toRet->blockInBin = false;
1167 removeBlockFromBin(toRet);
1168 }
1169 }
1170
1171 // Does not stay in same bin, or bin-less; add it
1172 if (needAddToBin) {
1173 toRet->prev = toRet->next = toRet->nextToFree = nullptr;
1174 toRet->myBin = NO_BIN;
1175 toRet->slabAligned = toAligned;
1176
1177 // If the block is too small to fit in any bin, keep it bin-less.
1178 // It's not a leak because the block later can be coalesced.
1179 if (currSz >= minBinnedSize) {
1180 toRet->sizeTmp = currSz;
1181 IndexedBins *target = toRet->slabAligned ? &freeSlabAlignedBins : &freeLargeBlockBins;
1182 if (forceCoalescQDrop) {
1183 target->addBlock(bin, toRet, toRet->sizeTmp, addToTail);
1184 } else if (!target->tryAddBlock(bin, toRet, addToTail)) {
1185 coalescQ.putBlock(toRet);
1186 continue;
1187 }
1188 }
1189 toRet->sizeTmp = 0;
1190 }
1191 // Free (possibly coalesced) free block.
1192 // Adding to bin must be done before this point,
1193 // because after a block is free it can be coalesced, and
1194 // using its pointer became unsafe.
1195 // Remember that coalescing is not done under any global lock.
1196 toRet->setMeFree(currSz);
1197 toRet->rightNeig(currSz)->setLeftFree(currSz);
1198 }
1199 return regionReleased;
1200 }
1201
1202 // Coalesce fBlock and add it back to a bin;
1203 // processing delayed coalescing requests.
coalescAndPut(FreeBlock * fBlock,size_t blockSz,bool slabAligned)1204 void Backend::coalescAndPut(FreeBlock *fBlock, size_t blockSz, bool slabAligned)
1205 {
1206 fBlock->sizeTmp = blockSz;
1207 fBlock->nextToFree = nullptr;
1208 fBlock->slabAligned = slabAligned;
1209
1210 coalescAndPutList(fBlock, /*forceCoalescQDrop=*/false, /*reportBlocksProcessed=*/false);
1211 }
1212
scanCoalescQ(bool forceCoalescQDrop)1213 bool Backend::scanCoalescQ(bool forceCoalescQDrop)
1214 {
1215 FreeBlock *currCoalescList = coalescQ.getAll();
1216
1217 if (currCoalescList)
1218 // reportBlocksProcessed=true informs that the blocks leave coalescQ,
1219 // matches blockConsumed() from CoalRequestQ::putBlock()
1220 coalescAndPutList(currCoalescList, forceCoalescQDrop,
1221 /*reportBlocksProcessed=*/true);
1222 // returns status of coalescQ.getAll(), as an indication of possible changes in backend
1223 // TODO: coalescAndPutList() may report is some new free blocks became available or not
1224 return currCoalescList;
1225 }
1226
findBlockInRegion(MemRegion * region,size_t exactBlockSize)1227 FreeBlock *Backend::findBlockInRegion(MemRegion *region, size_t exactBlockSize)
1228 {
1229 FreeBlock *fBlock;
1230 size_t blockSz;
1231 uintptr_t fBlockEnd,
1232 lastFreeBlock = (uintptr_t)region + region->allocSz - sizeof(LastFreeBlock);
1233
1234 static_assert(sizeof(LastFreeBlock) % sizeof(uintptr_t) == 0,
1235 "Atomic applied on LastFreeBlock, and we put it at the end of region, that"
1236 " is uintptr_t-aligned, so no unaligned atomic operations are possible.");
1237 // right bound is slab-aligned, keep LastFreeBlock after it
1238 if (region->type == MEMREG_SLAB_BLOCKS) {
1239 fBlock = (FreeBlock *)alignUp((uintptr_t)region + sizeof(MemRegion), sizeof(uintptr_t));
1240 fBlockEnd = alignDown(lastFreeBlock, slabSize);
1241 } else {
1242 fBlock = (FreeBlock *)alignUp((uintptr_t)region + sizeof(MemRegion), largeObjectAlignment);
1243 fBlockEnd = (uintptr_t)fBlock + exactBlockSize;
1244 MALLOC_ASSERT(fBlockEnd <= lastFreeBlock, ASSERT_TEXT);
1245 }
1246 if (fBlockEnd <= (uintptr_t)fBlock)
1247 return nullptr; // allocSz is too small
1248 blockSz = fBlockEnd - (uintptr_t)fBlock;
1249 // TODO: extend getSlabBlock to support degradation, i.e. getting less blocks
1250 // then requested, and then relax this check
1251 // (now all or nothing is implemented, check according to this)
1252 if (blockSz < numOfSlabAllocOnMiss*slabSize)
1253 return nullptr;
1254
1255 region->blockSz = blockSz;
1256 return fBlock;
1257 }
1258
1259 // startUseBlock may add the free block to a bin, the block can be used and
1260 // even released after this, so the region must be added to regionList already
startUseBlock(MemRegion * region,FreeBlock * fBlock,bool addToBin)1261 void Backend::startUseBlock(MemRegion *region, FreeBlock *fBlock, bool addToBin)
1262 {
1263 size_t blockSz = region->blockSz;
1264 fBlock->initHeader();
1265 fBlock->setMeFree(blockSz);
1266
1267 LastFreeBlock *lastBl = static_cast<LastFreeBlock*>(fBlock->rightNeig(blockSz));
1268 // to not get unaligned atomics during LastFreeBlock access
1269 MALLOC_ASSERT(isAligned(lastBl, sizeof(uintptr_t)), nullptr);
1270 lastBl->initHeader();
1271 lastBl->setMeFree(GuardedSize::LAST_REGION_BLOCK);
1272 lastBl->setLeftFree(blockSz);
1273 lastBl->myBin = NO_BIN;
1274 lastBl->memRegion = region;
1275
1276 if (addToBin) {
1277 unsigned targetBin = sizeToBin(blockSz);
1278 // during adding advance regions, register bin for a largest block in region
1279 advRegBins.registerBin(targetBin);
1280 if (region->type == MEMREG_SLAB_BLOCKS) {
1281 fBlock->slabAligned = true;
1282 freeSlabAlignedBins.addBlock(targetBin, fBlock, blockSz, /*addToTail=*/false);
1283 } else {
1284 fBlock->slabAligned = false;
1285 freeLargeBlockBins.addBlock(targetBin, fBlock, blockSz, /*addToTail=*/false);
1286 }
1287 } else {
1288 // to match with blockReleased() in genericGetBlock
1289 bkndSync.blockConsumed();
1290 // Understand our alignment for correct splitBlock operation
1291 fBlock->slabAligned = region->type == MEMREG_SLAB_BLOCKS ? true : false;
1292 fBlock->sizeTmp = fBlock->tryLockBlock();
1293 MALLOC_ASSERT(fBlock->sizeTmp >= FreeBlock::minBlockSize, "Locking must be successful");
1294 }
1295 }
1296
add(MemRegion * r)1297 void MemRegionList::add(MemRegion *r)
1298 {
1299 r->prev = nullptr;
1300 MallocMutex::scoped_lock lock(regionListLock);
1301 r->next = head;
1302 head = r;
1303 if (head->next)
1304 head->next->prev = head;
1305 }
1306
remove(MemRegion * r)1307 void MemRegionList::remove(MemRegion *r)
1308 {
1309 MallocMutex::scoped_lock lock(regionListLock);
1310 if (head == r)
1311 head = head->next;
1312 if (r->next)
1313 r->next->prev = r->prev;
1314 if (r->prev)
1315 r->prev->next = r->next;
1316 }
1317
1318 #if __TBB_MALLOC_BACKEND_STAT
reportStat(FILE * f)1319 int MemRegionList::reportStat(FILE *f)
1320 {
1321 int regNum = 0;
1322 MallocMutex::scoped_lock lock(regionListLock);
1323 for (MemRegion *curr = head; curr; curr = curr->next) {
1324 fprintf(f, "%p: max block %lu B, ", curr, curr->blockSz);
1325 regNum++;
1326 }
1327 return regNum;
1328 }
1329 #endif
1330
addNewRegion(size_t size,MemRegionType memRegType,bool addToBin)1331 FreeBlock *Backend::addNewRegion(size_t size, MemRegionType memRegType, bool addToBin)
1332 {
1333 static_assert(sizeof(BlockMutexes) <= sizeof(BlockI), "Header must be not overwritten in used blocks");
1334 MALLOC_ASSERT(FreeBlock::minBlockSize > GuardedSize::MAX_SPEC_VAL,
1335 "Block length must not conflict with special values of GuardedSize");
1336 // If the region is not "for slabs" we should reserve some space for
1337 // a region header, the worst case alignment and the last block mark.
1338 const size_t requestSize = memRegType == MEMREG_SLAB_BLOCKS ? size :
1339 size + sizeof(MemRegion) + largeObjectAlignment
1340 + FreeBlock::minBlockSize + sizeof(LastFreeBlock);
1341
1342 size_t rawSize = requestSize;
1343 MemRegion *region = (MemRegion*)allocRawMem(rawSize);
1344 if (!region) {
1345 MALLOC_ASSERT(rawSize==requestSize, "getRawMem has not allocated memory but changed the allocated size.");
1346 return nullptr;
1347 }
1348 if (rawSize < sizeof(MemRegion)) {
1349 if (!extMemPool->fixedPool)
1350 freeRawMem(region, rawSize);
1351 return nullptr;
1352 }
1353
1354 region->type = memRegType;
1355 region->allocSz = rawSize;
1356 FreeBlock *fBlock = findBlockInRegion(region, size);
1357 if (!fBlock) {
1358 if (!extMemPool->fixedPool)
1359 freeRawMem(region, rawSize);
1360 return nullptr;
1361 }
1362 regionList.add(region);
1363 startUseBlock(region, fBlock, addToBin);
1364 bkndSync.binsModified();
1365 return addToBin? (FreeBlock*)VALID_BLOCK_IN_BIN : fBlock;
1366 }
1367
init(ExtMemoryPool * extMemoryPool)1368 void Backend::init(ExtMemoryPool *extMemoryPool)
1369 {
1370 extMemPool = extMemoryPool;
1371 usedAddrRange.init();
1372 coalescQ.init(&bkndSync);
1373 bkndSync.init(this);
1374 }
1375
reset()1376 void Backend::reset()
1377 {
1378 MALLOC_ASSERT(extMemPool->userPool(), "Only user pool can be reset.");
1379 // no active threads are allowed in backend while reset() called
1380 verify();
1381
1382 freeLargeBlockBins.reset();
1383 freeSlabAlignedBins.reset();
1384 advRegBins.reset();
1385
1386 for (MemRegion *curr = regionList.head; curr; curr = curr->next) {
1387 FreeBlock *fBlock = findBlockInRegion(curr, curr->blockSz);
1388 MALLOC_ASSERT(fBlock, "A memory region unexpectedly got smaller");
1389 startUseBlock(curr, fBlock, /*addToBin=*/true);
1390 }
1391 }
1392
destroy()1393 bool Backend::destroy()
1394 {
1395 bool noError = true;
1396 // no active threads are allowed in backend while destroy() called
1397 verify();
1398 if (!inUserPool()) {
1399 freeLargeBlockBins.reset();
1400 freeSlabAlignedBins.reset();
1401 }
1402 while (regionList.head) {
1403 MemRegion *helper = regionList.head->next;
1404 noError &= freeRawMem(regionList.head, regionList.head->allocSz);
1405 regionList.head = helper;
1406 }
1407 return noError;
1408 }
1409
clean()1410 bool Backend::clean()
1411 {
1412 scanCoalescQ(/*forceCoalescQDrop=*/false);
1413 // Backend::clean is always called under synchronization so only one thread can
1414 // enter to this method at once.
1415 // backendCleanCnt%2== 1 means that clean operation is in progress
1416 backendCleanCnt.fetch_add(1, std::memory_order_acq_rel);
1417 bool res = false;
1418 // We can have several blocks occupying a whole region,
1419 // because such regions are added in advance (see askMemFromOS() and reset()),
1420 // and never used. Release them all.
1421 for (int i = advRegBins.getMinUsedBin(0); i != -1; i = advRegBins.getMinUsedBin(i+1)) {
1422 if (i == freeSlabAlignedBins.getMinNonemptyBin(i))
1423 res |= freeSlabAlignedBins.tryReleaseRegions(i, this);
1424 if (i == freeLargeBlockBins.getMinNonemptyBin(i))
1425 res |= freeLargeBlockBins.tryReleaseRegions(i, this);
1426 }
1427 backendCleanCnt.fetch_add(1, std::memory_order_acq_rel);
1428 return res;
1429 }
1430
verify()1431 void Backend::IndexedBins::verify()
1432 {
1433 #if MALLOC_DEBUG
1434 for (int i=0; i<(int)freeBinsNum; i++) {
1435 for (FreeBlock *fb = freeBins[i].head.load(std::memory_order_relaxed); fb; fb=fb->next) {
1436 uintptr_t mySz = fb->myL.value;
1437 MALLOC_ASSERT(mySz>GuardedSize::MAX_SPEC_VAL, ASSERT_TEXT);
1438 FreeBlock *right = (FreeBlock*)((uintptr_t)fb + mySz);
1439 suppress_unused_warning(right);
1440 MALLOC_ASSERT(right->myL.value<=GuardedSize::MAX_SPEC_VAL, ASSERT_TEXT);
1441 MALLOC_ASSERT(right->leftL.value==mySz, ASSERT_TEXT);
1442 MALLOC_ASSERT(fb->leftL.value<=GuardedSize::MAX_SPEC_VAL, ASSERT_TEXT);
1443 }
1444 }
1445 #endif
1446 }
1447
1448 // For correct operation, it must be called when no other threads
1449 // is changing backend.
verify()1450 void Backend::verify()
1451 {
1452 #if MALLOC_DEBUG
1453 scanCoalescQ(/*forceCoalescQDrop=*/false);
1454 #endif // MALLOC_DEBUG
1455
1456 freeLargeBlockBins.verify();
1457 freeSlabAlignedBins.verify();
1458 }
1459
1460 #if __TBB_MALLOC_BACKEND_STAT
countFreeBlocks()1461 size_t Backend::Bin::countFreeBlocks()
1462 {
1463 size_t cnt = 0;
1464 {
1465 MallocMutex::scoped_lock lock(tLock);
1466 for (FreeBlock *fb = head; fb; fb = fb->next)
1467 cnt++;
1468 }
1469 return cnt;
1470 }
1471
reportFreeBlocks(FILE * f)1472 size_t Backend::Bin::reportFreeBlocks(FILE *f)
1473 {
1474 size_t totalSz = 0;
1475 MallocMutex::scoped_lock lock(tLock);
1476 for (FreeBlock *fb = head; fb; fb = fb->next) {
1477 size_t sz = fb->tryLockBlock();
1478 fb->setMeFree(sz);
1479 fb->rightNeig(sz)->setLeftFree(sz);
1480 fprintf(f, " [%p;%p]", fb, (void*)((uintptr_t)fb+sz));
1481 totalSz += sz;
1482 }
1483 return totalSz;
1484 }
1485
reportStat(FILE * f)1486 void Backend::IndexedBins::reportStat(FILE *f)
1487 {
1488 size_t totalSize = 0;
1489
1490 for (int i=0; i<Backend::freeBinsNum; i++)
1491 if (size_t cnt = freeBins[i].countFreeBlocks()) {
1492 totalSize += freeBins[i].reportFreeBlocks(f);
1493 fprintf(f, " %d:%lu, ", i, cnt);
1494 }
1495 fprintf(f, "\ttotal size %lu KB", totalSize/1024);
1496 }
1497
reportStat(FILE * f)1498 void Backend::reportStat(FILE *f)
1499 {
1500 scanCoalescQ(/*forceCoalescQDrop=*/false);
1501
1502 fprintf(f, "\n regions:\n");
1503 int regNum = regionList.reportStat(f);
1504 fprintf(f, "\n%d regions, %lu KB in all regions\n free bins:\nlarge bins: ",
1505 regNum, totalMemSize/1024);
1506 freeLargeBlockBins.reportStat(f);
1507 fprintf(f, "\naligned bins: ");
1508 freeSlabAlignedBins.reportStat(f);
1509 fprintf(f, "\n");
1510 }
1511 #endif // __TBB_MALLOC_BACKEND_STAT
1512
1513 } } // namespaces
1514