xref: /oneTBB/src/tbbmalloc/backend.cpp (revision ddc3bd03)
1 /*
2     Copyright (c) 2005-2023 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #include <string.h>   /* for memset */
18 #include <errno.h>
19 #include "tbbmalloc_internal.h"
20 
21 namespace rml {
22 namespace internal {
23 
24 /*********** Code to acquire memory from the OS or other executive ****************/
25 
26 /*
27   syscall/malloc can set non-zero errno in case of failure,
28   but later allocator might be able to find memory to fulfill the request.
29   And we do not want changing of errno by successful scalable_malloc call.
30   To support this, restore old errno in (get|free)RawMemory, and set errno
31   in frontend just before returning to user code.
32   Please note: every syscall/libc call used inside scalable_malloc that
33   sets errno must be protected this way, not just memory allocation per se.
34 */
35 
36 #if USE_DEFAULT_MEMORY_MAPPING
37 #include "MapMemory.h"
38 #else
39 /* assume MapMemory and UnmapMemory are customized */
40 #endif
41 
42 void* getRawMemory (size_t size, PageType pageType) {
43     return MapMemory(size, pageType);
44 }
45 
46 int freeRawMemory (void *object, size_t size) {
47     return UnmapMemory(object, size);
48 }
49 
50 #if CHECK_ALLOCATION_RANGE
51 
52 void Backend::UsedAddressRange::registerAlloc(uintptr_t left, uintptr_t right)
53 {
54     MallocMutex::scoped_lock lock(mutex);
55     if (left < leftBound.load(std::memory_order_relaxed))
56         leftBound.store(left, std::memory_order_relaxed);
57     if (right > rightBound.load(std::memory_order_relaxed))
58         rightBound.store(right, std::memory_order_relaxed);
59     MALLOC_ASSERT(leftBound.load(std::memory_order_relaxed), ASSERT_TEXT);
60     MALLOC_ASSERT(leftBound.load(std::memory_order_relaxed) < rightBound.load(std::memory_order_relaxed), ASSERT_TEXT);
61     MALLOC_ASSERT(leftBound.load(std::memory_order_relaxed) <= left && right <= rightBound.load(std::memory_order_relaxed), ASSERT_TEXT);
62 }
63 
64 void Backend::UsedAddressRange::registerFree(uintptr_t left, uintptr_t right)
65 {
66     MallocMutex::scoped_lock lock(mutex);
67     if (leftBound.load(std::memory_order_relaxed) == left) {
68         if (rightBound.load(std::memory_order_relaxed) == right) {
69             leftBound.store(ADDRESS_UPPER_BOUND, std::memory_order_relaxed);
70             rightBound.store(0, std::memory_order_relaxed);
71         } else
72             leftBound.store(right, std::memory_order_relaxed);
73     } else if (rightBound.load(std::memory_order_relaxed) == right)
74         rightBound.store(left, std::memory_order_relaxed);
75     MALLOC_ASSERT((!rightBound.load(std::memory_order_relaxed) && leftBound.load(std::memory_order_relaxed) == ADDRESS_UPPER_BOUND)
76                   || leftBound.load(std::memory_order_relaxed) < rightBound.load(std::memory_order_relaxed), ASSERT_TEXT);
77 }
78 #endif // CHECK_ALLOCATION_RANGE
79 
80 // Initialized in frontend inside defaultMemPool
81 extern HugePagesStatus hugePages;
82 
83 void *Backend::allocRawMem(size_t &size)
84 {
85     void *res = nullptr;
86     size_t allocSize = 0;
87 
88     if (extMemPool->userPool()) {
89         if (extMemPool->fixedPool && bootsrapMemDone == bootsrapMemStatus.load(std::memory_order_acquire))
90             return nullptr;
91         MALLOC_ASSERT(bootsrapMemStatus != bootsrapMemNotDone,
92                       "Backend::allocRawMem() called prematurely?");
93         // TODO: support for raw mem not aligned at sizeof(uintptr_t)
94         // memory from fixed pool is asked once and only once
95         allocSize = alignUpGeneric(size, extMemPool->granularity);
96         res = (*extMemPool->rawAlloc)(extMemPool->poolId, allocSize);
97     } else {
98         // Align allocation on page size
99         size_t pageSize = hugePages.isEnabled ? hugePages.getGranularity() : extMemPool->granularity;
100         MALLOC_ASSERT(pageSize, "Page size cannot be zero.");
101         allocSize = alignUpGeneric(size, pageSize);
102 
103         // If user requested huge pages and they are available, try to use preallocated ones firstly.
104         // If there are none, lets check transparent huge pages support and use them instead.
105         if (hugePages.isEnabled) {
106             if (hugePages.isHPAvailable) {
107                 res = getRawMemory(allocSize, PREALLOCATED_HUGE_PAGE);
108             }
109             if (!res && hugePages.isTHPAvailable) {
110                 res = getRawMemory(allocSize, TRANSPARENT_HUGE_PAGE);
111             }
112         }
113 
114         if (!res) {
115             res = getRawMemory(allocSize, REGULAR);
116         }
117     }
118 
119     if (res) {
120         MALLOC_ASSERT(allocSize > 0, "Invalid size of an allocated region.");
121         size = allocSize;
122         if (!extMemPool->userPool())
123             usedAddrRange.registerAlloc((uintptr_t)res, (uintptr_t)res+size);
124 #if MALLOC_DEBUG
125         volatile size_t curTotalSize = totalMemSize; // to read global value once
126         MALLOC_ASSERT(curTotalSize+size > curTotalSize, "Overflow allocation size.");
127 #endif
128         totalMemSize.fetch_add(size);
129     }
130 
131     return res;
132 }
133 
134 bool Backend::freeRawMem(void *object, size_t size)
135 {
136     bool fail;
137 #if MALLOC_DEBUG
138     volatile size_t curTotalSize = totalMemSize; // to read global value once
139     MALLOC_ASSERT(curTotalSize-size < curTotalSize, "Negative allocation size.");
140 #endif
141     totalMemSize.fetch_sub(size);
142     if (extMemPool->userPool()) {
143         MALLOC_ASSERT(!extMemPool->fixedPool, "No free for fixed-size pools.");
144         fail = (*extMemPool->rawFree)(extMemPool->poolId, object, size);
145     } else {
146         usedAddrRange.registerFree((uintptr_t)object, (uintptr_t)object + size);
147         fail = freeRawMemory(object, size);
148     }
149     // TODO: use result in all freeRawMem() callers
150     return !fail;
151 }
152 
153 /********* End memory acquisition code ********************************/
154 
155 // Protected object size. After successful locking returns size of locked block,
156 // and releasing requires setting block size.
157 class GuardedSize : tbb::detail::no_copy {
158     std::atomic<uintptr_t> value;
159 public:
160     enum State {
161         LOCKED,
162         COAL_BLOCK,        // block is coalescing now
163         MAX_LOCKED_VAL = COAL_BLOCK,
164         LAST_REGION_BLOCK, // used to mark last block in region
165         // values after this are "normal" block sizes
166         MAX_SPEC_VAL = LAST_REGION_BLOCK
167     };
168 
169     void initLocked() { value.store(LOCKED, std::memory_order_release); } // TBB_REVAMP_TODO: was relaxed
170     void makeCoalscing() {
171         MALLOC_ASSERT(value.load(std::memory_order_relaxed) == LOCKED, ASSERT_TEXT);
172         value.store(COAL_BLOCK, std::memory_order_release); // TBB_REVAMP_TODO: was relaxed
173     }
174     size_t tryLock(State state) {
175         MALLOC_ASSERT(state <= MAX_LOCKED_VAL, ASSERT_TEXT);
176         size_t sz = value.load(std::memory_order_acquire);
177         for (;;) {
178             if (sz <= MAX_LOCKED_VAL) {
179                 break;
180             }
181             if (value.compare_exchange_strong(sz, state)) {
182                 break;
183             }
184         }
185         return sz;
186     }
187     void unlock(size_t size) {
188         MALLOC_ASSERT(value.load(std::memory_order_relaxed) <= MAX_LOCKED_VAL, "The lock is not locked");
189         MALLOC_ASSERT(size > MAX_LOCKED_VAL, ASSERT_TEXT);
190         value.store(size, std::memory_order_release);
191     }
192     bool isLastRegionBlock() const { return value.load(std::memory_order_relaxed) == LAST_REGION_BLOCK; }
193     friend void Backend::IndexedBins::verify();
194 };
195 
196 struct MemRegion {
197     MemRegion *next,      // keep all regions in any pool to release all them on
198               *prev;      // pool destroying, 2-linked list to release individual
199                           // regions.
200     size_t     allocSz,   // got from pool callback
201                blockSz;   // initial and maximal inner block size
202     MemRegionType type;
203 };
204 
205 // this data must be unmodified while block is in use, so separate it
206 class BlockMutexes {
207 protected:
208     GuardedSize myL,   // lock for me
209                 leftL; // lock for left neighbor
210 };
211 
212 class FreeBlock : BlockMutexes {
213 public:
214     static const size_t minBlockSize;
215     friend void Backend::IndexedBins::verify();
216 
217     FreeBlock    *prev,       // in 2-linked list related to bin
218                  *next,
219                  *nextToFree; // used to form a queue during coalescing
220     // valid only when block is in processing, i.e. one is not free and not
221     size_t        sizeTmp;    // used outside of backend
222     int           myBin;      // bin that is owner of the block
223     bool          slabAligned;
224     bool          blockInBin; // this block in myBin already
225 
226     FreeBlock *rightNeig(size_t sz) const {
227         MALLOC_ASSERT(sz, ASSERT_TEXT);
228         return (FreeBlock*)((uintptr_t)this+sz);
229     }
230     FreeBlock *leftNeig(size_t sz) const {
231         MALLOC_ASSERT(sz, ASSERT_TEXT);
232         return (FreeBlock*)((uintptr_t)this - sz);
233     }
234 
235     void initHeader() { myL.initLocked(); leftL.initLocked(); }
236     void setMeFree(size_t size) { myL.unlock(size); }
237     size_t trySetMeUsed(GuardedSize::State s) { return myL.tryLock(s); }
238     bool isLastRegionBlock() const { return myL.isLastRegionBlock(); }
239 
240     void setLeftFree(size_t sz) { leftL.unlock(sz); }
241     size_t trySetLeftUsed(GuardedSize::State s) { return leftL.tryLock(s); }
242 
243     size_t tryLockBlock() {
244         size_t rSz, sz = trySetMeUsed(GuardedSize::LOCKED);
245 
246         if (sz <= GuardedSize::MAX_LOCKED_VAL)
247             return false;
248         rSz = rightNeig(sz)->trySetLeftUsed(GuardedSize::LOCKED);
249         if (rSz <= GuardedSize::MAX_LOCKED_VAL) {
250             setMeFree(sz);
251             return false;
252         }
253         MALLOC_ASSERT(rSz == sz, ASSERT_TEXT);
254         return sz;
255     }
256     void markCoalescing(size_t blockSz) {
257         myL.makeCoalscing();
258         rightNeig(blockSz)->leftL.makeCoalscing();
259         sizeTmp = blockSz;
260         nextToFree = nullptr;
261     }
262     void markUsed() {
263         myL.initLocked();
264         rightNeig(sizeTmp)->leftL.initLocked();
265         nextToFree = nullptr;
266     }
267     static void markBlocks(FreeBlock *fBlock, int num, size_t size) {
268         for (int i=1; i<num; i++) {
269             fBlock = (FreeBlock*)((uintptr_t)fBlock + size);
270             fBlock->initHeader();
271         }
272     }
273 };
274 
275 // Last block in any region. Its "size" field is GuardedSize::LAST_REGION_BLOCK,
276 // This kind of blocks used to find region header
277 // and have a possibility to return region back to OS
278 struct LastFreeBlock : public FreeBlock {
279     MemRegion *memRegion;
280 };
281 
282 const size_t FreeBlock::minBlockSize = sizeof(FreeBlock);
283 
284 inline bool BackendSync::waitTillBlockReleased(intptr_t startModifiedCnt)
285 {
286     AtomicBackoff backoff;
287 #if __TBB_MALLOC_BACKEND_STAT
288     class ITT_Guard {
289         void *ptr;
290     public:
291         ITT_Guard(void *p) : ptr(p) {
292             MALLOC_ITT_SYNC_PREPARE(ptr);
293         }
294         ~ITT_Guard() {
295             MALLOC_ITT_SYNC_ACQUIRED(ptr);
296         }
297     };
298     ITT_Guard ittGuard(&inFlyBlocks);
299 #endif
300     intptr_t myBinsInFlyBlocks = inFlyBlocks.load(std::memory_order_acquire);
301     intptr_t myCoalescQInFlyBlocks = backend->blocksInCoalescing();
302     while (true) {
303         MALLOC_ASSERT(myBinsInFlyBlocks>=0 && myCoalescQInFlyBlocks>=0, nullptr);
304 
305         intptr_t currBinsInFlyBlocks = inFlyBlocks.load(std::memory_order_acquire);
306         intptr_t currCoalescQInFlyBlocks = backend->blocksInCoalescing();
307         WhiteboxTestingYield();
308         // Stop waiting iff:
309 
310         // 1) blocks were removed from processing, not added
311         if (myBinsInFlyBlocks > currBinsInFlyBlocks
312         // 2) released during delayed coalescing queue
313             || myCoalescQInFlyBlocks > currCoalescQInFlyBlocks)
314             break;
315         // 3) if there are blocks in coalescing, and no progress in its processing,
316         // try to scan coalescing queue and stop waiting, if changes were made
317         // (if there are no changes and in-fly blocks exist, we continue
318         //  waiting to not increase load on coalescQ)
319         if (currCoalescQInFlyBlocks > 0 && backend->scanCoalescQ(/*forceCoalescQDrop=*/false))
320             break;
321         // 4) when there are no blocks
322         if (!currBinsInFlyBlocks && !currCoalescQInFlyBlocks) {
323             // re-scan make sense only if bins were modified since scanned
324             auto pool = backend->extMemPool;
325             if (pool->hardCachesCleanupInProgress.load(std::memory_order_acquire) ||
326                 pool->softCachesCleanupInProgress.load(std::memory_order_acquire)) {
327                 backoff.pause();
328                 continue;
329             }
330 
331             return startModifiedCnt != getNumOfMods();
332         }
333         myBinsInFlyBlocks = currBinsInFlyBlocks;
334         myCoalescQInFlyBlocks = currCoalescQInFlyBlocks;
335         backoff.pause();
336     }
337     return true;
338 }
339 
340 void CoalRequestQ::putBlock(FreeBlock *fBlock)
341 {
342     MALLOC_ASSERT(fBlock->sizeTmp >= FreeBlock::minBlockSize, ASSERT_TEXT);
343     fBlock->markUsed();
344     // the block is in the queue, do not forget that it's here
345     inFlyBlocks++;
346 
347     FreeBlock *myBlToFree = blocksToFree.load(std::memory_order_acquire);
348     for (;;) {
349         fBlock->nextToFree = myBlToFree;
350         if (blocksToFree.compare_exchange_strong(myBlToFree, fBlock)) {
351             return;
352         }
353     }
354 }
355 
356 FreeBlock *CoalRequestQ::getAll()
357 {
358     for (;;) {
359         FreeBlock *myBlToFree = blocksToFree.load(std::memory_order_acquire);
360 
361         if (!myBlToFree) {
362             return nullptr;
363         } else {
364             if (blocksToFree.compare_exchange_strong(myBlToFree, nullptr)) {
365                 return myBlToFree;
366             } else {
367                 continue;
368             }
369         }
370     }
371 }
372 
373 inline void CoalRequestQ::blockWasProcessed()
374 {
375     bkndSync->binsModified();
376     int prev = inFlyBlocks.fetch_sub(1);
377     tbb::detail::suppress_unused_warning(prev);
378     MALLOC_ASSERT(prev > 0, ASSERT_TEXT);
379 }
380 
381 // Try to get a block from a bin.
382 // If the remaining free space would stay in the same bin,
383 //     split the block without removing it.
384 // If the free space should go to other bin(s), remove the block.
385 // alignedBin is true, if all blocks in the bin have slab-aligned right side.
386 FreeBlock *Backend::IndexedBins::getFromBin(int binIdx, BackendSync *sync, size_t size,
387         bool needAlignedRes, bool alignedBin,  bool wait, int *binLocked)
388 {
389     Bin *b = &freeBins[binIdx];
390 try_next:
391     FreeBlock *fBlock = nullptr;
392     if (!b->empty()) {
393         bool locked = false;
394         MallocMutex::scoped_lock scopedLock(b->tLock, wait, &locked);
395 
396         if (!locked) {
397             if (binLocked) (*binLocked)++;
398             return nullptr;
399         }
400 
401         for (FreeBlock *curr = b->head.load(std::memory_order_relaxed); curr; curr = curr->next) {
402             size_t szBlock = curr->tryLockBlock();
403             if (!szBlock) {
404                 // block is locked, re-do bin lock, as there is no place to spin
405                 // while block coalescing
406                 goto try_next;
407             }
408 
409             // GENERAL CASE
410             if (alignedBin || !needAlignedRes) {
411                 size_t splitSz = szBlock - size;
412                 // If we got a block as split result, it must have a room for control structures.
413                 if (szBlock >= size && (splitSz >= FreeBlock::minBlockSize || !splitSz))
414                     fBlock = curr;
415             } else {
416                 // SPECIAL CASE, to get aligned block from unaligned bin we have to cut the middle of a block
417                 // and return remaining left and right part. Possible only in fixed pool scenario, assert for this
418                 // is set inside splitBlock() function.
419 
420                 void *newB = alignUp(curr, slabSize);
421                 uintptr_t rightNew = (uintptr_t)newB + size;
422                 uintptr_t rightCurr = (uintptr_t)curr + szBlock;
423                 // Check if the block size is sufficient,
424                 // and also left and right split results are either big enough or non-existent
425                 if (rightNew <= rightCurr
426                         && (newB == curr || ((uintptr_t)newB - (uintptr_t)curr) >= FreeBlock::minBlockSize)
427                         && (rightNew == rightCurr || (rightCurr - rightNew) >= FreeBlock::minBlockSize))
428                     fBlock = curr;
429             }
430 
431             if (fBlock) {
432                 // consume must be called before result of removing from a bin is visible externally.
433                 sync->blockConsumed();
434                 // TODO: think about cases when block stays in the same bin
435                 b->removeBlock(fBlock);
436                 if (freeBins[binIdx].empty())
437                     bitMask.set(binIdx, false);
438                 fBlock->sizeTmp = szBlock;
439                 break;
440             } else { // block size is not valid, search for next block in the bin
441                 curr->setMeFree(szBlock);
442                 curr->rightNeig(szBlock)->setLeftFree(szBlock);
443             }
444         }
445     }
446     return fBlock;
447 }
448 
449 bool Backend::IndexedBins::tryReleaseRegions(int binIdx, Backend *backend)
450 {
451     Bin *b = &freeBins[binIdx];
452     FreeBlock *fBlockList = nullptr;
453 
454     // got all blocks from the bin and re-do coalesce on them
455     // to release single-block regions
456 try_next:
457     if (!b->empty()) {
458         MallocMutex::scoped_lock binLock(b->tLock);
459         for (FreeBlock *curr = b->head.load(std::memory_order_relaxed); curr; ) {
460             size_t szBlock = curr->tryLockBlock();
461             if (!szBlock)
462                 goto try_next;
463 
464             FreeBlock *next = curr->next;
465 
466             b->removeBlock(curr);
467             curr->sizeTmp = szBlock;
468             curr->nextToFree = fBlockList;
469             fBlockList = curr;
470             curr = next;
471         }
472     }
473     return backend->coalescAndPutList(fBlockList, /*forceCoalescQDrop=*/true,
474                                       /*reportBlocksProcessed=*/false);
475 }
476 
477 void Backend::Bin::removeBlock(FreeBlock *fBlock)
478 {
479     MALLOC_ASSERT(fBlock->next||fBlock->prev||fBlock== head.load(std::memory_order_relaxed),
480                   "Detected that a block is not in the bin.");
481     if (head.load(std::memory_order_relaxed) == fBlock)
482         head.store(fBlock->next, std::memory_order_relaxed);
483     if (tail == fBlock)
484         tail = fBlock->prev;
485     if (fBlock->prev)
486         fBlock->prev->next = fBlock->next;
487     if (fBlock->next)
488         fBlock->next->prev = fBlock->prev;
489 }
490 
491 void Backend::IndexedBins::addBlock(int binIdx, FreeBlock *fBlock, size_t /* blockSz */, bool addToTail)
492 {
493     Bin *b = &freeBins[binIdx];
494     fBlock->myBin = binIdx;
495     fBlock->next = fBlock->prev = nullptr;
496     {
497         MallocMutex::scoped_lock scopedLock(b->tLock);
498         if (addToTail) {
499             fBlock->prev = b->tail;
500             b->tail = fBlock;
501             if (fBlock->prev)
502                 fBlock->prev->next = fBlock;
503             if (!b->head.load(std::memory_order_relaxed))
504                 b->head.store(fBlock, std::memory_order_relaxed);
505         } else {
506             fBlock->next = b->head.load(std::memory_order_relaxed);
507             b->head.store(fBlock, std::memory_order_relaxed);
508             if (fBlock->next)
509                 fBlock->next->prev = fBlock;
510             if (!b->tail)
511                 b->tail = fBlock;
512         }
513     }
514     bitMask.set(binIdx, true);
515 }
516 
517 bool Backend::IndexedBins::tryAddBlock(int binIdx, FreeBlock *fBlock, bool addToTail)
518 {
519     bool locked = false;
520     Bin *b = &freeBins[binIdx];
521     fBlock->myBin = binIdx;
522     if (addToTail) {
523         fBlock->next = nullptr;
524         {
525             MallocMutex::scoped_lock scopedLock(b->tLock, /*wait=*/false, &locked);
526             if (!locked)
527                 return false;
528             fBlock->prev = b->tail;
529             b->tail = fBlock;
530             if (fBlock->prev)
531                 fBlock->prev->next = fBlock;
532             if (!b->head.load(std::memory_order_relaxed))
533                 b->head.store(fBlock, std::memory_order_relaxed);
534         }
535     } else {
536         fBlock->prev = nullptr;
537         {
538             MallocMutex::scoped_lock scopedLock(b->tLock, /*wait=*/false, &locked);
539             if (!locked)
540                 return false;
541             fBlock->next = b->head.load(std::memory_order_relaxed);
542             b->head.store(fBlock, std::memory_order_relaxed);
543             if (fBlock->next)
544                 fBlock->next->prev = fBlock;
545             if (!b->tail)
546                 b->tail = fBlock;
547         }
548     }
549     bitMask.set(binIdx, true);
550     return true;
551 }
552 
553 void Backend::IndexedBins::reset()
554 {
555     for (unsigned i=0; i<Backend::freeBinsNum; i++)
556         freeBins[i].reset();
557     bitMask.reset();
558 }
559 
560 void Backend::IndexedBins::lockRemoveBlock(int binIdx, FreeBlock *fBlock)
561 {
562     MallocMutex::scoped_lock scopedLock(freeBins[binIdx].tLock);
563     freeBins[binIdx].removeBlock(fBlock);
564     if (freeBins[binIdx].empty())
565         bitMask.set(binIdx, false);
566 }
567 
568 bool ExtMemoryPool::regionsAreReleaseable() const
569 {
570     return !keepAllMemory && !delayRegsReleasing;
571 }
572 
573 FreeBlock *Backend::splitBlock(FreeBlock *fBlock, int num, size_t size, bool blockIsAligned, bool needAlignedBlock)
574 {
575     const size_t totalSize = num * size;
576 
577     // SPECIAL CASE, for unaligned block we have to cut the middle of a block
578     // and return remaining left and right part. Possible only in a fixed pool scenario.
579     if (needAlignedBlock && !blockIsAligned) {
580         MALLOC_ASSERT(extMemPool->fixedPool,
581                 "Aligned block request from unaligned bin possible only in fixed pool scenario.");
582 
583         // Space to use is in the middle
584         FreeBlock *newBlock = alignUp(fBlock, slabSize);
585         FreeBlock *rightPart = (FreeBlock*)((uintptr_t)newBlock + totalSize);
586         uintptr_t fBlockEnd = (uintptr_t)fBlock + fBlock->sizeTmp;
587 
588         // Return free right part
589         if ((uintptr_t)rightPart != fBlockEnd) {
590             rightPart->initHeader();  // to prevent coalescing rightPart with fBlock
591             size_t rightSize = fBlockEnd - (uintptr_t)rightPart;
592             coalescAndPut(rightPart, rightSize, toAlignedBin(rightPart, rightSize));
593         }
594         // And free left part
595         if (newBlock != fBlock) {
596             newBlock->initHeader(); // to prevent coalescing fBlock with newB
597             size_t leftSize = (uintptr_t)newBlock - (uintptr_t)fBlock;
598             coalescAndPut(fBlock, leftSize, toAlignedBin(fBlock, leftSize));
599         }
600         fBlock = newBlock;
601     } else if (size_t splitSize = fBlock->sizeTmp - totalSize) { // need to split the block
602         // GENERAL CASE, cut the left or right part of the block
603         FreeBlock *splitBlock = nullptr;
604         if (needAlignedBlock) {
605             // For slab aligned blocks cut the right side of the block
606             // and return it to a requester, original block returns to backend
607             splitBlock = fBlock;
608             fBlock = (FreeBlock*)((uintptr_t)splitBlock + splitSize);
609             fBlock->initHeader();
610         } else {
611             // For large object blocks cut original block and put free righ part to backend
612             splitBlock = (FreeBlock*)((uintptr_t)fBlock + totalSize);
613             splitBlock->initHeader();
614         }
615         // Mark free block as it`s parent only when the requested type (needAlignedBlock)
616         // and returned from Bins/OS block (isAligned) are equal (XOR operation used)
617         bool markAligned = (blockIsAligned ^ needAlignedBlock) ? toAlignedBin(splitBlock, splitSize) : blockIsAligned;
618         coalescAndPut(splitBlock, splitSize, markAligned);
619     }
620     MALLOC_ASSERT(!needAlignedBlock || isAligned(fBlock, slabSize), "Expect to get aligned block, if one was requested.");
621     FreeBlock::markBlocks(fBlock, num, size);
622     return fBlock;
623 }
624 
625 size_t Backend::getMaxBinnedSize() const
626 {
627     return hugePages.isEnabled && !inUserPool() ?
628         maxBinned_HugePage : maxBinned_SmallPage;
629 }
630 
631 inline bool Backend::MaxRequestComparator::operator()(size_t oldMaxReq, size_t requestSize) const
632 {
633     return requestSize > oldMaxReq && requestSize < backend->getMaxBinnedSize();
634 }
635 
636 // last chance to get memory
637 FreeBlock *Backend::releaseMemInCaches(intptr_t startModifiedCnt,
638                                     int *lockedBinsThreshold, int numOfLockedBins)
639 {
640     // something released from caches
641     if (extMemPool->hardCachesCleanup(false))
642         return (FreeBlock*)VALID_BLOCK_IN_BIN;
643 
644     if (bkndSync.waitTillBlockReleased(startModifiedCnt))
645         return (FreeBlock*)VALID_BLOCK_IN_BIN;
646 
647     // OS can't give us more memory, but we have some in locked bins
648     if (*lockedBinsThreshold && numOfLockedBins) {
649         *lockedBinsThreshold = 0;
650         return (FreeBlock*)VALID_BLOCK_IN_BIN;
651     }
652     return nullptr; // nothing found, give up
653 }
654 
655 FreeBlock *Backend::askMemFromOS(size_t blockSize, intptr_t startModifiedCnt,
656                                  int *lockedBinsThreshold, int numOfLockedBins,
657                                  bool *splittableRet, bool needSlabRegion)
658 {
659     FreeBlock *block;
660     // The block sizes can be divided into 3 groups:
661     //   1. "quite small": popular object size, we are in bootstarp or something
662     //      like; request several regions.
663     //   2. "quite large": we want to have several such blocks in the region
664     //      but not want several pre-allocated regions.
665     //   3. "huge": exact fit, we allocate only one block and do not allow
666     //       any other allocations to placed in a region.
667     // Dividing the block sizes in these groups we are trying to balance between
668     // too small regions (that leads to fragmentation) and too large ones (that
669     // leads to excessive address space consumption). If a region is "too
670     // large", allocate only one, to prevent fragmentation. It supposedly
671     // doesn't hurt performance, because the object requested by user is large.
672     // Bounds for the groups are:
673     const size_t maxBinned = getMaxBinnedSize();
674     const size_t quiteSmall = maxBinned / 8;
675     const size_t quiteLarge = maxBinned;
676 
677     if (blockSize >= quiteLarge) {
678         // Do not interact with other threads via semaphores, as for exact fit
679         // we can't share regions with them, memory requesting is individual.
680         block = addNewRegion(blockSize, MEMREG_ONE_BLOCK, /*addToBin=*/false);
681         if (!block)
682             return releaseMemInCaches(startModifiedCnt, lockedBinsThreshold, numOfLockedBins);
683         *splittableRet = false;
684     } else {
685         const size_t regSz_sizeBased = alignUp(4*maxRequestedSize, 1024*1024);
686         // Another thread is modifying backend while we can't get the block.
687         // Wait while it leaves and re-do the scan
688         // before trying other ways to extend the backend.
689         if (bkndSync.waitTillBlockReleased(startModifiedCnt)
690             // semaphore is protecting adding more more memory from OS
691             || memExtendingSema.wait())
692             return (FreeBlock*)VALID_BLOCK_IN_BIN;
693 
694         if (startModifiedCnt != bkndSync.getNumOfMods()) {
695             memExtendingSema.signal();
696             return (FreeBlock*)VALID_BLOCK_IN_BIN;
697         }
698 
699         if (blockSize < quiteSmall) {
700             // For this size of blocks, add NUM_OF_REG "advance" regions in bin,
701             // and return one as a result.
702             // TODO: add to bin first, because other threads can use them right away.
703             // This must be done carefully, because blocks in bins can be released
704             // in releaseCachesToLimit().
705             const unsigned NUM_OF_REG = 3;
706             MemRegionType regType = needSlabRegion ? MEMREG_SLAB_BLOCKS : MEMREG_LARGE_BLOCKS;
707             block = addNewRegion(regSz_sizeBased, regType, /*addToBin=*/false);
708             if (block)
709                 for (unsigned idx=0; idx<NUM_OF_REG; idx++)
710                     if (! addNewRegion(regSz_sizeBased, regType, /*addToBin=*/true))
711                         break;
712         } else {
713             block = addNewRegion(regSz_sizeBased, MEMREG_LARGE_BLOCKS, /*addToBin=*/false);
714         }
715         memExtendingSema.signal();
716 
717         // no regions found, try to clean cache
718         if (!block || block == (FreeBlock*)VALID_BLOCK_IN_BIN)
719             return releaseMemInCaches(startModifiedCnt, lockedBinsThreshold, numOfLockedBins);
720         // Since a region can hold more than one block it can be split.
721         *splittableRet = true;
722     }
723     // after asking memory from OS, release caches if we above the memory limits
724     releaseCachesToLimit();
725 
726     return block;
727 }
728 
729 void Backend::releaseCachesToLimit()
730 {
731     if (!memSoftLimit.load(std::memory_order_relaxed)
732             || totalMemSize.load(std::memory_order_relaxed) <= memSoftLimit.load(std::memory_order_relaxed)) {
733         return;
734     }
735     size_t locTotalMemSize, locMemSoftLimit;
736 
737     scanCoalescQ(/*forceCoalescQDrop=*/false);
738     if (extMemPool->softCachesCleanup() &&
739         (locTotalMemSize = totalMemSize.load(std::memory_order_acquire)) <=
740         (locMemSoftLimit = memSoftLimit.load(std::memory_order_acquire)))
741         return;
742     // clean global large-object cache, if this is not enough, clean local caches
743     // do this in several tries, because backend fragmentation can prevent
744     // region from releasing
745     for (int cleanLocal = 0; cleanLocal<2; cleanLocal++)
746         while (cleanLocal ?
747                  extMemPool->allLocalCaches.cleanup(/*cleanOnlyUnused=*/true) :
748                  extMemPool->loc.decreasingCleanup())
749             if ((locTotalMemSize = totalMemSize.load(std::memory_order_acquire)) <=
750                 (locMemSoftLimit = memSoftLimit.load(std::memory_order_acquire)))
751                 return;
752     // last chance to match memSoftLimit
753     extMemPool->hardCachesCleanup(true);
754 }
755 
756 int Backend::IndexedBins::getMinNonemptyBin(unsigned startBin) const
757 {
758     int p = bitMask.getMinTrue(startBin);
759     return p == -1 ? Backend::freeBinsNum : p;
760 }
761 
762 FreeBlock *Backend::IndexedBins::findBlock(int nativeBin, BackendSync *sync, size_t size,
763         bool needAlignedBlock, bool alignedBin, int *numOfLockedBins)
764 {
765     for (int i=getMinNonemptyBin(nativeBin); i<(int)freeBinsNum; i=getMinNonemptyBin(i+1))
766         if (FreeBlock *block = getFromBin(i, sync, size, needAlignedBlock, alignedBin, /*wait=*/false, numOfLockedBins))
767             return block;
768 
769     return nullptr;
770 }
771 
772 void Backend::requestBootstrapMem()
773 {
774     if (bootsrapMemDone == bootsrapMemStatus.load(std::memory_order_acquire))
775         return;
776     MallocMutex::scoped_lock lock( bootsrapMemStatusMutex );
777     if (bootsrapMemDone == bootsrapMemStatus)
778         return;
779     MALLOC_ASSERT(bootsrapMemNotDone == bootsrapMemStatus, ASSERT_TEXT);
780     bootsrapMemStatus = bootsrapMemInitializing;
781     // request some rather big region during bootstrap in advance
782     // ok to get nullptr here, as later we re-do a request with more modest size
783     addNewRegion(2*1024*1024, MEMREG_SLAB_BLOCKS, /*addToBin=*/true);
784     bootsrapMemStatus = bootsrapMemDone;
785 }
786 
787 // try to allocate size Byte block in available bins
788 // needAlignedRes is true if result must be slab-aligned
789 FreeBlock *Backend::genericGetBlock(int num, size_t size, bool needAlignedBlock)
790 {
791     FreeBlock *block = nullptr;
792     const size_t totalReqSize = num*size;
793     // no splitting after requesting new region, asks exact size
794     const int nativeBin = sizeToBin(totalReqSize);
795 
796     requestBootstrapMem();
797     // If we found 2 or less locked bins, it's time to ask more memory from OS.
798     // But nothing can be asked from fixed pool. And we prefer wait, not ask
799     // for more memory, if block is quite large.
800     int lockedBinsThreshold = extMemPool->fixedPool || size>=maxBinned_SmallPage? 0 : 2;
801 
802     // Find maximal requested size limited by getMaxBinnedSize()
803     AtomicUpdate(maxRequestedSize, totalReqSize, MaxRequestComparator(this));
804     scanCoalescQ(/*forceCoalescQDrop=*/false);
805 
806     bool splittable = true;
807     for (;;) {
808         const intptr_t startModifiedCnt = bkndSync.getNumOfMods();
809         int numOfLockedBins;
810         intptr_t cleanCnt;
811         do {
812             cleanCnt = backendCleanCnt.load(std::memory_order_acquire);
813             numOfLockedBins = 0;
814             if (needAlignedBlock) {
815                 block = freeSlabAlignedBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock,
816                                                         /*alignedBin=*/true, &numOfLockedBins);
817                 if (!block && extMemPool->fixedPool)
818                     block = freeLargeBlockBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock,
819                                                         /*alignedBin=*/false, &numOfLockedBins);
820             } else {
821                 block = freeLargeBlockBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock,
822                                                         /*alignedBin=*/false, &numOfLockedBins);
823                 if (!block && extMemPool->fixedPool)
824                     block = freeSlabAlignedBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock,
825                                                         /*alignedBin=*/true, &numOfLockedBins);
826             }
827         } while (!block && (numOfLockedBins>lockedBinsThreshold || cleanCnt % 2 == 1 ||
828                             cleanCnt != backendCleanCnt.load(std::memory_order_acquire)));
829 
830         if (block)
831             break;
832 
833         bool retScanCoalescQ = scanCoalescQ(/*forceCoalescQDrop=*/true);
834         bool retSoftCachesCleanup = extMemPool->softCachesCleanup();
835         if (!(retScanCoalescQ || retSoftCachesCleanup)) {
836             // bins are not updated,
837             // only remaining possibility is to ask for more memory
838             block = askMemFromOS(totalReqSize, startModifiedCnt, &lockedBinsThreshold,
839                         numOfLockedBins, &splittable, needAlignedBlock);
840             if (!block)
841                 return nullptr;
842             if (block != (FreeBlock*)VALID_BLOCK_IN_BIN) {
843                 // size can be increased in askMemFromOS, that's why >=
844                 MALLOC_ASSERT(block->sizeTmp >= size, ASSERT_TEXT);
845                 break;
846             }
847             // valid block somewhere in bins, let's find it
848             block = nullptr;
849         }
850     }
851     MALLOC_ASSERT(block, ASSERT_TEXT);
852     if (splittable) {
853         // At this point we have to be sure that slabAligned attribute describes the right block state
854         block = splitBlock(block, num, size, block->slabAligned, needAlignedBlock);
855     }
856     // matched blockConsumed() from startUseBlock()
857     bkndSync.blockReleased();
858 
859     return block;
860 }
861 
862 LargeMemoryBlock *Backend::getLargeBlock(size_t size)
863 {
864     LargeMemoryBlock *lmb =
865         (LargeMemoryBlock*)genericGetBlock(1, size, /*needAlignedRes=*/false);
866     if (lmb) {
867         lmb->unalignedSize = size;
868         if (extMemPool->userPool())
869             extMemPool->lmbList.add(lmb);
870     }
871     return lmb;
872 }
873 
874 BlockI *Backend::getSlabBlock(int num) {
875     BlockI *b = (BlockI*)genericGetBlock(num, slabSize, /*slabAligned=*/true);
876     MALLOC_ASSERT(isAligned(b, slabSize), ASSERT_TEXT);
877     return b;
878 }
879 
880 void Backend::putSlabBlock(BlockI *block) {
881     genericPutBlock((FreeBlock *)block, slabSize, /*slabAligned=*/true);
882 }
883 
884 void *Backend::getBackRefSpace(size_t size, bool *rawMemUsed)
885 {
886     // This block is released only at shutdown, so it can prevent
887     // a entire region releasing when it's received from the backend,
888     // so prefer getRawMemory using.
889     if (void *ret = getRawMemory(size, REGULAR)) {
890         *rawMemUsed = true;
891         return ret;
892     }
893     void *ret = genericGetBlock(1, size, /*needAlignedRes=*/false);
894     if (ret) *rawMemUsed = false;
895     return ret;
896 }
897 
898 void Backend::putBackRefSpace(void *b, size_t size, bool rawMemUsed)
899 {
900     if (rawMemUsed)
901         freeRawMemory(b, size);
902     // ignore not raw mem, as it released on region releasing
903 }
904 
905 void Backend::removeBlockFromBin(FreeBlock *fBlock)
906 {
907     if (fBlock->myBin != Backend::NO_BIN) {
908         if (fBlock->slabAligned)
909             freeSlabAlignedBins.lockRemoveBlock(fBlock->myBin, fBlock);
910         else
911             freeLargeBlockBins.lockRemoveBlock(fBlock->myBin, fBlock);
912     }
913 }
914 
915 void Backend::genericPutBlock(FreeBlock *fBlock, size_t blockSz, bool slabAligned)
916 {
917     bkndSync.blockConsumed();
918     coalescAndPut(fBlock, blockSz, slabAligned);
919     bkndSync.blockReleased();
920 }
921 
922 void AllLargeBlocksList::add(LargeMemoryBlock *lmb)
923 {
924     MallocMutex::scoped_lock scoped_cs(largeObjLock);
925     lmb->gPrev = nullptr;
926     lmb->gNext = loHead;
927     if (lmb->gNext)
928         lmb->gNext->gPrev = lmb;
929     loHead = lmb;
930 }
931 
932 void AllLargeBlocksList::remove(LargeMemoryBlock *lmb)
933 {
934     MallocMutex::scoped_lock scoped_cs(largeObjLock);
935     if (loHead == lmb)
936         loHead = lmb->gNext;
937     if (lmb->gNext)
938         lmb->gNext->gPrev = lmb->gPrev;
939     if (lmb->gPrev)
940         lmb->gPrev->gNext = lmb->gNext;
941 }
942 
943 void Backend::putLargeBlock(LargeMemoryBlock *lmb)
944 {
945     if (extMemPool->userPool())
946         extMemPool->lmbList.remove(lmb);
947     genericPutBlock((FreeBlock *)lmb, lmb->unalignedSize, false);
948 }
949 
950 void Backend::returnLargeObject(LargeMemoryBlock *lmb)
951 {
952     removeBackRef(lmb->backRefIdx);
953     putLargeBlock(lmb);
954     STAT_increment(getThreadId(), ThreadCommonCounters, freeLargeObj);
955 }
956 
957 #if BACKEND_HAS_MREMAP
958 void *Backend::remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment)
959 {
960     // no remap for user pools and for object too small that living in bins
961     if (inUserPool() || min(oldSize, newSize)<maxBinned_SmallPage
962         // during remap, can't guarantee alignment more strict than current or
963         // more strict than page alignment
964         || !isAligned(ptr, alignment) || alignment>extMemPool->granularity)
965         return nullptr;
966     const LargeMemoryBlock* lmbOld = ((LargeObjectHdr *)ptr - 1)->memoryBlock;
967     const size_t oldUnalignedSize = lmbOld->unalignedSize;
968     FreeBlock *oldFBlock = (FreeBlock *)lmbOld;
969     FreeBlock *right = oldFBlock->rightNeig(oldUnalignedSize);
970     // in every region only one block can have LAST_REGION_BLOCK on right,
971     // so don't need no synchronization
972     if (!right->isLastRegionBlock())
973         return nullptr;
974 
975     MemRegion *oldRegion = static_cast<LastFreeBlock*>(right)->memRegion;
976     MALLOC_ASSERT( oldRegion < ptr, ASSERT_TEXT );
977     const size_t oldRegionSize = oldRegion->allocSz;
978     if (oldRegion->type != MEMREG_ONE_BLOCK)
979         return nullptr;  // we are not single in the region
980     const size_t userOffset = (uintptr_t)ptr - (uintptr_t)oldRegion;
981     const size_t alignedSize = LargeObjectCache::alignToBin(newSize + userOffset);
982     const size_t requestSize =
983         alignUp(sizeof(MemRegion) + alignedSize + sizeof(LastFreeBlock), extMemPool->granularity);
984     if (requestSize < alignedSize) // is wrapped around?
985         return nullptr;
986     regionList.remove(oldRegion);
987 
988     // The deallocation should be registered in address range before mremap to
989     // prevent a race condition with allocation on another thread.
990     // (OS can reuse the memory and registerAlloc will be missed on another thread)
991     usedAddrRange.registerFree((uintptr_t)oldRegion, (uintptr_t)oldRegion + oldRegionSize);
992 
993     void *ret = mremap(oldRegion, oldRegion->allocSz, requestSize, MREMAP_MAYMOVE);
994     if (MAP_FAILED == ret) { // can't remap, revert and leave
995         regionList.add(oldRegion);
996         usedAddrRange.registerAlloc((uintptr_t)oldRegion, (uintptr_t)oldRegion + oldRegionSize);
997         return nullptr;
998     }
999     MemRegion *region = (MemRegion*)ret;
1000     MALLOC_ASSERT(region->type == MEMREG_ONE_BLOCK, ASSERT_TEXT);
1001     region->allocSz = requestSize;
1002     region->blockSz = alignedSize;
1003 
1004     FreeBlock *fBlock = (FreeBlock *)alignUp((uintptr_t)region + sizeof(MemRegion),
1005                                              largeObjectAlignment);
1006 
1007     regionList.add(region);
1008     startUseBlock(region, fBlock, /*addToBin=*/false);
1009     MALLOC_ASSERT(fBlock->sizeTmp == region->blockSz, ASSERT_TEXT);
1010     // matched blockConsumed() in startUseBlock().
1011     // TODO: get rid of useless pair blockConsumed()/blockReleased()
1012     bkndSync.blockReleased();
1013 
1014     // object must start at same offset from region's start
1015     void *object = (void*)((uintptr_t)region + userOffset);
1016     MALLOC_ASSERT(isAligned(object, alignment), ASSERT_TEXT);
1017     LargeObjectHdr *header = (LargeObjectHdr*)object - 1;
1018     setBackRef(header->backRefIdx, header);
1019 
1020     LargeMemoryBlock *lmb = (LargeMemoryBlock*)fBlock;
1021     lmb->unalignedSize = region->blockSz;
1022     lmb->objectSize = newSize;
1023     lmb->backRefIdx = header->backRefIdx;
1024     header->memoryBlock = lmb;
1025     MALLOC_ASSERT((uintptr_t)lmb + lmb->unalignedSize >=
1026                   (uintptr_t)object + lmb->objectSize, "An object must fit to the block.");
1027 
1028     usedAddrRange.registerAlloc((uintptr_t)region, (uintptr_t)region + requestSize);
1029     totalMemSize.fetch_add(region->allocSz - oldRegionSize);
1030 
1031     return object;
1032 }
1033 #endif /* BACKEND_HAS_MREMAP */
1034 
1035 void Backend::releaseRegion(MemRegion *memRegion)
1036 {
1037     regionList.remove(memRegion);
1038     freeRawMem(memRegion, memRegion->allocSz);
1039 }
1040 
1041 // coalesce fBlock with its neighborhood
1042 FreeBlock *Backend::doCoalesc(FreeBlock *fBlock, MemRegion **mRegion)
1043 {
1044     FreeBlock *resBlock = fBlock;
1045     size_t resSize = fBlock->sizeTmp;
1046     MemRegion *memRegion = nullptr;
1047 
1048     fBlock->markCoalescing(resSize);
1049     resBlock->blockInBin = false;
1050 
1051     // coalescing with left neighbor
1052     size_t leftSz = fBlock->trySetLeftUsed(GuardedSize::COAL_BLOCK);
1053     if (leftSz != GuardedSize::LOCKED) {
1054         if (leftSz == GuardedSize::COAL_BLOCK) {
1055             coalescQ.putBlock(fBlock);
1056             return nullptr;
1057         } else {
1058             FreeBlock *left = fBlock->leftNeig(leftSz);
1059             size_t lSz = left->trySetMeUsed(GuardedSize::COAL_BLOCK);
1060             if (lSz <= GuardedSize::MAX_LOCKED_VAL) {
1061                 fBlock->setLeftFree(leftSz); // rollback
1062                 coalescQ.putBlock(fBlock);
1063                 return nullptr;
1064             } else {
1065                 MALLOC_ASSERT(lSz == leftSz, "Invalid header");
1066                 left->blockInBin = true;
1067                 resBlock = left;
1068                 resSize += leftSz;
1069                 resBlock->sizeTmp = resSize;
1070             }
1071         }
1072     }
1073     // coalescing with right neighbor
1074     FreeBlock *right = fBlock->rightNeig(fBlock->sizeTmp);
1075     size_t rightSz = right->trySetMeUsed(GuardedSize::COAL_BLOCK);
1076     if (rightSz != GuardedSize::LOCKED) {
1077         // LastFreeBlock is on the right side
1078         if (GuardedSize::LAST_REGION_BLOCK == rightSz) {
1079             right->setMeFree(GuardedSize::LAST_REGION_BLOCK);
1080             memRegion = static_cast<LastFreeBlock*>(right)->memRegion;
1081         } else if (GuardedSize::COAL_BLOCK == rightSz) {
1082             if (resBlock->blockInBin) {
1083                 resBlock->blockInBin = false;
1084                 removeBlockFromBin(resBlock);
1085             }
1086             coalescQ.putBlock(resBlock);
1087             return nullptr;
1088         } else {
1089             size_t rSz = right->rightNeig(rightSz)->
1090                 trySetLeftUsed(GuardedSize::COAL_BLOCK);
1091             if (rSz <= GuardedSize::MAX_LOCKED_VAL) {
1092                 right->setMeFree(rightSz);  // rollback
1093                 if (resBlock->blockInBin) {
1094                     resBlock->blockInBin = false;
1095                     removeBlockFromBin(resBlock);
1096                 }
1097                 coalescQ.putBlock(resBlock);
1098                 return nullptr;
1099             } else {
1100                 MALLOC_ASSERT(rSz == rightSz, "Invalid header");
1101                 removeBlockFromBin(right);
1102                 resSize += rightSz;
1103 
1104                 // Is LastFreeBlock on the right side of right?
1105                 FreeBlock *nextRight = right->rightNeig(rightSz);
1106                 size_t nextRightSz = nextRight->
1107                     trySetMeUsed(GuardedSize::COAL_BLOCK);
1108                 if (nextRightSz > GuardedSize::MAX_LOCKED_VAL) {
1109                     if (nextRightSz == GuardedSize::LAST_REGION_BLOCK)
1110                         memRegion = static_cast<LastFreeBlock*>(nextRight)->memRegion;
1111 
1112                     nextRight->setMeFree(nextRightSz);
1113                 }
1114             }
1115         }
1116     }
1117     if (memRegion) {
1118         MALLOC_ASSERT((uintptr_t)memRegion + memRegion->allocSz >=
1119                       (uintptr_t)right + sizeof(LastFreeBlock), ASSERT_TEXT);
1120         MALLOC_ASSERT((uintptr_t)memRegion < (uintptr_t)resBlock, ASSERT_TEXT);
1121         *mRegion = memRegion;
1122     } else
1123         *mRegion = nullptr;
1124     resBlock->sizeTmp = resSize;
1125     return resBlock;
1126 }
1127 
1128 bool Backend::coalescAndPutList(FreeBlock *list, bool forceCoalescQDrop, bool reportBlocksProcessed)
1129 {
1130     bool regionReleased = false;
1131 
1132     for (FreeBlock *helper; list;
1133          list = helper,
1134              // matches block enqueue in CoalRequestQ::putBlock()
1135              reportBlocksProcessed? coalescQ.blockWasProcessed() : (void)0) {
1136         MemRegion *memRegion;
1137         bool addToTail = false;
1138 
1139         helper = list->nextToFree;
1140         FreeBlock *toRet = doCoalesc(list, &memRegion);
1141         if (!toRet)
1142             continue;
1143 
1144         if (memRegion && memRegion->blockSz == toRet->sizeTmp
1145             && !extMemPool->fixedPool) {
1146             if (extMemPool->regionsAreReleaseable()) {
1147                 // release the region, because there is no used blocks in it
1148                 if (toRet->blockInBin)
1149                     removeBlockFromBin(toRet);
1150                 releaseRegion(memRegion);
1151                 regionReleased = true;
1152                 continue;
1153             } else // add block from empty region to end of bin,
1154                 addToTail = true; // preserving for exact fit
1155         }
1156         size_t currSz = toRet->sizeTmp;
1157         int bin = sizeToBin(currSz);
1158         bool toAligned = extMemPool->fixedPool ? toAlignedBin(toRet, currSz) : toRet->slabAligned;
1159         bool needAddToBin = true;
1160 
1161         if (toRet->blockInBin) {
1162             // Does it stay in same bin?
1163             if (toRet->myBin == bin && toRet->slabAligned == toAligned)
1164                 needAddToBin = false;
1165             else {
1166                 toRet->blockInBin = false;
1167                 removeBlockFromBin(toRet);
1168             }
1169         }
1170 
1171         // Does not stay in same bin, or bin-less; add it
1172         if (needAddToBin) {
1173             toRet->prev = toRet->next = toRet->nextToFree = nullptr;
1174             toRet->myBin = NO_BIN;
1175             toRet->slabAligned = toAligned;
1176 
1177             // If the block is too small to fit in any bin, keep it bin-less.
1178             // It's not a leak because the block later can be coalesced.
1179             if (currSz >= minBinnedSize) {
1180                 toRet->sizeTmp = currSz;
1181                 IndexedBins *target = toRet->slabAligned ? &freeSlabAlignedBins : &freeLargeBlockBins;
1182                 if (forceCoalescQDrop) {
1183                     target->addBlock(bin, toRet, toRet->sizeTmp, addToTail);
1184                 } else if (!target->tryAddBlock(bin, toRet, addToTail)) {
1185                     coalescQ.putBlock(toRet);
1186                     continue;
1187                 }
1188             }
1189             toRet->sizeTmp = 0;
1190         }
1191         // Free (possibly coalesced) free block.
1192         // Adding to bin must be done before this point,
1193         // because after a block is free it can be coalesced, and
1194         // using its pointer became unsafe.
1195         // Remember that coalescing is not done under any global lock.
1196         toRet->setMeFree(currSz);
1197         toRet->rightNeig(currSz)->setLeftFree(currSz);
1198     }
1199     return regionReleased;
1200 }
1201 
1202 // Coalesce fBlock and add it back to a bin;
1203 // processing delayed coalescing requests.
1204 void Backend::coalescAndPut(FreeBlock *fBlock, size_t blockSz, bool slabAligned)
1205 {
1206     fBlock->sizeTmp = blockSz;
1207     fBlock->nextToFree = nullptr;
1208     fBlock->slabAligned = slabAligned;
1209 
1210     coalescAndPutList(fBlock, /*forceCoalescQDrop=*/false, /*reportBlocksProcessed=*/false);
1211 }
1212 
1213 bool Backend::scanCoalescQ(bool forceCoalescQDrop)
1214 {
1215     FreeBlock *currCoalescList = coalescQ.getAll();
1216 
1217     if (currCoalescList)
1218         // reportBlocksProcessed=true informs that the blocks leave coalescQ,
1219         // matches blockConsumed() from CoalRequestQ::putBlock()
1220         coalescAndPutList(currCoalescList, forceCoalescQDrop,
1221                           /*reportBlocksProcessed=*/true);
1222     // returns status of coalescQ.getAll(), as an indication of possible changes in backend
1223     // TODO: coalescAndPutList() may report is some new free blocks became available or not
1224     return currCoalescList;
1225 }
1226 
1227 FreeBlock *Backend::findBlockInRegion(MemRegion *region, size_t exactBlockSize)
1228 {
1229     FreeBlock *fBlock;
1230     size_t blockSz;
1231     uintptr_t fBlockEnd,
1232         lastFreeBlock = (uintptr_t)region + region->allocSz - sizeof(LastFreeBlock);
1233 
1234     static_assert(sizeof(LastFreeBlock) % sizeof(uintptr_t) == 0,
1235         "Atomic applied on LastFreeBlock, and we put it at the end of region, that"
1236         " is uintptr_t-aligned, so no unaligned atomic operations are possible.");
1237      // right bound is slab-aligned, keep LastFreeBlock after it
1238     if (region->type == MEMREG_SLAB_BLOCKS) {
1239         fBlock = (FreeBlock *)alignUp((uintptr_t)region + sizeof(MemRegion), sizeof(uintptr_t));
1240         fBlockEnd = alignDown(lastFreeBlock, slabSize);
1241     } else {
1242         fBlock = (FreeBlock *)alignUp((uintptr_t)region + sizeof(MemRegion), largeObjectAlignment);
1243         fBlockEnd = (uintptr_t)fBlock + exactBlockSize;
1244         MALLOC_ASSERT(fBlockEnd <= lastFreeBlock, ASSERT_TEXT);
1245     }
1246     if (fBlockEnd <= (uintptr_t)fBlock)
1247         return nullptr; // allocSz is too small
1248     blockSz = fBlockEnd - (uintptr_t)fBlock;
1249     // TODO: extend getSlabBlock to support degradation, i.e. getting less blocks
1250     // then requested, and then relax this check
1251     // (now all or nothing is implemented, check according to this)
1252     if (blockSz < numOfSlabAllocOnMiss*slabSize)
1253         return nullptr;
1254 
1255     region->blockSz = blockSz;
1256     return fBlock;
1257 }
1258 
1259 // startUseBlock may add the free block to a bin, the block can be used and
1260 // even released after this, so the region must be added to regionList already
1261 void Backend::startUseBlock(MemRegion *region, FreeBlock *fBlock, bool addToBin)
1262 {
1263     size_t blockSz = region->blockSz;
1264     fBlock->initHeader();
1265     fBlock->setMeFree(blockSz);
1266 
1267     LastFreeBlock *lastBl = static_cast<LastFreeBlock*>(fBlock->rightNeig(blockSz));
1268     // to not get unaligned atomics during LastFreeBlock access
1269     MALLOC_ASSERT(isAligned(lastBl, sizeof(uintptr_t)), nullptr);
1270     lastBl->initHeader();
1271     lastBl->setMeFree(GuardedSize::LAST_REGION_BLOCK);
1272     lastBl->setLeftFree(blockSz);
1273     lastBl->myBin = NO_BIN;
1274     lastBl->memRegion = region;
1275 
1276     if (addToBin) {
1277         unsigned targetBin = sizeToBin(blockSz);
1278         // during adding advance regions, register bin for a largest block in region
1279         advRegBins.registerBin(targetBin);
1280         if (region->type == MEMREG_SLAB_BLOCKS) {
1281             fBlock->slabAligned = true;
1282             freeSlabAlignedBins.addBlock(targetBin, fBlock, blockSz, /*addToTail=*/false);
1283         } else {
1284             fBlock->slabAligned = false;
1285             freeLargeBlockBins.addBlock(targetBin, fBlock, blockSz, /*addToTail=*/false);
1286         }
1287     } else {
1288         // to match with blockReleased() in genericGetBlock
1289         bkndSync.blockConsumed();
1290         // Understand our alignment for correct splitBlock operation
1291         fBlock->slabAligned = region->type == MEMREG_SLAB_BLOCKS ? true : false;
1292         fBlock->sizeTmp = fBlock->tryLockBlock();
1293         MALLOC_ASSERT(fBlock->sizeTmp >= FreeBlock::minBlockSize, "Locking must be successful");
1294     }
1295 }
1296 
1297 void MemRegionList::add(MemRegion *r)
1298 {
1299     r->prev = nullptr;
1300     MallocMutex::scoped_lock lock(regionListLock);
1301     r->next = head;
1302     head = r;
1303     if (head->next)
1304         head->next->prev = head;
1305 }
1306 
1307 void MemRegionList::remove(MemRegion *r)
1308 {
1309     MallocMutex::scoped_lock lock(regionListLock);
1310     if (head == r)
1311         head = head->next;
1312     if (r->next)
1313         r->next->prev = r->prev;
1314     if (r->prev)
1315         r->prev->next = r->next;
1316 }
1317 
1318 #if __TBB_MALLOC_BACKEND_STAT
1319 int MemRegionList::reportStat(FILE *f)
1320 {
1321     int regNum = 0;
1322     MallocMutex::scoped_lock lock(regionListLock);
1323     for (MemRegion *curr = head; curr; curr = curr->next) {
1324         fprintf(f, "%p: max block %lu B, ", curr, curr->blockSz);
1325         regNum++;
1326     }
1327     return regNum;
1328 }
1329 #endif
1330 
1331 FreeBlock *Backend::addNewRegion(size_t size, MemRegionType memRegType, bool addToBin)
1332 {
1333     static_assert(sizeof(BlockMutexes) <= sizeof(BlockI), "Header must be not overwritten in used blocks");
1334     MALLOC_ASSERT(FreeBlock::minBlockSize > GuardedSize::MAX_SPEC_VAL,
1335           "Block length must not conflict with special values of GuardedSize");
1336     // If the region is not "for slabs" we should reserve some space for
1337     // a region header, the worst case alignment and the last block mark.
1338     const size_t requestSize = memRegType == MEMREG_SLAB_BLOCKS ? size :
1339         size + sizeof(MemRegion) + largeObjectAlignment
1340              +  FreeBlock::minBlockSize + sizeof(LastFreeBlock);
1341 
1342     size_t rawSize = requestSize;
1343     MemRegion *region = (MemRegion*)allocRawMem(rawSize);
1344     if (!region) {
1345         MALLOC_ASSERT(rawSize==requestSize, "getRawMem has not allocated memory but changed the allocated size.");
1346         return nullptr;
1347     }
1348     if (rawSize < sizeof(MemRegion)) {
1349         if (!extMemPool->fixedPool)
1350             freeRawMem(region, rawSize);
1351         return nullptr;
1352     }
1353 
1354     region->type = memRegType;
1355     region->allocSz = rawSize;
1356     FreeBlock *fBlock = findBlockInRegion(region, size);
1357     if (!fBlock) {
1358         if (!extMemPool->fixedPool)
1359             freeRawMem(region, rawSize);
1360         return nullptr;
1361     }
1362     regionList.add(region);
1363     startUseBlock(region, fBlock, addToBin);
1364     bkndSync.binsModified();
1365     return addToBin? (FreeBlock*)VALID_BLOCK_IN_BIN : fBlock;
1366 }
1367 
1368 void Backend::init(ExtMemoryPool *extMemoryPool)
1369 {
1370     extMemPool = extMemoryPool;
1371     usedAddrRange.init();
1372     coalescQ.init(&bkndSync);
1373     bkndSync.init(this);
1374 }
1375 
1376 void Backend::reset()
1377 {
1378     MALLOC_ASSERT(extMemPool->userPool(), "Only user pool can be reset.");
1379     // no active threads are allowed in backend while reset() called
1380     verify();
1381 
1382     freeLargeBlockBins.reset();
1383     freeSlabAlignedBins.reset();
1384     advRegBins.reset();
1385 
1386     for (MemRegion *curr = regionList.head; curr; curr = curr->next) {
1387         FreeBlock *fBlock = findBlockInRegion(curr, curr->blockSz);
1388         MALLOC_ASSERT(fBlock, "A memory region unexpectedly got smaller");
1389         startUseBlock(curr, fBlock, /*addToBin=*/true);
1390     }
1391 }
1392 
1393 bool Backend::destroy()
1394 {
1395     bool noError = true;
1396     // no active threads are allowed in backend while destroy() called
1397     verify();
1398     if (!inUserPool()) {
1399         freeLargeBlockBins.reset();
1400         freeSlabAlignedBins.reset();
1401     }
1402     while (regionList.head) {
1403         MemRegion *helper = regionList.head->next;
1404         noError &= freeRawMem(regionList.head, regionList.head->allocSz);
1405         regionList.head = helper;
1406     }
1407     return noError;
1408 }
1409 
1410 bool Backend::clean()
1411 {
1412     scanCoalescQ(/*forceCoalescQDrop=*/false);
1413     // Backend::clean is always called under synchronization so only one thread can
1414     // enter to this method at once.
1415     // backendCleanCnt%2== 1 means that clean operation is in progress
1416     backendCleanCnt.fetch_add(1, std::memory_order_acq_rel);
1417     bool res = false;
1418     // We can have several blocks occupying a whole region,
1419     // because such regions are added in advance (see askMemFromOS() and reset()),
1420     // and never used. Release them all.
1421     for (int i = advRegBins.getMinUsedBin(0); i != -1; i = advRegBins.getMinUsedBin(i+1)) {
1422         if (i == freeSlabAlignedBins.getMinNonemptyBin(i))
1423             res |= freeSlabAlignedBins.tryReleaseRegions(i, this);
1424         if (i == freeLargeBlockBins.getMinNonemptyBin(i))
1425             res |= freeLargeBlockBins.tryReleaseRegions(i, this);
1426     }
1427     backendCleanCnt.fetch_add(1, std::memory_order_acq_rel);
1428     return res;
1429 }
1430 
1431 void Backend::IndexedBins::verify()
1432 {
1433 #if MALLOC_DEBUG
1434     for (int i=0; i<(int)freeBinsNum; i++) {
1435         for (FreeBlock *fb = freeBins[i].head.load(std::memory_order_relaxed); fb; fb=fb->next) {
1436             uintptr_t mySz = fb->myL.value;
1437             MALLOC_ASSERT(mySz>GuardedSize::MAX_SPEC_VAL, ASSERT_TEXT);
1438             FreeBlock *right = (FreeBlock*)((uintptr_t)fb + mySz);
1439             suppress_unused_warning(right);
1440             MALLOC_ASSERT(right->myL.value<=GuardedSize::MAX_SPEC_VAL, ASSERT_TEXT);
1441             MALLOC_ASSERT(right->leftL.value==mySz, ASSERT_TEXT);
1442             MALLOC_ASSERT(fb->leftL.value<=GuardedSize::MAX_SPEC_VAL, ASSERT_TEXT);
1443         }
1444     }
1445 #endif
1446 }
1447 
1448 // For correct operation, it must be called when no other threads
1449 // is changing backend.
1450 void Backend::verify()
1451 {
1452 #if MALLOC_DEBUG
1453     scanCoalescQ(/*forceCoalescQDrop=*/false);
1454 #endif // MALLOC_DEBUG
1455 
1456     freeLargeBlockBins.verify();
1457     freeSlabAlignedBins.verify();
1458 }
1459 
1460 #if __TBB_MALLOC_BACKEND_STAT
1461 size_t Backend::Bin::countFreeBlocks()
1462 {
1463     size_t cnt = 0;
1464     {
1465         MallocMutex::scoped_lock lock(tLock);
1466         for (FreeBlock *fb = head; fb; fb = fb->next)
1467             cnt++;
1468     }
1469     return cnt;
1470 }
1471 
1472 size_t Backend::Bin::reportFreeBlocks(FILE *f)
1473 {
1474     size_t totalSz = 0;
1475     MallocMutex::scoped_lock lock(tLock);
1476     for (FreeBlock *fb = head; fb; fb = fb->next) {
1477         size_t sz = fb->tryLockBlock();
1478         fb->setMeFree(sz);
1479         fb->rightNeig(sz)->setLeftFree(sz);
1480         fprintf(f, " [%p;%p]", fb, (void*)((uintptr_t)fb+sz));
1481         totalSz += sz;
1482     }
1483     return totalSz;
1484 }
1485 
1486 void Backend::IndexedBins::reportStat(FILE *f)
1487 {
1488     size_t totalSize = 0;
1489 
1490     for (int i=0; i<Backend::freeBinsNum; i++)
1491         if (size_t cnt = freeBins[i].countFreeBlocks()) {
1492             totalSize += freeBins[i].reportFreeBlocks(f);
1493             fprintf(f, " %d:%lu, ", i, cnt);
1494         }
1495     fprintf(f, "\ttotal size %lu KB", totalSize/1024);
1496 }
1497 
1498 void Backend::reportStat(FILE *f)
1499 {
1500     scanCoalescQ(/*forceCoalescQDrop=*/false);
1501 
1502     fprintf(f, "\n  regions:\n");
1503     int regNum = regionList.reportStat(f);
1504     fprintf(f, "\n%d regions, %lu KB in all regions\n  free bins:\nlarge bins: ",
1505             regNum, totalMemSize/1024);
1506     freeLargeBlockBins.reportStat(f);
1507     fprintf(f, "\naligned bins: ");
1508     freeSlabAlignedBins.reportStat(f);
1509     fprintf(f, "\n");
1510 }
1511 #endif // __TBB_MALLOC_BACKEND_STAT
1512 
1513 } } // namespaces
1514