xref: /oneTBB/src/tbbmalloc/backend.cpp (revision 25c399f4)
1 /*
2     Copyright (c) 2005-2023 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #include <string.h>   /* for memset */
18 #include <errno.h>
19 #include "tbbmalloc_internal.h"
20 
21 namespace rml {
22 namespace internal {
23 
24 /*********** Code to acquire memory from the OS or other executive ****************/
25 
26 /*
27   syscall/malloc can set non-zero errno in case of failure,
28   but later allocator might be able to find memory to fulfill the request.
29   And we do not want changing of errno by successful scalable_malloc call.
30   To support this, restore old errno in (get|free)RawMemory, and set errno
31   in frontend just before returning to user code.
32   Please note: every syscall/libc call used inside scalable_malloc that
33   sets errno must be protected this way, not just memory allocation per se.
34 */
35 
36 #if USE_DEFAULT_MEMORY_MAPPING
37 #include "MapMemory.h"
38 #else
39 /* assume MapMemory and UnmapMemory are customized */
40 #endif
41 
42 void* getRawMemory (size_t size, PageType pageType) {
43     return MapMemory(size, pageType);
44 }
45 
46 int freeRawMemory (void *object, size_t size) {
47     return UnmapMemory(object, size);
48 }
49 
50 #if CHECK_ALLOCATION_RANGE
51 
52 void Backend::UsedAddressRange::registerAlloc(uintptr_t left, uintptr_t right)
53 {
54     MallocMutex::scoped_lock lock(mutex);
55     if (left < leftBound.load(std::memory_order_relaxed))
56         leftBound.store(left, std::memory_order_relaxed);
57     if (right > rightBound.load(std::memory_order_relaxed))
58         rightBound.store(right, std::memory_order_relaxed);
59     MALLOC_ASSERT(leftBound.load(std::memory_order_relaxed), ASSERT_TEXT);
60     MALLOC_ASSERT(leftBound.load(std::memory_order_relaxed) < rightBound.load(std::memory_order_relaxed), ASSERT_TEXT);
61     MALLOC_ASSERT(leftBound.load(std::memory_order_relaxed) <= left && right <= rightBound.load(std::memory_order_relaxed), ASSERT_TEXT);
62 }
63 
64 void Backend::UsedAddressRange::registerFree(uintptr_t left, uintptr_t right)
65 {
66     MallocMutex::scoped_lock lock(mutex);
67     if (leftBound.load(std::memory_order_relaxed) == left) {
68         if (rightBound.load(std::memory_order_relaxed) == right) {
69             leftBound.store(ADDRESS_UPPER_BOUND, std::memory_order_relaxed);
70             rightBound.store(0, std::memory_order_relaxed);
71         } else
72             leftBound.store(right, std::memory_order_relaxed);
73     } else if (rightBound.load(std::memory_order_relaxed) == right)
74         rightBound.store(left, std::memory_order_relaxed);
75     MALLOC_ASSERT((!rightBound.load(std::memory_order_relaxed) && leftBound.load(std::memory_order_relaxed) == ADDRESS_UPPER_BOUND)
76                   || leftBound.load(std::memory_order_relaxed) < rightBound.load(std::memory_order_relaxed), ASSERT_TEXT);
77 }
78 #endif // CHECK_ALLOCATION_RANGE
79 
80 // Initialized in frontend inside defaultMemPool
81 extern HugePagesStatus hugePages;
82 
83 void *Backend::allocRawMem(size_t &size)
84 {
85     void *res = nullptr;
86     size_t allocSize = 0;
87 
88     if (extMemPool->userPool()) {
89         if (extMemPool->fixedPool && bootsrapMemDone == bootsrapMemStatus.load(std::memory_order_acquire))
90             return nullptr;
91         MALLOC_ASSERT(bootsrapMemStatus != bootsrapMemNotDone,
92                       "Backend::allocRawMem() called prematurely?");
93         // TODO: support for raw mem not aligned at sizeof(uintptr_t)
94         // memory from fixed pool is asked once and only once
95         allocSize = alignUpGeneric(size, extMemPool->granularity);
96         res = (*extMemPool->rawAlloc)(extMemPool->poolId, allocSize);
97     } else {
98         // Align allocation on page size
99         size_t pageSize = hugePages.isEnabled ? hugePages.getGranularity() : extMemPool->granularity;
100         MALLOC_ASSERT(pageSize, "Page size cannot be zero.");
101         allocSize = alignUpGeneric(size, pageSize);
102 
103         // If user requested huge pages and they are available, try to use preallocated ones firstly.
104         // If there are none, lets check transparent huge pages support and use them instead.
105         if (hugePages.isEnabled) {
106             if (hugePages.isHPAvailable) {
107                 res = getRawMemory(allocSize, PREALLOCATED_HUGE_PAGE);
108             }
109             if (!res && hugePages.isTHPAvailable) {
110                 res = getRawMemory(allocSize, TRANSPARENT_HUGE_PAGE);
111             }
112         }
113 
114         if (!res) {
115             res = getRawMemory(allocSize, REGULAR);
116         }
117     }
118 
119     if (res) {
120         MALLOC_ASSERT(allocSize > 0, "Invalid size of an allocated region.");
121         size = allocSize;
122         if (!extMemPool->userPool())
123             usedAddrRange.registerAlloc((uintptr_t)res, (uintptr_t)res+size);
124 #if MALLOC_DEBUG
125         volatile size_t curTotalSize = totalMemSize; // to read global value once
126         MALLOC_ASSERT(curTotalSize+size > curTotalSize, "Overflow allocation size.");
127 #endif
128         totalMemSize.fetch_add(size);
129     }
130 
131     return res;
132 }
133 
134 bool Backend::freeRawMem(void *object, size_t size)
135 {
136     bool fail;
137 #if MALLOC_DEBUG
138     volatile size_t curTotalSize = totalMemSize; // to read global value once
139     MALLOC_ASSERT(curTotalSize-size < curTotalSize, "Negative allocation size.");
140 #endif
141     totalMemSize.fetch_sub(size);
142     if (extMemPool->userPool()) {
143         MALLOC_ASSERT(!extMemPool->fixedPool, "No free for fixed-size pools.");
144         fail = (*extMemPool->rawFree)(extMemPool->poolId, object, size);
145     } else {
146         usedAddrRange.registerFree((uintptr_t)object, (uintptr_t)object + size);
147         fail = freeRawMemory(object, size);
148     }
149     // TODO: use result in all freeRawMem() callers
150     return !fail;
151 }
152 
153 /********* End memory acquisition code ********************************/
154 
155 // Protected object size. After successful locking returns size of locked block,
156 // and releasing requires setting block size.
157 class GuardedSize : tbb::detail::no_copy {
158     std::atomic<uintptr_t> value;
159 public:
160     enum State {
161         LOCKED,
162         COAL_BLOCK,        // block is coalescing now
163         MAX_LOCKED_VAL = COAL_BLOCK,
164         LAST_REGION_BLOCK, // used to mark last block in region
165         // values after this are "normal" block sizes
166         MAX_SPEC_VAL = LAST_REGION_BLOCK
167     };
168 
169     void initLocked() { value.store(LOCKED, std::memory_order_release); } // TBB_REVAMP_TODO: was relaxed
170     void makeCoalscing() {
171         MALLOC_ASSERT(value.load(std::memory_order_relaxed) == LOCKED, ASSERT_TEXT);
172         value.store(COAL_BLOCK, std::memory_order_release); // TBB_REVAMP_TODO: was relaxed
173     }
174     size_t tryLock(State state) {
175         MALLOC_ASSERT(state <= MAX_LOCKED_VAL, ASSERT_TEXT);
176         size_t sz = value.load(std::memory_order_acquire);
177         for (;;) {
178             if (sz <= MAX_LOCKED_VAL) {
179                 break;
180             }
181             if (value.compare_exchange_strong(sz, state)) {
182                 break;
183             }
184         }
185         return sz;
186     }
187     void unlock(size_t size) {
188         MALLOC_ASSERT(value.load(std::memory_order_relaxed) <= MAX_LOCKED_VAL, "The lock is not locked");
189         MALLOC_ASSERT(size > MAX_LOCKED_VAL, ASSERT_TEXT);
190         value.store(size, std::memory_order_release);
191     }
192     bool isLastRegionBlock() const { return value.load(std::memory_order_relaxed) == LAST_REGION_BLOCK; }
193     friend void Backend::IndexedBins::verify();
194 };
195 
196 struct MemRegion {
197     MemRegion *next,      // keep all regions in any pool to release all them on
198               *prev;      // pool destroying, 2-linked list to release individual
199                           // regions.
200     size_t     allocSz,   // got from pool callback
201                blockSz;   // initial and maximal inner block size
202     MemRegionType type;
203 };
204 
205 // this data must be unmodified while block is in use, so separate it
206 class BlockMutexes {
207 protected:
208     GuardedSize myL,   // lock for me
209                 leftL; // lock for left neighbor
210 };
211 
212 class FreeBlock : BlockMutexes {
213 public:
214     static const size_t minBlockSize;
215     friend void Backend::IndexedBins::verify();
216 
217     FreeBlock    *prev,       // in 2-linked list related to bin
218                  *next,
219                  *nextToFree; // used to form a queue during coalescing
220     // valid only when block is in processing, i.e. one is not free and not
221     size_t        sizeTmp;    // used outside of backend
222     int           myBin;      // bin that is owner of the block
223     bool          slabAligned;
224     bool          blockInBin; // this block in myBin already
225 
226     FreeBlock *rightNeig(size_t sz) const {
227         MALLOC_ASSERT(sz, ASSERT_TEXT);
228         return (FreeBlock*)((uintptr_t)this+sz);
229     }
230     FreeBlock *leftNeig(size_t sz) const {
231         MALLOC_ASSERT(sz, ASSERT_TEXT);
232         return (FreeBlock*)((uintptr_t)this - sz);
233     }
234 
235     void initHeader() { myL.initLocked(); leftL.initLocked(); }
236     void setMeFree(size_t size) { myL.unlock(size); }
237     size_t trySetMeUsed(GuardedSize::State s) { return myL.tryLock(s); }
238     bool isLastRegionBlock() const { return myL.isLastRegionBlock(); }
239 
240     void setLeftFree(size_t sz) { leftL.unlock(sz); }
241     size_t trySetLeftUsed(GuardedSize::State s) { return leftL.tryLock(s); }
242 
243     size_t tryLockBlock() {
244         size_t rSz, sz = trySetMeUsed(GuardedSize::LOCKED);
245 
246         if (sz <= GuardedSize::MAX_LOCKED_VAL)
247             return false;
248         rSz = rightNeig(sz)->trySetLeftUsed(GuardedSize::LOCKED);
249         if (rSz <= GuardedSize::MAX_LOCKED_VAL) {
250             setMeFree(sz);
251             return false;
252         }
253         MALLOC_ASSERT(rSz == sz, ASSERT_TEXT);
254         return sz;
255     }
256     void markCoalescing(size_t blockSz) {
257         myL.makeCoalscing();
258         rightNeig(blockSz)->leftL.makeCoalscing();
259         sizeTmp = blockSz;
260         nextToFree = nullptr;
261     }
262     void markUsed() {
263         myL.initLocked();
264         rightNeig(sizeTmp)->leftL.initLocked();
265         nextToFree = nullptr;
266     }
267     static void markBlocks(FreeBlock *fBlock, int num, size_t size) {
268         for (int i=1; i<num; i++) {
269             fBlock = (FreeBlock*)((uintptr_t)fBlock + size);
270             fBlock->initHeader();
271         }
272     }
273 };
274 
275 // Last block in any region. Its "size" field is GuardedSize::LAST_REGION_BLOCK,
276 // This kind of blocks used to find region header
277 // and have a possibility to return region back to OS
278 struct LastFreeBlock : public FreeBlock {
279     MemRegion *memRegion;
280 };
281 
282 const size_t FreeBlock::minBlockSize = sizeof(FreeBlock);
283 
284 inline bool BackendSync::waitTillBlockReleased(intptr_t startModifiedCnt)
285 {
286     AtomicBackoff backoff;
287 #if __TBB_MALLOC_BACKEND_STAT
288     class ITT_Guard {
289         void *ptr;
290     public:
291         ITT_Guard(void *p) : ptr(p) {
292             MALLOC_ITT_SYNC_PREPARE(ptr);
293         }
294         ~ITT_Guard() {
295             MALLOC_ITT_SYNC_ACQUIRED(ptr);
296         }
297     };
298     ITT_Guard ittGuard(&inFlyBlocks);
299 #endif
300     intptr_t myBinsInFlyBlocks = inFlyBlocks.load(std::memory_order_acquire);
301     intptr_t myCoalescQInFlyBlocks = backend->blocksInCoalescing();
302     while (true) {
303         MALLOC_ASSERT(myBinsInFlyBlocks>=0 && myCoalescQInFlyBlocks>=0, nullptr);
304 
305         intptr_t currBinsInFlyBlocks = inFlyBlocks.load(std::memory_order_acquire);
306         intptr_t currCoalescQInFlyBlocks = backend->blocksInCoalescing();
307         WhiteboxTestingYield();
308         // Stop waiting iff:
309 
310         // 1) blocks were removed from processing, not added
311         if (myBinsInFlyBlocks > currBinsInFlyBlocks
312         // 2) released during delayed coalescing queue
313             || myCoalescQInFlyBlocks > currCoalescQInFlyBlocks)
314             break;
315         // 3) if there are blocks in coalescing, and no progress in its processing,
316         // try to scan coalescing queue and stop waiting, if changes were made
317         // (if there are no changes and in-fly blocks exist, we continue
318         //  waiting to not increase load on coalescQ)
319         if (currCoalescQInFlyBlocks > 0 && backend->scanCoalescQ(/*forceCoalescQDrop=*/false))
320             break;
321         // 4) when there are no blocks
322         if (!currBinsInFlyBlocks && !currCoalescQInFlyBlocks)
323             // re-scan make sense only if bins were modified since scanned
324             return startModifiedCnt != getNumOfMods();
325         myBinsInFlyBlocks = currBinsInFlyBlocks;
326         myCoalescQInFlyBlocks = currCoalescQInFlyBlocks;
327         backoff.pause();
328     }
329     return true;
330 }
331 
332 void CoalRequestQ::putBlock(FreeBlock *fBlock)
333 {
334     MALLOC_ASSERT(fBlock->sizeTmp >= FreeBlock::minBlockSize, ASSERT_TEXT);
335     fBlock->markUsed();
336     // the block is in the queue, do not forget that it's here
337     inFlyBlocks++;
338 
339     FreeBlock *myBlToFree = blocksToFree.load(std::memory_order_acquire);
340     for (;;) {
341         fBlock->nextToFree = myBlToFree;
342         if (blocksToFree.compare_exchange_strong(myBlToFree, fBlock)) {
343             return;
344         }
345     }
346 }
347 
348 FreeBlock *CoalRequestQ::getAll()
349 {
350     for (;;) {
351         FreeBlock *myBlToFree = blocksToFree.load(std::memory_order_acquire);
352 
353         if (!myBlToFree) {
354             return nullptr;
355         } else {
356             if (blocksToFree.compare_exchange_strong(myBlToFree, nullptr)) {
357                 return myBlToFree;
358             } else {
359                 continue;
360             }
361         }
362     }
363 }
364 
365 inline void CoalRequestQ::blockWasProcessed()
366 {
367     bkndSync->binsModified();
368     int prev = inFlyBlocks.fetch_sub(1);
369     tbb::detail::suppress_unused_warning(prev);
370     MALLOC_ASSERT(prev > 0, ASSERT_TEXT);
371 }
372 
373 // Try to get a block from a bin.
374 // If the remaining free space would stay in the same bin,
375 //     split the block without removing it.
376 // If the free space should go to other bin(s), remove the block.
377 // alignedBin is true, if all blocks in the bin have slab-aligned right side.
378 FreeBlock *Backend::IndexedBins::getFromBin(int binIdx, BackendSync *sync, size_t size,
379         bool needAlignedRes, bool alignedBin,  bool wait, int *binLocked)
380 {
381     Bin *b = &freeBins[binIdx];
382 try_next:
383     FreeBlock *fBlock = nullptr;
384     if (!b->empty()) {
385         bool locked = false;
386         MallocMutex::scoped_lock scopedLock(b->tLock, wait, &locked);
387 
388         if (!locked) {
389             if (binLocked) (*binLocked)++;
390             return nullptr;
391         }
392 
393         for (FreeBlock *curr = b->head.load(std::memory_order_relaxed); curr; curr = curr->next) {
394             size_t szBlock = curr->tryLockBlock();
395             if (!szBlock) {
396                 // block is locked, re-do bin lock, as there is no place to spin
397                 // while block coalescing
398                 goto try_next;
399             }
400 
401             // GENERAL CASE
402             if (alignedBin || !needAlignedRes) {
403                 size_t splitSz = szBlock - size;
404                 // If we got a block as split result, it must have a room for control structures.
405                 if (szBlock >= size && (splitSz >= FreeBlock::minBlockSize || !splitSz))
406                     fBlock = curr;
407             } else {
408                 // SPECIAL CASE, to get aligned block from unaligned bin we have to cut the middle of a block
409                 // and return remaining left and right part. Possible only in fixed pool scenario, assert for this
410                 // is set inside splitBlock() function.
411 
412                 void *newB = alignUp(curr, slabSize);
413                 uintptr_t rightNew = (uintptr_t)newB + size;
414                 uintptr_t rightCurr = (uintptr_t)curr + szBlock;
415                 // Check if the block size is sufficient,
416                 // and also left and right split results are either big enough or non-existent
417                 if (rightNew <= rightCurr
418                         && (newB == curr || ((uintptr_t)newB - (uintptr_t)curr) >= FreeBlock::minBlockSize)
419                         && (rightNew == rightCurr || (rightCurr - rightNew) >= FreeBlock::minBlockSize))
420                     fBlock = curr;
421             }
422 
423             if (fBlock) {
424                 // consume must be called before result of removing from a bin is visible externally.
425                 sync->blockConsumed();
426                 // TODO: think about cases when block stays in the same bin
427                 b->removeBlock(fBlock);
428                 if (freeBins[binIdx].empty())
429                     bitMask.set(binIdx, false);
430                 fBlock->sizeTmp = szBlock;
431                 break;
432             } else { // block size is not valid, search for next block in the bin
433                 curr->setMeFree(szBlock);
434                 curr->rightNeig(szBlock)->setLeftFree(szBlock);
435             }
436         }
437     }
438     return fBlock;
439 }
440 
441 bool Backend::IndexedBins::tryReleaseRegions(int binIdx, Backend *backend)
442 {
443     Bin *b = &freeBins[binIdx];
444     FreeBlock *fBlockList = nullptr;
445 
446     // got all blocks from the bin and re-do coalesce on them
447     // to release single-block regions
448 try_next:
449     if (!b->empty()) {
450         MallocMutex::scoped_lock binLock(b->tLock);
451         for (FreeBlock *curr = b->head.load(std::memory_order_relaxed); curr; ) {
452             size_t szBlock = curr->tryLockBlock();
453             if (!szBlock)
454                 goto try_next;
455 
456             FreeBlock *next = curr->next;
457 
458             b->removeBlock(curr);
459             curr->sizeTmp = szBlock;
460             curr->nextToFree = fBlockList;
461             fBlockList = curr;
462             curr = next;
463         }
464     }
465     return backend->coalescAndPutList(fBlockList, /*forceCoalescQDrop=*/true,
466                                       /*reportBlocksProcessed=*/false);
467 }
468 
469 void Backend::Bin::removeBlock(FreeBlock *fBlock)
470 {
471     MALLOC_ASSERT(fBlock->next||fBlock->prev||fBlock== head.load(std::memory_order_relaxed),
472                   "Detected that a block is not in the bin.");
473     if (head.load(std::memory_order_relaxed) == fBlock)
474         head.store(fBlock->next, std::memory_order_relaxed);
475     if (tail == fBlock)
476         tail = fBlock->prev;
477     if (fBlock->prev)
478         fBlock->prev->next = fBlock->next;
479     if (fBlock->next)
480         fBlock->next->prev = fBlock->prev;
481 }
482 
483 void Backend::IndexedBins::addBlock(int binIdx, FreeBlock *fBlock, size_t /* blockSz */, bool addToTail)
484 {
485     Bin *b = &freeBins[binIdx];
486     fBlock->myBin = binIdx;
487     fBlock->next = fBlock->prev = nullptr;
488     {
489         MallocMutex::scoped_lock scopedLock(b->tLock);
490         if (addToTail) {
491             fBlock->prev = b->tail;
492             b->tail = fBlock;
493             if (fBlock->prev)
494                 fBlock->prev->next = fBlock;
495             if (!b->head.load(std::memory_order_relaxed))
496                 b->head.store(fBlock, std::memory_order_relaxed);
497         } else {
498             fBlock->next = b->head.load(std::memory_order_relaxed);
499             b->head.store(fBlock, std::memory_order_relaxed);
500             if (fBlock->next)
501                 fBlock->next->prev = fBlock;
502             if (!b->tail)
503                 b->tail = fBlock;
504         }
505     }
506     bitMask.set(binIdx, true);
507 }
508 
509 bool Backend::IndexedBins::tryAddBlock(int binIdx, FreeBlock *fBlock, bool addToTail)
510 {
511     bool locked = false;
512     Bin *b = &freeBins[binIdx];
513     fBlock->myBin = binIdx;
514     if (addToTail) {
515         fBlock->next = nullptr;
516         {
517             MallocMutex::scoped_lock scopedLock(b->tLock, /*wait=*/false, &locked);
518             if (!locked)
519                 return false;
520             fBlock->prev = b->tail;
521             b->tail = fBlock;
522             if (fBlock->prev)
523                 fBlock->prev->next = fBlock;
524             if (!b->head.load(std::memory_order_relaxed))
525                 b->head.store(fBlock, std::memory_order_relaxed);
526         }
527     } else {
528         fBlock->prev = nullptr;
529         {
530             MallocMutex::scoped_lock scopedLock(b->tLock, /*wait=*/false, &locked);
531             if (!locked)
532                 return false;
533             fBlock->next = b->head.load(std::memory_order_relaxed);
534             b->head.store(fBlock, std::memory_order_relaxed);
535             if (fBlock->next)
536                 fBlock->next->prev = fBlock;
537             if (!b->tail)
538                 b->tail = fBlock;
539         }
540     }
541     bitMask.set(binIdx, true);
542     return true;
543 }
544 
545 void Backend::IndexedBins::reset()
546 {
547     for (unsigned i=0; i<Backend::freeBinsNum; i++)
548         freeBins[i].reset();
549     bitMask.reset();
550 }
551 
552 void Backend::IndexedBins::lockRemoveBlock(int binIdx, FreeBlock *fBlock)
553 {
554     MallocMutex::scoped_lock scopedLock(freeBins[binIdx].tLock);
555     freeBins[binIdx].removeBlock(fBlock);
556     if (freeBins[binIdx].empty())
557         bitMask.set(binIdx, false);
558 }
559 
560 bool ExtMemoryPool::regionsAreReleaseable() const
561 {
562     return !keepAllMemory && !delayRegsReleasing;
563 }
564 
565 FreeBlock *Backend::splitBlock(FreeBlock *fBlock, int num, size_t size, bool blockIsAligned, bool needAlignedBlock)
566 {
567     const size_t totalSize = num * size;
568 
569     // SPECIAL CASE, for unaligned block we have to cut the middle of a block
570     // and return remaining left and right part. Possible only in a fixed pool scenario.
571     if (needAlignedBlock && !blockIsAligned) {
572         MALLOC_ASSERT(extMemPool->fixedPool,
573                 "Aligned block request from unaligned bin possible only in fixed pool scenario.");
574 
575         // Space to use is in the middle
576         FreeBlock *newBlock = alignUp(fBlock, slabSize);
577         FreeBlock *rightPart = (FreeBlock*)((uintptr_t)newBlock + totalSize);
578         uintptr_t fBlockEnd = (uintptr_t)fBlock + fBlock->sizeTmp;
579 
580         // Return free right part
581         if ((uintptr_t)rightPart != fBlockEnd) {
582             rightPart->initHeader();  // to prevent coalescing rightPart with fBlock
583             size_t rightSize = fBlockEnd - (uintptr_t)rightPart;
584             coalescAndPut(rightPart, rightSize, toAlignedBin(rightPart, rightSize));
585         }
586         // And free left part
587         if (newBlock != fBlock) {
588             newBlock->initHeader(); // to prevent coalescing fBlock with newB
589             size_t leftSize = (uintptr_t)newBlock - (uintptr_t)fBlock;
590             coalescAndPut(fBlock, leftSize, toAlignedBin(fBlock, leftSize));
591         }
592         fBlock = newBlock;
593     } else if (size_t splitSize = fBlock->sizeTmp - totalSize) { // need to split the block
594         // GENERAL CASE, cut the left or right part of the block
595         FreeBlock *splitBlock = nullptr;
596         if (needAlignedBlock) {
597             // For slab aligned blocks cut the right side of the block
598             // and return it to a requester, original block returns to backend
599             splitBlock = fBlock;
600             fBlock = (FreeBlock*)((uintptr_t)splitBlock + splitSize);
601             fBlock->initHeader();
602         } else {
603             // For large object blocks cut original block and put free righ part to backend
604             splitBlock = (FreeBlock*)((uintptr_t)fBlock + totalSize);
605             splitBlock->initHeader();
606         }
607         // Mark free block as it`s parent only when the requested type (needAlignedBlock)
608         // and returned from Bins/OS block (isAligned) are equal (XOR operation used)
609         bool markAligned = (blockIsAligned ^ needAlignedBlock) ? toAlignedBin(splitBlock, splitSize) : blockIsAligned;
610         coalescAndPut(splitBlock, splitSize, markAligned);
611     }
612     MALLOC_ASSERT(!needAlignedBlock || isAligned(fBlock, slabSize), "Expect to get aligned block, if one was requested.");
613     FreeBlock::markBlocks(fBlock, num, size);
614     return fBlock;
615 }
616 
617 size_t Backend::getMaxBinnedSize() const
618 {
619     return hugePages.isEnabled && !inUserPool() ?
620         maxBinned_HugePage : maxBinned_SmallPage;
621 }
622 
623 inline bool Backend::MaxRequestComparator::operator()(size_t oldMaxReq, size_t requestSize) const
624 {
625     return requestSize > oldMaxReq && requestSize < backend->getMaxBinnedSize();
626 }
627 
628 // last chance to get memory
629 FreeBlock *Backend::releaseMemInCaches(intptr_t startModifiedCnt,
630                                     int *lockedBinsThreshold, int numOfLockedBins)
631 {
632     // something released from caches
633     if (extMemPool->hardCachesCleanup()
634         // ..or can use blocks that are in processing now
635         || bkndSync.waitTillBlockReleased(startModifiedCnt))
636         return (FreeBlock*)VALID_BLOCK_IN_BIN;
637     // OS can't give us more memory, but we have some in locked bins
638     if (*lockedBinsThreshold && numOfLockedBins) {
639         *lockedBinsThreshold = 0;
640         return (FreeBlock*)VALID_BLOCK_IN_BIN;
641     }
642     return nullptr; // nothing found, give up
643 }
644 
645 FreeBlock *Backend::askMemFromOS(size_t blockSize, intptr_t startModifiedCnt,
646                                  int *lockedBinsThreshold, int numOfLockedBins,
647                                  bool *splittableRet, bool needSlabRegion)
648 {
649     FreeBlock *block;
650     // The block sizes can be divided into 3 groups:
651     //   1. "quite small": popular object size, we are in bootstarp or something
652     //      like; request several regions.
653     //   2. "quite large": we want to have several such blocks in the region
654     //      but not want several pre-allocated regions.
655     //   3. "huge": exact fit, we allocate only one block and do not allow
656     //       any other allocations to placed in a region.
657     // Dividing the block sizes in these groups we are trying to balance between
658     // too small regions (that leads to fragmentation) and too large ones (that
659     // leads to excessive address space consumption). If a region is "too
660     // large", allocate only one, to prevent fragmentation. It supposedly
661     // doesn't hurt performance, because the object requested by user is large.
662     // Bounds for the groups are:
663     const size_t maxBinned = getMaxBinnedSize();
664     const size_t quiteSmall = maxBinned / 8;
665     const size_t quiteLarge = maxBinned;
666 
667     if (blockSize >= quiteLarge) {
668         // Do not interact with other threads via semaphores, as for exact fit
669         // we can't share regions with them, memory requesting is individual.
670         block = addNewRegion(blockSize, MEMREG_ONE_BLOCK, /*addToBin=*/false);
671         if (!block)
672             return releaseMemInCaches(startModifiedCnt, lockedBinsThreshold, numOfLockedBins);
673         *splittableRet = false;
674     } else {
675         const size_t regSz_sizeBased = alignUp(4*maxRequestedSize, 1024*1024);
676         // Another thread is modifying backend while we can't get the block.
677         // Wait while it leaves and re-do the scan
678         // before trying other ways to extend the backend.
679         if (bkndSync.waitTillBlockReleased(startModifiedCnt)
680             // semaphore is protecting adding more more memory from OS
681             || memExtendingSema.wait())
682             return (FreeBlock*)VALID_BLOCK_IN_BIN;
683 
684         if (startModifiedCnt != bkndSync.getNumOfMods()) {
685             memExtendingSema.signal();
686             return (FreeBlock*)VALID_BLOCK_IN_BIN;
687         }
688 
689         if (blockSize < quiteSmall) {
690             // For this size of blocks, add NUM_OF_REG "advance" regions in bin,
691             // and return one as a result.
692             // TODO: add to bin first, because other threads can use them right away.
693             // This must be done carefully, because blocks in bins can be released
694             // in releaseCachesToLimit().
695             const unsigned NUM_OF_REG = 3;
696             MemRegionType regType = needSlabRegion ? MEMREG_SLAB_BLOCKS : MEMREG_LARGE_BLOCKS;
697             block = addNewRegion(regSz_sizeBased, regType, /*addToBin=*/false);
698             if (block)
699                 for (unsigned idx=0; idx<NUM_OF_REG; idx++)
700                     if (! addNewRegion(regSz_sizeBased, regType, /*addToBin=*/true))
701                         break;
702         } else {
703             block = addNewRegion(regSz_sizeBased, MEMREG_LARGE_BLOCKS, /*addToBin=*/false);
704         }
705         memExtendingSema.signal();
706 
707         // no regions found, try to clean cache
708         if (!block || block == (FreeBlock*)VALID_BLOCK_IN_BIN)
709             return releaseMemInCaches(startModifiedCnt, lockedBinsThreshold, numOfLockedBins);
710         // Since a region can hold more than one block it can be split.
711         *splittableRet = true;
712     }
713     // after asking memory from OS, release caches if we above the memory limits
714     releaseCachesToLimit();
715 
716     return block;
717 }
718 
719 void Backend::releaseCachesToLimit()
720 {
721     if (!memSoftLimit.load(std::memory_order_relaxed)
722             || totalMemSize.load(std::memory_order_relaxed) <= memSoftLimit.load(std::memory_order_relaxed)) {
723         return;
724     }
725     size_t locTotalMemSize, locMemSoftLimit;
726 
727     scanCoalescQ(/*forceCoalescQDrop=*/false);
728     if (extMemPool->softCachesCleanup() &&
729         (locTotalMemSize = totalMemSize.load(std::memory_order_acquire)) <=
730         (locMemSoftLimit = memSoftLimit.load(std::memory_order_acquire)))
731         return;
732     // clean global large-object cache, if this is not enough, clean local caches
733     // do this in several tries, because backend fragmentation can prevent
734     // region from releasing
735     for (int cleanLocal = 0; cleanLocal<2; cleanLocal++)
736         while (cleanLocal ?
737                  extMemPool->allLocalCaches.cleanup(/*cleanOnlyUnused=*/true) :
738                  extMemPool->loc.decreasingCleanup())
739             if ((locTotalMemSize = totalMemSize.load(std::memory_order_acquire)) <=
740                 (locMemSoftLimit = memSoftLimit.load(std::memory_order_acquire)))
741                 return;
742     // last chance to match memSoftLimit
743     extMemPool->hardCachesCleanup();
744 }
745 
746 int Backend::IndexedBins::getMinNonemptyBin(unsigned startBin) const
747 {
748     int p = bitMask.getMinTrue(startBin);
749     return p == -1 ? Backend::freeBinsNum : p;
750 }
751 
752 FreeBlock *Backend::IndexedBins::findBlock(int nativeBin, BackendSync *sync, size_t size,
753         bool needAlignedBlock, bool alignedBin, int *numOfLockedBins)
754 {
755     for (int i=getMinNonemptyBin(nativeBin); i<(int)freeBinsNum; i=getMinNonemptyBin(i+1))
756         if (FreeBlock *block = getFromBin(i, sync, size, needAlignedBlock, alignedBin, /*wait=*/false, numOfLockedBins))
757             return block;
758 
759     return nullptr;
760 }
761 
762 void Backend::requestBootstrapMem()
763 {
764     if (bootsrapMemDone == bootsrapMemStatus.load(std::memory_order_acquire))
765         return;
766     MallocMutex::scoped_lock lock( bootsrapMemStatusMutex );
767     if (bootsrapMemDone == bootsrapMemStatus)
768         return;
769     MALLOC_ASSERT(bootsrapMemNotDone == bootsrapMemStatus, ASSERT_TEXT);
770     bootsrapMemStatus = bootsrapMemInitializing;
771     // request some rather big region during bootstrap in advance
772     // ok to get nullptr here, as later we re-do a request with more modest size
773     addNewRegion(2*1024*1024, MEMREG_SLAB_BLOCKS, /*addToBin=*/true);
774     bootsrapMemStatus = bootsrapMemDone;
775 }
776 
777 // try to allocate size Byte block in available bins
778 // needAlignedRes is true if result must be slab-aligned
779 FreeBlock *Backend::genericGetBlock(int num, size_t size, bool needAlignedBlock)
780 {
781     FreeBlock *block = nullptr;
782     const size_t totalReqSize = num*size;
783     // no splitting after requesting new region, asks exact size
784     const int nativeBin = sizeToBin(totalReqSize);
785 
786     requestBootstrapMem();
787     // If we found 2 or less locked bins, it's time to ask more memory from OS.
788     // But nothing can be asked from fixed pool. And we prefer wait, not ask
789     // for more memory, if block is quite large.
790     int lockedBinsThreshold = extMemPool->fixedPool || size>=maxBinned_SmallPage? 0 : 2;
791 
792     // Find maximal requested size limited by getMaxBinnedSize()
793     AtomicUpdate(maxRequestedSize, totalReqSize, MaxRequestComparator(this));
794     scanCoalescQ(/*forceCoalescQDrop=*/false);
795 
796     bool splittable = true;
797     for (;;) {
798         const intptr_t startModifiedCnt = bkndSync.getNumOfMods();
799         int numOfLockedBins;
800 
801         do {
802             numOfLockedBins = 0;
803             if (needAlignedBlock) {
804                 block = freeSlabAlignedBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock,
805                                                         /*alignedBin=*/true, &numOfLockedBins);
806                 if (!block && extMemPool->fixedPool)
807                     block = freeLargeBlockBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock,
808                                                         /*alignedBin=*/false, &numOfLockedBins);
809             } else {
810                 block = freeLargeBlockBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock,
811                                                         /*alignedBin=*/false, &numOfLockedBins);
812                 if (!block && extMemPool->fixedPool)
813                     block = freeSlabAlignedBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock,
814                                                         /*alignedBin=*/true, &numOfLockedBins);
815             }
816         } while (!block && numOfLockedBins>lockedBinsThreshold);
817 
818         if (block)
819             break;
820 
821         bool retScanCoalescQ = scanCoalescQ(/*forceCoalescQDrop=*/true);
822         bool retSoftCachesCleanup = extMemPool->softCachesCleanup();
823         if (!(retScanCoalescQ || retSoftCachesCleanup)) {
824             // bins are not updated,
825             // only remaining possibility is to ask for more memory
826             block = askMemFromOS(totalReqSize, startModifiedCnt, &lockedBinsThreshold,
827                         numOfLockedBins, &splittable, needAlignedBlock);
828             if (!block)
829                 return nullptr;
830             if (block != (FreeBlock*)VALID_BLOCK_IN_BIN) {
831                 // size can be increased in askMemFromOS, that's why >=
832                 MALLOC_ASSERT(block->sizeTmp >= size, ASSERT_TEXT);
833                 break;
834             }
835             // valid block somewhere in bins, let's find it
836             block = nullptr;
837         }
838     }
839     MALLOC_ASSERT(block, ASSERT_TEXT);
840     if (splittable) {
841         // At this point we have to be sure that slabAligned attribute describes the right block state
842         block = splitBlock(block, num, size, block->slabAligned, needAlignedBlock);
843     }
844     // matched blockConsumed() from startUseBlock()
845     bkndSync.blockReleased();
846 
847     return block;
848 }
849 
850 LargeMemoryBlock *Backend::getLargeBlock(size_t size)
851 {
852     LargeMemoryBlock *lmb =
853         (LargeMemoryBlock*)genericGetBlock(1, size, /*needAlignedRes=*/false);
854     if (lmb) {
855         lmb->unalignedSize = size;
856         if (extMemPool->userPool())
857             extMemPool->lmbList.add(lmb);
858     }
859     return lmb;
860 }
861 
862 BlockI *Backend::getSlabBlock(int num) {
863     BlockI *b = (BlockI*)genericGetBlock(num, slabSize, /*slabAligned=*/true);
864     MALLOC_ASSERT(isAligned(b, slabSize), ASSERT_TEXT);
865     return b;
866 }
867 
868 void Backend::putSlabBlock(BlockI *block) {
869     genericPutBlock((FreeBlock *)block, slabSize, /*slabAligned=*/true);
870 }
871 
872 void *Backend::getBackRefSpace(size_t size, bool *rawMemUsed)
873 {
874     // This block is released only at shutdown, so it can prevent
875     // a entire region releasing when it's received from the backend,
876     // so prefer getRawMemory using.
877     if (void *ret = getRawMemory(size, REGULAR)) {
878         *rawMemUsed = true;
879         return ret;
880     }
881     void *ret = genericGetBlock(1, size, /*needAlignedRes=*/false);
882     if (ret) *rawMemUsed = false;
883     return ret;
884 }
885 
886 void Backend::putBackRefSpace(void *b, size_t size, bool rawMemUsed)
887 {
888     if (rawMemUsed)
889         freeRawMemory(b, size);
890     // ignore not raw mem, as it released on region releasing
891 }
892 
893 void Backend::removeBlockFromBin(FreeBlock *fBlock)
894 {
895     if (fBlock->myBin != Backend::NO_BIN) {
896         if (fBlock->slabAligned)
897             freeSlabAlignedBins.lockRemoveBlock(fBlock->myBin, fBlock);
898         else
899             freeLargeBlockBins.lockRemoveBlock(fBlock->myBin, fBlock);
900     }
901 }
902 
903 void Backend::genericPutBlock(FreeBlock *fBlock, size_t blockSz, bool slabAligned)
904 {
905     bkndSync.blockConsumed();
906     coalescAndPut(fBlock, blockSz, slabAligned);
907     bkndSync.blockReleased();
908 }
909 
910 void AllLargeBlocksList::add(LargeMemoryBlock *lmb)
911 {
912     MallocMutex::scoped_lock scoped_cs(largeObjLock);
913     lmb->gPrev = nullptr;
914     lmb->gNext = loHead;
915     if (lmb->gNext)
916         lmb->gNext->gPrev = lmb;
917     loHead = lmb;
918 }
919 
920 void AllLargeBlocksList::remove(LargeMemoryBlock *lmb)
921 {
922     MallocMutex::scoped_lock scoped_cs(largeObjLock);
923     if (loHead == lmb)
924         loHead = lmb->gNext;
925     if (lmb->gNext)
926         lmb->gNext->gPrev = lmb->gPrev;
927     if (lmb->gPrev)
928         lmb->gPrev->gNext = lmb->gNext;
929 }
930 
931 void Backend::putLargeBlock(LargeMemoryBlock *lmb)
932 {
933     if (extMemPool->userPool())
934         extMemPool->lmbList.remove(lmb);
935     genericPutBlock((FreeBlock *)lmb, lmb->unalignedSize, false);
936 }
937 
938 void Backend::returnLargeObject(LargeMemoryBlock *lmb)
939 {
940     removeBackRef(lmb->backRefIdx);
941     putLargeBlock(lmb);
942     STAT_increment(getThreadId(), ThreadCommonCounters, freeLargeObj);
943 }
944 
945 #if BACKEND_HAS_MREMAP
946 void *Backend::remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment)
947 {
948     // no remap for user pools and for object too small that living in bins
949     if (inUserPool() || min(oldSize, newSize)<maxBinned_SmallPage
950         // during remap, can't guarantee alignment more strict than current or
951         // more strict than page alignment
952         || !isAligned(ptr, alignment) || alignment>extMemPool->granularity)
953         return nullptr;
954     const LargeMemoryBlock* lmbOld = ((LargeObjectHdr *)ptr - 1)->memoryBlock;
955     const size_t oldUnalignedSize = lmbOld->unalignedSize;
956     FreeBlock *oldFBlock = (FreeBlock *)lmbOld;
957     FreeBlock *right = oldFBlock->rightNeig(oldUnalignedSize);
958     // in every region only one block can have LAST_REGION_BLOCK on right,
959     // so don't need no synchronization
960     if (!right->isLastRegionBlock())
961         return nullptr;
962 
963     MemRegion *oldRegion = static_cast<LastFreeBlock*>(right)->memRegion;
964     MALLOC_ASSERT( oldRegion < ptr, ASSERT_TEXT );
965     const size_t oldRegionSize = oldRegion->allocSz;
966     if (oldRegion->type != MEMREG_ONE_BLOCK)
967         return nullptr;  // we are not single in the region
968     const size_t userOffset = (uintptr_t)ptr - (uintptr_t)oldRegion;
969     const size_t alignedSize = LargeObjectCache::alignToBin(newSize + userOffset);
970     const size_t requestSize =
971         alignUp(sizeof(MemRegion) + alignedSize + sizeof(LastFreeBlock), extMemPool->granularity);
972     if (requestSize < alignedSize) // is wrapped around?
973         return nullptr;
974     regionList.remove(oldRegion);
975 
976     // The deallocation should be registered in address range before mremap to
977     // prevent a race condition with allocation on another thread.
978     // (OS can reuse the memory and registerAlloc will be missed on another thread)
979     usedAddrRange.registerFree((uintptr_t)oldRegion, (uintptr_t)oldRegion + oldRegionSize);
980 
981     void *ret = mremap(oldRegion, oldRegion->allocSz, requestSize, MREMAP_MAYMOVE);
982     if (MAP_FAILED == ret) { // can't remap, revert and leave
983         regionList.add(oldRegion);
984         usedAddrRange.registerAlloc((uintptr_t)oldRegion, (uintptr_t)oldRegion + oldRegionSize);
985         return nullptr;
986     }
987     MemRegion *region = (MemRegion*)ret;
988     MALLOC_ASSERT(region->type == MEMREG_ONE_BLOCK, ASSERT_TEXT);
989     region->allocSz = requestSize;
990     region->blockSz = alignedSize;
991 
992     FreeBlock *fBlock = (FreeBlock *)alignUp((uintptr_t)region + sizeof(MemRegion),
993                                              largeObjectAlignment);
994 
995     regionList.add(region);
996     startUseBlock(region, fBlock, /*addToBin=*/false);
997     MALLOC_ASSERT(fBlock->sizeTmp == region->blockSz, ASSERT_TEXT);
998     // matched blockConsumed() in startUseBlock().
999     // TODO: get rid of useless pair blockConsumed()/blockReleased()
1000     bkndSync.blockReleased();
1001 
1002     // object must start at same offset from region's start
1003     void *object = (void*)((uintptr_t)region + userOffset);
1004     MALLOC_ASSERT(isAligned(object, alignment), ASSERT_TEXT);
1005     LargeObjectHdr *header = (LargeObjectHdr*)object - 1;
1006     setBackRef(header->backRefIdx, header);
1007 
1008     LargeMemoryBlock *lmb = (LargeMemoryBlock*)fBlock;
1009     lmb->unalignedSize = region->blockSz;
1010     lmb->objectSize = newSize;
1011     lmb->backRefIdx = header->backRefIdx;
1012     header->memoryBlock = lmb;
1013     MALLOC_ASSERT((uintptr_t)lmb + lmb->unalignedSize >=
1014                   (uintptr_t)object + lmb->objectSize, "An object must fit to the block.");
1015 
1016     usedAddrRange.registerAlloc((uintptr_t)region, (uintptr_t)region + requestSize);
1017     totalMemSize.fetch_add(region->allocSz - oldRegionSize);
1018 
1019     return object;
1020 }
1021 #endif /* BACKEND_HAS_MREMAP */
1022 
1023 void Backend::releaseRegion(MemRegion *memRegion)
1024 {
1025     regionList.remove(memRegion);
1026     freeRawMem(memRegion, memRegion->allocSz);
1027 }
1028 
1029 // coalesce fBlock with its neighborhood
1030 FreeBlock *Backend::doCoalesc(FreeBlock *fBlock, MemRegion **mRegion)
1031 {
1032     FreeBlock *resBlock = fBlock;
1033     size_t resSize = fBlock->sizeTmp;
1034     MemRegion *memRegion = nullptr;
1035 
1036     fBlock->markCoalescing(resSize);
1037     resBlock->blockInBin = false;
1038 
1039     // coalescing with left neighbor
1040     size_t leftSz = fBlock->trySetLeftUsed(GuardedSize::COAL_BLOCK);
1041     if (leftSz != GuardedSize::LOCKED) {
1042         if (leftSz == GuardedSize::COAL_BLOCK) {
1043             coalescQ.putBlock(fBlock);
1044             return nullptr;
1045         } else {
1046             FreeBlock *left = fBlock->leftNeig(leftSz);
1047             size_t lSz = left->trySetMeUsed(GuardedSize::COAL_BLOCK);
1048             if (lSz <= GuardedSize::MAX_LOCKED_VAL) {
1049                 fBlock->setLeftFree(leftSz); // rollback
1050                 coalescQ.putBlock(fBlock);
1051                 return nullptr;
1052             } else {
1053                 MALLOC_ASSERT(lSz == leftSz, "Invalid header");
1054                 left->blockInBin = true;
1055                 resBlock = left;
1056                 resSize += leftSz;
1057                 resBlock->sizeTmp = resSize;
1058             }
1059         }
1060     }
1061     // coalescing with right neighbor
1062     FreeBlock *right = fBlock->rightNeig(fBlock->sizeTmp);
1063     size_t rightSz = right->trySetMeUsed(GuardedSize::COAL_BLOCK);
1064     if (rightSz != GuardedSize::LOCKED) {
1065         // LastFreeBlock is on the right side
1066         if (GuardedSize::LAST_REGION_BLOCK == rightSz) {
1067             right->setMeFree(GuardedSize::LAST_REGION_BLOCK);
1068             memRegion = static_cast<LastFreeBlock*>(right)->memRegion;
1069         } else if (GuardedSize::COAL_BLOCK == rightSz) {
1070             if (resBlock->blockInBin) {
1071                 resBlock->blockInBin = false;
1072                 removeBlockFromBin(resBlock);
1073             }
1074             coalescQ.putBlock(resBlock);
1075             return nullptr;
1076         } else {
1077             size_t rSz = right->rightNeig(rightSz)->
1078                 trySetLeftUsed(GuardedSize::COAL_BLOCK);
1079             if (rSz <= GuardedSize::MAX_LOCKED_VAL) {
1080                 right->setMeFree(rightSz);  // rollback
1081                 if (resBlock->blockInBin) {
1082                     resBlock->blockInBin = false;
1083                     removeBlockFromBin(resBlock);
1084                 }
1085                 coalescQ.putBlock(resBlock);
1086                 return nullptr;
1087             } else {
1088                 MALLOC_ASSERT(rSz == rightSz, "Invalid header");
1089                 removeBlockFromBin(right);
1090                 resSize += rightSz;
1091 
1092                 // Is LastFreeBlock on the right side of right?
1093                 FreeBlock *nextRight = right->rightNeig(rightSz);
1094                 size_t nextRightSz = nextRight->
1095                     trySetMeUsed(GuardedSize::COAL_BLOCK);
1096                 if (nextRightSz > GuardedSize::MAX_LOCKED_VAL) {
1097                     if (nextRightSz == GuardedSize::LAST_REGION_BLOCK)
1098                         memRegion = static_cast<LastFreeBlock*>(nextRight)->memRegion;
1099 
1100                     nextRight->setMeFree(nextRightSz);
1101                 }
1102             }
1103         }
1104     }
1105     if (memRegion) {
1106         MALLOC_ASSERT((uintptr_t)memRegion + memRegion->allocSz >=
1107                       (uintptr_t)right + sizeof(LastFreeBlock), ASSERT_TEXT);
1108         MALLOC_ASSERT((uintptr_t)memRegion < (uintptr_t)resBlock, ASSERT_TEXT);
1109         *mRegion = memRegion;
1110     } else
1111         *mRegion = nullptr;
1112     resBlock->sizeTmp = resSize;
1113     return resBlock;
1114 }
1115 
1116 bool Backend::coalescAndPutList(FreeBlock *list, bool forceCoalescQDrop, bool reportBlocksProcessed)
1117 {
1118     bool regionReleased = false;
1119 
1120     for (FreeBlock *helper; list;
1121          list = helper,
1122              // matches block enqueue in CoalRequestQ::putBlock()
1123              reportBlocksProcessed? coalescQ.blockWasProcessed() : (void)0) {
1124         MemRegion *memRegion;
1125         bool addToTail = false;
1126 
1127         helper = list->nextToFree;
1128         FreeBlock *toRet = doCoalesc(list, &memRegion);
1129         if (!toRet)
1130             continue;
1131 
1132         if (memRegion && memRegion->blockSz == toRet->sizeTmp
1133             && !extMemPool->fixedPool) {
1134             if (extMemPool->regionsAreReleaseable()) {
1135                 // release the region, because there is no used blocks in it
1136                 if (toRet->blockInBin)
1137                     removeBlockFromBin(toRet);
1138                 releaseRegion(memRegion);
1139                 regionReleased = true;
1140                 continue;
1141             } else // add block from empty region to end of bin,
1142                 addToTail = true; // preserving for exact fit
1143         }
1144         size_t currSz = toRet->sizeTmp;
1145         int bin = sizeToBin(currSz);
1146         bool toAligned = extMemPool->fixedPool ? toAlignedBin(toRet, currSz) : toRet->slabAligned;
1147         bool needAddToBin = true;
1148 
1149         if (toRet->blockInBin) {
1150             // Does it stay in same bin?
1151             if (toRet->myBin == bin && toRet->slabAligned == toAligned)
1152                 needAddToBin = false;
1153             else {
1154                 toRet->blockInBin = false;
1155                 removeBlockFromBin(toRet);
1156             }
1157         }
1158 
1159         // Does not stay in same bin, or bin-less; add it
1160         if (needAddToBin) {
1161             toRet->prev = toRet->next = toRet->nextToFree = nullptr;
1162             toRet->myBin = NO_BIN;
1163             toRet->slabAligned = toAligned;
1164 
1165             // If the block is too small to fit in any bin, keep it bin-less.
1166             // It's not a leak because the block later can be coalesced.
1167             if (currSz >= minBinnedSize) {
1168                 toRet->sizeTmp = currSz;
1169                 IndexedBins *target = toRet->slabAligned ? &freeSlabAlignedBins : &freeLargeBlockBins;
1170                 if (forceCoalescQDrop) {
1171                     target->addBlock(bin, toRet, toRet->sizeTmp, addToTail);
1172                 } else if (!target->tryAddBlock(bin, toRet, addToTail)) {
1173                     coalescQ.putBlock(toRet);
1174                     continue;
1175                 }
1176             }
1177             toRet->sizeTmp = 0;
1178         }
1179         // Free (possibly coalesced) free block.
1180         // Adding to bin must be done before this point,
1181         // because after a block is free it can be coalesced, and
1182         // using its pointer became unsafe.
1183         // Remember that coalescing is not done under any global lock.
1184         toRet->setMeFree(currSz);
1185         toRet->rightNeig(currSz)->setLeftFree(currSz);
1186     }
1187     return regionReleased;
1188 }
1189 
1190 // Coalesce fBlock and add it back to a bin;
1191 // processing delayed coalescing requests.
1192 void Backend::coalescAndPut(FreeBlock *fBlock, size_t blockSz, bool slabAligned)
1193 {
1194     fBlock->sizeTmp = blockSz;
1195     fBlock->nextToFree = nullptr;
1196     fBlock->slabAligned = slabAligned;
1197 
1198     coalescAndPutList(fBlock, /*forceCoalescQDrop=*/false, /*reportBlocksProcessed=*/false);
1199 }
1200 
1201 bool Backend::scanCoalescQ(bool forceCoalescQDrop)
1202 {
1203     FreeBlock *currCoalescList = coalescQ.getAll();
1204 
1205     if (currCoalescList)
1206         // reportBlocksProcessed=true informs that the blocks leave coalescQ,
1207         // matches blockConsumed() from CoalRequestQ::putBlock()
1208         coalescAndPutList(currCoalescList, forceCoalescQDrop,
1209                           /*reportBlocksProcessed=*/true);
1210     // returns status of coalescQ.getAll(), as an indication of possible changes in backend
1211     // TODO: coalescAndPutList() may report is some new free blocks became available or not
1212     return currCoalescList;
1213 }
1214 
1215 FreeBlock *Backend::findBlockInRegion(MemRegion *region, size_t exactBlockSize)
1216 {
1217     FreeBlock *fBlock;
1218     size_t blockSz;
1219     uintptr_t fBlockEnd,
1220         lastFreeBlock = (uintptr_t)region + region->allocSz - sizeof(LastFreeBlock);
1221 
1222     static_assert(sizeof(LastFreeBlock) % sizeof(uintptr_t) == 0,
1223         "Atomic applied on LastFreeBlock, and we put it at the end of region, that"
1224         " is uintptr_t-aligned, so no unaligned atomic operations are possible.");
1225      // right bound is slab-aligned, keep LastFreeBlock after it
1226     if (region->type == MEMREG_SLAB_BLOCKS) {
1227         fBlock = (FreeBlock *)alignUp((uintptr_t)region + sizeof(MemRegion), sizeof(uintptr_t));
1228         fBlockEnd = alignDown(lastFreeBlock, slabSize);
1229     } else {
1230         fBlock = (FreeBlock *)alignUp((uintptr_t)region + sizeof(MemRegion), largeObjectAlignment);
1231         fBlockEnd = (uintptr_t)fBlock + exactBlockSize;
1232         MALLOC_ASSERT(fBlockEnd <= lastFreeBlock, ASSERT_TEXT);
1233     }
1234     if (fBlockEnd <= (uintptr_t)fBlock)
1235         return nullptr; // allocSz is too small
1236     blockSz = fBlockEnd - (uintptr_t)fBlock;
1237     // TODO: extend getSlabBlock to support degradation, i.e. getting less blocks
1238     // then requested, and then relax this check
1239     // (now all or nothing is implemented, check according to this)
1240     if (blockSz < numOfSlabAllocOnMiss*slabSize)
1241         return nullptr;
1242 
1243     region->blockSz = blockSz;
1244     return fBlock;
1245 }
1246 
1247 // startUseBlock may add the free block to a bin, the block can be used and
1248 // even released after this, so the region must be added to regionList already
1249 void Backend::startUseBlock(MemRegion *region, FreeBlock *fBlock, bool addToBin)
1250 {
1251     size_t blockSz = region->blockSz;
1252     fBlock->initHeader();
1253     fBlock->setMeFree(blockSz);
1254 
1255     LastFreeBlock *lastBl = static_cast<LastFreeBlock*>(fBlock->rightNeig(blockSz));
1256     // to not get unaligned atomics during LastFreeBlock access
1257     MALLOC_ASSERT(isAligned(lastBl, sizeof(uintptr_t)), nullptr);
1258     lastBl->initHeader();
1259     lastBl->setMeFree(GuardedSize::LAST_REGION_BLOCK);
1260     lastBl->setLeftFree(blockSz);
1261     lastBl->myBin = NO_BIN;
1262     lastBl->memRegion = region;
1263 
1264     if (addToBin) {
1265         unsigned targetBin = sizeToBin(blockSz);
1266         // during adding advance regions, register bin for a largest block in region
1267         advRegBins.registerBin(targetBin);
1268         if (region->type == MEMREG_SLAB_BLOCKS) {
1269             fBlock->slabAligned = true;
1270             freeSlabAlignedBins.addBlock(targetBin, fBlock, blockSz, /*addToTail=*/false);
1271         } else {
1272             fBlock->slabAligned = false;
1273             freeLargeBlockBins.addBlock(targetBin, fBlock, blockSz, /*addToTail=*/false);
1274         }
1275     } else {
1276         // to match with blockReleased() in genericGetBlock
1277         bkndSync.blockConsumed();
1278         // Understand our alignment for correct splitBlock operation
1279         fBlock->slabAligned = region->type == MEMREG_SLAB_BLOCKS ? true : false;
1280         fBlock->sizeTmp = fBlock->tryLockBlock();
1281         MALLOC_ASSERT(fBlock->sizeTmp >= FreeBlock::minBlockSize, "Locking must be successful");
1282     }
1283 }
1284 
1285 void MemRegionList::add(MemRegion *r)
1286 {
1287     r->prev = nullptr;
1288     MallocMutex::scoped_lock lock(regionListLock);
1289     r->next = head;
1290     head = r;
1291     if (head->next)
1292         head->next->prev = head;
1293 }
1294 
1295 void MemRegionList::remove(MemRegion *r)
1296 {
1297     MallocMutex::scoped_lock lock(regionListLock);
1298     if (head == r)
1299         head = head->next;
1300     if (r->next)
1301         r->next->prev = r->prev;
1302     if (r->prev)
1303         r->prev->next = r->next;
1304 }
1305 
1306 #if __TBB_MALLOC_BACKEND_STAT
1307 int MemRegionList::reportStat(FILE *f)
1308 {
1309     int regNum = 0;
1310     MallocMutex::scoped_lock lock(regionListLock);
1311     for (MemRegion *curr = head; curr; curr = curr->next) {
1312         fprintf(f, "%p: max block %lu B, ", curr, curr->blockSz);
1313         regNum++;
1314     }
1315     return regNum;
1316 }
1317 #endif
1318 
1319 FreeBlock *Backend::addNewRegion(size_t size, MemRegionType memRegType, bool addToBin)
1320 {
1321     static_assert(sizeof(BlockMutexes) <= sizeof(BlockI), "Header must be not overwritten in used blocks");
1322     MALLOC_ASSERT(FreeBlock::minBlockSize > GuardedSize::MAX_SPEC_VAL,
1323           "Block length must not conflict with special values of GuardedSize");
1324     // If the region is not "for slabs" we should reserve some space for
1325     // a region header, the worst case alignment and the last block mark.
1326     const size_t requestSize = memRegType == MEMREG_SLAB_BLOCKS ? size :
1327         size + sizeof(MemRegion) + largeObjectAlignment
1328              +  FreeBlock::minBlockSize + sizeof(LastFreeBlock);
1329 
1330     size_t rawSize = requestSize;
1331     MemRegion *region = (MemRegion*)allocRawMem(rawSize);
1332     if (!region) {
1333         MALLOC_ASSERT(rawSize==requestSize, "getRawMem has not allocated memory but changed the allocated size.");
1334         return nullptr;
1335     }
1336     if (rawSize < sizeof(MemRegion)) {
1337         if (!extMemPool->fixedPool)
1338             freeRawMem(region, rawSize);
1339         return nullptr;
1340     }
1341 
1342     region->type = memRegType;
1343     region->allocSz = rawSize;
1344     FreeBlock *fBlock = findBlockInRegion(region, size);
1345     if (!fBlock) {
1346         if (!extMemPool->fixedPool)
1347             freeRawMem(region, rawSize);
1348         return nullptr;
1349     }
1350     regionList.add(region);
1351     startUseBlock(region, fBlock, addToBin);
1352     bkndSync.binsModified();
1353     return addToBin? (FreeBlock*)VALID_BLOCK_IN_BIN : fBlock;
1354 }
1355 
1356 void Backend::init(ExtMemoryPool *extMemoryPool)
1357 {
1358     extMemPool = extMemoryPool;
1359     usedAddrRange.init();
1360     coalescQ.init(&bkndSync);
1361     bkndSync.init(this);
1362 }
1363 
1364 void Backend::reset()
1365 {
1366     MALLOC_ASSERT(extMemPool->userPool(), "Only user pool can be reset.");
1367     // no active threads are allowed in backend while reset() called
1368     verify();
1369 
1370     freeLargeBlockBins.reset();
1371     freeSlabAlignedBins.reset();
1372     advRegBins.reset();
1373 
1374     for (MemRegion *curr = regionList.head; curr; curr = curr->next) {
1375         FreeBlock *fBlock = findBlockInRegion(curr, curr->blockSz);
1376         MALLOC_ASSERT(fBlock, "A memory region unexpectedly got smaller");
1377         startUseBlock(curr, fBlock, /*addToBin=*/true);
1378     }
1379 }
1380 
1381 bool Backend::destroy()
1382 {
1383     bool noError = true;
1384     // no active threads are allowed in backend while destroy() called
1385     verify();
1386     if (!inUserPool()) {
1387         freeLargeBlockBins.reset();
1388         freeSlabAlignedBins.reset();
1389     }
1390     while (regionList.head) {
1391         MemRegion *helper = regionList.head->next;
1392         noError &= freeRawMem(regionList.head, regionList.head->allocSz);
1393         regionList.head = helper;
1394     }
1395     return noError;
1396 }
1397 
1398 bool Backend::clean()
1399 {
1400     scanCoalescQ(/*forceCoalescQDrop=*/false);
1401 
1402     bool res = false;
1403     // We can have several blocks occupying a whole region,
1404     // because such regions are added in advance (see askMemFromOS() and reset()),
1405     // and never used. Release them all.
1406     for (int i = advRegBins.getMinUsedBin(0); i != -1; i = advRegBins.getMinUsedBin(i+1)) {
1407         if (i == freeSlabAlignedBins.getMinNonemptyBin(i))
1408             res |= freeSlabAlignedBins.tryReleaseRegions(i, this);
1409         if (i == freeLargeBlockBins.getMinNonemptyBin(i))
1410             res |= freeLargeBlockBins.tryReleaseRegions(i, this);
1411     }
1412 
1413     return res;
1414 }
1415 
1416 void Backend::IndexedBins::verify()
1417 {
1418 #if MALLOC_DEBUG
1419     for (int i=0; i<(int)freeBinsNum; i++) {
1420         for (FreeBlock *fb = freeBins[i].head.load(std::memory_order_relaxed); fb; fb=fb->next) {
1421             uintptr_t mySz = fb->myL.value;
1422             MALLOC_ASSERT(mySz>GuardedSize::MAX_SPEC_VAL, ASSERT_TEXT);
1423             FreeBlock *right = (FreeBlock*)((uintptr_t)fb + mySz);
1424             suppress_unused_warning(right);
1425             MALLOC_ASSERT(right->myL.value<=GuardedSize::MAX_SPEC_VAL, ASSERT_TEXT);
1426             MALLOC_ASSERT(right->leftL.value==mySz, ASSERT_TEXT);
1427             MALLOC_ASSERT(fb->leftL.value<=GuardedSize::MAX_SPEC_VAL, ASSERT_TEXT);
1428         }
1429     }
1430 #endif
1431 }
1432 
1433 // For correct operation, it must be called when no other threads
1434 // is changing backend.
1435 void Backend::verify()
1436 {
1437 #if MALLOC_DEBUG
1438     scanCoalescQ(/*forceCoalescQDrop=*/false);
1439 #endif // MALLOC_DEBUG
1440 
1441     freeLargeBlockBins.verify();
1442     freeSlabAlignedBins.verify();
1443 }
1444 
1445 #if __TBB_MALLOC_BACKEND_STAT
1446 size_t Backend::Bin::countFreeBlocks()
1447 {
1448     size_t cnt = 0;
1449     {
1450         MallocMutex::scoped_lock lock(tLock);
1451         for (FreeBlock *fb = head; fb; fb = fb->next)
1452             cnt++;
1453     }
1454     return cnt;
1455 }
1456 
1457 size_t Backend::Bin::reportFreeBlocks(FILE *f)
1458 {
1459     size_t totalSz = 0;
1460     MallocMutex::scoped_lock lock(tLock);
1461     for (FreeBlock *fb = head; fb; fb = fb->next) {
1462         size_t sz = fb->tryLockBlock();
1463         fb->setMeFree(sz);
1464         fb->rightNeig(sz)->setLeftFree(sz);
1465         fprintf(f, " [%p;%p]", fb, (void*)((uintptr_t)fb+sz));
1466         totalSz += sz;
1467     }
1468     return totalSz;
1469 }
1470 
1471 void Backend::IndexedBins::reportStat(FILE *f)
1472 {
1473     size_t totalSize = 0;
1474 
1475     for (int i=0; i<Backend::freeBinsNum; i++)
1476         if (size_t cnt = freeBins[i].countFreeBlocks()) {
1477             totalSize += freeBins[i].reportFreeBlocks(f);
1478             fprintf(f, " %d:%lu, ", i, cnt);
1479         }
1480     fprintf(f, "\ttotal size %lu KB", totalSize/1024);
1481 }
1482 
1483 void Backend::reportStat(FILE *f)
1484 {
1485     scanCoalescQ(/*forceCoalescQDrop=*/false);
1486 
1487     fprintf(f, "\n  regions:\n");
1488     int regNum = regionList.reportStat(f);
1489     fprintf(f, "\n%d regions, %lu KB in all regions\n  free bins:\nlarge bins: ",
1490             regNum, totalMemSize/1024);
1491     freeLargeBlockBins.reportStat(f);
1492     fprintf(f, "\naligned bins: ");
1493     freeSlabAlignedBins.reportStat(f);
1494     fprintf(f, "\n");
1495 }
1496 #endif // __TBB_MALLOC_BACKEND_STAT
1497 
1498 } } // namespaces
1499