xref: /oneTBB/src/tbbmalloc/backend.cpp (revision c4568449)
1 /*
2     Copyright (c) 2005-2023 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #include <string.h>   /* for memset */
18 #include <errno.h>
19 #include "tbbmalloc_internal.h"
20 
21 namespace rml {
22 namespace internal {
23 
24 /*********** Code to acquire memory from the OS or other executive ****************/
25 
26 /*
27   syscall/malloc can set non-zero errno in case of failure,
28   but later allocator might be able to find memory to fulfill the request.
29   And we do not want changing of errno by successful scalable_malloc call.
30   To support this, restore old errno in (get|free)RawMemory, and set errno
31   in frontend just before returning to user code.
32   Please note: every syscall/libc call used inside scalable_malloc that
33   sets errno must be protected this way, not just memory allocation per se.
34 */
35 
36 #if USE_DEFAULT_MEMORY_MAPPING
37 #include "MapMemory.h"
38 #else
39 /* assume MapMemory and UnmapMemory are customized */
40 #endif
41 
42 void* getRawMemory (size_t size, PageType pageType) {
43     return MapMemory(size, pageType);
44 }
45 
46 int freeRawMemory (void *object, size_t size) {
47     return UnmapMemory(object, size);
48 }
49 
50 #if CHECK_ALLOCATION_RANGE
51 
52 void Backend::UsedAddressRange::registerAlloc(uintptr_t left, uintptr_t right)
53 {
54     MallocMutex::scoped_lock lock(mutex);
55     if (left < leftBound.load(std::memory_order_relaxed))
56         leftBound.store(left, std::memory_order_relaxed);
57     if (right > rightBound.load(std::memory_order_relaxed))
58         rightBound.store(right, std::memory_order_relaxed);
59     MALLOC_ASSERT(leftBound.load(std::memory_order_relaxed), ASSERT_TEXT);
60     MALLOC_ASSERT(leftBound.load(std::memory_order_relaxed) < rightBound.load(std::memory_order_relaxed), ASSERT_TEXT);
61     MALLOC_ASSERT(leftBound.load(std::memory_order_relaxed) <= left && right <= rightBound.load(std::memory_order_relaxed), ASSERT_TEXT);
62 }
63 
64 void Backend::UsedAddressRange::registerFree(uintptr_t left, uintptr_t right)
65 {
66     MallocMutex::scoped_lock lock(mutex);
67     if (leftBound.load(std::memory_order_relaxed) == left) {
68         if (rightBound.load(std::memory_order_relaxed) == right) {
69             leftBound.store(ADDRESS_UPPER_BOUND, std::memory_order_relaxed);
70             rightBound.store(0, std::memory_order_relaxed);
71         } else
72             leftBound.store(right, std::memory_order_relaxed);
73     } else if (rightBound.load(std::memory_order_relaxed) == right)
74         rightBound.store(left, std::memory_order_relaxed);
75     MALLOC_ASSERT((!rightBound.load(std::memory_order_relaxed) && leftBound.load(std::memory_order_relaxed) == ADDRESS_UPPER_BOUND)
76                   || leftBound.load(std::memory_order_relaxed) < rightBound.load(std::memory_order_relaxed), ASSERT_TEXT);
77 }
78 #endif // CHECK_ALLOCATION_RANGE
79 
80 // Initialized in frontend inside defaultMemPool
81 extern HugePagesStatus hugePages;
82 
83 void *Backend::allocRawMem(size_t &size)
84 {
85     void *res = nullptr;
86     size_t allocSize = 0;
87 
88     if (extMemPool->userPool()) {
89         if (extMemPool->fixedPool && bootsrapMemDone == bootsrapMemStatus.load(std::memory_order_acquire))
90             return nullptr;
91         MALLOC_ASSERT(bootsrapMemStatus != bootsrapMemNotDone,
92                       "Backend::allocRawMem() called prematurely?");
93         // TODO: support for raw mem not aligned at sizeof(uintptr_t)
94         // memory from fixed pool is asked once and only once
95         allocSize = alignUpGeneric(size, extMemPool->granularity);
96         res = (*extMemPool->rawAlloc)(extMemPool->poolId, allocSize);
97     } else {
98         // Align allocation on page size
99         size_t pageSize = hugePages.isEnabled ? hugePages.getGranularity() : extMemPool->granularity;
100         MALLOC_ASSERT(pageSize, "Page size cannot be zero.");
101         allocSize = alignUpGeneric(size, pageSize);
102 
103         // If user requested huge pages and they are available, try to use preallocated ones firstly.
104         // If there are none, lets check transparent huge pages support and use them instead.
105         if (hugePages.isEnabled) {
106             if (hugePages.isHPAvailable) {
107                 res = getRawMemory(allocSize, PREALLOCATED_HUGE_PAGE);
108             }
109             if (!res && hugePages.isTHPAvailable) {
110                 res = getRawMemory(allocSize, TRANSPARENT_HUGE_PAGE);
111             }
112         }
113 
114         if (!res) {
115             res = getRawMemory(allocSize, REGULAR);
116         }
117     }
118 
119     if (res) {
120         MALLOC_ASSERT(allocSize > 0, "Invalid size of an allocated region.");
121         size = allocSize;
122         if (!extMemPool->userPool())
123             usedAddrRange.registerAlloc((uintptr_t)res, (uintptr_t)res+size);
124 #if MALLOC_DEBUG
125         volatile size_t curTotalSize = totalMemSize; // to read global value once
126         MALLOC_ASSERT(curTotalSize+size > curTotalSize, "Overflow allocation size.");
127 #endif
128         totalMemSize.fetch_add(size);
129     }
130 
131     return res;
132 }
133 
134 bool Backend::freeRawMem(void *object, size_t size)
135 {
136     bool fail;
137 #if MALLOC_DEBUG
138     volatile size_t curTotalSize = totalMemSize; // to read global value once
139     MALLOC_ASSERT(curTotalSize-size < curTotalSize, "Negative allocation size.");
140 #endif
141     totalMemSize.fetch_sub(size);
142     if (extMemPool->userPool()) {
143         MALLOC_ASSERT(!extMemPool->fixedPool, "No free for fixed-size pools.");
144         fail = (*extMemPool->rawFree)(extMemPool->poolId, object, size);
145     } else {
146         usedAddrRange.registerFree((uintptr_t)object, (uintptr_t)object + size);
147         fail = freeRawMemory(object, size);
148     }
149     // TODO: use result in all freeRawMem() callers
150     return !fail;
151 }
152 
153 /********* End memory acquisition code ********************************/
154 
155 // Protected object size. After successful locking returns size of locked block,
156 // and releasing requires setting block size.
157 class GuardedSize : tbb::detail::no_copy {
158     std::atomic<uintptr_t> value;
159 public:
160     enum State {
161         LOCKED,
162         COAL_BLOCK,        // block is coalescing now
163         MAX_LOCKED_VAL = COAL_BLOCK,
164         LAST_REGION_BLOCK, // used to mark last block in region
165         // values after this are "normal" block sizes
166         MAX_SPEC_VAL = LAST_REGION_BLOCK
167     };
168 
169     void initLocked() { value.store(LOCKED, std::memory_order_release); } // TBB_REVAMP_TODO: was relaxed
170     void makeCoalscing() {
171         MALLOC_ASSERT(value.load(std::memory_order_relaxed) == LOCKED, ASSERT_TEXT);
172         value.store(COAL_BLOCK, std::memory_order_release); // TBB_REVAMP_TODO: was relaxed
173     }
174     size_t tryLock(State state) {
175         MALLOC_ASSERT(state <= MAX_LOCKED_VAL, ASSERT_TEXT);
176         size_t sz = value.load(std::memory_order_acquire);
177         for (;;) {
178             if (sz <= MAX_LOCKED_VAL) {
179                 break;
180             }
181             if (value.compare_exchange_strong(sz, state)) {
182                 break;
183             }
184         }
185         return sz;
186     }
187     void unlock(size_t size) {
188         MALLOC_ASSERT(value.load(std::memory_order_relaxed) <= MAX_LOCKED_VAL, "The lock is not locked");
189         MALLOC_ASSERT(size > MAX_LOCKED_VAL, ASSERT_TEXT);
190         value.store(size, std::memory_order_release);
191     }
192     bool isLastRegionBlock() const { return value.load(std::memory_order_relaxed) == LAST_REGION_BLOCK; }
193     friend void Backend::IndexedBins::verify();
194 };
195 
196 struct MemRegion {
197     MemRegion *next,      // keep all regions in any pool to release all them on
198               *prev;      // pool destroying, 2-linked list to release individual
199                           // regions.
200     size_t     allocSz,   // got from pool callback
201                blockSz;   // initial and maximal inner block size
202     MemRegionType type;
203 };
204 
205 // this data must be unmodified while block is in use, so separate it
206 class BlockMutexes {
207 protected:
208     GuardedSize myL,   // lock for me
209                 leftL; // lock for left neighbor
210 };
211 
212 class FreeBlock : BlockMutexes {
213 public:
214     static const size_t minBlockSize;
215     friend void Backend::IndexedBins::verify();
216 
217     FreeBlock    *prev,       // in 2-linked list related to bin
218                  *next,
219                  *nextToFree; // used to form a queue during coalescing
220     // valid only when block is in processing, i.e. one is not free and not
221     size_t        sizeTmp;    // used outside of backend
222     int           myBin;      // bin that is owner of the block
223     bool          slabAligned;
224     bool          blockInBin; // this block in myBin already
225 
226     FreeBlock *rightNeig(size_t sz) const {
227         MALLOC_ASSERT(sz, ASSERT_TEXT);
228         return (FreeBlock*)((uintptr_t)this+sz);
229     }
230     FreeBlock *leftNeig(size_t sz) const {
231         MALLOC_ASSERT(sz, ASSERT_TEXT);
232         return (FreeBlock*)((uintptr_t)this - sz);
233     }
234 
235     void initHeader() { myL.initLocked(); leftL.initLocked(); }
236     void setMeFree(size_t size) { myL.unlock(size); }
237     size_t trySetMeUsed(GuardedSize::State s) { return myL.tryLock(s); }
238     bool isLastRegionBlock() const { return myL.isLastRegionBlock(); }
239 
240     void setLeftFree(size_t sz) { leftL.unlock(sz); }
241     size_t trySetLeftUsed(GuardedSize::State s) { return leftL.tryLock(s); }
242 
243     size_t tryLockBlock() {
244         size_t rSz, sz = trySetMeUsed(GuardedSize::LOCKED);
245 
246         if (sz <= GuardedSize::MAX_LOCKED_VAL)
247             return false;
248         rSz = rightNeig(sz)->trySetLeftUsed(GuardedSize::LOCKED);
249         if (rSz <= GuardedSize::MAX_LOCKED_VAL) {
250             setMeFree(sz);
251             return false;
252         }
253         MALLOC_ASSERT(rSz == sz, ASSERT_TEXT);
254         return sz;
255     }
256     void markCoalescing(size_t blockSz) {
257         myL.makeCoalscing();
258         rightNeig(blockSz)->leftL.makeCoalscing();
259         sizeTmp = blockSz;
260         nextToFree = nullptr;
261     }
262     void markUsed() {
263         myL.initLocked();
264         rightNeig(sizeTmp)->leftL.initLocked();
265         nextToFree = nullptr;
266     }
267     static void markBlocks(FreeBlock *fBlock, int num, size_t size) {
268         for (int i=1; i<num; i++) {
269             fBlock = (FreeBlock*)((uintptr_t)fBlock + size);
270             fBlock->initHeader();
271         }
272     }
273 };
274 
275 // Last block in any region. Its "size" field is GuardedSize::LAST_REGION_BLOCK,
276 // This kind of blocks used to find region header
277 // and have a possibility to return region back to OS
278 struct LastFreeBlock : public FreeBlock {
279     MemRegion *memRegion;
280 };
281 
282 const size_t FreeBlock::minBlockSize = sizeof(FreeBlock);
283 
284 inline bool BackendSync::waitTillBlockReleased(intptr_t startModifiedCnt)
285 {
286     AtomicBackoff backoff;
287 #if __TBB_MALLOC_BACKEND_STAT
288     class ITT_Guard {
289         void *ptr;
290     public:
291         ITT_Guard(void *p) : ptr(p) {
292             MALLOC_ITT_SYNC_PREPARE(ptr);
293         }
294         ~ITT_Guard() {
295             MALLOC_ITT_SYNC_ACQUIRED(ptr);
296         }
297     };
298     ITT_Guard ittGuard(&inFlyBlocks);
299 #endif
300     for (intptr_t myBinsInFlyBlocks = inFlyBlocks.load(std::memory_order_acquire),
301              myCoalescQInFlyBlocks = backend->blocksInCoalescing(); ; backoff.pause()) {
302         MALLOC_ASSERT(myBinsInFlyBlocks>=0 && myCoalescQInFlyBlocks>=0, nullptr);
303         intptr_t currBinsInFlyBlocks = inFlyBlocks.load(std::memory_order_acquire),
304             currCoalescQInFlyBlocks = backend->blocksInCoalescing();
305         WhiteboxTestingYield();
306         // Stop waiting iff:
307 
308         // 1) blocks were removed from processing, not added
309         if (myBinsInFlyBlocks > currBinsInFlyBlocks
310         // 2) released during delayed coalescing queue
311             || myCoalescQInFlyBlocks > currCoalescQInFlyBlocks)
312             break;
313         // 3) if there are blocks in coalescing, and no progress in its processing,
314         // try to scan coalescing queue and stop waiting, if changes were made
315         // (if there are no changes and in-fly blocks exist, we continue
316         //  waiting to not increase load on coalescQ)
317         if (currCoalescQInFlyBlocks > 0 && backend->scanCoalescQ(/*forceCoalescQDrop=*/false))
318             break;
319         // 4) when there are no blocks
320         if (!currBinsInFlyBlocks && !currCoalescQInFlyBlocks)
321             // re-scan make sense only if bins were modified since scanned
322             return startModifiedCnt != getNumOfMods();
323         myBinsInFlyBlocks = currBinsInFlyBlocks;
324         myCoalescQInFlyBlocks = currCoalescQInFlyBlocks;
325     }
326     return true;
327 }
328 
329 void CoalRequestQ::putBlock(FreeBlock *fBlock)
330 {
331     MALLOC_ASSERT(fBlock->sizeTmp >= FreeBlock::minBlockSize, ASSERT_TEXT);
332     fBlock->markUsed();
333     // the block is in the queue, do not forget that it's here
334     inFlyBlocks++;
335 
336     FreeBlock *myBlToFree = blocksToFree.load(std::memory_order_acquire);
337     for (;;) {
338         fBlock->nextToFree = myBlToFree;
339         if (blocksToFree.compare_exchange_strong(myBlToFree, fBlock)) {
340             return;
341         }
342     }
343 }
344 
345 FreeBlock *CoalRequestQ::getAll()
346 {
347     for (;;) {
348         FreeBlock *myBlToFree = blocksToFree.load(std::memory_order_acquire);
349 
350         if (!myBlToFree) {
351             return nullptr;
352         } else {
353             if (blocksToFree.compare_exchange_strong(myBlToFree, nullptr)) {
354                 return myBlToFree;
355             } else {
356                 continue;
357             }
358         }
359     }
360 }
361 
362 inline void CoalRequestQ::blockWasProcessed()
363 {
364     bkndSync->binsModified();
365     int prev = inFlyBlocks.fetch_sub(1);
366     tbb::detail::suppress_unused_warning(prev);
367     MALLOC_ASSERT(prev > 0, ASSERT_TEXT);
368 }
369 
370 // Try to get a block from a bin.
371 // If the remaining free space would stay in the same bin,
372 //     split the block without removing it.
373 // If the free space should go to other bin(s), remove the block.
374 // alignedBin is true, if all blocks in the bin have slab-aligned right side.
375 FreeBlock *Backend::IndexedBins::getFromBin(int binIdx, BackendSync *sync, size_t size,
376         bool needAlignedRes, bool alignedBin,  bool wait, int *binLocked)
377 {
378     Bin *b = &freeBins[binIdx];
379 try_next:
380     FreeBlock *fBlock = nullptr;
381     if (!b->empty()) {
382         bool locked = false;
383         MallocMutex::scoped_lock scopedLock(b->tLock, wait, &locked);
384 
385         if (!locked) {
386             if (binLocked) (*binLocked)++;
387             return nullptr;
388         }
389 
390         for (FreeBlock *curr = b->head.load(std::memory_order_relaxed); curr; curr = curr->next) {
391             size_t szBlock = curr->tryLockBlock();
392             if (!szBlock) {
393                 // block is locked, re-do bin lock, as there is no place to spin
394                 // while block coalescing
395                 goto try_next;
396             }
397 
398             // GENERAL CASE
399             if (alignedBin || !needAlignedRes) {
400                 size_t splitSz = szBlock - size;
401                 // If we got a block as split result, it must have a room for control structures.
402                 if (szBlock >= size && (splitSz >= FreeBlock::minBlockSize || !splitSz))
403                     fBlock = curr;
404             } else {
405                 // SPECIAL CASE, to get aligned block from unaligned bin we have to cut the middle of a block
406                 // and return remaining left and right part. Possible only in fixed pool scenario, assert for this
407                 // is set inside splitBlock() function.
408 
409                 void *newB = alignUp(curr, slabSize);
410                 uintptr_t rightNew = (uintptr_t)newB + size;
411                 uintptr_t rightCurr = (uintptr_t)curr + szBlock;
412                 // Check if the block size is sufficient,
413                 // and also left and right split results are either big enough or non-existent
414                 if (rightNew <= rightCurr
415                         && (newB == curr || ((uintptr_t)newB - (uintptr_t)curr) >= FreeBlock::minBlockSize)
416                         && (rightNew == rightCurr || (rightCurr - rightNew) >= FreeBlock::minBlockSize))
417                     fBlock = curr;
418             }
419 
420             if (fBlock) {
421                 // consume must be called before result of removing from a bin is visible externally.
422                 sync->blockConsumed();
423                 // TODO: think about cases when block stays in the same bin
424                 b->removeBlock(fBlock);
425                 if (freeBins[binIdx].empty())
426                     bitMask.set(binIdx, false);
427                 fBlock->sizeTmp = szBlock;
428                 break;
429             } else { // block size is not valid, search for next block in the bin
430                 curr->setMeFree(szBlock);
431                 curr->rightNeig(szBlock)->setLeftFree(szBlock);
432             }
433         }
434     }
435     return fBlock;
436 }
437 
438 bool Backend::IndexedBins::tryReleaseRegions(int binIdx, Backend *backend)
439 {
440     Bin *b = &freeBins[binIdx];
441     FreeBlock *fBlockList = nullptr;
442 
443     // got all blocks from the bin and re-do coalesce on them
444     // to release single-block regions
445 try_next:
446     if (!b->empty()) {
447         MallocMutex::scoped_lock binLock(b->tLock);
448         for (FreeBlock *curr = b->head.load(std::memory_order_relaxed); curr; ) {
449             size_t szBlock = curr->tryLockBlock();
450             if (!szBlock)
451                 goto try_next;
452 
453             FreeBlock *next = curr->next;
454 
455             b->removeBlock(curr);
456             curr->sizeTmp = szBlock;
457             curr->nextToFree = fBlockList;
458             fBlockList = curr;
459             curr = next;
460         }
461     }
462     return backend->coalescAndPutList(fBlockList, /*forceCoalescQDrop=*/true,
463                                       /*reportBlocksProcessed=*/false);
464 }
465 
466 void Backend::Bin::removeBlock(FreeBlock *fBlock)
467 {
468     MALLOC_ASSERT(fBlock->next||fBlock->prev||fBlock== head.load(std::memory_order_relaxed),
469                   "Detected that a block is not in the bin.");
470     if (head.load(std::memory_order_relaxed) == fBlock)
471         head.store(fBlock->next, std::memory_order_relaxed);
472     if (tail == fBlock)
473         tail = fBlock->prev;
474     if (fBlock->prev)
475         fBlock->prev->next = fBlock->next;
476     if (fBlock->next)
477         fBlock->next->prev = fBlock->prev;
478 }
479 
480 void Backend::IndexedBins::addBlock(int binIdx, FreeBlock *fBlock, size_t /* blockSz */, bool addToTail)
481 {
482     Bin *b = &freeBins[binIdx];
483     fBlock->myBin = binIdx;
484     fBlock->next = fBlock->prev = nullptr;
485     {
486         MallocMutex::scoped_lock scopedLock(b->tLock);
487         if (addToTail) {
488             fBlock->prev = b->tail;
489             b->tail = fBlock;
490             if (fBlock->prev)
491                 fBlock->prev->next = fBlock;
492             if (!b->head.load(std::memory_order_relaxed))
493                 b->head.store(fBlock, std::memory_order_relaxed);
494         } else {
495             fBlock->next = b->head.load(std::memory_order_relaxed);
496             b->head.store(fBlock, std::memory_order_relaxed);
497             if (fBlock->next)
498                 fBlock->next->prev = fBlock;
499             if (!b->tail)
500                 b->tail = fBlock;
501         }
502     }
503     bitMask.set(binIdx, true);
504 }
505 
506 bool Backend::IndexedBins::tryAddBlock(int binIdx, FreeBlock *fBlock, bool addToTail)
507 {
508     bool locked = false;
509     Bin *b = &freeBins[binIdx];
510     fBlock->myBin = binIdx;
511     if (addToTail) {
512         fBlock->next = nullptr;
513         {
514             MallocMutex::scoped_lock scopedLock(b->tLock, /*wait=*/false, &locked);
515             if (!locked)
516                 return false;
517             fBlock->prev = b->tail;
518             b->tail = fBlock;
519             if (fBlock->prev)
520                 fBlock->prev->next = fBlock;
521             if (!b->head.load(std::memory_order_relaxed))
522                 b->head.store(fBlock, std::memory_order_relaxed);
523         }
524     } else {
525         fBlock->prev = nullptr;
526         {
527             MallocMutex::scoped_lock scopedLock(b->tLock, /*wait=*/false, &locked);
528             if (!locked)
529                 return false;
530             fBlock->next = b->head.load(std::memory_order_relaxed);
531             b->head.store(fBlock, std::memory_order_relaxed);
532             if (fBlock->next)
533                 fBlock->next->prev = fBlock;
534             if (!b->tail)
535                 b->tail = fBlock;
536         }
537     }
538     bitMask.set(binIdx, true);
539     return true;
540 }
541 
542 void Backend::IndexedBins::reset()
543 {
544     for (unsigned i=0; i<Backend::freeBinsNum; i++)
545         freeBins[i].reset();
546     bitMask.reset();
547 }
548 
549 void Backend::IndexedBins::lockRemoveBlock(int binIdx, FreeBlock *fBlock)
550 {
551     MallocMutex::scoped_lock scopedLock(freeBins[binIdx].tLock);
552     freeBins[binIdx].removeBlock(fBlock);
553     if (freeBins[binIdx].empty())
554         bitMask.set(binIdx, false);
555 }
556 
557 bool ExtMemoryPool::regionsAreReleaseable() const
558 {
559     return !keepAllMemory && !delayRegsReleasing;
560 }
561 
562 FreeBlock *Backend::splitBlock(FreeBlock *fBlock, int num, size_t size, bool blockIsAligned, bool needAlignedBlock)
563 {
564     const size_t totalSize = num * size;
565 
566     // SPECIAL CASE, for unaligned block we have to cut the middle of a block
567     // and return remaining left and right part. Possible only in a fixed pool scenario.
568     if (needAlignedBlock && !blockIsAligned) {
569         MALLOC_ASSERT(extMemPool->fixedPool,
570                 "Aligned block request from unaligned bin possible only in fixed pool scenario.");
571 
572         // Space to use is in the middle
573         FreeBlock *newBlock = alignUp(fBlock, slabSize);
574         FreeBlock *rightPart = (FreeBlock*)((uintptr_t)newBlock + totalSize);
575         uintptr_t fBlockEnd = (uintptr_t)fBlock + fBlock->sizeTmp;
576 
577         // Return free right part
578         if ((uintptr_t)rightPart != fBlockEnd) {
579             rightPart->initHeader();  // to prevent coalescing rightPart with fBlock
580             size_t rightSize = fBlockEnd - (uintptr_t)rightPart;
581             coalescAndPut(rightPart, rightSize, toAlignedBin(rightPart, rightSize));
582         }
583         // And free left part
584         if (newBlock != fBlock) {
585             newBlock->initHeader(); // to prevent coalescing fBlock with newB
586             size_t leftSize = (uintptr_t)newBlock - (uintptr_t)fBlock;
587             coalescAndPut(fBlock, leftSize, toAlignedBin(fBlock, leftSize));
588         }
589         fBlock = newBlock;
590     } else if (size_t splitSize = fBlock->sizeTmp - totalSize) { // need to split the block
591         // GENERAL CASE, cut the left or right part of the block
592         FreeBlock *splitBlock = nullptr;
593         if (needAlignedBlock) {
594             // For slab aligned blocks cut the right side of the block
595             // and return it to a requester, original block returns to backend
596             splitBlock = fBlock;
597             fBlock = (FreeBlock*)((uintptr_t)splitBlock + splitSize);
598             fBlock->initHeader();
599         } else {
600             // For large object blocks cut original block and put free righ part to backend
601             splitBlock = (FreeBlock*)((uintptr_t)fBlock + totalSize);
602             splitBlock->initHeader();
603         }
604         // Mark free block as it`s parent only when the requested type (needAlignedBlock)
605         // and returned from Bins/OS block (isAligned) are equal (XOR operation used)
606         bool markAligned = (blockIsAligned ^ needAlignedBlock) ? toAlignedBin(splitBlock, splitSize) : blockIsAligned;
607         coalescAndPut(splitBlock, splitSize, markAligned);
608     }
609     MALLOC_ASSERT(!needAlignedBlock || isAligned(fBlock, slabSize), "Expect to get aligned block, if one was requested.");
610     FreeBlock::markBlocks(fBlock, num, size);
611     return fBlock;
612 }
613 
614 size_t Backend::getMaxBinnedSize() const
615 {
616     return hugePages.isEnabled && !inUserPool() ?
617         maxBinned_HugePage : maxBinned_SmallPage;
618 }
619 
620 inline bool Backend::MaxRequestComparator::operator()(size_t oldMaxReq, size_t requestSize) const
621 {
622     return requestSize > oldMaxReq && requestSize < backend->getMaxBinnedSize();
623 }
624 
625 // last chance to get memory
626 FreeBlock *Backend::releaseMemInCaches(intptr_t startModifiedCnt,
627                                     int *lockedBinsThreshold, int numOfLockedBins)
628 {
629     // something released from caches
630     if (extMemPool->hardCachesCleanup()
631         // ..or can use blocks that are in processing now
632         || bkndSync.waitTillBlockReleased(startModifiedCnt))
633         return (FreeBlock*)VALID_BLOCK_IN_BIN;
634     // OS can't give us more memory, but we have some in locked bins
635     if (*lockedBinsThreshold && numOfLockedBins) {
636         *lockedBinsThreshold = 0;
637         return (FreeBlock*)VALID_BLOCK_IN_BIN;
638     }
639     return nullptr; // nothing found, give up
640 }
641 
642 FreeBlock *Backend::askMemFromOS(size_t blockSize, intptr_t startModifiedCnt,
643                                  int *lockedBinsThreshold, int numOfLockedBins,
644                                  bool *splittableRet, bool needSlabRegion)
645 {
646     FreeBlock *block;
647     // The block sizes can be divided into 3 groups:
648     //   1. "quite small": popular object size, we are in bootstarp or something
649     //      like; request several regions.
650     //   2. "quite large": we want to have several such blocks in the region
651     //      but not want several pre-allocated regions.
652     //   3. "huge": exact fit, we allocate only one block and do not allow
653     //       any other allocations to placed in a region.
654     // Dividing the block sizes in these groups we are trying to balance between
655     // too small regions (that leads to fragmentation) and too large ones (that
656     // leads to excessive address space consumption). If a region is "too
657     // large", allocate only one, to prevent fragmentation. It supposedly
658     // doesn't hurt performance, because the object requested by user is large.
659     // Bounds for the groups are:
660     const size_t maxBinned = getMaxBinnedSize();
661     const size_t quiteSmall = maxBinned / 8;
662     const size_t quiteLarge = maxBinned;
663 
664     if (blockSize >= quiteLarge) {
665         // Do not interact with other threads via semaphores, as for exact fit
666         // we can't share regions with them, memory requesting is individual.
667         block = addNewRegion(blockSize, MEMREG_ONE_BLOCK, /*addToBin=*/false);
668         if (!block)
669             return releaseMemInCaches(startModifiedCnt, lockedBinsThreshold, numOfLockedBins);
670         *splittableRet = false;
671     } else {
672         const size_t regSz_sizeBased = alignUp(4*maxRequestedSize, 1024*1024);
673         // Another thread is modifying backend while we can't get the block.
674         // Wait while it leaves and re-do the scan
675         // before trying other ways to extend the backend.
676         if (bkndSync.waitTillBlockReleased(startModifiedCnt)
677             // semaphore is protecting adding more more memory from OS
678             || memExtendingSema.wait())
679             return (FreeBlock*)VALID_BLOCK_IN_BIN;
680 
681         if (startModifiedCnt != bkndSync.getNumOfMods()) {
682             memExtendingSema.signal();
683             return (FreeBlock*)VALID_BLOCK_IN_BIN;
684         }
685 
686         if (blockSize < quiteSmall) {
687             // For this size of blocks, add NUM_OF_REG "advance" regions in bin,
688             // and return one as a result.
689             // TODO: add to bin first, because other threads can use them right away.
690             // This must be done carefully, because blocks in bins can be released
691             // in releaseCachesToLimit().
692             const unsigned NUM_OF_REG = 3;
693             MemRegionType regType = needSlabRegion ? MEMREG_SLAB_BLOCKS : MEMREG_LARGE_BLOCKS;
694             block = addNewRegion(regSz_sizeBased, regType, /*addToBin=*/false);
695             if (block)
696                 for (unsigned idx=0; idx<NUM_OF_REG; idx++)
697                     if (! addNewRegion(regSz_sizeBased, regType, /*addToBin=*/true))
698                         break;
699         } else {
700             block = addNewRegion(regSz_sizeBased, MEMREG_LARGE_BLOCKS, /*addToBin=*/false);
701         }
702         memExtendingSema.signal();
703 
704         // no regions found, try to clean cache
705         if (!block || block == (FreeBlock*)VALID_BLOCK_IN_BIN)
706             return releaseMemInCaches(startModifiedCnt, lockedBinsThreshold, numOfLockedBins);
707         // Since a region can hold more than one block it can be split.
708         *splittableRet = true;
709     }
710     // after asking memory from OS, release caches if we above the memory limits
711     releaseCachesToLimit();
712 
713     return block;
714 }
715 
716 void Backend::releaseCachesToLimit()
717 {
718     if (!memSoftLimit.load(std::memory_order_relaxed)
719             || totalMemSize.load(std::memory_order_relaxed) <= memSoftLimit.load(std::memory_order_relaxed)) {
720         return;
721     }
722     size_t locTotalMemSize, locMemSoftLimit;
723 
724     scanCoalescQ(/*forceCoalescQDrop=*/false);
725     if (extMemPool->softCachesCleanup() &&
726         (locTotalMemSize = totalMemSize.load(std::memory_order_acquire)) <=
727         (locMemSoftLimit = memSoftLimit.load(std::memory_order_acquire)))
728         return;
729     // clean global large-object cache, if this is not enough, clean local caches
730     // do this in several tries, because backend fragmentation can prevent
731     // region from releasing
732     for (int cleanLocal = 0; cleanLocal<2; cleanLocal++)
733         while (cleanLocal ?
734                  extMemPool->allLocalCaches.cleanup(/*cleanOnlyUnused=*/true) :
735                  extMemPool->loc.decreasingCleanup())
736             if ((locTotalMemSize = totalMemSize.load(std::memory_order_acquire)) <=
737                 (locMemSoftLimit = memSoftLimit.load(std::memory_order_acquire)))
738                 return;
739     // last chance to match memSoftLimit
740     extMemPool->hardCachesCleanup();
741 }
742 
743 int Backend::IndexedBins::getMinNonemptyBin(unsigned startBin) const
744 {
745     int p = bitMask.getMinTrue(startBin);
746     return p == -1 ? Backend::freeBinsNum : p;
747 }
748 
749 FreeBlock *Backend::IndexedBins::findBlock(int nativeBin, BackendSync *sync, size_t size,
750         bool needAlignedBlock, bool alignedBin, int *numOfLockedBins)
751 {
752     for (int i=getMinNonemptyBin(nativeBin); i<(int)freeBinsNum; i=getMinNonemptyBin(i+1))
753         if (FreeBlock *block = getFromBin(i, sync, size, needAlignedBlock, alignedBin, /*wait=*/false, numOfLockedBins))
754             return block;
755 
756     return nullptr;
757 }
758 
759 void Backend::requestBootstrapMem()
760 {
761     if (bootsrapMemDone == bootsrapMemStatus.load(std::memory_order_acquire))
762         return;
763     MallocMutex::scoped_lock lock( bootsrapMemStatusMutex );
764     if (bootsrapMemDone == bootsrapMemStatus)
765         return;
766     MALLOC_ASSERT(bootsrapMemNotDone == bootsrapMemStatus, ASSERT_TEXT);
767     bootsrapMemStatus = bootsrapMemInitializing;
768     // request some rather big region during bootstrap in advance
769     // ok to get nullptr here, as later we re-do a request with more modest size
770     addNewRegion(2*1024*1024, MEMREG_SLAB_BLOCKS, /*addToBin=*/true);
771     bootsrapMemStatus = bootsrapMemDone;
772 }
773 
774 // try to allocate size Byte block in available bins
775 // needAlignedRes is true if result must be slab-aligned
776 FreeBlock *Backend::genericGetBlock(int num, size_t size, bool needAlignedBlock)
777 {
778     FreeBlock *block = nullptr;
779     const size_t totalReqSize = num*size;
780     // no splitting after requesting new region, asks exact size
781     const int nativeBin = sizeToBin(totalReqSize);
782 
783     requestBootstrapMem();
784     // If we found 2 or less locked bins, it's time to ask more memory from OS.
785     // But nothing can be asked from fixed pool. And we prefer wait, not ask
786     // for more memory, if block is quite large.
787     int lockedBinsThreshold = extMemPool->fixedPool || size>=maxBinned_SmallPage? 0 : 2;
788 
789     // Find maximal requested size limited by getMaxBinnedSize()
790     AtomicUpdate(maxRequestedSize, totalReqSize, MaxRequestComparator(this));
791     scanCoalescQ(/*forceCoalescQDrop=*/false);
792 
793     bool splittable = true;
794     for (;;) {
795         const intptr_t startModifiedCnt = bkndSync.getNumOfMods();
796         int numOfLockedBins;
797 
798         do {
799             numOfLockedBins = 0;
800             if (needAlignedBlock) {
801                 block = freeSlabAlignedBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock,
802                                                         /*alignedBin=*/true, &numOfLockedBins);
803                 if (!block && extMemPool->fixedPool)
804                     block = freeLargeBlockBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock,
805                                                         /*alignedBin=*/false, &numOfLockedBins);
806             } else {
807                 block = freeLargeBlockBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock,
808                                                         /*alignedBin=*/false, &numOfLockedBins);
809                 if (!block && extMemPool->fixedPool)
810                     block = freeSlabAlignedBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock,
811                                                         /*alignedBin=*/true, &numOfLockedBins);
812             }
813         } while (!block && numOfLockedBins>lockedBinsThreshold);
814 
815         if (block)
816             break;
817 
818         bool retScanCoalescQ = scanCoalescQ(/*forceCoalescQDrop=*/true);
819         bool retSoftCachesCleanup = extMemPool->softCachesCleanup();
820         if (!(retScanCoalescQ || retSoftCachesCleanup)) {
821             // bins are not updated,
822             // only remaining possibility is to ask for more memory
823             block = askMemFromOS(totalReqSize, startModifiedCnt, &lockedBinsThreshold,
824                         numOfLockedBins, &splittable, needAlignedBlock);
825             if (!block)
826                 return nullptr;
827             if (block != (FreeBlock*)VALID_BLOCK_IN_BIN) {
828                 // size can be increased in askMemFromOS, that's why >=
829                 MALLOC_ASSERT(block->sizeTmp >= size, ASSERT_TEXT);
830                 break;
831             }
832             // valid block somewhere in bins, let's find it
833             block = nullptr;
834         }
835     }
836     MALLOC_ASSERT(block, ASSERT_TEXT);
837     if (splittable) {
838         // At this point we have to be sure that slabAligned attribute describes the right block state
839         block = splitBlock(block, num, size, block->slabAligned, needAlignedBlock);
840     }
841     // matched blockConsumed() from startUseBlock()
842     bkndSync.blockReleased();
843 
844     return block;
845 }
846 
847 LargeMemoryBlock *Backend::getLargeBlock(size_t size)
848 {
849     LargeMemoryBlock *lmb =
850         (LargeMemoryBlock*)genericGetBlock(1, size, /*needAlignedRes=*/false);
851     if (lmb) {
852         lmb->unalignedSize = size;
853         if (extMemPool->userPool())
854             extMemPool->lmbList.add(lmb);
855     }
856     return lmb;
857 }
858 
859 BlockI *Backend::getSlabBlock(int num) {
860     BlockI *b = (BlockI*)genericGetBlock(num, slabSize, /*slabAligned=*/true);
861     MALLOC_ASSERT(isAligned(b, slabSize), ASSERT_TEXT);
862     return b;
863 }
864 
865 void Backend::putSlabBlock(BlockI *block) {
866     genericPutBlock((FreeBlock *)block, slabSize, /*slabAligned=*/true);
867 }
868 
869 void *Backend::getBackRefSpace(size_t size, bool *rawMemUsed)
870 {
871     // This block is released only at shutdown, so it can prevent
872     // a entire region releasing when it's received from the backend,
873     // so prefer getRawMemory using.
874     if (void *ret = getRawMemory(size, REGULAR)) {
875         *rawMemUsed = true;
876         return ret;
877     }
878     void *ret = genericGetBlock(1, size, /*needAlignedRes=*/false);
879     if (ret) *rawMemUsed = false;
880     return ret;
881 }
882 
883 void Backend::putBackRefSpace(void *b, size_t size, bool rawMemUsed)
884 {
885     if (rawMemUsed)
886         freeRawMemory(b, size);
887     // ignore not raw mem, as it released on region releasing
888 }
889 
890 void Backend::removeBlockFromBin(FreeBlock *fBlock)
891 {
892     if (fBlock->myBin != Backend::NO_BIN) {
893         if (fBlock->slabAligned)
894             freeSlabAlignedBins.lockRemoveBlock(fBlock->myBin, fBlock);
895         else
896             freeLargeBlockBins.lockRemoveBlock(fBlock->myBin, fBlock);
897     }
898 }
899 
900 void Backend::genericPutBlock(FreeBlock *fBlock, size_t blockSz, bool slabAligned)
901 {
902     bkndSync.blockConsumed();
903     coalescAndPut(fBlock, blockSz, slabAligned);
904     bkndSync.blockReleased();
905 }
906 
907 void AllLargeBlocksList::add(LargeMemoryBlock *lmb)
908 {
909     MallocMutex::scoped_lock scoped_cs(largeObjLock);
910     lmb->gPrev = nullptr;
911     lmb->gNext = loHead;
912     if (lmb->gNext)
913         lmb->gNext->gPrev = lmb;
914     loHead = lmb;
915 }
916 
917 void AllLargeBlocksList::remove(LargeMemoryBlock *lmb)
918 {
919     MallocMutex::scoped_lock scoped_cs(largeObjLock);
920     if (loHead == lmb)
921         loHead = lmb->gNext;
922     if (lmb->gNext)
923         lmb->gNext->gPrev = lmb->gPrev;
924     if (lmb->gPrev)
925         lmb->gPrev->gNext = lmb->gNext;
926 }
927 
928 void Backend::putLargeBlock(LargeMemoryBlock *lmb)
929 {
930     if (extMemPool->userPool())
931         extMemPool->lmbList.remove(lmb);
932     genericPutBlock((FreeBlock *)lmb, lmb->unalignedSize, false);
933 }
934 
935 void Backend::returnLargeObject(LargeMemoryBlock *lmb)
936 {
937     removeBackRef(lmb->backRefIdx);
938     putLargeBlock(lmb);
939     STAT_increment(getThreadId(), ThreadCommonCounters, freeLargeObj);
940 }
941 
942 #if BACKEND_HAS_MREMAP
943 void *Backend::remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment)
944 {
945     // no remap for user pools and for object too small that living in bins
946     if (inUserPool() || min(oldSize, newSize)<maxBinned_SmallPage
947         // during remap, can't guarantee alignment more strict than current or
948         // more strict than page alignment
949         || !isAligned(ptr, alignment) || alignment>extMemPool->granularity)
950         return nullptr;
951     const LargeMemoryBlock* lmbOld = ((LargeObjectHdr *)ptr - 1)->memoryBlock;
952     const size_t oldUnalignedSize = lmbOld->unalignedSize;
953     FreeBlock *oldFBlock = (FreeBlock *)lmbOld;
954     FreeBlock *right = oldFBlock->rightNeig(oldUnalignedSize);
955     // in every region only one block can have LAST_REGION_BLOCK on right,
956     // so don't need no synchronization
957     if (!right->isLastRegionBlock())
958         return nullptr;
959 
960     MemRegion *oldRegion = static_cast<LastFreeBlock*>(right)->memRegion;
961     MALLOC_ASSERT( oldRegion < ptr, ASSERT_TEXT );
962     const size_t oldRegionSize = oldRegion->allocSz;
963     if (oldRegion->type != MEMREG_ONE_BLOCK)
964         return nullptr;  // we are not single in the region
965     const size_t userOffset = (uintptr_t)ptr - (uintptr_t)oldRegion;
966     const size_t alignedSize = LargeObjectCache::alignToBin(newSize + userOffset);
967     const size_t requestSize =
968         alignUp(sizeof(MemRegion) + alignedSize + sizeof(LastFreeBlock), extMemPool->granularity);
969     if (requestSize < alignedSize) // is wrapped around?
970         return nullptr;
971     regionList.remove(oldRegion);
972 
973     // The deallocation should be registered in address range before mremap to
974     // prevent a race condition with allocation on another thread.
975     // (OS can reuse the memory and registerAlloc will be missed on another thread)
976     usedAddrRange.registerFree((uintptr_t)oldRegion, (uintptr_t)oldRegion + oldRegionSize);
977 
978     void *ret = mremap(oldRegion, oldRegion->allocSz, requestSize, MREMAP_MAYMOVE);
979     if (MAP_FAILED == ret) { // can't remap, revert and leave
980         regionList.add(oldRegion);
981         usedAddrRange.registerAlloc((uintptr_t)oldRegion, (uintptr_t)oldRegion + oldRegionSize);
982         return nullptr;
983     }
984     MemRegion *region = (MemRegion*)ret;
985     MALLOC_ASSERT(region->type == MEMREG_ONE_BLOCK, ASSERT_TEXT);
986     region->allocSz = requestSize;
987     region->blockSz = alignedSize;
988 
989     FreeBlock *fBlock = (FreeBlock *)alignUp((uintptr_t)region + sizeof(MemRegion),
990                                              largeObjectAlignment);
991 
992     regionList.add(region);
993     startUseBlock(region, fBlock, /*addToBin=*/false);
994     MALLOC_ASSERT(fBlock->sizeTmp == region->blockSz, ASSERT_TEXT);
995     // matched blockConsumed() in startUseBlock().
996     // TODO: get rid of useless pair blockConsumed()/blockReleased()
997     bkndSync.blockReleased();
998 
999     // object must start at same offset from region's start
1000     void *object = (void*)((uintptr_t)region + userOffset);
1001     MALLOC_ASSERT(isAligned(object, alignment), ASSERT_TEXT);
1002     LargeObjectHdr *header = (LargeObjectHdr*)object - 1;
1003     setBackRef(header->backRefIdx, header);
1004 
1005     LargeMemoryBlock *lmb = (LargeMemoryBlock*)fBlock;
1006     lmb->unalignedSize = region->blockSz;
1007     lmb->objectSize = newSize;
1008     lmb->backRefIdx = header->backRefIdx;
1009     header->memoryBlock = lmb;
1010     MALLOC_ASSERT((uintptr_t)lmb + lmb->unalignedSize >=
1011                   (uintptr_t)object + lmb->objectSize, "An object must fit to the block.");
1012 
1013     usedAddrRange.registerAlloc((uintptr_t)region, (uintptr_t)region + requestSize);
1014     totalMemSize.fetch_add(region->allocSz - oldRegionSize);
1015 
1016     return object;
1017 }
1018 #endif /* BACKEND_HAS_MREMAP */
1019 
1020 void Backend::releaseRegion(MemRegion *memRegion)
1021 {
1022     regionList.remove(memRegion);
1023     freeRawMem(memRegion, memRegion->allocSz);
1024 }
1025 
1026 // coalesce fBlock with its neighborhood
1027 FreeBlock *Backend::doCoalesc(FreeBlock *fBlock, MemRegion **mRegion)
1028 {
1029     FreeBlock *resBlock = fBlock;
1030     size_t resSize = fBlock->sizeTmp;
1031     MemRegion *memRegion = nullptr;
1032 
1033     fBlock->markCoalescing(resSize);
1034     resBlock->blockInBin = false;
1035 
1036     // coalescing with left neighbor
1037     size_t leftSz = fBlock->trySetLeftUsed(GuardedSize::COAL_BLOCK);
1038     if (leftSz != GuardedSize::LOCKED) {
1039         if (leftSz == GuardedSize::COAL_BLOCK) {
1040             coalescQ.putBlock(fBlock);
1041             return nullptr;
1042         } else {
1043             FreeBlock *left = fBlock->leftNeig(leftSz);
1044             size_t lSz = left->trySetMeUsed(GuardedSize::COAL_BLOCK);
1045             if (lSz <= GuardedSize::MAX_LOCKED_VAL) {
1046                 fBlock->setLeftFree(leftSz); // rollback
1047                 coalescQ.putBlock(fBlock);
1048                 return nullptr;
1049             } else {
1050                 MALLOC_ASSERT(lSz == leftSz, "Invalid header");
1051                 left->blockInBin = true;
1052                 resBlock = left;
1053                 resSize += leftSz;
1054                 resBlock->sizeTmp = resSize;
1055             }
1056         }
1057     }
1058     // coalescing with right neighbor
1059     FreeBlock *right = fBlock->rightNeig(fBlock->sizeTmp);
1060     size_t rightSz = right->trySetMeUsed(GuardedSize::COAL_BLOCK);
1061     if (rightSz != GuardedSize::LOCKED) {
1062         // LastFreeBlock is on the right side
1063         if (GuardedSize::LAST_REGION_BLOCK == rightSz) {
1064             right->setMeFree(GuardedSize::LAST_REGION_BLOCK);
1065             memRegion = static_cast<LastFreeBlock*>(right)->memRegion;
1066         } else if (GuardedSize::COAL_BLOCK == rightSz) {
1067             if (resBlock->blockInBin) {
1068                 resBlock->blockInBin = false;
1069                 removeBlockFromBin(resBlock);
1070             }
1071             coalescQ.putBlock(resBlock);
1072             return nullptr;
1073         } else {
1074             size_t rSz = right->rightNeig(rightSz)->
1075                 trySetLeftUsed(GuardedSize::COAL_BLOCK);
1076             if (rSz <= GuardedSize::MAX_LOCKED_VAL) {
1077                 right->setMeFree(rightSz);  // rollback
1078                 if (resBlock->blockInBin) {
1079                     resBlock->blockInBin = false;
1080                     removeBlockFromBin(resBlock);
1081                 }
1082                 coalescQ.putBlock(resBlock);
1083                 return nullptr;
1084             } else {
1085                 MALLOC_ASSERT(rSz == rightSz, "Invalid header");
1086                 removeBlockFromBin(right);
1087                 resSize += rightSz;
1088 
1089                 // Is LastFreeBlock on the right side of right?
1090                 FreeBlock *nextRight = right->rightNeig(rightSz);
1091                 size_t nextRightSz = nextRight->
1092                     trySetMeUsed(GuardedSize::COAL_BLOCK);
1093                 if (nextRightSz > GuardedSize::MAX_LOCKED_VAL) {
1094                     if (nextRightSz == GuardedSize::LAST_REGION_BLOCK)
1095                         memRegion = static_cast<LastFreeBlock*>(nextRight)->memRegion;
1096 
1097                     nextRight->setMeFree(nextRightSz);
1098                 }
1099             }
1100         }
1101     }
1102     if (memRegion) {
1103         MALLOC_ASSERT((uintptr_t)memRegion + memRegion->allocSz >=
1104                       (uintptr_t)right + sizeof(LastFreeBlock), ASSERT_TEXT);
1105         MALLOC_ASSERT((uintptr_t)memRegion < (uintptr_t)resBlock, ASSERT_TEXT);
1106         *mRegion = memRegion;
1107     } else
1108         *mRegion = nullptr;
1109     resBlock->sizeTmp = resSize;
1110     return resBlock;
1111 }
1112 
1113 bool Backend::coalescAndPutList(FreeBlock *list, bool forceCoalescQDrop, bool reportBlocksProcessed)
1114 {
1115     bool regionReleased = false;
1116 
1117     for (FreeBlock *helper; list;
1118          list = helper,
1119              // matches block enqueue in CoalRequestQ::putBlock()
1120              reportBlocksProcessed? coalescQ.blockWasProcessed() : (void)0) {
1121         MemRegion *memRegion;
1122         bool addToTail = false;
1123 
1124         helper = list->nextToFree;
1125         FreeBlock *toRet = doCoalesc(list, &memRegion);
1126         if (!toRet)
1127             continue;
1128 
1129         if (memRegion && memRegion->blockSz == toRet->sizeTmp
1130             && !extMemPool->fixedPool) {
1131             if (extMemPool->regionsAreReleaseable()) {
1132                 // release the region, because there is no used blocks in it
1133                 if (toRet->blockInBin)
1134                     removeBlockFromBin(toRet);
1135                 releaseRegion(memRegion);
1136                 regionReleased = true;
1137                 continue;
1138             } else // add block from empty region to end of bin,
1139                 addToTail = true; // preserving for exact fit
1140         }
1141         size_t currSz = toRet->sizeTmp;
1142         int bin = sizeToBin(currSz);
1143         bool toAligned = extMemPool->fixedPool ? toAlignedBin(toRet, currSz) : toRet->slabAligned;
1144         bool needAddToBin = true;
1145 
1146         if (toRet->blockInBin) {
1147             // Does it stay in same bin?
1148             if (toRet->myBin == bin && toRet->slabAligned == toAligned)
1149                 needAddToBin = false;
1150             else {
1151                 toRet->blockInBin = false;
1152                 removeBlockFromBin(toRet);
1153             }
1154         }
1155 
1156         // Does not stay in same bin, or bin-less; add it
1157         if (needAddToBin) {
1158             toRet->prev = toRet->next = toRet->nextToFree = nullptr;
1159             toRet->myBin = NO_BIN;
1160             toRet->slabAligned = toAligned;
1161 
1162             // If the block is too small to fit in any bin, keep it bin-less.
1163             // It's not a leak because the block later can be coalesced.
1164             if (currSz >= minBinnedSize) {
1165                 toRet->sizeTmp = currSz;
1166                 IndexedBins *target = toRet->slabAligned ? &freeSlabAlignedBins : &freeLargeBlockBins;
1167                 if (forceCoalescQDrop) {
1168                     target->addBlock(bin, toRet, toRet->sizeTmp, addToTail);
1169                 } else if (!target->tryAddBlock(bin, toRet, addToTail)) {
1170                     coalescQ.putBlock(toRet);
1171                     continue;
1172                 }
1173             }
1174             toRet->sizeTmp = 0;
1175         }
1176         // Free (possibly coalesced) free block.
1177         // Adding to bin must be done before this point,
1178         // because after a block is free it can be coalesced, and
1179         // using its pointer became unsafe.
1180         // Remember that coalescing is not done under any global lock.
1181         toRet->setMeFree(currSz);
1182         toRet->rightNeig(currSz)->setLeftFree(currSz);
1183     }
1184     return regionReleased;
1185 }
1186 
1187 // Coalesce fBlock and add it back to a bin;
1188 // processing delayed coalescing requests.
1189 void Backend::coalescAndPut(FreeBlock *fBlock, size_t blockSz, bool slabAligned)
1190 {
1191     fBlock->sizeTmp = blockSz;
1192     fBlock->nextToFree = nullptr;
1193     fBlock->slabAligned = slabAligned;
1194 
1195     coalescAndPutList(fBlock, /*forceCoalescQDrop=*/false, /*reportBlocksProcessed=*/false);
1196 }
1197 
1198 bool Backend::scanCoalescQ(bool forceCoalescQDrop)
1199 {
1200     FreeBlock *currCoalescList = coalescQ.getAll();
1201 
1202     if (currCoalescList)
1203         // reportBlocksProcessed=true informs that the blocks leave coalescQ,
1204         // matches blockConsumed() from CoalRequestQ::putBlock()
1205         coalescAndPutList(currCoalescList, forceCoalescQDrop,
1206                           /*reportBlocksProcessed=*/true);
1207     // returns status of coalescQ.getAll(), as an indication of possible changes in backend
1208     // TODO: coalescAndPutList() may report is some new free blocks became available or not
1209     return currCoalescList;
1210 }
1211 
1212 FreeBlock *Backend::findBlockInRegion(MemRegion *region, size_t exactBlockSize)
1213 {
1214     FreeBlock *fBlock;
1215     size_t blockSz;
1216     uintptr_t fBlockEnd,
1217         lastFreeBlock = (uintptr_t)region + region->allocSz - sizeof(LastFreeBlock);
1218 
1219     static_assert(sizeof(LastFreeBlock) % sizeof(uintptr_t) == 0,
1220         "Atomic applied on LastFreeBlock, and we put it at the end of region, that"
1221         " is uintptr_t-aligned, so no unaligned atomic operations are possible.");
1222      // right bound is slab-aligned, keep LastFreeBlock after it
1223     if (region->type == MEMREG_SLAB_BLOCKS) {
1224         fBlock = (FreeBlock *)alignUp((uintptr_t)region + sizeof(MemRegion), sizeof(uintptr_t));
1225         fBlockEnd = alignDown(lastFreeBlock, slabSize);
1226     } else {
1227         fBlock = (FreeBlock *)alignUp((uintptr_t)region + sizeof(MemRegion), largeObjectAlignment);
1228         fBlockEnd = (uintptr_t)fBlock + exactBlockSize;
1229         MALLOC_ASSERT(fBlockEnd <= lastFreeBlock, ASSERT_TEXT);
1230     }
1231     if (fBlockEnd <= (uintptr_t)fBlock)
1232         return nullptr; // allocSz is too small
1233     blockSz = fBlockEnd - (uintptr_t)fBlock;
1234     // TODO: extend getSlabBlock to support degradation, i.e. getting less blocks
1235     // then requested, and then relax this check
1236     // (now all or nothing is implemented, check according to this)
1237     if (blockSz < numOfSlabAllocOnMiss*slabSize)
1238         return nullptr;
1239 
1240     region->blockSz = blockSz;
1241     return fBlock;
1242 }
1243 
1244 // startUseBlock may add the free block to a bin, the block can be used and
1245 // even released after this, so the region must be added to regionList already
1246 void Backend::startUseBlock(MemRegion *region, FreeBlock *fBlock, bool addToBin)
1247 {
1248     size_t blockSz = region->blockSz;
1249     fBlock->initHeader();
1250     fBlock->setMeFree(blockSz);
1251 
1252     LastFreeBlock *lastBl = static_cast<LastFreeBlock*>(fBlock->rightNeig(blockSz));
1253     // to not get unaligned atomics during LastFreeBlock access
1254     MALLOC_ASSERT(isAligned(lastBl, sizeof(uintptr_t)), nullptr);
1255     lastBl->initHeader();
1256     lastBl->setMeFree(GuardedSize::LAST_REGION_BLOCK);
1257     lastBl->setLeftFree(blockSz);
1258     lastBl->myBin = NO_BIN;
1259     lastBl->memRegion = region;
1260 
1261     if (addToBin) {
1262         unsigned targetBin = sizeToBin(blockSz);
1263         // during adding advance regions, register bin for a largest block in region
1264         advRegBins.registerBin(targetBin);
1265         if (region->type == MEMREG_SLAB_BLOCKS) {
1266             fBlock->slabAligned = true;
1267             freeSlabAlignedBins.addBlock(targetBin, fBlock, blockSz, /*addToTail=*/false);
1268         } else {
1269             fBlock->slabAligned = false;
1270             freeLargeBlockBins.addBlock(targetBin, fBlock, blockSz, /*addToTail=*/false);
1271         }
1272     } else {
1273         // to match with blockReleased() in genericGetBlock
1274         bkndSync.blockConsumed();
1275         // Understand our alignment for correct splitBlock operation
1276         fBlock->slabAligned = region->type == MEMREG_SLAB_BLOCKS ? true : false;
1277         fBlock->sizeTmp = fBlock->tryLockBlock();
1278         MALLOC_ASSERT(fBlock->sizeTmp >= FreeBlock::minBlockSize, "Locking must be successful");
1279     }
1280 }
1281 
1282 void MemRegionList::add(MemRegion *r)
1283 {
1284     r->prev = nullptr;
1285     MallocMutex::scoped_lock lock(regionListLock);
1286     r->next = head;
1287     head = r;
1288     if (head->next)
1289         head->next->prev = head;
1290 }
1291 
1292 void MemRegionList::remove(MemRegion *r)
1293 {
1294     MallocMutex::scoped_lock lock(regionListLock);
1295     if (head == r)
1296         head = head->next;
1297     if (r->next)
1298         r->next->prev = r->prev;
1299     if (r->prev)
1300         r->prev->next = r->next;
1301 }
1302 
1303 #if __TBB_MALLOC_BACKEND_STAT
1304 int MemRegionList::reportStat(FILE *f)
1305 {
1306     int regNum = 0;
1307     MallocMutex::scoped_lock lock(regionListLock);
1308     for (MemRegion *curr = head; curr; curr = curr->next) {
1309         fprintf(f, "%p: max block %lu B, ", curr, curr->blockSz);
1310         regNum++;
1311     }
1312     return regNum;
1313 }
1314 #endif
1315 
1316 FreeBlock *Backend::addNewRegion(size_t size, MemRegionType memRegType, bool addToBin)
1317 {
1318     static_assert(sizeof(BlockMutexes) <= sizeof(BlockI), "Header must be not overwritten in used blocks");
1319     MALLOC_ASSERT(FreeBlock::minBlockSize > GuardedSize::MAX_SPEC_VAL,
1320           "Block length must not conflict with special values of GuardedSize");
1321     // If the region is not "for slabs" we should reserve some space for
1322     // a region header, the worst case alignment and the last block mark.
1323     const size_t requestSize = memRegType == MEMREG_SLAB_BLOCKS ? size :
1324         size + sizeof(MemRegion) + largeObjectAlignment
1325              +  FreeBlock::minBlockSize + sizeof(LastFreeBlock);
1326 
1327     size_t rawSize = requestSize;
1328     MemRegion *region = (MemRegion*)allocRawMem(rawSize);
1329     if (!region) {
1330         MALLOC_ASSERT(rawSize==requestSize, "getRawMem has not allocated memory but changed the allocated size.");
1331         return nullptr;
1332     }
1333     if (rawSize < sizeof(MemRegion)) {
1334         if (!extMemPool->fixedPool)
1335             freeRawMem(region, rawSize);
1336         return nullptr;
1337     }
1338 
1339     region->type = memRegType;
1340     region->allocSz = rawSize;
1341     FreeBlock *fBlock = findBlockInRegion(region, size);
1342     if (!fBlock) {
1343         if (!extMemPool->fixedPool)
1344             freeRawMem(region, rawSize);
1345         return nullptr;
1346     }
1347     regionList.add(region);
1348     startUseBlock(region, fBlock, addToBin);
1349     bkndSync.binsModified();
1350     return addToBin? (FreeBlock*)VALID_BLOCK_IN_BIN : fBlock;
1351 }
1352 
1353 void Backend::init(ExtMemoryPool *extMemoryPool)
1354 {
1355     extMemPool = extMemoryPool;
1356     usedAddrRange.init();
1357     coalescQ.init(&bkndSync);
1358     bkndSync.init(this);
1359 }
1360 
1361 void Backend::reset()
1362 {
1363     MALLOC_ASSERT(extMemPool->userPool(), "Only user pool can be reset.");
1364     // no active threads are allowed in backend while reset() called
1365     verify();
1366 
1367     freeLargeBlockBins.reset();
1368     freeSlabAlignedBins.reset();
1369     advRegBins.reset();
1370 
1371     for (MemRegion *curr = regionList.head; curr; curr = curr->next) {
1372         FreeBlock *fBlock = findBlockInRegion(curr, curr->blockSz);
1373         MALLOC_ASSERT(fBlock, "A memory region unexpectedly got smaller");
1374         startUseBlock(curr, fBlock, /*addToBin=*/true);
1375     }
1376 }
1377 
1378 bool Backend::destroy()
1379 {
1380     bool noError = true;
1381     // no active threads are allowed in backend while destroy() called
1382     verify();
1383     if (!inUserPool()) {
1384         freeLargeBlockBins.reset();
1385         freeSlabAlignedBins.reset();
1386     }
1387     while (regionList.head) {
1388         MemRegion *helper = regionList.head->next;
1389         noError &= freeRawMem(regionList.head, regionList.head->allocSz);
1390         regionList.head = helper;
1391     }
1392     return noError;
1393 }
1394 
1395 bool Backend::clean()
1396 {
1397     scanCoalescQ(/*forceCoalescQDrop=*/false);
1398 
1399     bool res = false;
1400     // We can have several blocks occupying a whole region,
1401     // because such regions are added in advance (see askMemFromOS() and reset()),
1402     // and never used. Release them all.
1403     for (int i = advRegBins.getMinUsedBin(0); i != -1; i = advRegBins.getMinUsedBin(i+1)) {
1404         if (i == freeSlabAlignedBins.getMinNonemptyBin(i))
1405             res |= freeSlabAlignedBins.tryReleaseRegions(i, this);
1406         if (i == freeLargeBlockBins.getMinNonemptyBin(i))
1407             res |= freeLargeBlockBins.tryReleaseRegions(i, this);
1408     }
1409 
1410     return res;
1411 }
1412 
1413 void Backend::IndexedBins::verify()
1414 {
1415 #if MALLOC_DEBUG
1416     for (int i=0; i<(int)freeBinsNum; i++) {
1417         for (FreeBlock *fb = freeBins[i].head.load(std::memory_order_relaxed); fb; fb=fb->next) {
1418             uintptr_t mySz = fb->myL.value;
1419             MALLOC_ASSERT(mySz>GuardedSize::MAX_SPEC_VAL, ASSERT_TEXT);
1420             FreeBlock *right = (FreeBlock*)((uintptr_t)fb + mySz);
1421             suppress_unused_warning(right);
1422             MALLOC_ASSERT(right->myL.value<=GuardedSize::MAX_SPEC_VAL, ASSERT_TEXT);
1423             MALLOC_ASSERT(right->leftL.value==mySz, ASSERT_TEXT);
1424             MALLOC_ASSERT(fb->leftL.value<=GuardedSize::MAX_SPEC_VAL, ASSERT_TEXT);
1425         }
1426     }
1427 #endif
1428 }
1429 
1430 // For correct operation, it must be called when no other threads
1431 // is changing backend.
1432 void Backend::verify()
1433 {
1434 #if MALLOC_DEBUG
1435     scanCoalescQ(/*forceCoalescQDrop=*/false);
1436 #endif // MALLOC_DEBUG
1437 
1438     freeLargeBlockBins.verify();
1439     freeSlabAlignedBins.verify();
1440 }
1441 
1442 #if __TBB_MALLOC_BACKEND_STAT
1443 size_t Backend::Bin::countFreeBlocks()
1444 {
1445     size_t cnt = 0;
1446     {
1447         MallocMutex::scoped_lock lock(tLock);
1448         for (FreeBlock *fb = head; fb; fb = fb->next)
1449             cnt++;
1450     }
1451     return cnt;
1452 }
1453 
1454 size_t Backend::Bin::reportFreeBlocks(FILE *f)
1455 {
1456     size_t totalSz = 0;
1457     MallocMutex::scoped_lock lock(tLock);
1458     for (FreeBlock *fb = head; fb; fb = fb->next) {
1459         size_t sz = fb->tryLockBlock();
1460         fb->setMeFree(sz);
1461         fb->rightNeig(sz)->setLeftFree(sz);
1462         fprintf(f, " [%p;%p]", fb, (void*)((uintptr_t)fb+sz));
1463         totalSz += sz;
1464     }
1465     return totalSz;
1466 }
1467 
1468 void Backend::IndexedBins::reportStat(FILE *f)
1469 {
1470     size_t totalSize = 0;
1471 
1472     for (int i=0; i<Backend::freeBinsNum; i++)
1473         if (size_t cnt = freeBins[i].countFreeBlocks()) {
1474             totalSize += freeBins[i].reportFreeBlocks(f);
1475             fprintf(f, " %d:%lu, ", i, cnt);
1476         }
1477     fprintf(f, "\ttotal size %lu KB", totalSize/1024);
1478 }
1479 
1480 void Backend::reportStat(FILE *f)
1481 {
1482     scanCoalescQ(/*forceCoalescQDrop=*/false);
1483 
1484     fprintf(f, "\n  regions:\n");
1485     int regNum = regionList.reportStat(f);
1486     fprintf(f, "\n%d regions, %lu KB in all regions\n  free bins:\nlarge bins: ",
1487             regNum, totalMemSize/1024);
1488     freeLargeBlockBins.reportStat(f);
1489     fprintf(f, "\naligned bins: ");
1490     freeSlabAlignedBins.reportStat(f);
1491     fprintf(f, "\n");
1492 }
1493 #endif // __TBB_MALLOC_BACKEND_STAT
1494 
1495 } } // namespaces
1496