xref: /oneTBB/src/tbbmalloc/backend.cpp (revision cd6a5f9f)
1 /*
2     Copyright (c) 2005-2021 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #include <string.h>   /* for memset */
18 #include <errno.h>
19 #include "tbbmalloc_internal.h"
20 
21 namespace rml {
22 namespace internal {
23 
24 /*********** Code to acquire memory from the OS or other executive ****************/
25 
26 /*
27   syscall/malloc can set non-zero errno in case of failure,
28   but later allocator might be able to find memory to fulfill the request.
29   And we do not want changing of errno by successful scalable_malloc call.
30   To support this, restore old errno in (get|free)RawMemory, and set errno
31   in frontend just before returning to user code.
32   Please note: every syscall/libc call used inside scalable_malloc that
33   sets errno must be protected this way, not just memory allocation per se.
34 */
35 
36 #if USE_DEFAULT_MEMORY_MAPPING
37 #include "MapMemory.h"
38 #else
39 /* assume MapMemory and UnmapMemory are customized */
40 #endif
41 
42 void* getRawMemory (size_t size, PageType pageType) {
43     return MapMemory(size, pageType);
44 }
45 
46 int freeRawMemory (void *object, size_t size) {
47     return UnmapMemory(object, size);
48 }
49 
50 #if CHECK_ALLOCATION_RANGE
51 
52 void Backend::UsedAddressRange::registerAlloc(uintptr_t left, uintptr_t right)
53 {
54     MallocMutex::scoped_lock lock(mutex);
55     if (left < leftBound.load(std::memory_order_relaxed))
56         leftBound.store(left, std::memory_order_relaxed);
57     if (right > rightBound.load(std::memory_order_relaxed))
58         rightBound.store(right, std::memory_order_relaxed);
59     MALLOC_ASSERT(leftBound.load(std::memory_order_relaxed), ASSERT_TEXT);
60     MALLOC_ASSERT(leftBound.load(std::memory_order_relaxed) < rightBound.load(std::memory_order_relaxed), ASSERT_TEXT);
61     MALLOC_ASSERT(leftBound.load(std::memory_order_relaxed) <= left && right <= rightBound.load(std::memory_order_relaxed), ASSERT_TEXT);
62 }
63 
64 void Backend::UsedAddressRange::registerFree(uintptr_t left, uintptr_t right)
65 {
66     MallocMutex::scoped_lock lock(mutex);
67     if (leftBound.load(std::memory_order_relaxed) == left) {
68         if (rightBound.load(std::memory_order_relaxed) == right) {
69             leftBound.store(ADDRESS_UPPER_BOUND, std::memory_order_relaxed);
70             rightBound.store(0, std::memory_order_relaxed);
71         } else
72             leftBound.store(right, std::memory_order_relaxed);
73     } else if (rightBound.load(std::memory_order_relaxed) == right)
74         rightBound.store(left, std::memory_order_relaxed);
75     MALLOC_ASSERT((!rightBound.load(std::memory_order_relaxed) && leftBound.load(std::memory_order_relaxed) == ADDRESS_UPPER_BOUND)
76                   || leftBound.load(std::memory_order_relaxed) < rightBound.load(std::memory_order_relaxed), ASSERT_TEXT);
77 }
78 #endif // CHECK_ALLOCATION_RANGE
79 
80 // Initialized in frontend inside defaultMemPool
81 extern HugePagesStatus hugePages;
82 
83 void *Backend::allocRawMem(size_t &size)
84 {
85     void *res = nullptr;
86     size_t allocSize = 0;
87 
88     if (extMemPool->userPool()) {
89         if (extMemPool->fixedPool && bootsrapMemDone == bootsrapMemStatus.load(std::memory_order_acquire))
90             return nullptr;
91         MALLOC_ASSERT(bootsrapMemStatus != bootsrapMemNotDone,
92                       "Backend::allocRawMem() called prematurely?");
93         // TODO: support for raw mem not aligned at sizeof(uintptr_t)
94         // memory from fixed pool is asked once and only once
95         allocSize = alignUpGeneric(size, extMemPool->granularity);
96         res = (*extMemPool->rawAlloc)(extMemPool->poolId, allocSize);
97     } else {
98         // Align allocation on page size
99         size_t pageSize = hugePages.isEnabled ? hugePages.getGranularity() : extMemPool->granularity;
100         MALLOC_ASSERT(pageSize, "Page size cannot be zero.");
101         allocSize = alignUpGeneric(size, pageSize);
102 
103         // If user requested huge pages and they are available, try to use preallocated ones firstly.
104         // If there are none, lets check transparent huge pages support and use them instead.
105         if (hugePages.isEnabled) {
106             if (hugePages.isHPAvailable) {
107                 res = getRawMemory(allocSize, PREALLOCATED_HUGE_PAGE);
108             }
109             if (!res && hugePages.isTHPAvailable) {
110                 res = getRawMemory(allocSize, TRANSPARENT_HUGE_PAGE);
111             }
112         }
113 
114         if (!res) {
115             res = getRawMemory(allocSize, REGULAR);
116         }
117     }
118 
119     if (res) {
120         MALLOC_ASSERT(allocSize > 0, "Invalid size of an allocated region.");
121         size = allocSize;
122         if (!extMemPool->userPool())
123             usedAddrRange.registerAlloc((uintptr_t)res, (uintptr_t)res+size);
124 #if MALLOC_DEBUG
125         volatile size_t curTotalSize = totalMemSize; // to read global value once
126         MALLOC_ASSERT(curTotalSize+size > curTotalSize, "Overflow allocation size.");
127 #endif
128         totalMemSize.fetch_add(size);
129     }
130 
131     return res;
132 }
133 
134 bool Backend::freeRawMem(void *object, size_t size)
135 {
136     bool fail;
137 #if MALLOC_DEBUG
138     volatile size_t curTotalSize = totalMemSize; // to read global value once
139     MALLOC_ASSERT(curTotalSize-size < curTotalSize, "Negative allocation size.");
140 #endif
141     totalMemSize.fetch_sub(size);
142     if (extMemPool->userPool()) {
143         MALLOC_ASSERT(!extMemPool->fixedPool, "No free for fixed-size pools.");
144         fail = (*extMemPool->rawFree)(extMemPool->poolId, object, size);
145     } else {
146         usedAddrRange.registerFree((uintptr_t)object, (uintptr_t)object + size);
147         fail = freeRawMemory(object, size);
148     }
149     // TODO: use result in all freeRawMem() callers
150     return !fail;
151 }
152 
153 /********* End memory acquisition code ********************************/
154 
155 // Protected object size. After successful locking returns size of locked block,
156 // and releasing requires setting block size.
157 class GuardedSize : tbb::detail::no_copy {
158     std::atomic<uintptr_t> value;
159 public:
160     enum State {
161         LOCKED,
162         COAL_BLOCK,        // block is coalescing now
163         MAX_LOCKED_VAL = COAL_BLOCK,
164         LAST_REGION_BLOCK, // used to mark last block in region
165         // values after this are "normal" block sizes
166         MAX_SPEC_VAL = LAST_REGION_BLOCK
167     };
168 
169     void initLocked() { value.store(LOCKED, std::memory_order_release); } // TBB_REVAMP_TODO: was relaxed
170     void makeCoalscing() {
171         MALLOC_ASSERT(value.load(std::memory_order_relaxed) == LOCKED, ASSERT_TEXT);
172         value.store(COAL_BLOCK, std::memory_order_release); // TBB_REVAMP_TODO: was relaxed
173     }
174     size_t tryLock(State state) {
175         MALLOC_ASSERT(state <= MAX_LOCKED_VAL, ASSERT_TEXT);
176         size_t sz = value.load(std::memory_order_acquire);
177         for (;;) {
178             if (sz <= MAX_LOCKED_VAL) {
179                 break;
180             }
181             if (value.compare_exchange_strong(sz, state)) {
182                 break;
183             }
184         }
185         return sz;
186     }
187     void unlock(size_t size) {
188         MALLOC_ASSERT(value.load(std::memory_order_relaxed) <= MAX_LOCKED_VAL, "The lock is not locked");
189         MALLOC_ASSERT(size > MAX_LOCKED_VAL, ASSERT_TEXT);
190         value.store(size, std::memory_order_release);
191     }
192     bool isLastRegionBlock() const { return value.load(std::memory_order_relaxed) == LAST_REGION_BLOCK; }
193     friend void Backend::IndexedBins::verify();
194 };
195 
196 struct MemRegion {
197     MemRegion *next,      // keep all regions in any pool to release all them on
198               *prev;      // pool destroying, 2-linked list to release individual
199                           // regions.
200     size_t     allocSz,   // got from pool callback
201                blockSz;   // initial and maximal inner block size
202     MemRegionType type;
203 };
204 
205 // this data must be unmodified while block is in use, so separate it
206 class BlockMutexes {
207 protected:
208     GuardedSize myL,   // lock for me
209                 leftL; // lock for left neighbor
210 };
211 
212 class FreeBlock : BlockMutexes {
213 public:
214     static const size_t minBlockSize;
215     friend void Backend::IndexedBins::verify();
216 
217     FreeBlock    *prev,       // in 2-linked list related to bin
218                  *next,
219                  *nextToFree; // used to form a queue during coalescing
220     // valid only when block is in processing, i.e. one is not free and not
221     size_t        sizeTmp;    // used outside of backend
222     int           myBin;      // bin that is owner of the block
223     bool          slabAligned;
224     bool          blockInBin; // this block in myBin already
225 
226     FreeBlock *rightNeig(size_t sz) const {
227         MALLOC_ASSERT(sz, ASSERT_TEXT);
228         return (FreeBlock*)((uintptr_t)this+sz);
229     }
230     FreeBlock *leftNeig(size_t sz) const {
231         MALLOC_ASSERT(sz, ASSERT_TEXT);
232         return (FreeBlock*)((uintptr_t)this - sz);
233     }
234 
235     void initHeader() { myL.initLocked(); leftL.initLocked(); }
236     void setMeFree(size_t size) { myL.unlock(size); }
237     size_t trySetMeUsed(GuardedSize::State s) { return myL.tryLock(s); }
238     bool isLastRegionBlock() const { return myL.isLastRegionBlock(); }
239 
240     void setLeftFree(size_t sz) { leftL.unlock(sz); }
241     size_t trySetLeftUsed(GuardedSize::State s) { return leftL.tryLock(s); }
242 
243     size_t tryLockBlock() {
244         size_t rSz, sz = trySetMeUsed(GuardedSize::LOCKED);
245 
246         if (sz <= GuardedSize::MAX_LOCKED_VAL)
247             return false;
248         rSz = rightNeig(sz)->trySetLeftUsed(GuardedSize::LOCKED);
249         if (rSz <= GuardedSize::MAX_LOCKED_VAL) {
250             setMeFree(sz);
251             return false;
252         }
253         MALLOC_ASSERT(rSz == sz, ASSERT_TEXT);
254         return sz;
255     }
256     void markCoalescing(size_t blockSz) {
257         myL.makeCoalscing();
258         rightNeig(blockSz)->leftL.makeCoalscing();
259         sizeTmp = blockSz;
260         nextToFree = nullptr;
261     }
262     void markUsed() {
263         myL.initLocked();
264         rightNeig(sizeTmp)->leftL.initLocked();
265         nextToFree = nullptr;
266     }
267     static void markBlocks(FreeBlock *fBlock, int num, size_t size) {
268         for (int i=1; i<num; i++) {
269             fBlock = (FreeBlock*)((uintptr_t)fBlock + size);
270             fBlock->initHeader();
271         }
272     }
273 };
274 
275 // Last block in any region. Its "size" field is GuardedSize::LAST_REGION_BLOCK,
276 // This kind of blocks used to find region header
277 // and have a possibility to return region back to OS
278 struct LastFreeBlock : public FreeBlock {
279     MemRegion *memRegion;
280 };
281 
282 const size_t FreeBlock::minBlockSize = sizeof(FreeBlock);
283 
284 inline bool BackendSync::waitTillBlockReleased(intptr_t startModifiedCnt)
285 {
286     AtomicBackoff backoff;
287 #if __TBB_MALLOC_BACKEND_STAT
288     class ITT_Guard {
289         void *ptr;
290     public:
291         ITT_Guard(void *p) : ptr(p) {
292             MALLOC_ITT_SYNC_PREPARE(ptr);
293         }
294         ~ITT_Guard() {
295             MALLOC_ITT_SYNC_ACQUIRED(ptr);
296         }
297     };
298     ITT_Guard ittGuard(&inFlyBlocks);
299 #endif
300     for (intptr_t myBinsInFlyBlocks = inFlyBlocks.load(std::memory_order_acquire),
301              myCoalescQInFlyBlocks = backend->blocksInCoalescing(); ; backoff.pause()) {
302         MALLOC_ASSERT(myBinsInFlyBlocks>=0 && myCoalescQInFlyBlocks>=0, nullptr);
303         intptr_t currBinsInFlyBlocks = inFlyBlocks.load(std::memory_order_acquire),
304             currCoalescQInFlyBlocks = backend->blocksInCoalescing();
305         WhiteboxTestingYield();
306         // Stop waiting iff:
307 
308         // 1) blocks were removed from processing, not added
309         if (myBinsInFlyBlocks > currBinsInFlyBlocks
310         // 2) released during delayed coalescing queue
311             || myCoalescQInFlyBlocks > currCoalescQInFlyBlocks)
312             break;
313         // 3) if there are blocks in coalescing, and no progress in its processing,
314         // try to scan coalescing queue and stop waiting, if changes were made
315         // (if there are no changes and in-fly blocks exist, we continue
316         //  waiting to not increase load on coalescQ)
317         if (currCoalescQInFlyBlocks > 0 && backend->scanCoalescQ(/*forceCoalescQDrop=*/false))
318             break;
319         // 4) when there are no blocks
320         if (!currBinsInFlyBlocks && !currCoalescQInFlyBlocks)
321             // re-scan make sense only if bins were modified since scanned
322             return startModifiedCnt != getNumOfMods();
323         myBinsInFlyBlocks = currBinsInFlyBlocks;
324         myCoalescQInFlyBlocks = currCoalescQInFlyBlocks;
325     }
326     return true;
327 }
328 
329 void CoalRequestQ::putBlock(FreeBlock *fBlock)
330 {
331     MALLOC_ASSERT(fBlock->sizeTmp >= FreeBlock::minBlockSize, ASSERT_TEXT);
332     fBlock->markUsed();
333     // the block is in the queue, do not forget that it's here
334     inFlyBlocks++;
335 
336     FreeBlock *myBlToFree = blocksToFree.load(std::memory_order_acquire);
337     for (;;) {
338         fBlock->nextToFree = myBlToFree;
339         if (blocksToFree.compare_exchange_strong(myBlToFree, fBlock)) {
340             return;
341         }
342     }
343 }
344 
345 FreeBlock *CoalRequestQ::getAll()
346 {
347     for (;;) {
348         FreeBlock *myBlToFree = blocksToFree.load(std::memory_order_acquire);
349 
350         if (!myBlToFree) {
351             return nullptr;
352         } else {
353             if (blocksToFree.compare_exchange_strong(myBlToFree, nullptr)) {
354                 return myBlToFree;
355             } else {
356                 continue;
357             }
358         }
359     }
360 }
361 
362 inline void CoalRequestQ::blockWasProcessed()
363 {
364     bkndSync->binsModified();
365     int prev = inFlyBlocks.fetch_sub(1);
366     MALLOC_ASSERT(prev > 0, ASSERT_TEXT);
367 }
368 
369 // Try to get a block from a bin.
370 // If the remaining free space would stay in the same bin,
371 //     split the block without removing it.
372 // If the free space should go to other bin(s), remove the block.
373 // alignedBin is true, if all blocks in the bin have slab-aligned right side.
374 FreeBlock *Backend::IndexedBins::getFromBin(int binIdx, BackendSync *sync, size_t size,
375         bool needAlignedRes, bool alignedBin,  bool wait, int *binLocked)
376 {
377     Bin *b = &freeBins[binIdx];
378 try_next:
379     FreeBlock *fBlock = nullptr;
380     if (!b->empty()) {
381         bool locked;
382         MallocMutex::scoped_lock scopedLock(b->tLock, wait, &locked);
383 
384         if (!locked) {
385             if (binLocked) (*binLocked)++;
386             return nullptr;
387         }
388 
389         for (FreeBlock *curr = b->head.load(std::memory_order_relaxed); curr; curr = curr->next) {
390             size_t szBlock = curr->tryLockBlock();
391             if (!szBlock) {
392                 // block is locked, re-do bin lock, as there is no place to spin
393                 // while block coalescing
394                 goto try_next;
395             }
396 
397             // GENERAL CASE
398             if (alignedBin || !needAlignedRes) {
399                 size_t splitSz = szBlock - size;
400                 // If we got a block as split result, it must have a room for control structures.
401                 if (szBlock >= size && (splitSz >= FreeBlock::minBlockSize || !splitSz))
402                     fBlock = curr;
403             } else {
404                 // SPECIAL CASE, to get aligned block from unaligned bin we have to cut the middle of a block
405                 // and return remaining left and right part. Possible only in fixed pool scenario, assert for this
406                 // is set inside splitBlock() function.
407 
408                 void *newB = alignUp(curr, slabSize);
409                 uintptr_t rightNew = (uintptr_t)newB + size;
410                 uintptr_t rightCurr = (uintptr_t)curr + szBlock;
411                 // Check if the block size is sufficient,
412                 // and also left and right split results are either big enough or non-existent
413                 if (rightNew <= rightCurr
414                         && (newB == curr || ((uintptr_t)newB - (uintptr_t)curr) >= FreeBlock::minBlockSize)
415                         && (rightNew == rightCurr || (rightCurr - rightNew) >= FreeBlock::minBlockSize))
416                     fBlock = curr;
417             }
418 
419             if (fBlock) {
420                 // consume must be called before result of removing from a bin is visible externally.
421                 sync->blockConsumed();
422                 // TODO: think about cases when block stays in the same bin
423                 b->removeBlock(fBlock);
424                 if (freeBins[binIdx].empty())
425                     bitMask.set(binIdx, false);
426                 fBlock->sizeTmp = szBlock;
427                 break;
428             } else { // block size is not valid, search for next block in the bin
429                 curr->setMeFree(szBlock);
430                 curr->rightNeig(szBlock)->setLeftFree(szBlock);
431             }
432         }
433     }
434     return fBlock;
435 }
436 
437 bool Backend::IndexedBins::tryReleaseRegions(int binIdx, Backend *backend)
438 {
439     Bin *b = &freeBins[binIdx];
440     FreeBlock *fBlockList = nullptr;
441 
442     // got all blocks from the bin and re-do coalesce on them
443     // to release single-block regions
444 try_next:
445     if (!b->empty()) {
446         MallocMutex::scoped_lock binLock(b->tLock);
447         for (FreeBlock *curr = b->head.load(std::memory_order_relaxed); curr; ) {
448             size_t szBlock = curr->tryLockBlock();
449             if (!szBlock)
450                 goto try_next;
451 
452             FreeBlock *next = curr->next;
453 
454             b->removeBlock(curr);
455             curr->sizeTmp = szBlock;
456             curr->nextToFree = fBlockList;
457             fBlockList = curr;
458             curr = next;
459         }
460     }
461     return backend->coalescAndPutList(fBlockList, /*forceCoalescQDrop=*/true,
462                                       /*reportBlocksProcessed=*/false);
463 }
464 
465 void Backend::Bin::removeBlock(FreeBlock *fBlock)
466 {
467     MALLOC_ASSERT(fBlock->next||fBlock->prev||fBlock== head.load(std::memory_order_relaxed),
468                   "Detected that a block is not in the bin.");
469     if (head.load(std::memory_order_relaxed) == fBlock)
470         head.store(fBlock->next, std::memory_order_relaxed);
471     if (tail == fBlock)
472         tail = fBlock->prev;
473     if (fBlock->prev)
474         fBlock->prev->next = fBlock->next;
475     if (fBlock->next)
476         fBlock->next->prev = fBlock->prev;
477 }
478 
479 void Backend::IndexedBins::addBlock(int binIdx, FreeBlock *fBlock, size_t /* blockSz */, bool addToTail)
480 {
481     Bin *b = &freeBins[binIdx];
482     fBlock->myBin = binIdx;
483     fBlock->next = fBlock->prev = nullptr;
484     {
485         MallocMutex::scoped_lock scopedLock(b->tLock);
486         if (addToTail) {
487             fBlock->prev = b->tail;
488             b->tail = fBlock;
489             if (fBlock->prev)
490                 fBlock->prev->next = fBlock;
491             if (!b->head.load(std::memory_order_relaxed))
492                 b->head.store(fBlock, std::memory_order_relaxed);
493         } else {
494             fBlock->next = b->head.load(std::memory_order_relaxed);
495             b->head.store(fBlock, std::memory_order_relaxed);
496             if (fBlock->next)
497                 fBlock->next->prev = fBlock;
498             if (!b->tail)
499                 b->tail = fBlock;
500         }
501     }
502     bitMask.set(binIdx, true);
503 }
504 
505 bool Backend::IndexedBins::tryAddBlock(int binIdx, FreeBlock *fBlock, bool addToTail)
506 {
507     bool locked;
508     Bin *b = &freeBins[binIdx];
509     fBlock->myBin = binIdx;
510     if (addToTail) {
511         fBlock->next = nullptr;
512         {
513             MallocMutex::scoped_lock scopedLock(b->tLock, /*wait=*/false, &locked);
514             if (!locked)
515                 return false;
516             fBlock->prev = b->tail;
517             b->tail = fBlock;
518             if (fBlock->prev)
519                 fBlock->prev->next = fBlock;
520             if (!b->head.load(std::memory_order_relaxed))
521                 b->head.store(fBlock, std::memory_order_relaxed);
522         }
523     } else {
524         fBlock->prev = nullptr;
525         {
526             MallocMutex::scoped_lock scopedLock(b->tLock, /*wait=*/false, &locked);
527             if (!locked)
528                 return false;
529             fBlock->next = b->head.load(std::memory_order_relaxed);
530             b->head.store(fBlock, std::memory_order_relaxed);
531             if (fBlock->next)
532                 fBlock->next->prev = fBlock;
533             if (!b->tail)
534                 b->tail = fBlock;
535         }
536     }
537     bitMask.set(binIdx, true);
538     return true;
539 }
540 
541 void Backend::IndexedBins::reset()
542 {
543     for (unsigned i=0; i<Backend::freeBinsNum; i++)
544         freeBins[i].reset();
545     bitMask.reset();
546 }
547 
548 void Backend::IndexedBins::lockRemoveBlock(int binIdx, FreeBlock *fBlock)
549 {
550     MallocMutex::scoped_lock scopedLock(freeBins[binIdx].tLock);
551     freeBins[binIdx].removeBlock(fBlock);
552     if (freeBins[binIdx].empty())
553         bitMask.set(binIdx, false);
554 }
555 
556 bool ExtMemoryPool::regionsAreReleaseable() const
557 {
558     return !keepAllMemory && !delayRegsReleasing;
559 }
560 
561 FreeBlock *Backend::splitBlock(FreeBlock *fBlock, int num, size_t size, bool blockIsAligned, bool needAlignedBlock)
562 {
563     const size_t totalSize = num * size;
564 
565     // SPECIAL CASE, for unaligned block we have to cut the middle of a block
566     // and return remaining left and right part. Possible only in a fixed pool scenario.
567     if (needAlignedBlock && !blockIsAligned) {
568         MALLOC_ASSERT(extMemPool->fixedPool,
569                 "Aligned block request from unaligned bin possible only in fixed pool scenario.");
570 
571         // Space to use is in the middle
572         FreeBlock *newBlock = alignUp(fBlock, slabSize);
573         FreeBlock *rightPart = (FreeBlock*)((uintptr_t)newBlock + totalSize);
574         uintptr_t fBlockEnd = (uintptr_t)fBlock + fBlock->sizeTmp;
575 
576         // Return free right part
577         if ((uintptr_t)rightPart != fBlockEnd) {
578             rightPart->initHeader();  // to prevent coalescing rightPart with fBlock
579             size_t rightSize = fBlockEnd - (uintptr_t)rightPart;
580             coalescAndPut(rightPart, rightSize, toAlignedBin(rightPart, rightSize));
581         }
582         // And free left part
583         if (newBlock != fBlock) {
584             newBlock->initHeader(); // to prevent coalescing fBlock with newB
585             size_t leftSize = (uintptr_t)newBlock - (uintptr_t)fBlock;
586             coalescAndPut(fBlock, leftSize, toAlignedBin(fBlock, leftSize));
587         }
588         fBlock = newBlock;
589     } else if (size_t splitSize = fBlock->sizeTmp - totalSize) { // need to split the block
590         // GENERAL CASE, cut the left or right part of the block
591         FreeBlock *splitBlock = nullptr;
592         if (needAlignedBlock) {
593             // For slab aligned blocks cut the right side of the block
594             // and return it to a requester, original block returns to backend
595             splitBlock = fBlock;
596             fBlock = (FreeBlock*)((uintptr_t)splitBlock + splitSize);
597             fBlock->initHeader();
598         } else {
599             // For large object blocks cut original block and put free righ part to backend
600             splitBlock = (FreeBlock*)((uintptr_t)fBlock + totalSize);
601             splitBlock->initHeader();
602         }
603         // Mark free block as it`s parent only when the requested type (needAlignedBlock)
604         // and returned from Bins/OS block (isAligned) are equal (XOR operation used)
605         bool markAligned = (blockIsAligned ^ needAlignedBlock) ? toAlignedBin(splitBlock, splitSize) : blockIsAligned;
606         coalescAndPut(splitBlock, splitSize, markAligned);
607     }
608     MALLOC_ASSERT(!needAlignedBlock || isAligned(fBlock, slabSize), "Expect to get aligned block, if one was requested.");
609     FreeBlock::markBlocks(fBlock, num, size);
610     return fBlock;
611 }
612 
613 size_t Backend::getMaxBinnedSize() const
614 {
615     return hugePages.isEnabled && !inUserPool() ?
616         maxBinned_HugePage : maxBinned_SmallPage;
617 }
618 
619 inline bool Backend::MaxRequestComparator::operator()(size_t oldMaxReq, size_t requestSize) const
620 {
621     return requestSize > oldMaxReq && requestSize < backend->getMaxBinnedSize();
622 }
623 
624 // last chance to get memory
625 FreeBlock *Backend::releaseMemInCaches(intptr_t startModifiedCnt,
626                                     int *lockedBinsThreshold, int numOfLockedBins)
627 {
628     // something released from caches
629     if (extMemPool->hardCachesCleanup()
630         // ..or can use blocks that are in processing now
631         || bkndSync.waitTillBlockReleased(startModifiedCnt))
632         return (FreeBlock*)VALID_BLOCK_IN_BIN;
633     // OS can't give us more memory, but we have some in locked bins
634     if (*lockedBinsThreshold && numOfLockedBins) {
635         *lockedBinsThreshold = 0;
636         return (FreeBlock*)VALID_BLOCK_IN_BIN;
637     }
638     return nullptr; // nothing found, give up
639 }
640 
641 FreeBlock *Backend::askMemFromOS(size_t blockSize, intptr_t startModifiedCnt,
642                                  int *lockedBinsThreshold, int numOfLockedBins,
643                                  bool *splittableRet, bool needSlabRegion)
644 {
645     FreeBlock *block;
646     // The block sizes can be divided into 3 groups:
647     //   1. "quite small": popular object size, we are in bootstarp or something
648     //      like; request several regions.
649     //   2. "quite large": we want to have several such blocks in the region
650     //      but not want several pre-allocated regions.
651     //   3. "huge": exact fit, we allocate only one block and do not allow
652     //       any other allocations to placed in a region.
653     // Dividing the block sizes in these groups we are trying to balance between
654     // too small regions (that leads to fragmentation) and too large ones (that
655     // leads to excessive address space consumption). If a region is "too
656     // large", allocate only one, to prevent fragmentation. It supposedly
657     // doesn't hurt performance, because the object requested by user is large.
658     // Bounds for the groups are:
659     const size_t maxBinned = getMaxBinnedSize();
660     const size_t quiteSmall = maxBinned / 8;
661     const size_t quiteLarge = maxBinned;
662 
663     if (blockSize >= quiteLarge) {
664         // Do not interact with other threads via semaphores, as for exact fit
665         // we can't share regions with them, memory requesting is individual.
666         block = addNewRegion(blockSize, MEMREG_ONE_BLOCK, /*addToBin=*/false);
667         if (!block)
668             return releaseMemInCaches(startModifiedCnt, lockedBinsThreshold, numOfLockedBins);
669         *splittableRet = false;
670     } else {
671         const size_t regSz_sizeBased = alignUp(4*maxRequestedSize, 1024*1024);
672         // Another thread is modifying backend while we can't get the block.
673         // Wait while it leaves and re-do the scan
674         // before trying other ways to extend the backend.
675         if (bkndSync.waitTillBlockReleased(startModifiedCnt)
676             // semaphore is protecting adding more more memory from OS
677             || memExtendingSema.wait())
678             return (FreeBlock*)VALID_BLOCK_IN_BIN;
679 
680         if (startModifiedCnt != bkndSync.getNumOfMods()) {
681             memExtendingSema.signal();
682             return (FreeBlock*)VALID_BLOCK_IN_BIN;
683         }
684 
685         if (blockSize < quiteSmall) {
686             // For this size of blocks, add NUM_OF_REG "advance" regions in bin,
687             // and return one as a result.
688             // TODO: add to bin first, because other threads can use them right away.
689             // This must be done carefully, because blocks in bins can be released
690             // in releaseCachesToLimit().
691             const unsigned NUM_OF_REG = 3;
692             MemRegionType regType = needSlabRegion ? MEMREG_SLAB_BLOCKS : MEMREG_LARGE_BLOCKS;
693             block = addNewRegion(regSz_sizeBased, regType, /*addToBin=*/false);
694             if (block)
695                 for (unsigned idx=0; idx<NUM_OF_REG; idx++)
696                     if (! addNewRegion(regSz_sizeBased, regType, /*addToBin=*/true))
697                         break;
698         } else {
699             block = addNewRegion(regSz_sizeBased, MEMREG_LARGE_BLOCKS, /*addToBin=*/false);
700         }
701         memExtendingSema.signal();
702 
703         // no regions found, try to clean cache
704         if (!block || block == (FreeBlock*)VALID_BLOCK_IN_BIN)
705             return releaseMemInCaches(startModifiedCnt, lockedBinsThreshold, numOfLockedBins);
706         // Since a region can hold more than one block it can be split.
707         *splittableRet = true;
708     }
709     // after asking memory from OS, release caches if we above the memory limits
710     releaseCachesToLimit();
711 
712     return block;
713 }
714 
715 void Backend::releaseCachesToLimit()
716 {
717     if (!memSoftLimit.load(std::memory_order_relaxed)
718             || totalMemSize.load(std::memory_order_relaxed) <= memSoftLimit.load(std::memory_order_relaxed)) {
719         return;
720     }
721     size_t locTotalMemSize, locMemSoftLimit;
722 
723     scanCoalescQ(/*forceCoalescQDrop=*/false);
724     if (extMemPool->softCachesCleanup() &&
725         (locTotalMemSize = totalMemSize.load(std::memory_order_acquire)) <=
726         (locMemSoftLimit = memSoftLimit.load(std::memory_order_acquire)))
727         return;
728     // clean global large-object cache, if this is not enough, clean local caches
729     // do this in several tries, because backend fragmentation can prevent
730     // region from releasing
731     for (int cleanLocal = 0; cleanLocal<2; cleanLocal++)
732         while (cleanLocal ?
733                  extMemPool->allLocalCaches.cleanup(/*cleanOnlyUnused=*/true) :
734                  extMemPool->loc.decreasingCleanup())
735             if ((locTotalMemSize = totalMemSize.load(std::memory_order_acquire)) <=
736                 (locMemSoftLimit = memSoftLimit.load(std::memory_order_acquire)))
737                 return;
738     // last chance to match memSoftLimit
739     extMemPool->hardCachesCleanup();
740 }
741 
742 int Backend::IndexedBins::getMinNonemptyBin(unsigned startBin) const
743 {
744     int p = bitMask.getMinTrue(startBin);
745     return p == -1 ? Backend::freeBinsNum : p;
746 }
747 
748 FreeBlock *Backend::IndexedBins::findBlock(int nativeBin, BackendSync *sync, size_t size,
749         bool needAlignedBlock, bool alignedBin, int *numOfLockedBins)
750 {
751     for (int i=getMinNonemptyBin(nativeBin); i<freeBinsNum; i=getMinNonemptyBin(i+1))
752         if (FreeBlock *block = getFromBin(i, sync, size, needAlignedBlock, alignedBin, /*wait=*/false, numOfLockedBins))
753             return block;
754 
755     return nullptr;
756 }
757 
758 void Backend::requestBootstrapMem()
759 {
760     if (bootsrapMemDone == bootsrapMemStatus.load(std::memory_order_acquire))
761         return;
762     MallocMutex::scoped_lock lock( bootsrapMemStatusMutex );
763     if (bootsrapMemDone == bootsrapMemStatus)
764         return;
765     MALLOC_ASSERT(bootsrapMemNotDone == bootsrapMemStatus, ASSERT_TEXT);
766     bootsrapMemStatus = bootsrapMemInitializing;
767     // request some rather big region during bootstrap in advance
768     // ok to get nullptr here, as later we re-do a request with more modest size
769     addNewRegion(2*1024*1024, MEMREG_SLAB_BLOCKS, /*addToBin=*/true);
770     bootsrapMemStatus = bootsrapMemDone;
771 }
772 
773 // try to allocate size Byte block in available bins
774 // needAlignedRes is true if result must be slab-aligned
775 FreeBlock *Backend::genericGetBlock(int num, size_t size, bool needAlignedBlock)
776 {
777     FreeBlock *block = nullptr;
778     const size_t totalReqSize = num*size;
779     // no splitting after requesting new region, asks exact size
780     const int nativeBin = sizeToBin(totalReqSize);
781 
782     requestBootstrapMem();
783     // If we found 2 or less locked bins, it's time to ask more memory from OS.
784     // But nothing can be asked from fixed pool. And we prefer wait, not ask
785     // for more memory, if block is quite large.
786     int lockedBinsThreshold = extMemPool->fixedPool || size>=maxBinned_SmallPage? 0 : 2;
787 
788     // Find maximal requested size limited by getMaxBinnedSize()
789     AtomicUpdate(maxRequestedSize, totalReqSize, MaxRequestComparator(this));
790     scanCoalescQ(/*forceCoalescQDrop=*/false);
791 
792     bool splittable = true;
793     for (;;) {
794         const intptr_t startModifiedCnt = bkndSync.getNumOfMods();
795         int numOfLockedBins;
796 
797         do {
798             numOfLockedBins = 0;
799             if (needAlignedBlock) {
800                 block = freeSlabAlignedBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock,
801                                                         /*alignedBin=*/true, &numOfLockedBins);
802                 if (!block && extMemPool->fixedPool)
803                     block = freeLargeBlockBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock,
804                                                         /*alignedBin=*/false, &numOfLockedBins);
805             } else {
806                 block = freeLargeBlockBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock,
807                                                         /*alignedBin=*/false, &numOfLockedBins);
808                 if (!block && extMemPool->fixedPool)
809                     block = freeSlabAlignedBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock,
810                                                         /*alignedBin=*/true, &numOfLockedBins);
811             }
812         } while (!block && numOfLockedBins>lockedBinsThreshold);
813 
814         if (block)
815             break;
816 
817         if (!(scanCoalescQ(/*forceCoalescQDrop=*/true) | extMemPool->softCachesCleanup())) {
818             // bins are not updated,
819             // only remaining possibility is to ask for more memory
820             block = askMemFromOS(totalReqSize, startModifiedCnt, &lockedBinsThreshold,
821                         numOfLockedBins, &splittable, needAlignedBlock);
822             if (!block)
823                 return nullptr;
824             if (block != (FreeBlock*)VALID_BLOCK_IN_BIN) {
825                 // size can be increased in askMemFromOS, that's why >=
826                 MALLOC_ASSERT(block->sizeTmp >= size, ASSERT_TEXT);
827                 break;
828             }
829             // valid block somewhere in bins, let's find it
830             block = nullptr;
831         }
832     }
833     MALLOC_ASSERT(block, ASSERT_TEXT);
834     if (splittable) {
835         // At this point we have to be sure that slabAligned attribute describes the right block state
836         block = splitBlock(block, num, size, block->slabAligned, needAlignedBlock);
837     }
838     // matched blockConsumed() from startUseBlock()
839     bkndSync.blockReleased();
840 
841     return block;
842 }
843 
844 LargeMemoryBlock *Backend::getLargeBlock(size_t size)
845 {
846     LargeMemoryBlock *lmb =
847         (LargeMemoryBlock*)genericGetBlock(1, size, /*needAlignedRes=*/false);
848     if (lmb) {
849         lmb->unalignedSize = size;
850         if (extMemPool->userPool())
851             extMemPool->lmbList.add(lmb);
852     }
853     return lmb;
854 }
855 
856 BlockI *Backend::getSlabBlock(int num) {
857     BlockI *b = (BlockI*)genericGetBlock(num, slabSize, /*slabAligned=*/true);
858     MALLOC_ASSERT(isAligned(b, slabSize), ASSERT_TEXT);
859     return b;
860 }
861 
862 void Backend::putSlabBlock(BlockI *block) {
863     genericPutBlock((FreeBlock *)block, slabSize, /*slabAligned=*/true);
864 }
865 
866 void *Backend::getBackRefSpace(size_t size, bool *rawMemUsed)
867 {
868     // This block is released only at shutdown, so it can prevent
869     // a entire region releasing when it's received from the backend,
870     // so prefer getRawMemory using.
871     if (void *ret = getRawMemory(size, REGULAR)) {
872         *rawMemUsed = true;
873         return ret;
874     }
875     void *ret = genericGetBlock(1, size, /*needAlignedRes=*/false);
876     if (ret) *rawMemUsed = false;
877     return ret;
878 }
879 
880 void Backend::putBackRefSpace(void *b, size_t size, bool rawMemUsed)
881 {
882     if (rawMemUsed)
883         freeRawMemory(b, size);
884     // ignore not raw mem, as it released on region releasing
885 }
886 
887 void Backend::removeBlockFromBin(FreeBlock *fBlock)
888 {
889     if (fBlock->myBin != Backend::NO_BIN) {
890         if (fBlock->slabAligned)
891             freeSlabAlignedBins.lockRemoveBlock(fBlock->myBin, fBlock);
892         else
893             freeLargeBlockBins.lockRemoveBlock(fBlock->myBin, fBlock);
894     }
895 }
896 
897 void Backend::genericPutBlock(FreeBlock *fBlock, size_t blockSz, bool slabAligned)
898 {
899     bkndSync.blockConsumed();
900     coalescAndPut(fBlock, blockSz, slabAligned);
901     bkndSync.blockReleased();
902 }
903 
904 void AllLargeBlocksList::add(LargeMemoryBlock *lmb)
905 {
906     MallocMutex::scoped_lock scoped_cs(largeObjLock);
907     lmb->gPrev = nullptr;
908     lmb->gNext = loHead;
909     if (lmb->gNext)
910         lmb->gNext->gPrev = lmb;
911     loHead = lmb;
912 }
913 
914 void AllLargeBlocksList::remove(LargeMemoryBlock *lmb)
915 {
916     MallocMutex::scoped_lock scoped_cs(largeObjLock);
917     if (loHead == lmb)
918         loHead = lmb->gNext;
919     if (lmb->gNext)
920         lmb->gNext->gPrev = lmb->gPrev;
921     if (lmb->gPrev)
922         lmb->gPrev->gNext = lmb->gNext;
923 }
924 
925 void Backend::putLargeBlock(LargeMemoryBlock *lmb)
926 {
927     if (extMemPool->userPool())
928         extMemPool->lmbList.remove(lmb);
929     genericPutBlock((FreeBlock *)lmb, lmb->unalignedSize, false);
930 }
931 
932 void Backend::returnLargeObject(LargeMemoryBlock *lmb)
933 {
934     removeBackRef(lmb->backRefIdx);
935     putLargeBlock(lmb);
936     STAT_increment(getThreadId(), ThreadCommonCounters, freeLargeObj);
937 }
938 
939 #if BACKEND_HAS_MREMAP
940 void *Backend::remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment)
941 {
942     // no remap for user pools and for object too small that living in bins
943     if (inUserPool() || min(oldSize, newSize)<maxBinned_SmallPage
944         // during remap, can't guarantee alignment more strict than current or
945         // more strict than page alignment
946         || !isAligned(ptr, alignment) || alignment>extMemPool->granularity)
947         return nullptr;
948     const LargeMemoryBlock* lmbOld = ((LargeObjectHdr *)ptr - 1)->memoryBlock;
949     const size_t oldUnalignedSize = lmbOld->unalignedSize;
950     FreeBlock *oldFBlock = (FreeBlock *)lmbOld;
951     FreeBlock *right = oldFBlock->rightNeig(oldUnalignedSize);
952     // in every region only one block can have LAST_REGION_BLOCK on right,
953     // so don't need no synchronization
954     if (!right->isLastRegionBlock())
955         return nullptr;
956 
957     MemRegion *oldRegion = static_cast<LastFreeBlock*>(right)->memRegion;
958     MALLOC_ASSERT( oldRegion < ptr, ASSERT_TEXT );
959     const size_t oldRegionSize = oldRegion->allocSz;
960     if (oldRegion->type != MEMREG_ONE_BLOCK)
961         return nullptr;  // we are not single in the region
962     const size_t userOffset = (uintptr_t)ptr - (uintptr_t)oldRegion;
963     const size_t alignedSize = LargeObjectCache::alignToBin(newSize + userOffset);
964     const size_t requestSize =
965         alignUp(sizeof(MemRegion) + alignedSize + sizeof(LastFreeBlock), extMemPool->granularity);
966     if (requestSize < alignedSize) // is wrapped around?
967         return nullptr;
968     regionList.remove(oldRegion);
969 
970     // The deallocation should be registered in address range before mremap to
971     // prevent a race condition with allocation on another thread.
972     // (OS can reuse the memory and registerAlloc will be missed on another thread)
973     usedAddrRange.registerFree((uintptr_t)oldRegion, (uintptr_t)oldRegion + oldRegionSize);
974 
975     void *ret = mremap(oldRegion, oldRegion->allocSz, requestSize, MREMAP_MAYMOVE);
976     if (MAP_FAILED == ret) { // can't remap, revert and leave
977         regionList.add(oldRegion);
978         usedAddrRange.registerAlloc((uintptr_t)oldRegion, (uintptr_t)oldRegion + oldRegionSize);
979         return nullptr;
980     }
981     MemRegion *region = (MemRegion*)ret;
982     MALLOC_ASSERT(region->type == MEMREG_ONE_BLOCK, ASSERT_TEXT);
983     region->allocSz = requestSize;
984     region->blockSz = alignedSize;
985 
986     FreeBlock *fBlock = (FreeBlock *)alignUp((uintptr_t)region + sizeof(MemRegion),
987                                              largeObjectAlignment);
988 
989     regionList.add(region);
990     startUseBlock(region, fBlock, /*addToBin=*/false);
991     MALLOC_ASSERT(fBlock->sizeTmp == region->blockSz, ASSERT_TEXT);
992     // matched blockConsumed() in startUseBlock().
993     // TODO: get rid of useless pair blockConsumed()/blockReleased()
994     bkndSync.blockReleased();
995 
996     // object must start at same offset from region's start
997     void *object = (void*)((uintptr_t)region + userOffset);
998     MALLOC_ASSERT(isAligned(object, alignment), ASSERT_TEXT);
999     LargeObjectHdr *header = (LargeObjectHdr*)object - 1;
1000     setBackRef(header->backRefIdx, header);
1001 
1002     LargeMemoryBlock *lmb = (LargeMemoryBlock*)fBlock;
1003     lmb->unalignedSize = region->blockSz;
1004     lmb->objectSize = newSize;
1005     lmb->backRefIdx = header->backRefIdx;
1006     header->memoryBlock = lmb;
1007     MALLOC_ASSERT((uintptr_t)lmb + lmb->unalignedSize >=
1008                   (uintptr_t)object + lmb->objectSize, "An object must fit to the block.");
1009 
1010     usedAddrRange.registerAlloc((uintptr_t)region, (uintptr_t)region + requestSize);
1011     totalMemSize.fetch_add(region->allocSz - oldRegionSize);
1012 
1013     return object;
1014 }
1015 #endif /* BACKEND_HAS_MREMAP */
1016 
1017 void Backend::releaseRegion(MemRegion *memRegion)
1018 {
1019     regionList.remove(memRegion);
1020     freeRawMem(memRegion, memRegion->allocSz);
1021 }
1022 
1023 // coalesce fBlock with its neighborhood
1024 FreeBlock *Backend::doCoalesc(FreeBlock *fBlock, MemRegion **mRegion)
1025 {
1026     FreeBlock *resBlock = fBlock;
1027     size_t resSize = fBlock->sizeTmp;
1028     MemRegion *memRegion = nullptr;
1029 
1030     fBlock->markCoalescing(resSize);
1031     resBlock->blockInBin = false;
1032 
1033     // coalescing with left neighbor
1034     size_t leftSz = fBlock->trySetLeftUsed(GuardedSize::COAL_BLOCK);
1035     if (leftSz != GuardedSize::LOCKED) {
1036         if (leftSz == GuardedSize::COAL_BLOCK) {
1037             coalescQ.putBlock(fBlock);
1038             return nullptr;
1039         } else {
1040             FreeBlock *left = fBlock->leftNeig(leftSz);
1041             size_t lSz = left->trySetMeUsed(GuardedSize::COAL_BLOCK);
1042             if (lSz <= GuardedSize::MAX_LOCKED_VAL) {
1043                 fBlock->setLeftFree(leftSz); // rollback
1044                 coalescQ.putBlock(fBlock);
1045                 return nullptr;
1046             } else {
1047                 MALLOC_ASSERT(lSz == leftSz, "Invalid header");
1048                 left->blockInBin = true;
1049                 resBlock = left;
1050                 resSize += leftSz;
1051                 resBlock->sizeTmp = resSize;
1052             }
1053         }
1054     }
1055     // coalescing with right neighbor
1056     FreeBlock *right = fBlock->rightNeig(fBlock->sizeTmp);
1057     size_t rightSz = right->trySetMeUsed(GuardedSize::COAL_BLOCK);
1058     if (rightSz != GuardedSize::LOCKED) {
1059         // LastFreeBlock is on the right side
1060         if (GuardedSize::LAST_REGION_BLOCK == rightSz) {
1061             right->setMeFree(GuardedSize::LAST_REGION_BLOCK);
1062             memRegion = static_cast<LastFreeBlock*>(right)->memRegion;
1063         } else if (GuardedSize::COAL_BLOCK == rightSz) {
1064             if (resBlock->blockInBin) {
1065                 resBlock->blockInBin = false;
1066                 removeBlockFromBin(resBlock);
1067             }
1068             coalescQ.putBlock(resBlock);
1069             return nullptr;
1070         } else {
1071             size_t rSz = right->rightNeig(rightSz)->
1072                 trySetLeftUsed(GuardedSize::COAL_BLOCK);
1073             if (rSz <= GuardedSize::MAX_LOCKED_VAL) {
1074                 right->setMeFree(rightSz);  // rollback
1075                 if (resBlock->blockInBin) {
1076                     resBlock->blockInBin = false;
1077                     removeBlockFromBin(resBlock);
1078                 }
1079                 coalescQ.putBlock(resBlock);
1080                 return nullptr;
1081             } else {
1082                 MALLOC_ASSERT(rSz == rightSz, "Invalid header");
1083                 removeBlockFromBin(right);
1084                 resSize += rightSz;
1085 
1086                 // Is LastFreeBlock on the right side of right?
1087                 FreeBlock *nextRight = right->rightNeig(rightSz);
1088                 size_t nextRightSz = nextRight->
1089                     trySetMeUsed(GuardedSize::COAL_BLOCK);
1090                 if (nextRightSz > GuardedSize::MAX_LOCKED_VAL) {
1091                     if (nextRightSz == GuardedSize::LAST_REGION_BLOCK)
1092                         memRegion = static_cast<LastFreeBlock*>(nextRight)->memRegion;
1093 
1094                     nextRight->setMeFree(nextRightSz);
1095                 }
1096             }
1097         }
1098     }
1099     if (memRegion) {
1100         MALLOC_ASSERT((uintptr_t)memRegion + memRegion->allocSz >=
1101                       (uintptr_t)right + sizeof(LastFreeBlock), ASSERT_TEXT);
1102         MALLOC_ASSERT((uintptr_t)memRegion < (uintptr_t)resBlock, ASSERT_TEXT);
1103         *mRegion = memRegion;
1104     } else
1105         *mRegion = nullptr;
1106     resBlock->sizeTmp = resSize;
1107     return resBlock;
1108 }
1109 
1110 bool Backend::coalescAndPutList(FreeBlock *list, bool forceCoalescQDrop, bool reportBlocksProcessed)
1111 {
1112     bool regionReleased = false;
1113 
1114     for (FreeBlock *helper; list;
1115          list = helper,
1116              // matches block enqueue in CoalRequestQ::putBlock()
1117              reportBlocksProcessed? coalescQ.blockWasProcessed() : (void)0) {
1118         MemRegion *memRegion;
1119         bool addToTail = false;
1120 
1121         helper = list->nextToFree;
1122         FreeBlock *toRet = doCoalesc(list, &memRegion);
1123         if (!toRet)
1124             continue;
1125 
1126         if (memRegion && memRegion->blockSz == toRet->sizeTmp
1127             && !extMemPool->fixedPool) {
1128             if (extMemPool->regionsAreReleaseable()) {
1129                 // release the region, because there is no used blocks in it
1130                 if (toRet->blockInBin)
1131                     removeBlockFromBin(toRet);
1132                 releaseRegion(memRegion);
1133                 regionReleased = true;
1134                 continue;
1135             } else // add block from empty region to end of bin,
1136                 addToTail = true; // preserving for exact fit
1137         }
1138         size_t currSz = toRet->sizeTmp;
1139         int bin = sizeToBin(currSz);
1140         bool toAligned = extMemPool->fixedPool ? toAlignedBin(toRet, currSz) : toRet->slabAligned;
1141         bool needAddToBin = true;
1142 
1143         if (toRet->blockInBin) {
1144             // Does it stay in same bin?
1145             if (toRet->myBin == bin && toRet->slabAligned == toAligned)
1146                 needAddToBin = false;
1147             else {
1148                 toRet->blockInBin = false;
1149                 removeBlockFromBin(toRet);
1150             }
1151         }
1152 
1153         // Does not stay in same bin, or bin-less; add it
1154         if (needAddToBin) {
1155             toRet->prev = toRet->next = toRet->nextToFree = nullptr;
1156             toRet->myBin = NO_BIN;
1157             toRet->slabAligned = toAligned;
1158 
1159             // If the block is too small to fit in any bin, keep it bin-less.
1160             // It's not a leak because the block later can be coalesced.
1161             if (currSz >= minBinnedSize) {
1162                 toRet->sizeTmp = currSz;
1163                 IndexedBins *target = toRet->slabAligned ? &freeSlabAlignedBins : &freeLargeBlockBins;
1164                 if (forceCoalescQDrop) {
1165                     target->addBlock(bin, toRet, toRet->sizeTmp, addToTail);
1166                 } else if (!target->tryAddBlock(bin, toRet, addToTail)) {
1167                     coalescQ.putBlock(toRet);
1168                     continue;
1169                 }
1170             }
1171             toRet->sizeTmp = 0;
1172         }
1173         // Free (possibly coalesced) free block.
1174         // Adding to bin must be done before this point,
1175         // because after a block is free it can be coalesced, and
1176         // using its pointer became unsafe.
1177         // Remember that coalescing is not done under any global lock.
1178         toRet->setMeFree(currSz);
1179         toRet->rightNeig(currSz)->setLeftFree(currSz);
1180     }
1181     return regionReleased;
1182 }
1183 
1184 // Coalesce fBlock and add it back to a bin;
1185 // processing delayed coalescing requests.
1186 void Backend::coalescAndPut(FreeBlock *fBlock, size_t blockSz, bool slabAligned)
1187 {
1188     fBlock->sizeTmp = blockSz;
1189     fBlock->nextToFree = nullptr;
1190     fBlock->slabAligned = slabAligned;
1191 
1192     coalescAndPutList(fBlock, /*forceCoalescQDrop=*/false, /*reportBlocksProcessed=*/false);
1193 }
1194 
1195 bool Backend::scanCoalescQ(bool forceCoalescQDrop)
1196 {
1197     FreeBlock *currCoalescList = coalescQ.getAll();
1198 
1199     if (currCoalescList)
1200         // reportBlocksProcessed=true informs that the blocks leave coalescQ,
1201         // matches blockConsumed() from CoalRequestQ::putBlock()
1202         coalescAndPutList(currCoalescList, forceCoalescQDrop,
1203                           /*reportBlocksProcessed=*/true);
1204     // returns status of coalescQ.getAll(), as an indication of possible changes in backend
1205     // TODO: coalescAndPutList() may report is some new free blocks became available or not
1206     return currCoalescList;
1207 }
1208 
1209 FreeBlock *Backend::findBlockInRegion(MemRegion *region, size_t exactBlockSize)
1210 {
1211     FreeBlock *fBlock;
1212     size_t blockSz;
1213     uintptr_t fBlockEnd,
1214         lastFreeBlock = (uintptr_t)region + region->allocSz - sizeof(LastFreeBlock);
1215 
1216     static_assert(sizeof(LastFreeBlock) % sizeof(uintptr_t) == 0,
1217         "Atomic applied on LastFreeBlock, and we put it at the end of region, that"
1218         " is uintptr_t-aligned, so no unaligned atomic operations are possible.");
1219      // right bound is slab-aligned, keep LastFreeBlock after it
1220     if (region->type == MEMREG_SLAB_BLOCKS) {
1221         fBlock = (FreeBlock *)alignUp((uintptr_t)region + sizeof(MemRegion), sizeof(uintptr_t));
1222         fBlockEnd = alignDown(lastFreeBlock, slabSize);
1223     } else {
1224         fBlock = (FreeBlock *)alignUp((uintptr_t)region + sizeof(MemRegion), largeObjectAlignment);
1225         fBlockEnd = (uintptr_t)fBlock + exactBlockSize;
1226         MALLOC_ASSERT(fBlockEnd <= lastFreeBlock, ASSERT_TEXT);
1227     }
1228     if (fBlockEnd <= (uintptr_t)fBlock)
1229         return nullptr; // allocSz is too small
1230     blockSz = fBlockEnd - (uintptr_t)fBlock;
1231     // TODO: extend getSlabBlock to support degradation, i.e. getting less blocks
1232     // then requested, and then relax this check
1233     // (now all or nothing is implemented, check according to this)
1234     if (blockSz < numOfSlabAllocOnMiss*slabSize)
1235         return nullptr;
1236 
1237     region->blockSz = blockSz;
1238     return fBlock;
1239 }
1240 
1241 // startUseBlock may add the free block to a bin, the block can be used and
1242 // even released after this, so the region must be added to regionList already
1243 void Backend::startUseBlock(MemRegion *region, FreeBlock *fBlock, bool addToBin)
1244 {
1245     size_t blockSz = region->blockSz;
1246     fBlock->initHeader();
1247     fBlock->setMeFree(blockSz);
1248 
1249     LastFreeBlock *lastBl = static_cast<LastFreeBlock*>(fBlock->rightNeig(blockSz));
1250     // to not get unaligned atomics during LastFreeBlock access
1251     MALLOC_ASSERT(isAligned(lastBl, sizeof(uintptr_t)), nullptr);
1252     lastBl->initHeader();
1253     lastBl->setMeFree(GuardedSize::LAST_REGION_BLOCK);
1254     lastBl->setLeftFree(blockSz);
1255     lastBl->myBin = NO_BIN;
1256     lastBl->memRegion = region;
1257 
1258     if (addToBin) {
1259         unsigned targetBin = sizeToBin(blockSz);
1260         // during adding advance regions, register bin for a largest block in region
1261         advRegBins.registerBin(targetBin);
1262         if (region->type == MEMREG_SLAB_BLOCKS) {
1263             fBlock->slabAligned = true;
1264             freeSlabAlignedBins.addBlock(targetBin, fBlock, blockSz, /*addToTail=*/false);
1265         } else {
1266             fBlock->slabAligned = false;
1267             freeLargeBlockBins.addBlock(targetBin, fBlock, blockSz, /*addToTail=*/false);
1268         }
1269     } else {
1270         // to match with blockReleased() in genericGetBlock
1271         bkndSync.blockConsumed();
1272         // Understand our alignment for correct splitBlock operation
1273         fBlock->slabAligned = region->type == MEMREG_SLAB_BLOCKS ? true : false;
1274         fBlock->sizeTmp = fBlock->tryLockBlock();
1275         MALLOC_ASSERT(fBlock->sizeTmp >= FreeBlock::minBlockSize, "Locking must be successful");
1276     }
1277 }
1278 
1279 void MemRegionList::add(MemRegion *r)
1280 {
1281     r->prev = nullptr;
1282     MallocMutex::scoped_lock lock(regionListLock);
1283     r->next = head;
1284     head = r;
1285     if (head->next)
1286         head->next->prev = head;
1287 }
1288 
1289 void MemRegionList::remove(MemRegion *r)
1290 {
1291     MallocMutex::scoped_lock lock(regionListLock);
1292     if (head == r)
1293         head = head->next;
1294     if (r->next)
1295         r->next->prev = r->prev;
1296     if (r->prev)
1297         r->prev->next = r->next;
1298 }
1299 
1300 #if __TBB_MALLOC_BACKEND_STAT
1301 int MemRegionList::reportStat(FILE *f)
1302 {
1303     int regNum = 0;
1304     MallocMutex::scoped_lock lock(regionListLock);
1305     for (MemRegion *curr = head; curr; curr = curr->next) {
1306         fprintf(f, "%p: max block %lu B, ", curr, curr->blockSz);
1307         regNum++;
1308     }
1309     return regNum;
1310 }
1311 #endif
1312 
1313 FreeBlock *Backend::addNewRegion(size_t size, MemRegionType memRegType, bool addToBin)
1314 {
1315     static_assert(sizeof(BlockMutexes) <= sizeof(BlockI), "Header must be not overwritten in used blocks");
1316     MALLOC_ASSERT(FreeBlock::minBlockSize > GuardedSize::MAX_SPEC_VAL,
1317           "Block length must not conflict with special values of GuardedSize");
1318     // If the region is not "for slabs" we should reserve some space for
1319     // a region header, the worst case alignment and the last block mark.
1320     const size_t requestSize = memRegType == MEMREG_SLAB_BLOCKS ? size :
1321         size + sizeof(MemRegion) + largeObjectAlignment
1322              +  FreeBlock::minBlockSize + sizeof(LastFreeBlock);
1323 
1324     size_t rawSize = requestSize;
1325     MemRegion *region = (MemRegion*)allocRawMem(rawSize);
1326     if (!region) {
1327         MALLOC_ASSERT(rawSize==requestSize, "getRawMem has not allocated memory but changed the allocated size.");
1328         return nullptr;
1329     }
1330     if (rawSize < sizeof(MemRegion)) {
1331         if (!extMemPool->fixedPool)
1332             freeRawMem(region, rawSize);
1333         return nullptr;
1334     }
1335 
1336     region->type = memRegType;
1337     region->allocSz = rawSize;
1338     FreeBlock *fBlock = findBlockInRegion(region, size);
1339     if (!fBlock) {
1340         if (!extMemPool->fixedPool)
1341             freeRawMem(region, rawSize);
1342         return nullptr;
1343     }
1344     regionList.add(region);
1345     startUseBlock(region, fBlock, addToBin);
1346     bkndSync.binsModified();
1347     return addToBin? (FreeBlock*)VALID_BLOCK_IN_BIN : fBlock;
1348 }
1349 
1350 void Backend::init(ExtMemoryPool *extMemoryPool)
1351 {
1352     extMemPool = extMemoryPool;
1353     usedAddrRange.init();
1354     coalescQ.init(&bkndSync);
1355     bkndSync.init(this);
1356 }
1357 
1358 void Backend::reset()
1359 {
1360     MALLOC_ASSERT(extMemPool->userPool(), "Only user pool can be reset.");
1361     // no active threads are allowed in backend while reset() called
1362     verify();
1363 
1364     freeLargeBlockBins.reset();
1365     freeSlabAlignedBins.reset();
1366     advRegBins.reset();
1367 
1368     for (MemRegion *curr = regionList.head; curr; curr = curr->next) {
1369         FreeBlock *fBlock = findBlockInRegion(curr, curr->blockSz);
1370         MALLOC_ASSERT(fBlock, "A memory region unexpectedly got smaller");
1371         startUseBlock(curr, fBlock, /*addToBin=*/true);
1372     }
1373 }
1374 
1375 bool Backend::destroy()
1376 {
1377     bool noError = true;
1378     // no active threads are allowed in backend while destroy() called
1379     verify();
1380     if (!inUserPool()) {
1381         freeLargeBlockBins.reset();
1382         freeSlabAlignedBins.reset();
1383     }
1384     while (regionList.head) {
1385         MemRegion *helper = regionList.head->next;
1386         noError &= freeRawMem(regionList.head, regionList.head->allocSz);
1387         regionList.head = helper;
1388     }
1389     return noError;
1390 }
1391 
1392 bool Backend::clean()
1393 {
1394     scanCoalescQ(/*forceCoalescQDrop=*/false);
1395 
1396     bool res = false;
1397     // We can have several blocks occupying a whole region,
1398     // because such regions are added in advance (see askMemFromOS() and reset()),
1399     // and never used. Release them all.
1400     for (int i = advRegBins.getMinUsedBin(0); i != -1; i = advRegBins.getMinUsedBin(i+1)) {
1401         if (i == freeSlabAlignedBins.getMinNonemptyBin(i))
1402             res |= freeSlabAlignedBins.tryReleaseRegions(i, this);
1403         if (i == freeLargeBlockBins.getMinNonemptyBin(i))
1404             res |= freeLargeBlockBins.tryReleaseRegions(i, this);
1405     }
1406 
1407     return res;
1408 }
1409 
1410 void Backend::IndexedBins::verify()
1411 {
1412 #if MALLOC_DEBUG
1413     for (int i=0; i<freeBinsNum; i++) {
1414         for (FreeBlock *fb = freeBins[i].head.load(std::memory_order_relaxed); fb; fb=fb->next) {
1415             uintptr_t mySz = fb->myL.value;
1416             MALLOC_ASSERT(mySz>GuardedSize::MAX_SPEC_VAL, ASSERT_TEXT);
1417             FreeBlock *right = (FreeBlock*)((uintptr_t)fb + mySz);
1418             suppress_unused_warning(right);
1419             MALLOC_ASSERT(right->myL.value<=GuardedSize::MAX_SPEC_VAL, ASSERT_TEXT);
1420             MALLOC_ASSERT(right->leftL.value==mySz, ASSERT_TEXT);
1421             MALLOC_ASSERT(fb->leftL.value<=GuardedSize::MAX_SPEC_VAL, ASSERT_TEXT);
1422         }
1423     }
1424 #endif
1425 }
1426 
1427 // For correct operation, it must be called when no other threads
1428 // is changing backend.
1429 void Backend::verify()
1430 {
1431 #if MALLOC_DEBUG
1432     scanCoalescQ(/*forceCoalescQDrop=*/false);
1433 #endif // MALLOC_DEBUG
1434 
1435     freeLargeBlockBins.verify();
1436     freeSlabAlignedBins.verify();
1437 }
1438 
1439 #if __TBB_MALLOC_BACKEND_STAT
1440 size_t Backend::Bin::countFreeBlocks()
1441 {
1442     size_t cnt = 0;
1443     {
1444         MallocMutex::scoped_lock lock(tLock);
1445         for (FreeBlock *fb = head; fb; fb = fb->next)
1446             cnt++;
1447     }
1448     return cnt;
1449 }
1450 
1451 size_t Backend::Bin::reportFreeBlocks(FILE *f)
1452 {
1453     size_t totalSz = 0;
1454     MallocMutex::scoped_lock lock(tLock);
1455     for (FreeBlock *fb = head; fb; fb = fb->next) {
1456         size_t sz = fb->tryLockBlock();
1457         fb->setMeFree(sz);
1458         fprintf(f, " [%p;%p]", fb, (void*)((uintptr_t)fb+sz));
1459         totalSz += sz;
1460     }
1461     return totalSz;
1462 }
1463 
1464 void Backend::IndexedBins::reportStat(FILE *f)
1465 {
1466     size_t totalSize = 0;
1467 
1468     for (int i=0; i<Backend::freeBinsNum; i++)
1469         if (size_t cnt = freeBins[i].countFreeBlocks()) {
1470             totalSize += freeBins[i].reportFreeBlocks(f);
1471             fprintf(f, " %d:%lu, ", i, cnt);
1472         }
1473     fprintf(f, "\ttotal size %lu KB", totalSize/1024);
1474 }
1475 
1476 void Backend::reportStat(FILE *f)
1477 {
1478     scanCoalescQ(/*forceCoalescQDrop=*/false);
1479 
1480     fprintf(f, "\n  regions:\n");
1481     int regNum = regionList.reportStat(f);
1482     fprintf(f, "\n%d regions, %lu KB in all regions\n  free bins:\nlarge bins: ",
1483             regNum, totalMemSize/1024);
1484     freeLargeBlockBins.reportStat(f);
1485     fprintf(f, "\naligned bins: ");
1486     freeSlabAlignedBins.reportStat(f);
1487     fprintf(f, "\n");
1488 }
1489 #endif // __TBB_MALLOC_BACKEND_STAT
1490 
1491 } } // namespaces
1492