xref: /oneTBB/src/tbbmalloc/backend.cpp (revision 6caecf96)
1 /*
2     Copyright (c) 2005-2021 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #include <string.h>   /* for memset */
18 #include <errno.h>
19 #include "tbbmalloc_internal.h"
20 
21 namespace rml {
22 namespace internal {
23 
24 /*********** Code to acquire memory from the OS or other executive ****************/
25 
26 /*
27   syscall/malloc can set non-zero errno in case of failure,
28   but later allocator might be able to find memory to fulfill the request.
29   And we do not want changing of errno by successful scalable_malloc call.
30   To support this, restore old errno in (get|free)RawMemory, and set errno
31   in frontend just before returning to user code.
32   Please note: every syscall/libc call used inside scalable_malloc that
33   sets errno must be protected this way, not just memory allocation per se.
34 */
35 
36 #if USE_DEFAULT_MEMORY_MAPPING
37 #include "MapMemory.h"
38 #else
39 /* assume MapMemory and UnmapMemory are customized */
40 #endif
41 
42 void* getRawMemory (size_t size, PageType pageType) {
43     return MapMemory(size, pageType);
44 }
45 
46 int freeRawMemory (void *object, size_t size) {
47     return UnmapMemory(object, size);
48 }
49 
50 #if CHECK_ALLOCATION_RANGE
51 
52 void Backend::UsedAddressRange::registerAlloc(uintptr_t left, uintptr_t right)
53 {
54     MallocMutex::scoped_lock lock(mutex);
55     if (left < leftBound.load(std::memory_order_relaxed))
56         leftBound.store(left, std::memory_order_relaxed);
57     if (right > rightBound.load(std::memory_order_relaxed))
58         rightBound.store(right, std::memory_order_relaxed);
59     MALLOC_ASSERT(leftBound.load(std::memory_order_relaxed), ASSERT_TEXT);
60     MALLOC_ASSERT(leftBound.load(std::memory_order_relaxed) < rightBound.load(std::memory_order_relaxed), ASSERT_TEXT);
61     MALLOC_ASSERT(leftBound.load(std::memory_order_relaxed) <= left && right <= rightBound.load(std::memory_order_relaxed), ASSERT_TEXT);
62 }
63 
64 void Backend::UsedAddressRange::registerFree(uintptr_t left, uintptr_t right)
65 {
66     MallocMutex::scoped_lock lock(mutex);
67     if (leftBound.load(std::memory_order_relaxed) == left) {
68         if (rightBound.load(std::memory_order_relaxed) == right) {
69             leftBound.store(ADDRESS_UPPER_BOUND, std::memory_order_relaxed);
70             rightBound.store(0, std::memory_order_relaxed);
71         } else
72             leftBound.store(right, std::memory_order_relaxed);
73     } else if (rightBound.load(std::memory_order_relaxed) == right)
74         rightBound.store(left, std::memory_order_relaxed);
75     MALLOC_ASSERT((!rightBound.load(std::memory_order_relaxed) && leftBound.load(std::memory_order_relaxed) == ADDRESS_UPPER_BOUND)
76                   || leftBound.load(std::memory_order_relaxed) < rightBound.load(std::memory_order_relaxed), ASSERT_TEXT);
77 }
78 #endif // CHECK_ALLOCATION_RANGE
79 
80 // Initialized in frontend inside defaultMemPool
81 extern HugePagesStatus hugePages;
82 
83 void *Backend::allocRawMem(size_t &size)
84 {
85     void *res = NULL;
86     size_t allocSize = 0;
87 
88     if (extMemPool->userPool()) {
89         if (extMemPool->fixedPool && bootsrapMemDone == bootsrapMemStatus.load(std::memory_order_acquire))
90             return NULL;
91         MALLOC_ASSERT(bootsrapMemStatus != bootsrapMemNotDone,
92                       "Backend::allocRawMem() called prematurely?");
93         // TODO: support for raw mem not aligned at sizeof(uintptr_t)
94         // memory from fixed pool is asked once and only once
95         allocSize = alignUpGeneric(size, extMemPool->granularity);
96         res = (*extMemPool->rawAlloc)(extMemPool->poolId, allocSize);
97     } else {
98         // Align allocation on page size
99         size_t pageSize = hugePages.isEnabled ? hugePages.getGranularity() : extMemPool->granularity;
100         MALLOC_ASSERT(pageSize, "Page size cannot be zero.");
101         allocSize = alignUpGeneric(size, pageSize);
102 
103         // If user requested huge pages and they are available, try to use preallocated ones firstly.
104         // If there are none, lets check transparent huge pages support and use them instead.
105         if (hugePages.isEnabled) {
106             if (hugePages.isHPAvailable) {
107                 res = getRawMemory(allocSize, PREALLOCATED_HUGE_PAGE);
108             }
109             if (!res && hugePages.isTHPAvailable) {
110                 res = getRawMemory(allocSize, TRANSPARENT_HUGE_PAGE);
111             }
112         }
113 
114         if (!res) {
115             res = getRawMemory(allocSize, REGULAR);
116         }
117     }
118 
119     if (res) {
120         MALLOC_ASSERT(allocSize > 0, "Invalid size of an allocated region.");
121         size = allocSize;
122         if (!extMemPool->userPool())
123             usedAddrRange.registerAlloc((uintptr_t)res, (uintptr_t)res+size);
124 #if MALLOC_DEBUG
125         volatile size_t curTotalSize = totalMemSize; // to read global value once
126         MALLOC_ASSERT(curTotalSize+size > curTotalSize, "Overflow allocation size.");
127 #endif
128         totalMemSize.fetch_add(size);
129     }
130 
131     return res;
132 }
133 
134 bool Backend::freeRawMem(void *object, size_t size)
135 {
136     bool fail;
137 #if MALLOC_DEBUG
138     volatile size_t curTotalSize = totalMemSize; // to read global value once
139     MALLOC_ASSERT(curTotalSize-size < curTotalSize, "Negative allocation size.");
140 #endif
141     totalMemSize.fetch_sub(size);
142     if (extMemPool->userPool()) {
143         MALLOC_ASSERT(!extMemPool->fixedPool, "No free for fixed-size pools.");
144         fail = (*extMemPool->rawFree)(extMemPool->poolId, object, size);
145     } else {
146         usedAddrRange.registerFree((uintptr_t)object, (uintptr_t)object + size);
147         fail = freeRawMemory(object, size);
148     }
149     // TODO: use result in all freeRawMem() callers
150     return !fail;
151 }
152 
153 /********* End memory acquisition code ********************************/
154 
155 // Protected object size. After successful locking returns size of locked block,
156 // and releasing requires setting block size.
157 class GuardedSize : tbb::detail::no_copy {
158     std::atomic<uintptr_t> value;
159 public:
160     enum State {
161         LOCKED,
162         COAL_BLOCK,        // block is coalescing now
163         MAX_LOCKED_VAL = COAL_BLOCK,
164         LAST_REGION_BLOCK, // used to mark last block in region
165         // values after this are "normal" block sizes
166         MAX_SPEC_VAL = LAST_REGION_BLOCK
167     };
168 
169     void initLocked() { value.store(LOCKED, std::memory_order_release); } // TBB_REVAMP_TODO: was relaxed
170     void makeCoalscing() {
171         MALLOC_ASSERT(value.load(std::memory_order_relaxed) == LOCKED, ASSERT_TEXT);
172         value.store(COAL_BLOCK, std::memory_order_release); // TBB_REVAMP_TODO: was relaxed
173     }
174     size_t tryLock(State state) {
175         MALLOC_ASSERT(state <= MAX_LOCKED_VAL, ASSERT_TEXT);
176         size_t sz = value.load(std::memory_order_acquire);
177         for (;;) {
178             if (sz <= MAX_LOCKED_VAL) {
179                 break;
180             }
181             if (value.compare_exchange_strong(sz, state)) {
182                 break;
183             }
184         }
185         return sz;
186     }
187     void unlock(size_t size) {
188         MALLOC_ASSERT(value.load(std::memory_order_relaxed) <= MAX_LOCKED_VAL, "The lock is not locked");
189         MALLOC_ASSERT(size > MAX_LOCKED_VAL, ASSERT_TEXT);
190         value.store(size, std::memory_order_release);
191     }
192     bool isLastRegionBlock() const { return value.load(std::memory_order_relaxed) == LAST_REGION_BLOCK; }
193     friend void Backend::IndexedBins::verify();
194 };
195 
196 struct MemRegion {
197     MemRegion *next,      // keep all regions in any pool to release all them on
198               *prev;      // pool destroying, 2-linked list to release individual
199                           // regions.
200     size_t     allocSz,   // got from pool callback
201                blockSz;   // initial and maximal inner block size
202     MemRegionType type;
203 };
204 
205 // this data must be unmodified while block is in use, so separate it
206 class BlockMutexes {
207 protected:
208     GuardedSize myL,   // lock for me
209                 leftL; // lock for left neighbor
210 };
211 
212 class FreeBlock : BlockMutexes {
213 public:
214     static const size_t minBlockSize;
215     friend void Backend::IndexedBins::verify();
216 
217     FreeBlock    *prev,       // in 2-linked list related to bin
218                  *next,
219                  *nextToFree; // used to form a queue during coalescing
220     // valid only when block is in processing, i.e. one is not free and not
221     size_t        sizeTmp;    // used outside of backend
222     int           myBin;      // bin that is owner of the block
223     bool          slabAligned;
224     bool          blockInBin; // this block in myBin already
225 
226     FreeBlock *rightNeig(size_t sz) const {
227         MALLOC_ASSERT(sz, ASSERT_TEXT);
228         return (FreeBlock*)((uintptr_t)this+sz);
229     }
230     FreeBlock *leftNeig(size_t sz) const {
231         MALLOC_ASSERT(sz, ASSERT_TEXT);
232         return (FreeBlock*)((uintptr_t)this - sz);
233     }
234 
235     void initHeader() { myL.initLocked(); leftL.initLocked(); }
236     void setMeFree(size_t size) { myL.unlock(size); }
237     size_t trySetMeUsed(GuardedSize::State s) { return myL.tryLock(s); }
238     bool isLastRegionBlock() const { return myL.isLastRegionBlock(); }
239 
240     void setLeftFree(size_t sz) { leftL.unlock(sz); }
241     size_t trySetLeftUsed(GuardedSize::State s) { return leftL.tryLock(s); }
242 
243     size_t tryLockBlock() {
244         size_t rSz, sz = trySetMeUsed(GuardedSize::LOCKED);
245 
246         if (sz <= GuardedSize::MAX_LOCKED_VAL)
247             return false;
248         rSz = rightNeig(sz)->trySetLeftUsed(GuardedSize::LOCKED);
249         if (rSz <= GuardedSize::MAX_LOCKED_VAL) {
250             setMeFree(sz);
251             return false;
252         }
253         MALLOC_ASSERT(rSz == sz, ASSERT_TEXT);
254         return sz;
255     }
256     void markCoalescing(size_t blockSz) {
257         myL.makeCoalscing();
258         rightNeig(blockSz)->leftL.makeCoalscing();
259         sizeTmp = blockSz;
260         nextToFree = NULL;
261     }
262     void markUsed() {
263         myL.initLocked();
264         rightNeig(sizeTmp)->leftL.initLocked();
265         nextToFree = NULL;
266     }
267     static void markBlocks(FreeBlock *fBlock, int num, size_t size) {
268         for (int i=1; i<num; i++) {
269             fBlock = (FreeBlock*)((uintptr_t)fBlock + size);
270             fBlock->initHeader();
271         }
272     }
273 };
274 
275 // Last block in any region. Its "size" field is GuardedSize::LAST_REGION_BLOCK,
276 // This kind of blocks used to find region header
277 // and have a possibility to return region back to OS
278 struct LastFreeBlock : public FreeBlock {
279     MemRegion *memRegion;
280 };
281 
282 const size_t FreeBlock::minBlockSize = sizeof(FreeBlock);
283 
284 inline bool BackendSync::waitTillBlockReleased(intptr_t startModifiedCnt)
285 {
286     AtomicBackoff backoff;
287 #if __TBB_MALLOC_BACKEND_STAT
288     class ITT_Guard {
289         void *ptr;
290     public:
291         ITT_Guard(void *p) : ptr(p) {
292             MALLOC_ITT_SYNC_PREPARE(ptr);
293         }
294         ~ITT_Guard() {
295             MALLOC_ITT_SYNC_ACQUIRED(ptr);
296         }
297     };
298     ITT_Guard ittGuard(&inFlyBlocks);
299 #endif
300     for (intptr_t myBinsInFlyBlocks = inFlyBlocks.load(std::memory_order_acquire),
301              myCoalescQInFlyBlocks = backend->blocksInCoalescing(); ; backoff.pause()) {
302         MALLOC_ASSERT(myBinsInFlyBlocks>=0 && myCoalescQInFlyBlocks>=0, NULL);
303         intptr_t currBinsInFlyBlocks = inFlyBlocks.load(std::memory_order_acquire),
304             currCoalescQInFlyBlocks = backend->blocksInCoalescing();
305         WhiteboxTestingYield();
306         // Stop waiting iff:
307 
308         // 1) blocks were removed from processing, not added
309         if (myBinsInFlyBlocks > currBinsInFlyBlocks
310         // 2) released during delayed coalescing queue
311             || myCoalescQInFlyBlocks > currCoalescQInFlyBlocks)
312             break;
313         // 3) if there are blocks in coalescing, and no progress in its processing,
314         // try to scan coalescing queue and stop waiting, if changes were made
315         // (if there are no changes and in-fly blocks exist, we continue
316         //  waiting to not increase load on coalescQ)
317         if (currCoalescQInFlyBlocks > 0 && backend->scanCoalescQ(/*forceCoalescQDrop=*/false))
318             break;
319         // 4) when there are no blocks
320         if (!currBinsInFlyBlocks && !currCoalescQInFlyBlocks)
321             // re-scan make sense only if bins were modified since scanned
322             return startModifiedCnt != getNumOfMods();
323         myBinsInFlyBlocks = currBinsInFlyBlocks;
324         myCoalescQInFlyBlocks = currCoalescQInFlyBlocks;
325     }
326     return true;
327 }
328 
329 void CoalRequestQ::putBlock(FreeBlock *fBlock)
330 {
331     MALLOC_ASSERT(fBlock->sizeTmp >= FreeBlock::minBlockSize, ASSERT_TEXT);
332     fBlock->markUsed();
333     // the block is in the queue, do not forget that it's here
334     inFlyBlocks++;
335 
336     FreeBlock *myBlToFree = blocksToFree.load(std::memory_order_acquire);
337     for (;;) {
338         fBlock->nextToFree = myBlToFree;
339         if (blocksToFree.compare_exchange_strong(myBlToFree, fBlock)) {
340             return;
341         }
342     }
343 }
344 
345 FreeBlock *CoalRequestQ::getAll()
346 {
347     for (;;) {
348         FreeBlock *myBlToFree = blocksToFree.load(std::memory_order_acquire);
349 
350         if (!myBlToFree) {
351             return NULL;
352         } else {
353             if (blocksToFree.compare_exchange_strong(myBlToFree, 0)) {
354                 return myBlToFree;
355             } else {
356                 continue;
357             }
358         }
359     }
360 }
361 
362 inline void CoalRequestQ::blockWasProcessed()
363 {
364     bkndSync->binsModified();
365     int prev = inFlyBlocks.fetch_sub(1);
366     MALLOC_ASSERT(prev > 0, ASSERT_TEXT);
367 }
368 
369 // Try to get a block from a bin.
370 // If the remaining free space would stay in the same bin,
371 //     split the block without removing it.
372 // If the free space should go to other bin(s), remove the block.
373 // alignedBin is true, if all blocks in the bin have slab-aligned right side.
374 FreeBlock *Backend::IndexedBins::getFromBin(int binIdx, BackendSync *sync, size_t size,
375         bool needAlignedRes, bool alignedBin,  bool wait, int *binLocked)
376 {
377     Bin *b = &freeBins[binIdx];
378 try_next:
379     FreeBlock *fBlock = NULL;
380     if (!b->empty()) {
381         bool locked;
382         MallocMutex::scoped_lock scopedLock(b->tLock, wait, &locked);
383 
384         if (!locked) {
385             if (binLocked) (*binLocked)++;
386             return NULL;
387         }
388 
389         for (FreeBlock *curr = b->head.load(std::memory_order_relaxed); curr; curr = curr->next) {
390             size_t szBlock = curr->tryLockBlock();
391             if (!szBlock) {
392                 // block is locked, re-do bin lock, as there is no place to spin
393                 // while block coalescing
394                 goto try_next;
395             }
396 
397             // GENERAL CASE
398             if (alignedBin || !needAlignedRes) {
399                 size_t splitSz = szBlock - size;
400                 // If we got a block as split result, it must have a room for control structures.
401                 if (szBlock >= size && (splitSz >= FreeBlock::minBlockSize || !splitSz))
402                     fBlock = curr;
403             } else {
404                 // SPECIAL CASE, to get aligned block from unaligned bin we have to cut the middle of a block
405                 // and return remaining left and right part. Possible only in fixed pool scenario, assert for this
406                 // is set inside splitBlock() function.
407 
408                 void *newB = alignUp(curr, slabSize);
409                 uintptr_t rightNew = (uintptr_t)newB + size;
410                 uintptr_t rightCurr = (uintptr_t)curr + szBlock;
411                 // Check if the block size is sufficient,
412                 // and also left and right split results are either big enough or non-existent
413                 if (rightNew <= rightCurr
414                         && (newB == curr || ((uintptr_t)newB - (uintptr_t)curr) >= FreeBlock::minBlockSize)
415                         && (rightNew == rightCurr || (rightCurr - rightNew) >= FreeBlock::minBlockSize))
416                     fBlock = curr;
417             }
418 
419             if (fBlock) {
420                 // consume must be called before result of removing from a bin is visible externally.
421                 sync->blockConsumed();
422                 // TODO: think about cases when block stays in the same bin
423                 b->removeBlock(fBlock);
424                 if (freeBins[binIdx].empty())
425                     bitMask.set(binIdx, false);
426                 fBlock->sizeTmp = szBlock;
427                 break;
428             } else { // block size is not valid, search for next block in the bin
429                 curr->setMeFree(szBlock);
430                 curr->rightNeig(szBlock)->setLeftFree(szBlock);
431             }
432         }
433     }
434     return fBlock;
435 }
436 
437 bool Backend::IndexedBins::tryReleaseRegions(int binIdx, Backend *backend)
438 {
439     Bin *b = &freeBins[binIdx];
440     FreeBlock *fBlockList = NULL;
441 
442     // got all blocks from the bin and re-do coalesce on them
443     // to release single-block regions
444 try_next:
445     if (!b->empty()) {
446         MallocMutex::scoped_lock binLock(b->tLock);
447         for (FreeBlock *curr = b->head.load(std::memory_order_relaxed); curr; ) {
448             size_t szBlock = curr->tryLockBlock();
449             if (!szBlock)
450                 goto try_next;
451 
452             FreeBlock *next = curr->next;
453 
454             b->removeBlock(curr);
455             curr->sizeTmp = szBlock;
456             curr->nextToFree = fBlockList;
457             fBlockList = curr;
458             curr = next;
459         }
460     }
461     return backend->coalescAndPutList(fBlockList, /*forceCoalescQDrop=*/true,
462                                       /*reportBlocksProcessed=*/false);
463 }
464 
465 void Backend::Bin::removeBlock(FreeBlock *fBlock)
466 {
467     MALLOC_ASSERT(fBlock->next||fBlock->prev||fBlock== head.load(std::memory_order_relaxed),
468                   "Detected that a block is not in the bin.");
469     if (head.load(std::memory_order_relaxed) == fBlock)
470         head.store(fBlock->next, std::memory_order_relaxed);
471     if (tail == fBlock)
472         tail = fBlock->prev;
473     if (fBlock->prev)
474         fBlock->prev->next = fBlock->next;
475     if (fBlock->next)
476         fBlock->next->prev = fBlock->prev;
477 }
478 
479 void Backend::IndexedBins::addBlock(int binIdx, FreeBlock *fBlock, size_t /* blockSz */, bool addToTail)
480 {
481     Bin *b = &freeBins[binIdx];
482     fBlock->myBin = binIdx;
483     fBlock->next = fBlock->prev = NULL;
484     {
485         MallocMutex::scoped_lock scopedLock(b->tLock);
486         if (addToTail) {
487             fBlock->prev = b->tail;
488             b->tail = fBlock;
489             if (fBlock->prev)
490                 fBlock->prev->next = fBlock;
491             if (!b->head.load(std::memory_order_relaxed))
492                 b->head.store(fBlock, std::memory_order_relaxed);
493         } else {
494             fBlock->next = b->head.load(std::memory_order_relaxed);
495             b->head.store(fBlock, std::memory_order_relaxed);
496             if (fBlock->next)
497                 fBlock->next->prev = fBlock;
498             if (!b->tail)
499                 b->tail = fBlock;
500         }
501     }
502     bitMask.set(binIdx, true);
503 }
504 
505 bool Backend::IndexedBins::tryAddBlock(int binIdx, FreeBlock *fBlock, bool addToTail)
506 {
507     bool locked;
508     Bin *b = &freeBins[binIdx];
509     fBlock->myBin = binIdx;
510     if (addToTail) {
511         fBlock->next = NULL;
512         {
513             MallocMutex::scoped_lock scopedLock(b->tLock, /*wait=*/false, &locked);
514             if (!locked)
515                 return false;
516             fBlock->prev = b->tail;
517             b->tail = fBlock;
518             if (fBlock->prev)
519                 fBlock->prev->next = fBlock;
520             if (!b->head.load(std::memory_order_relaxed))
521                 b->head.store(fBlock, std::memory_order_relaxed);
522         }
523     } else {
524         fBlock->prev = NULL;
525         {
526             MallocMutex::scoped_lock scopedLock(b->tLock, /*wait=*/false, &locked);
527             if (!locked)
528                 return false;
529             fBlock->next = b->head.load(std::memory_order_relaxed);
530             b->head.store(fBlock, std::memory_order_relaxed);
531             if (fBlock->next)
532                 fBlock->next->prev = fBlock;
533             if (!b->tail)
534                 b->tail = fBlock;
535         }
536     }
537     bitMask.set(binIdx, true);
538     return true;
539 }
540 
541 void Backend::IndexedBins::reset()
542 {
543     for (unsigned i=0; i<Backend::freeBinsNum; i++)
544         freeBins[i].reset();
545     bitMask.reset();
546 }
547 
548 void Backend::IndexedBins::lockRemoveBlock(int binIdx, FreeBlock *fBlock)
549 {
550     MallocMutex::scoped_lock scopedLock(freeBins[binIdx].tLock);
551     freeBins[binIdx].removeBlock(fBlock);
552     if (freeBins[binIdx].empty())
553         bitMask.set(binIdx, false);
554 }
555 
556 bool ExtMemoryPool::regionsAreReleaseable() const
557 {
558     return !keepAllMemory && !delayRegsReleasing;
559 }
560 
561 FreeBlock *Backend::splitBlock(FreeBlock *fBlock, int num, size_t size, bool blockIsAligned, bool needAlignedBlock)
562 {
563     const size_t totalSize = num * size;
564 
565     // SPECIAL CASE, for unaligned block we have to cut the middle of a block
566     // and return remaining left and right part. Possible only in a fixed pool scenario.
567     if (needAlignedBlock && !blockIsAligned) {
568         MALLOC_ASSERT(extMemPool->fixedPool,
569                 "Aligned block request from unaligned bin possible only in fixed pool scenario.");
570 
571         // Space to use is in the middle
572         FreeBlock *newBlock = alignUp(fBlock, slabSize);
573         FreeBlock *rightPart = (FreeBlock*)((uintptr_t)newBlock + totalSize);
574         uintptr_t fBlockEnd = (uintptr_t)fBlock + fBlock->sizeTmp;
575 
576         // Return free right part
577         if ((uintptr_t)rightPart != fBlockEnd) {
578             rightPart->initHeader();  // to prevent coalescing rightPart with fBlock
579             size_t rightSize = fBlockEnd - (uintptr_t)rightPart;
580             coalescAndPut(rightPart, rightSize, toAlignedBin(rightPart, rightSize));
581         }
582         // And free left part
583         if (newBlock != fBlock) {
584             newBlock->initHeader(); // to prevent coalescing fBlock with newB
585             size_t leftSize = (uintptr_t)newBlock - (uintptr_t)fBlock;
586             coalescAndPut(fBlock, leftSize, toAlignedBin(fBlock, leftSize));
587         }
588         fBlock = newBlock;
589     } else if (size_t splitSize = fBlock->sizeTmp - totalSize) { // need to split the block
590         // GENERAL CASE, cut the left or right part of the block
591         FreeBlock *splitBlock = NULL;
592         if (needAlignedBlock) {
593             // For slab aligned blocks cut the right side of the block
594             // and return it to a requester, original block returns to backend
595             splitBlock = fBlock;
596             fBlock = (FreeBlock*)((uintptr_t)splitBlock + splitSize);
597             fBlock->initHeader();
598         } else {
599             // For large object blocks cut original block and put free righ part to backend
600             splitBlock = (FreeBlock*)((uintptr_t)fBlock + totalSize);
601             splitBlock->initHeader();
602         }
603         // Mark free block as it`s parent only when the requested type (needAlignedBlock)
604         // and returned from Bins/OS block (isAligned) are equal (XOR operation used)
605         bool markAligned = (blockIsAligned ^ needAlignedBlock) ? toAlignedBin(splitBlock, splitSize) : blockIsAligned;
606         coalescAndPut(splitBlock, splitSize, markAligned);
607     }
608     MALLOC_ASSERT(!needAlignedBlock || isAligned(fBlock, slabSize), "Expect to get aligned block, if one was requested.");
609     FreeBlock::markBlocks(fBlock, num, size);
610     return fBlock;
611 }
612 
613 size_t Backend::getMaxBinnedSize() const
614 {
615     return hugePages.isEnabled && !inUserPool() ?
616         maxBinned_HugePage : maxBinned_SmallPage;
617 }
618 
619 inline bool Backend::MaxRequestComparator::operator()(size_t oldMaxReq, size_t requestSize) const
620 {
621     return requestSize > oldMaxReq && requestSize < backend->getMaxBinnedSize();
622 }
623 
624 // last chance to get memory
625 FreeBlock *Backend::releaseMemInCaches(intptr_t startModifiedCnt,
626                                     int *lockedBinsThreshold, int numOfLockedBins)
627 {
628     // something released from caches
629     if (extMemPool->hardCachesCleanup()
630         // ..or can use blocks that are in processing now
631         || bkndSync.waitTillBlockReleased(startModifiedCnt))
632         return (FreeBlock*)VALID_BLOCK_IN_BIN;
633     // OS can't give us more memory, but we have some in locked bins
634     if (*lockedBinsThreshold && numOfLockedBins) {
635         *lockedBinsThreshold = 0;
636         return (FreeBlock*)VALID_BLOCK_IN_BIN;
637     }
638     return NULL; // nothing found, give up
639 }
640 
641 FreeBlock *Backend::askMemFromOS(size_t blockSize, intptr_t startModifiedCnt,
642                                  int *lockedBinsThreshold, int numOfLockedBins,
643                                  bool *splittableRet, bool needSlabRegion)
644 {
645     FreeBlock *block;
646     // The block sizes can be divided into 3 groups:
647     //   1. "quite small": popular object size, we are in bootstarp or something
648     //      like; request several regions.
649     //   2. "quite large": we want to have several such blocks in the region
650     //      but not want several pre-allocated regions.
651     //   3. "huge": exact fit, we allocate only one block and do not allow
652     //       any other allocations to placed in a region.
653     // Dividing the block sizes in these groups we are trying to balance between
654     // too small regions (that leads to fragmentation) and too large ones (that
655     // leads to excessive address space consumption). If a region is "too
656     // large", allocate only one, to prevent fragmentation. It supposedly
657     // doesn't hurt performance, because the object requested by user is large.
658     // Bounds for the groups are:
659     const size_t maxBinned = getMaxBinnedSize();
660     const size_t quiteSmall = maxBinned / 8;
661     const size_t quiteLarge = maxBinned;
662 
663     if (blockSize >= quiteLarge) {
664         // Do not interact with other threads via semaphores, as for exact fit
665         // we can't share regions with them, memory requesting is individual.
666         block = addNewRegion(blockSize, MEMREG_ONE_BLOCK, /*addToBin=*/false);
667         if (!block)
668             return releaseMemInCaches(startModifiedCnt, lockedBinsThreshold, numOfLockedBins);
669         *splittableRet = false;
670     } else {
671         const size_t regSz_sizeBased = alignUp(4*maxRequestedSize, 1024*1024);
672         // Another thread is modifying backend while we can't get the block.
673         // Wait while it leaves and re-do the scan
674         // before trying other ways to extend the backend.
675         if (bkndSync.waitTillBlockReleased(startModifiedCnt)
676             // semaphore is protecting adding more more memory from OS
677             || memExtendingSema.wait())
678             return (FreeBlock*)VALID_BLOCK_IN_BIN;
679 
680         if (startModifiedCnt != bkndSync.getNumOfMods()) {
681             memExtendingSema.signal();
682             return (FreeBlock*)VALID_BLOCK_IN_BIN;
683         }
684 
685         if (blockSize < quiteSmall) {
686             // For this size of blocks, add NUM_OF_REG "advance" regions in bin,
687             // and return one as a result.
688             // TODO: add to bin first, because other threads can use them right away.
689             // This must be done carefully, because blocks in bins can be released
690             // in releaseCachesToLimit().
691             const unsigned NUM_OF_REG = 3;
692             MemRegionType regType = needSlabRegion ? MEMREG_SLAB_BLOCKS : MEMREG_LARGE_BLOCKS;
693             block = addNewRegion(regSz_sizeBased, regType, /*addToBin=*/false);
694             if (block)
695                 for (unsigned idx=0; idx<NUM_OF_REG; idx++)
696                     if (! addNewRegion(regSz_sizeBased, regType, /*addToBin=*/true))
697                         break;
698         } else {
699             block = addNewRegion(regSz_sizeBased, MEMREG_LARGE_BLOCKS, /*addToBin=*/false);
700         }
701         memExtendingSema.signal();
702 
703         // no regions found, try to clean cache
704         if (!block || block == (FreeBlock*)VALID_BLOCK_IN_BIN)
705             return releaseMemInCaches(startModifiedCnt, lockedBinsThreshold, numOfLockedBins);
706         // Since a region can hold more than one block it can be split.
707         *splittableRet = true;
708     }
709     // after asking memory from OS, release caches if we above the memory limits
710     releaseCachesToLimit();
711 
712     return block;
713 }
714 
715 void Backend::releaseCachesToLimit()
716 {
717     if (!memSoftLimit.load(std::memory_order_relaxed)
718             || totalMemSize.load(std::memory_order_relaxed) <= memSoftLimit.load(std::memory_order_relaxed)) {
719         return;
720     }
721     size_t locTotalMemSize, locMemSoftLimit;
722 
723     scanCoalescQ(/*forceCoalescQDrop=*/false);
724     if (extMemPool->softCachesCleanup() &&
725         (locTotalMemSize = totalMemSize.load(std::memory_order_acquire)) <=
726         (locMemSoftLimit = memSoftLimit.load(std::memory_order_acquire)))
727         return;
728     // clean global large-object cache, if this is not enough, clean local caches
729     // do this in several tries, because backend fragmentation can prevent
730     // region from releasing
731     for (int cleanLocal = 0; cleanLocal<2; cleanLocal++)
732         while (cleanLocal ?
733                  extMemPool->allLocalCaches.cleanup(/*cleanOnlyUnused=*/true) :
734                  extMemPool->loc.decreasingCleanup())
735             if ((locTotalMemSize = totalMemSize.load(std::memory_order_acquire)) <=
736                 (locMemSoftLimit = memSoftLimit.load(std::memory_order_acquire)))
737                 return;
738     // last chance to match memSoftLimit
739     extMemPool->hardCachesCleanup();
740 }
741 
742 int Backend::IndexedBins::getMinNonemptyBin(unsigned startBin) const
743 {
744     int p = bitMask.getMinTrue(startBin);
745     return p == -1 ? Backend::freeBinsNum : p;
746 }
747 
748 FreeBlock *Backend::IndexedBins::findBlock(int nativeBin, BackendSync *sync, size_t size,
749         bool needAlignedBlock, bool alignedBin, int *numOfLockedBins)
750 {
751     for (int i=getMinNonemptyBin(nativeBin); i<freeBinsNum; i=getMinNonemptyBin(i+1))
752         if (FreeBlock *block = getFromBin(i, sync, size, needAlignedBlock, alignedBin, /*wait=*/false, numOfLockedBins))
753             return block;
754 
755     return NULL;
756 }
757 
758 void Backend::requestBootstrapMem()
759 {
760     if (bootsrapMemDone == bootsrapMemStatus.load(std::memory_order_acquire))
761         return;
762     MallocMutex::scoped_lock lock( bootsrapMemStatusMutex );
763     if (bootsrapMemDone == bootsrapMemStatus)
764         return;
765     MALLOC_ASSERT(bootsrapMemNotDone == bootsrapMemStatus, ASSERT_TEXT);
766     bootsrapMemStatus = bootsrapMemInitializing;
767     // request some rather big region during bootstrap in advance
768     // ok to get NULL here, as later we re-do a request with more modest size
769     addNewRegion(2*1024*1024, MEMREG_SLAB_BLOCKS, /*addToBin=*/true);
770     bootsrapMemStatus = bootsrapMemDone;
771 }
772 
773 // try to allocate size Byte block in available bins
774 // needAlignedRes is true if result must be slab-aligned
775 FreeBlock *Backend::genericGetBlock(int num, size_t size, bool needAlignedBlock)
776 {
777     FreeBlock *block = NULL;
778     const size_t totalReqSize = num*size;
779     // no splitting after requesting new region, asks exact size
780     const int nativeBin = sizeToBin(totalReqSize);
781 
782     requestBootstrapMem();
783     // If we found 2 or less locked bins, it's time to ask more memory from OS.
784     // But nothing can be asked from fixed pool. And we prefer wait, not ask
785     // for more memory, if block is quite large.
786     int lockedBinsThreshold = extMemPool->fixedPool || size>=maxBinned_SmallPage? 0 : 2;
787 
788     // Find maximal requested size limited by getMaxBinnedSize()
789     AtomicUpdate(maxRequestedSize, totalReqSize, MaxRequestComparator(this));
790     scanCoalescQ(/*forceCoalescQDrop=*/false);
791 
792     bool splittable = true;
793     for (;;) {
794         const intptr_t startModifiedCnt = bkndSync.getNumOfMods();
795         int numOfLockedBins;
796 
797         do {
798             numOfLockedBins = 0;
799             if (needAlignedBlock) {
800                 block = freeSlabAlignedBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock,
801                                                         /*alignedBin=*/true, &numOfLockedBins);
802                 if (!block && extMemPool->fixedPool)
803                     block = freeLargeBlockBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock,
804                                                         /*alignedBin=*/false, &numOfLockedBins);
805             } else {
806                 block = freeLargeBlockBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock,
807                                                         /*alignedBin=*/false, &numOfLockedBins);
808                 if (!block && extMemPool->fixedPool)
809                     block = freeSlabAlignedBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock,
810                                                         /*alignedBin=*/true, &numOfLockedBins);
811             }
812         } while (!block && numOfLockedBins>lockedBinsThreshold);
813 
814         if (block)
815             break;
816 
817         if (!(scanCoalescQ(/*forceCoalescQDrop=*/true) | extMemPool->softCachesCleanup())) {
818             // bins are not updated,
819             // only remaining possibility is to ask for more memory
820             block = askMemFromOS(totalReqSize, startModifiedCnt, &lockedBinsThreshold,
821                         numOfLockedBins, &splittable, needAlignedBlock);
822             if (!block)
823                 return NULL;
824             if (block != (FreeBlock*)VALID_BLOCK_IN_BIN) {
825                 // size can be increased in askMemFromOS, that's why >=
826                 MALLOC_ASSERT(block->sizeTmp >= size, ASSERT_TEXT);
827                 break;
828             }
829             // valid block somewhere in bins, let's find it
830             block = NULL;
831         }
832     }
833     MALLOC_ASSERT(block, ASSERT_TEXT);
834     if (splittable) {
835         // At this point we have to be sure that slabAligned attribute describes the right block state
836         block = splitBlock(block, num, size, block->slabAligned, needAlignedBlock);
837     }
838     // matched blockConsumed() from startUseBlock()
839     bkndSync.blockReleased();
840 
841     return block;
842 }
843 
844 LargeMemoryBlock *Backend::getLargeBlock(size_t size)
845 {
846     LargeMemoryBlock *lmb =
847         (LargeMemoryBlock*)genericGetBlock(1, size, /*needAlignedRes=*/false);
848     if (lmb) {
849         lmb->unalignedSize = size;
850         if (extMemPool->userPool())
851             extMemPool->lmbList.add(lmb);
852     }
853     return lmb;
854 }
855 
856 BlockI *Backend::getSlabBlock(int num) {
857     BlockI *b = (BlockI*)genericGetBlock(num, slabSize, /*slabAligned=*/true);
858     MALLOC_ASSERT(isAligned(b, slabSize), ASSERT_TEXT);
859     return b;
860 }
861 
862 void Backend::putSlabBlock(BlockI *block) {
863     genericPutBlock((FreeBlock *)block, slabSize, /*slabAligned=*/true);
864 }
865 
866 void *Backend::getBackRefSpace(size_t size, bool *rawMemUsed)
867 {
868     // This block is released only at shutdown, so it can prevent
869     // a entire region releasing when it's received from the backend,
870     // so prefer getRawMemory using.
871     if (void *ret = getRawMemory(size, REGULAR)) {
872         *rawMemUsed = true;
873         return ret;
874     }
875     void *ret = genericGetBlock(1, size, /*needAlignedRes=*/false);
876     if (ret) *rawMemUsed = false;
877     return ret;
878 }
879 
880 void Backend::putBackRefSpace(void *b, size_t size, bool rawMemUsed)
881 {
882     if (rawMemUsed)
883         freeRawMemory(b, size);
884     // ignore not raw mem, as it released on region releasing
885 }
886 
887 void Backend::removeBlockFromBin(FreeBlock *fBlock)
888 {
889     if (fBlock->myBin != Backend::NO_BIN) {
890         if (fBlock->slabAligned)
891             freeSlabAlignedBins.lockRemoveBlock(fBlock->myBin, fBlock);
892         else
893             freeLargeBlockBins.lockRemoveBlock(fBlock->myBin, fBlock);
894     }
895 }
896 
897 void Backend::genericPutBlock(FreeBlock *fBlock, size_t blockSz, bool slabAligned)
898 {
899     bkndSync.blockConsumed();
900     coalescAndPut(fBlock, blockSz, slabAligned);
901     bkndSync.blockReleased();
902 }
903 
904 void AllLargeBlocksList::add(LargeMemoryBlock *lmb)
905 {
906     MallocMutex::scoped_lock scoped_cs(largeObjLock);
907     lmb->gPrev = NULL;
908     lmb->gNext = loHead;
909     if (lmb->gNext)
910         lmb->gNext->gPrev = lmb;
911     loHead = lmb;
912 }
913 
914 void AllLargeBlocksList::remove(LargeMemoryBlock *lmb)
915 {
916     MallocMutex::scoped_lock scoped_cs(largeObjLock);
917     if (loHead == lmb)
918         loHead = lmb->gNext;
919     if (lmb->gNext)
920         lmb->gNext->gPrev = lmb->gPrev;
921     if (lmb->gPrev)
922         lmb->gPrev->gNext = lmb->gNext;
923 }
924 
925 void Backend::putLargeBlock(LargeMemoryBlock *lmb)
926 {
927     if (extMemPool->userPool())
928         extMemPool->lmbList.remove(lmb);
929     genericPutBlock((FreeBlock *)lmb, lmb->unalignedSize, false);
930 }
931 
932 void Backend::returnLargeObject(LargeMemoryBlock *lmb)
933 {
934     removeBackRef(lmb->backRefIdx);
935     putLargeBlock(lmb);
936     STAT_increment(getThreadId(), ThreadCommonCounters, freeLargeObj);
937 }
938 
939 #if BACKEND_HAS_MREMAP
940 void *Backend::remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment)
941 {
942     // no remap for user pools and for object too small that living in bins
943     if (inUserPool() || min(oldSize, newSize)<maxBinned_SmallPage
944         // during remap, can't guarantee alignment more strict than current or
945         // more strict than page alignment
946         || !isAligned(ptr, alignment) || alignment>extMemPool->granularity)
947         return NULL;
948     const LargeMemoryBlock* lmbOld = ((LargeObjectHdr *)ptr - 1)->memoryBlock;
949     const size_t oldUnalignedSize = lmbOld->unalignedSize;
950     FreeBlock *oldFBlock = (FreeBlock *)lmbOld;
951     FreeBlock *right = oldFBlock->rightNeig(oldUnalignedSize);
952     // in every region only one block can have LAST_REGION_BLOCK on right,
953     // so don't need no synchronization
954     if (!right->isLastRegionBlock())
955         return NULL;
956 
957     MemRegion *oldRegion = static_cast<LastFreeBlock*>(right)->memRegion;
958     MALLOC_ASSERT( oldRegion < ptr, ASSERT_TEXT );
959     const size_t oldRegionSize = oldRegion->allocSz;
960     if (oldRegion->type != MEMREG_ONE_BLOCK)
961         return NULL;  // we are not single in the region
962     const size_t userOffset = (uintptr_t)ptr - (uintptr_t)oldRegion;
963     const size_t alignedSize = LargeObjectCache::alignToBin(newSize + userOffset);
964     const size_t requestSize =
965         alignUp(sizeof(MemRegion) + alignedSize + sizeof(LastFreeBlock), extMemPool->granularity);
966     if (requestSize < alignedSize) // is wrapped around?
967         return NULL;
968     regionList.remove(oldRegion);
969 
970     void *ret = mremap(oldRegion, oldRegion->allocSz, requestSize, MREMAP_MAYMOVE);
971     if (MAP_FAILED == ret) { // can't remap, revert and leave
972         regionList.add(oldRegion);
973         return NULL;
974     }
975     MemRegion *region = (MemRegion*)ret;
976     MALLOC_ASSERT(region->type == MEMREG_ONE_BLOCK, ASSERT_TEXT);
977     region->allocSz = requestSize;
978     region->blockSz = alignedSize;
979 
980     FreeBlock *fBlock = (FreeBlock *)alignUp((uintptr_t)region + sizeof(MemRegion),
981                                              largeObjectAlignment);
982 
983     regionList.add(region);
984     startUseBlock(region, fBlock, /*addToBin=*/false);
985     MALLOC_ASSERT(fBlock->sizeTmp == region->blockSz, ASSERT_TEXT);
986     // matched blockConsumed() in startUseBlock().
987     // TODO: get rid of useless pair blockConsumed()/blockReleased()
988     bkndSync.blockReleased();
989 
990     // object must start at same offset from region's start
991     void *object = (void*)((uintptr_t)region + userOffset);
992     MALLOC_ASSERT(isAligned(object, alignment), ASSERT_TEXT);
993     LargeObjectHdr *header = (LargeObjectHdr*)object - 1;
994     setBackRef(header->backRefIdx, header);
995 
996     LargeMemoryBlock *lmb = (LargeMemoryBlock*)fBlock;
997     lmb->unalignedSize = region->blockSz;
998     lmb->objectSize = newSize;
999     lmb->backRefIdx = header->backRefIdx;
1000     header->memoryBlock = lmb;
1001     MALLOC_ASSERT((uintptr_t)lmb + lmb->unalignedSize >=
1002                   (uintptr_t)object + lmb->objectSize, "An object must fit to the block.");
1003 
1004     usedAddrRange.registerFree((uintptr_t)oldRegion, (uintptr_t)oldRegion + oldRegionSize);
1005     usedAddrRange.registerAlloc((uintptr_t)region, (uintptr_t)region + requestSize);
1006     totalMemSize.fetch_add(region->allocSz - oldRegionSize);
1007 
1008     return object;
1009 }
1010 #endif /* BACKEND_HAS_MREMAP */
1011 
1012 void Backend::releaseRegion(MemRegion *memRegion)
1013 {
1014     regionList.remove(memRegion);
1015     freeRawMem(memRegion, memRegion->allocSz);
1016 }
1017 
1018 // coalesce fBlock with its neighborhood
1019 FreeBlock *Backend::doCoalesc(FreeBlock *fBlock, MemRegion **mRegion)
1020 {
1021     FreeBlock *resBlock = fBlock;
1022     size_t resSize = fBlock->sizeTmp;
1023     MemRegion *memRegion = NULL;
1024 
1025     fBlock->markCoalescing(resSize);
1026     resBlock->blockInBin = false;
1027 
1028     // coalescing with left neighbor
1029     size_t leftSz = fBlock->trySetLeftUsed(GuardedSize::COAL_BLOCK);
1030     if (leftSz != GuardedSize::LOCKED) {
1031         if (leftSz == GuardedSize::COAL_BLOCK) {
1032             coalescQ.putBlock(fBlock);
1033             return NULL;
1034         } else {
1035             FreeBlock *left = fBlock->leftNeig(leftSz);
1036             size_t lSz = left->trySetMeUsed(GuardedSize::COAL_BLOCK);
1037             if (lSz <= GuardedSize::MAX_LOCKED_VAL) {
1038                 fBlock->setLeftFree(leftSz); // rollback
1039                 coalescQ.putBlock(fBlock);
1040                 return NULL;
1041             } else {
1042                 MALLOC_ASSERT(lSz == leftSz, "Invalid header");
1043                 left->blockInBin = true;
1044                 resBlock = left;
1045                 resSize += leftSz;
1046                 resBlock->sizeTmp = resSize;
1047             }
1048         }
1049     }
1050     // coalescing with right neighbor
1051     FreeBlock *right = fBlock->rightNeig(fBlock->sizeTmp);
1052     size_t rightSz = right->trySetMeUsed(GuardedSize::COAL_BLOCK);
1053     if (rightSz != GuardedSize::LOCKED) {
1054         // LastFreeBlock is on the right side
1055         if (GuardedSize::LAST_REGION_BLOCK == rightSz) {
1056             right->setMeFree(GuardedSize::LAST_REGION_BLOCK);
1057             memRegion = static_cast<LastFreeBlock*>(right)->memRegion;
1058         } else if (GuardedSize::COAL_BLOCK == rightSz) {
1059             if (resBlock->blockInBin) {
1060                 resBlock->blockInBin = false;
1061                 removeBlockFromBin(resBlock);
1062             }
1063             coalescQ.putBlock(resBlock);
1064             return NULL;
1065         } else {
1066             size_t rSz = right->rightNeig(rightSz)->
1067                 trySetLeftUsed(GuardedSize::COAL_BLOCK);
1068             if (rSz <= GuardedSize::MAX_LOCKED_VAL) {
1069                 right->setMeFree(rightSz);  // rollback
1070                 if (resBlock->blockInBin) {
1071                     resBlock->blockInBin = false;
1072                     removeBlockFromBin(resBlock);
1073                 }
1074                 coalescQ.putBlock(resBlock);
1075                 return NULL;
1076             } else {
1077                 MALLOC_ASSERT(rSz == rightSz, "Invalid header");
1078                 removeBlockFromBin(right);
1079                 resSize += rightSz;
1080 
1081                 // Is LastFreeBlock on the right side of right?
1082                 FreeBlock *nextRight = right->rightNeig(rightSz);
1083                 size_t nextRightSz = nextRight->
1084                     trySetMeUsed(GuardedSize::COAL_BLOCK);
1085                 if (nextRightSz > GuardedSize::MAX_LOCKED_VAL) {
1086                     if (nextRightSz == GuardedSize::LAST_REGION_BLOCK)
1087                         memRegion = static_cast<LastFreeBlock*>(nextRight)->memRegion;
1088 
1089                     nextRight->setMeFree(nextRightSz);
1090                 }
1091             }
1092         }
1093     }
1094     if (memRegion) {
1095         MALLOC_ASSERT((uintptr_t)memRegion + memRegion->allocSz >=
1096                       (uintptr_t)right + sizeof(LastFreeBlock), ASSERT_TEXT);
1097         MALLOC_ASSERT((uintptr_t)memRegion < (uintptr_t)resBlock, ASSERT_TEXT);
1098         *mRegion = memRegion;
1099     } else
1100         *mRegion = NULL;
1101     resBlock->sizeTmp = resSize;
1102     return resBlock;
1103 }
1104 
1105 bool Backend::coalescAndPutList(FreeBlock *list, bool forceCoalescQDrop, bool reportBlocksProcessed)
1106 {
1107     bool regionReleased = false;
1108 
1109     for (FreeBlock *helper; list;
1110          list = helper,
1111              // matches block enqueue in CoalRequestQ::putBlock()
1112              reportBlocksProcessed? coalescQ.blockWasProcessed() : (void)0) {
1113         MemRegion *memRegion;
1114         bool addToTail = false;
1115 
1116         helper = list->nextToFree;
1117         FreeBlock *toRet = doCoalesc(list, &memRegion);
1118         if (!toRet)
1119             continue;
1120 
1121         if (memRegion && memRegion->blockSz == toRet->sizeTmp
1122             && !extMemPool->fixedPool) {
1123             if (extMemPool->regionsAreReleaseable()) {
1124                 // release the region, because there is no used blocks in it
1125                 if (toRet->blockInBin)
1126                     removeBlockFromBin(toRet);
1127                 releaseRegion(memRegion);
1128                 regionReleased = true;
1129                 continue;
1130             } else // add block from empty region to end of bin,
1131                 addToTail = true; // preserving for exact fit
1132         }
1133         size_t currSz = toRet->sizeTmp;
1134         int bin = sizeToBin(currSz);
1135         bool toAligned = extMemPool->fixedPool ? toAlignedBin(toRet, currSz) : toRet->slabAligned;
1136         bool needAddToBin = true;
1137 
1138         if (toRet->blockInBin) {
1139             // Does it stay in same bin?
1140             if (toRet->myBin == bin && toRet->slabAligned == toAligned)
1141                 needAddToBin = false;
1142             else {
1143                 toRet->blockInBin = false;
1144                 removeBlockFromBin(toRet);
1145             }
1146         }
1147 
1148         // Does not stay in same bin, or bin-less; add it
1149         if (needAddToBin) {
1150             toRet->prev = toRet->next = toRet->nextToFree = NULL;
1151             toRet->myBin = NO_BIN;
1152             toRet->slabAligned = toAligned;
1153 
1154             // If the block is too small to fit in any bin, keep it bin-less.
1155             // It's not a leak because the block later can be coalesced.
1156             if (currSz >= minBinnedSize) {
1157                 toRet->sizeTmp = currSz;
1158                 IndexedBins *target = toRet->slabAligned ? &freeSlabAlignedBins : &freeLargeBlockBins;
1159                 if (forceCoalescQDrop) {
1160                     target->addBlock(bin, toRet, toRet->sizeTmp, addToTail);
1161                 } else if (!target->tryAddBlock(bin, toRet, addToTail)) {
1162                     coalescQ.putBlock(toRet);
1163                     continue;
1164                 }
1165             }
1166             toRet->sizeTmp = 0;
1167         }
1168         // Free (possibly coalesced) free block.
1169         // Adding to bin must be done before this point,
1170         // because after a block is free it can be coalesced, and
1171         // using its pointer became unsafe.
1172         // Remember that coalescing is not done under any global lock.
1173         toRet->setMeFree(currSz);
1174         toRet->rightNeig(currSz)->setLeftFree(currSz);
1175     }
1176     return regionReleased;
1177 }
1178 
1179 // Coalesce fBlock and add it back to a bin;
1180 // processing delayed coalescing requests.
1181 void Backend::coalescAndPut(FreeBlock *fBlock, size_t blockSz, bool slabAligned)
1182 {
1183     fBlock->sizeTmp = blockSz;
1184     fBlock->nextToFree = NULL;
1185     fBlock->slabAligned = slabAligned;
1186 
1187     coalescAndPutList(fBlock, /*forceCoalescQDrop=*/false, /*reportBlocksProcessed=*/false);
1188 }
1189 
1190 bool Backend::scanCoalescQ(bool forceCoalescQDrop)
1191 {
1192     FreeBlock *currCoalescList = coalescQ.getAll();
1193 
1194     if (currCoalescList)
1195         // reportBlocksProcessed=true informs that the blocks leave coalescQ,
1196         // matches blockConsumed() from CoalRequestQ::putBlock()
1197         coalescAndPutList(currCoalescList, forceCoalescQDrop,
1198                           /*reportBlocksProcessed=*/true);
1199     // returns status of coalescQ.getAll(), as an indication of possible changes in backend
1200     // TODO: coalescAndPutList() may report is some new free blocks became available or not
1201     return currCoalescList;
1202 }
1203 
1204 FreeBlock *Backend::findBlockInRegion(MemRegion *region, size_t exactBlockSize)
1205 {
1206     FreeBlock *fBlock;
1207     size_t blockSz;
1208     uintptr_t fBlockEnd,
1209         lastFreeBlock = (uintptr_t)region + region->allocSz - sizeof(LastFreeBlock);
1210 
1211     static_assert(sizeof(LastFreeBlock) % sizeof(uintptr_t) == 0,
1212         "Atomic applied on LastFreeBlock, and we put it at the end of region, that"
1213         " is uintptr_t-aligned, so no unaligned atomic operations are possible.");
1214      // right bound is slab-aligned, keep LastFreeBlock after it
1215     if (region->type == MEMREG_SLAB_BLOCKS) {
1216         fBlock = (FreeBlock *)alignUp((uintptr_t)region + sizeof(MemRegion), sizeof(uintptr_t));
1217         fBlockEnd = alignDown(lastFreeBlock, slabSize);
1218     } else {
1219         fBlock = (FreeBlock *)alignUp((uintptr_t)region + sizeof(MemRegion), largeObjectAlignment);
1220         fBlockEnd = (uintptr_t)fBlock + exactBlockSize;
1221         MALLOC_ASSERT(fBlockEnd <= lastFreeBlock, ASSERT_TEXT);
1222     }
1223     if (fBlockEnd <= (uintptr_t)fBlock)
1224         return NULL; // allocSz is too small
1225     blockSz = fBlockEnd - (uintptr_t)fBlock;
1226     // TODO: extend getSlabBlock to support degradation, i.e. getting less blocks
1227     // then requested, and then relax this check
1228     // (now all or nothing is implemented, check according to this)
1229     if (blockSz < numOfSlabAllocOnMiss*slabSize)
1230         return NULL;
1231 
1232     region->blockSz = blockSz;
1233     return fBlock;
1234 }
1235 
1236 // startUseBlock may add the free block to a bin, the block can be used and
1237 // even released after this, so the region must be added to regionList already
1238 void Backend::startUseBlock(MemRegion *region, FreeBlock *fBlock, bool addToBin)
1239 {
1240     size_t blockSz = region->blockSz;
1241     fBlock->initHeader();
1242     fBlock->setMeFree(blockSz);
1243 
1244     LastFreeBlock *lastBl = static_cast<LastFreeBlock*>(fBlock->rightNeig(blockSz));
1245     // to not get unaligned atomics during LastFreeBlock access
1246     MALLOC_ASSERT(isAligned(lastBl, sizeof(uintptr_t)), NULL);
1247     lastBl->initHeader();
1248     lastBl->setMeFree(GuardedSize::LAST_REGION_BLOCK);
1249     lastBl->setLeftFree(blockSz);
1250     lastBl->myBin = NO_BIN;
1251     lastBl->memRegion = region;
1252 
1253     if (addToBin) {
1254         unsigned targetBin = sizeToBin(blockSz);
1255         // during adding advance regions, register bin for a largest block in region
1256         advRegBins.registerBin(targetBin);
1257         if (region->type == MEMREG_SLAB_BLOCKS) {
1258             fBlock->slabAligned = true;
1259             freeSlabAlignedBins.addBlock(targetBin, fBlock, blockSz, /*addToTail=*/false);
1260         } else {
1261             fBlock->slabAligned = false;
1262             freeLargeBlockBins.addBlock(targetBin, fBlock, blockSz, /*addToTail=*/false);
1263         }
1264     } else {
1265         // to match with blockReleased() in genericGetBlock
1266         bkndSync.blockConsumed();
1267         // Understand our alignment for correct splitBlock operation
1268         fBlock->slabAligned = region->type == MEMREG_SLAB_BLOCKS ? true : false;
1269         fBlock->sizeTmp = fBlock->tryLockBlock();
1270         MALLOC_ASSERT(fBlock->sizeTmp >= FreeBlock::minBlockSize, "Locking must be successful");
1271     }
1272 }
1273 
1274 void MemRegionList::add(MemRegion *r)
1275 {
1276     r->prev = NULL;
1277     MallocMutex::scoped_lock lock(regionListLock);
1278     r->next = head;
1279     head = r;
1280     if (head->next)
1281         head->next->prev = head;
1282 }
1283 
1284 void MemRegionList::remove(MemRegion *r)
1285 {
1286     MallocMutex::scoped_lock lock(regionListLock);
1287     if (head == r)
1288         head = head->next;
1289     if (r->next)
1290         r->next->prev = r->prev;
1291     if (r->prev)
1292         r->prev->next = r->next;
1293 }
1294 
1295 #if __TBB_MALLOC_BACKEND_STAT
1296 int MemRegionList::reportStat(FILE *f)
1297 {
1298     int regNum = 0;
1299     MallocMutex::scoped_lock lock(regionListLock);
1300     for (MemRegion *curr = head; curr; curr = curr->next) {
1301         fprintf(f, "%p: max block %lu B, ", curr, curr->blockSz);
1302         regNum++;
1303     }
1304     return regNum;
1305 }
1306 #endif
1307 
1308 FreeBlock *Backend::addNewRegion(size_t size, MemRegionType memRegType, bool addToBin)
1309 {
1310     static_assert(sizeof(BlockMutexes) <= sizeof(BlockI), "Header must be not overwritten in used blocks");
1311     MALLOC_ASSERT(FreeBlock::minBlockSize > GuardedSize::MAX_SPEC_VAL,
1312           "Block length must not conflict with special values of GuardedSize");
1313     // If the region is not "for slabs" we should reserve some space for
1314     // a region header, the worst case alignment and the last block mark.
1315     const size_t requestSize = memRegType == MEMREG_SLAB_BLOCKS ? size :
1316         size + sizeof(MemRegion) + largeObjectAlignment
1317              +  FreeBlock::minBlockSize + sizeof(LastFreeBlock);
1318 
1319     size_t rawSize = requestSize;
1320     MemRegion *region = (MemRegion*)allocRawMem(rawSize);
1321     if (!region) {
1322         MALLOC_ASSERT(rawSize==requestSize, "getRawMem has not allocated memory but changed the allocated size.");
1323         return NULL;
1324     }
1325     if (rawSize < sizeof(MemRegion)) {
1326         if (!extMemPool->fixedPool)
1327             freeRawMem(region, rawSize);
1328         return NULL;
1329     }
1330 
1331     region->type = memRegType;
1332     region->allocSz = rawSize;
1333     FreeBlock *fBlock = findBlockInRegion(region, size);
1334     if (!fBlock) {
1335         if (!extMemPool->fixedPool)
1336             freeRawMem(region, rawSize);
1337         return NULL;
1338     }
1339     regionList.add(region);
1340     startUseBlock(region, fBlock, addToBin);
1341     bkndSync.binsModified();
1342     return addToBin? (FreeBlock*)VALID_BLOCK_IN_BIN : fBlock;
1343 }
1344 
1345 void Backend::init(ExtMemoryPool *extMemoryPool)
1346 {
1347     extMemPool = extMemoryPool;
1348     usedAddrRange.init();
1349     coalescQ.init(&bkndSync);
1350     bkndSync.init(this);
1351 }
1352 
1353 void Backend::reset()
1354 {
1355     MALLOC_ASSERT(extMemPool->userPool(), "Only user pool can be reset.");
1356     // no active threads are allowed in backend while reset() called
1357     verify();
1358 
1359     freeLargeBlockBins.reset();
1360     freeSlabAlignedBins.reset();
1361     advRegBins.reset();
1362 
1363     for (MemRegion *curr = regionList.head; curr; curr = curr->next) {
1364         FreeBlock *fBlock = findBlockInRegion(curr, curr->blockSz);
1365         MALLOC_ASSERT(fBlock, "A memory region unexpectedly got smaller");
1366         startUseBlock(curr, fBlock, /*addToBin=*/true);
1367     }
1368 }
1369 
1370 bool Backend::destroy()
1371 {
1372     bool noError = true;
1373     // no active threads are allowed in backend while destroy() called
1374     verify();
1375     if (!inUserPool()) {
1376         freeLargeBlockBins.reset();
1377         freeSlabAlignedBins.reset();
1378     }
1379     while (regionList.head) {
1380         MemRegion *helper = regionList.head->next;
1381         noError &= freeRawMem(regionList.head, regionList.head->allocSz);
1382         regionList.head = helper;
1383     }
1384     return noError;
1385 }
1386 
1387 bool Backend::clean()
1388 {
1389     scanCoalescQ(/*forceCoalescQDrop=*/false);
1390 
1391     bool res = false;
1392     // We can have several blocks occupying a whole region,
1393     // because such regions are added in advance (see askMemFromOS() and reset()),
1394     // and never used. Release them all.
1395     for (int i = advRegBins.getMinUsedBin(0); i != -1; i = advRegBins.getMinUsedBin(i+1)) {
1396         if (i == freeSlabAlignedBins.getMinNonemptyBin(i))
1397             res |= freeSlabAlignedBins.tryReleaseRegions(i, this);
1398         if (i == freeLargeBlockBins.getMinNonemptyBin(i))
1399             res |= freeLargeBlockBins.tryReleaseRegions(i, this);
1400     }
1401 
1402     return res;
1403 }
1404 
1405 void Backend::IndexedBins::verify()
1406 {
1407 #if MALLOC_DEBUG
1408     for (int i=0; i<freeBinsNum; i++) {
1409         for (FreeBlock *fb = freeBins[i].head.load(std::memory_order_relaxed); fb; fb=fb->next) {
1410             uintptr_t mySz = fb->myL.value;
1411             MALLOC_ASSERT(mySz>GuardedSize::MAX_SPEC_VAL, ASSERT_TEXT);
1412             FreeBlock *right = (FreeBlock*)((uintptr_t)fb + mySz);
1413             suppress_unused_warning(right);
1414             MALLOC_ASSERT(right->myL.value<=GuardedSize::MAX_SPEC_VAL, ASSERT_TEXT);
1415             MALLOC_ASSERT(right->leftL.value==mySz, ASSERT_TEXT);
1416             MALLOC_ASSERT(fb->leftL.value<=GuardedSize::MAX_SPEC_VAL, ASSERT_TEXT);
1417         }
1418     }
1419 #endif
1420 }
1421 
1422 // For correct operation, it must be called when no other threads
1423 // is changing backend.
1424 void Backend::verify()
1425 {
1426 #if MALLOC_DEBUG
1427     scanCoalescQ(/*forceCoalescQDrop=*/false);
1428 #endif // MALLOC_DEBUG
1429 
1430     freeLargeBlockBins.verify();
1431     freeSlabAlignedBins.verify();
1432 }
1433 
1434 #if __TBB_MALLOC_BACKEND_STAT
1435 size_t Backend::Bin::countFreeBlocks()
1436 {
1437     size_t cnt = 0;
1438     {
1439         MallocMutex::scoped_lock lock(tLock);
1440         for (FreeBlock *fb = head; fb; fb = fb->next)
1441             cnt++;
1442     }
1443     return cnt;
1444 }
1445 
1446 size_t Backend::Bin::reportFreeBlocks(FILE *f)
1447 {
1448     size_t totalSz = 0;
1449     MallocMutex::scoped_lock lock(tLock);
1450     for (FreeBlock *fb = head; fb; fb = fb->next) {
1451         size_t sz = fb->tryLockBlock();
1452         fb->setMeFree(sz);
1453         fprintf(f, " [%p;%p]", fb, (void*)((uintptr_t)fb+sz));
1454         totalSz += sz;
1455     }
1456     return totalSz;
1457 }
1458 
1459 void Backend::IndexedBins::reportStat(FILE *f)
1460 {
1461     size_t totalSize = 0;
1462 
1463     for (int i=0; i<Backend::freeBinsNum; i++)
1464         if (size_t cnt = freeBins[i].countFreeBlocks()) {
1465             totalSize += freeBins[i].reportFreeBlocks(f);
1466             fprintf(f, " %d:%lu, ", i, cnt);
1467         }
1468     fprintf(f, "\ttotal size %lu KB", totalSize/1024);
1469 }
1470 
1471 void Backend::reportStat(FILE *f)
1472 {
1473     scanCoalescQ(/*forceCoalescQDrop=*/false);
1474 
1475     fprintf(f, "\n  regions:\n");
1476     int regNum = regionList.reportStat(f);
1477     fprintf(f, "\n%d regions, %lu KB in all regions\n  free bins:\nlarge bins: ",
1478             regNum, totalMemSize/1024);
1479     freeLargeBlockBins.reportStat(f);
1480     fprintf(f, "\naligned bins: ");
1481     freeSlabAlignedBins.reportStat(f);
1482     fprintf(f, "\n");
1483 }
1484 #endif // __TBB_MALLOC_BACKEND_STAT
1485 
1486 } } // namespaces
1487