xref: /oneTBB/src/tbbmalloc/backend.cpp (revision d5fd1e97)
1 /*
2     Copyright (c) 2005-2023 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #include <string.h>   /* for memset */
18 #include <errno.h>
19 #include "tbbmalloc_internal.h"
20 
21 namespace rml {
22 namespace internal {
23 
24 /*********** Code to acquire memory from the OS or other executive ****************/
25 
26 /*
27   syscall/malloc can set non-zero errno in case of failure,
28   but later allocator might be able to find memory to fulfill the request.
29   And we do not want changing of errno by successful scalable_malloc call.
30   To support this, restore old errno in (get|free)RawMemory, and set errno
31   in frontend just before returning to user code.
32   Please note: every syscall/libc call used inside scalable_malloc that
33   sets errno must be protected this way, not just memory allocation per se.
34 */
35 
36 #if USE_DEFAULT_MEMORY_MAPPING
37 #include "MapMemory.h"
38 #else
39 /* assume MapMemory and UnmapMemory are customized */
40 #endif
41 
42 void* getRawMemory (size_t size, PageType pageType) {
43     return MapMemory(size, pageType);
44 }
45 
46 int freeRawMemory (void *object, size_t size) {
47     return UnmapMemory(object, size);
48 }
49 
50 #if CHECK_ALLOCATION_RANGE
51 
52 void Backend::UsedAddressRange::registerAlloc(uintptr_t left, uintptr_t right)
53 {
54     MallocMutex::scoped_lock lock(mutex);
55     if (left < leftBound.load(std::memory_order_relaxed))
56         leftBound.store(left, std::memory_order_relaxed);
57     if (right > rightBound.load(std::memory_order_relaxed))
58         rightBound.store(right, std::memory_order_relaxed);
59     MALLOC_ASSERT(leftBound.load(std::memory_order_relaxed), ASSERT_TEXT);
60     MALLOC_ASSERT(leftBound.load(std::memory_order_relaxed) < rightBound.load(std::memory_order_relaxed), ASSERT_TEXT);
61     MALLOC_ASSERT(leftBound.load(std::memory_order_relaxed) <= left && right <= rightBound.load(std::memory_order_relaxed), ASSERT_TEXT);
62 }
63 
64 void Backend::UsedAddressRange::registerFree(uintptr_t left, uintptr_t right)
65 {
66     MallocMutex::scoped_lock lock(mutex);
67     if (leftBound.load(std::memory_order_relaxed) == left) {
68         if (rightBound.load(std::memory_order_relaxed) == right) {
69             leftBound.store(ADDRESS_UPPER_BOUND, std::memory_order_relaxed);
70             rightBound.store(0, std::memory_order_relaxed);
71         } else
72             leftBound.store(right, std::memory_order_relaxed);
73     } else if (rightBound.load(std::memory_order_relaxed) == right)
74         rightBound.store(left, std::memory_order_relaxed);
75     MALLOC_ASSERT((!rightBound.load(std::memory_order_relaxed) && leftBound.load(std::memory_order_relaxed) == ADDRESS_UPPER_BOUND)
76                   || leftBound.load(std::memory_order_relaxed) < rightBound.load(std::memory_order_relaxed), ASSERT_TEXT);
77 }
78 #endif // CHECK_ALLOCATION_RANGE
79 
80 // Initialized in frontend inside defaultMemPool
81 extern HugePagesStatus hugePages;
82 
83 void *Backend::allocRawMem(size_t &size)
84 {
85     void *res = nullptr;
86     size_t allocSize = 0;
87 
88     if (extMemPool->userPool()) {
89         if (extMemPool->fixedPool && bootsrapMemDone == bootsrapMemStatus.load(std::memory_order_acquire))
90             return nullptr;
91         MALLOC_ASSERT(bootsrapMemStatus != bootsrapMemNotDone,
92                       "Backend::allocRawMem() called prematurely?");
93         // TODO: support for raw mem not aligned at sizeof(uintptr_t)
94         // memory from fixed pool is asked once and only once
95         allocSize = alignUpGeneric(size, extMemPool->granularity);
96         res = (*extMemPool->rawAlloc)(extMemPool->poolId, allocSize);
97     } else {
98         // Align allocation on page size
99         size_t pageSize = hugePages.isEnabled ? hugePages.getGranularity() : extMemPool->granularity;
100         MALLOC_ASSERT(pageSize, "Page size cannot be zero.");
101         allocSize = alignUpGeneric(size, pageSize);
102 
103         // If user requested huge pages and they are available, try to use preallocated ones firstly.
104         // If there are none, lets check transparent huge pages support and use them instead.
105         if (hugePages.isEnabled) {
106             if (hugePages.isHPAvailable) {
107                 res = getRawMemory(allocSize, PREALLOCATED_HUGE_PAGE);
108             }
109             if (!res && hugePages.isTHPAvailable) {
110                 res = getRawMemory(allocSize, TRANSPARENT_HUGE_PAGE);
111             }
112         }
113 
114         if (!res) {
115             res = getRawMemory(allocSize, REGULAR);
116         }
117     }
118 
119     if (res) {
120         MALLOC_ASSERT(allocSize > 0, "Invalid size of an allocated region.");
121         size = allocSize;
122         if (!extMemPool->userPool())
123             usedAddrRange.registerAlloc((uintptr_t)res, (uintptr_t)res+size);
124 #if MALLOC_DEBUG
125         volatile size_t curTotalSize = totalMemSize; // to read global value once
126         MALLOC_ASSERT(curTotalSize+size > curTotalSize, "Overflow allocation size.");
127 #endif
128         totalMemSize.fetch_add(size);
129     }
130 
131     return res;
132 }
133 
134 bool Backend::freeRawMem(void *object, size_t size)
135 {
136     bool fail;
137 #if MALLOC_DEBUG
138     volatile size_t curTotalSize = totalMemSize; // to read global value once
139     MALLOC_ASSERT(curTotalSize-size < curTotalSize, "Negative allocation size.");
140 #endif
141     totalMemSize.fetch_sub(size);
142     if (extMemPool->userPool()) {
143         MALLOC_ASSERT(!extMemPool->fixedPool, "No free for fixed-size pools.");
144         fail = (*extMemPool->rawFree)(extMemPool->poolId, object, size);
145     } else {
146         usedAddrRange.registerFree((uintptr_t)object, (uintptr_t)object + size);
147         fail = freeRawMemory(object, size);
148     }
149     // TODO: use result in all freeRawMem() callers
150     return !fail;
151 }
152 
153 /********* End memory acquisition code ********************************/
154 
155 // Protected object size. After successful locking returns size of locked block,
156 // and releasing requires setting block size.
157 class GuardedSize : tbb::detail::no_copy {
158     std::atomic<uintptr_t> value;
159 public:
160     enum State {
161         LOCKED,
162         COAL_BLOCK,        // block is coalescing now
163         MAX_LOCKED_VAL = COAL_BLOCK,
164         LAST_REGION_BLOCK, // used to mark last block in region
165         // values after this are "normal" block sizes
166         MAX_SPEC_VAL = LAST_REGION_BLOCK
167     };
168 
169     void initLocked() { value.store(LOCKED, std::memory_order_release); } // TBB_REVAMP_TODO: was relaxed
170     void makeCoalscing() {
171         MALLOC_ASSERT(value.load(std::memory_order_relaxed) == LOCKED, ASSERT_TEXT);
172         value.store(COAL_BLOCK, std::memory_order_release); // TBB_REVAMP_TODO: was relaxed
173     }
174     size_t tryLock(State state) {
175         MALLOC_ASSERT(state <= MAX_LOCKED_VAL, ASSERT_TEXT);
176         size_t sz = value.load(std::memory_order_acquire);
177         for (;;) {
178             if (sz <= MAX_LOCKED_VAL) {
179                 break;
180             }
181             if (value.compare_exchange_strong(sz, state)) {
182                 break;
183             }
184         }
185         return sz;
186     }
187     void unlock(size_t size) {
188         MALLOC_ASSERT(value.load(std::memory_order_relaxed) <= MAX_LOCKED_VAL, "The lock is not locked");
189         MALLOC_ASSERT(size > MAX_LOCKED_VAL, ASSERT_TEXT);
190         value.store(size, std::memory_order_release);
191     }
192     bool isLastRegionBlock() const { return value.load(std::memory_order_relaxed) == LAST_REGION_BLOCK; }
193     friend void Backend::IndexedBins::verify();
194 };
195 
196 struct MemRegion {
197     MemRegion *next,      // keep all regions in any pool to release all them on
198               *prev;      // pool destroying, 2-linked list to release individual
199                           // regions.
200     size_t     allocSz,   // got from pool callback
201                blockSz;   // initial and maximal inner block size
202     MemRegionType type;
203 };
204 
205 // this data must be unmodified while block is in use, so separate it
206 class BlockMutexes {
207 protected:
208     GuardedSize myL,   // lock for me
209                 leftL; // lock for left neighbor
210 };
211 
212 class FreeBlock : BlockMutexes {
213 public:
214     static const size_t minBlockSize;
215     friend void Backend::IndexedBins::verify();
216 
217     FreeBlock    *prev,       // in 2-linked list related to bin
218                  *next,
219                  *nextToFree; // used to form a queue during coalescing
220     // valid only when block is in processing, i.e. one is not free and not
221     size_t        sizeTmp;    // used outside of backend
222     int           myBin;      // bin that is owner of the block
223     bool          slabAligned;
224     bool          blockInBin; // this block in myBin already
225 
226     FreeBlock *rightNeig(size_t sz) const {
227         MALLOC_ASSERT(sz, ASSERT_TEXT);
228         return (FreeBlock*)((uintptr_t)this+sz);
229     }
230     FreeBlock *leftNeig(size_t sz) const {
231         MALLOC_ASSERT(sz, ASSERT_TEXT);
232         return (FreeBlock*)((uintptr_t)this - sz);
233     }
234 
235     void initHeader() { myL.initLocked(); leftL.initLocked(); }
236     void setMeFree(size_t size) { myL.unlock(size); }
237     size_t trySetMeUsed(GuardedSize::State s) { return myL.tryLock(s); }
238     bool isLastRegionBlock() const { return myL.isLastRegionBlock(); }
239 
240     void setLeftFree(size_t sz) { leftL.unlock(sz); }
241     size_t trySetLeftUsed(GuardedSize::State s) { return leftL.tryLock(s); }
242 
243     size_t tryLockBlock() {
244         size_t rSz, sz = trySetMeUsed(GuardedSize::LOCKED);
245 
246         if (sz <= GuardedSize::MAX_LOCKED_VAL)
247             return false;
248         rSz = rightNeig(sz)->trySetLeftUsed(GuardedSize::LOCKED);
249         if (rSz <= GuardedSize::MAX_LOCKED_VAL) {
250             setMeFree(sz);
251             return false;
252         }
253         MALLOC_ASSERT(rSz == sz, ASSERT_TEXT);
254         return sz;
255     }
256     void markCoalescing(size_t blockSz) {
257         myL.makeCoalscing();
258         rightNeig(blockSz)->leftL.makeCoalscing();
259         sizeTmp = blockSz;
260         nextToFree = nullptr;
261     }
262     void markUsed() {
263         myL.initLocked();
264         rightNeig(sizeTmp)->leftL.initLocked();
265         nextToFree = nullptr;
266     }
267     static void markBlocks(FreeBlock *fBlock, int num, size_t size) {
268         for (int i=1; i<num; i++) {
269             fBlock = (FreeBlock*)((uintptr_t)fBlock + size);
270             fBlock->initHeader();
271         }
272     }
273 };
274 
275 // Last block in any region. Its "size" field is GuardedSize::LAST_REGION_BLOCK,
276 // This kind of blocks used to find region header
277 // and have a possibility to return region back to OS
278 struct LastFreeBlock : public FreeBlock {
279     MemRegion *memRegion;
280 };
281 
282 const size_t FreeBlock::minBlockSize = sizeof(FreeBlock);
283 
284 inline bool BackendSync::waitTillBlockReleased(intptr_t startModifiedCnt)
285 {
286     AtomicBackoff backoff;
287 #if __TBB_MALLOC_BACKEND_STAT
288     class ITT_Guard {
289         void *ptr;
290     public:
291         ITT_Guard(void *p) : ptr(p) {
292             MALLOC_ITT_SYNC_PREPARE(ptr);
293         }
294         ~ITT_Guard() {
295             MALLOC_ITT_SYNC_ACQUIRED(ptr);
296         }
297     };
298     ITT_Guard ittGuard(&inFlyBlocks);
299 #endif
300     for (intptr_t myBinsInFlyBlocks = inFlyBlocks.load(std::memory_order_acquire),
301              myCoalescQInFlyBlocks = backend->blocksInCoalescing(); ; backoff.pause()) {
302         MALLOC_ASSERT(myBinsInFlyBlocks>=0 && myCoalescQInFlyBlocks>=0, nullptr);
303         intptr_t currBinsInFlyBlocks = inFlyBlocks.load(std::memory_order_acquire),
304             currCoalescQInFlyBlocks = backend->blocksInCoalescing();
305         WhiteboxTestingYield();
306         // Stop waiting iff:
307 
308         // 1) blocks were removed from processing, not added
309         if (myBinsInFlyBlocks > currBinsInFlyBlocks
310         // 2) released during delayed coalescing queue
311             || myCoalescQInFlyBlocks > currCoalescQInFlyBlocks)
312             break;
313         // 3) if there are blocks in coalescing, and no progress in its processing,
314         // try to scan coalescing queue and stop waiting, if changes were made
315         // (if there are no changes and in-fly blocks exist, we continue
316         //  waiting to not increase load on coalescQ)
317         if (currCoalescQInFlyBlocks > 0 && backend->scanCoalescQ(/*forceCoalescQDrop=*/false))
318             break;
319         // 4) when there are no blocks
320         if (!currBinsInFlyBlocks && !currCoalescQInFlyBlocks)
321             // re-scan make sense only if bins were modified since scanned
322             return startModifiedCnt != getNumOfMods();
323         myBinsInFlyBlocks = currBinsInFlyBlocks;
324         myCoalescQInFlyBlocks = currCoalescQInFlyBlocks;
325     }
326     return true;
327 }
328 
329 void CoalRequestQ::putBlock(FreeBlock *fBlock)
330 {
331     MALLOC_ASSERT(fBlock->sizeTmp >= FreeBlock::minBlockSize, ASSERT_TEXT);
332     fBlock->markUsed();
333     // the block is in the queue, do not forget that it's here
334     inFlyBlocks++;
335 
336     FreeBlock *myBlToFree = blocksToFree.load(std::memory_order_acquire);
337     for (;;) {
338         fBlock->nextToFree = myBlToFree;
339         if (blocksToFree.compare_exchange_strong(myBlToFree, fBlock)) {
340             return;
341         }
342     }
343 }
344 
345 FreeBlock *CoalRequestQ::getAll()
346 {
347     for (;;) {
348         FreeBlock *myBlToFree = blocksToFree.load(std::memory_order_acquire);
349 
350         if (!myBlToFree) {
351             return nullptr;
352         } else {
353             if (blocksToFree.compare_exchange_strong(myBlToFree, nullptr)) {
354                 return myBlToFree;
355             } else {
356                 continue;
357             }
358         }
359     }
360 }
361 
362 inline void CoalRequestQ::blockWasProcessed()
363 {
364     bkndSync->binsModified();
365     int prev = inFlyBlocks.fetch_sub(1);
366     tbb::detail::suppress_unused_warning(prev);
367     MALLOC_ASSERT(prev > 0, ASSERT_TEXT);
368 }
369 
370 // Try to get a block from a bin.
371 // If the remaining free space would stay in the same bin,
372 //     split the block without removing it.
373 // If the free space should go to other bin(s), remove the block.
374 // alignedBin is true, if all blocks in the bin have slab-aligned right side.
375 FreeBlock *Backend::IndexedBins::getFromBin(int binIdx, BackendSync *sync, size_t size,
376         bool needAlignedRes, bool alignedBin,  bool wait, int *binLocked)
377 {
378     Bin *b = &freeBins[binIdx];
379 try_next:
380     FreeBlock *fBlock = nullptr;
381     if (!b->empty()) {
382         bool locked;
383         MallocMutex::scoped_lock scopedLock(b->tLock, wait, &locked);
384 
385         if (!locked) {
386             if (binLocked) (*binLocked)++;
387             return nullptr;
388         }
389 
390         for (FreeBlock *curr = b->head.load(std::memory_order_relaxed); curr; curr = curr->next) {
391             size_t szBlock = curr->tryLockBlock();
392             if (!szBlock) {
393                 // block is locked, re-do bin lock, as there is no place to spin
394                 // while block coalescing
395                 goto try_next;
396             }
397 
398             // GENERAL CASE
399             if (alignedBin || !needAlignedRes) {
400                 size_t splitSz = szBlock - size;
401                 // If we got a block as split result, it must have a room for control structures.
402                 if (szBlock >= size && (splitSz >= FreeBlock::minBlockSize || !splitSz))
403                     fBlock = curr;
404             } else {
405                 // SPECIAL CASE, to get aligned block from unaligned bin we have to cut the middle of a block
406                 // and return remaining left and right part. Possible only in fixed pool scenario, assert for this
407                 // is set inside splitBlock() function.
408 
409                 void *newB = alignUp(curr, slabSize);
410                 uintptr_t rightNew = (uintptr_t)newB + size;
411                 uintptr_t rightCurr = (uintptr_t)curr + szBlock;
412                 // Check if the block size is sufficient,
413                 // and also left and right split results are either big enough or non-existent
414                 if (rightNew <= rightCurr
415                         && (newB == curr || ((uintptr_t)newB - (uintptr_t)curr) >= FreeBlock::minBlockSize)
416                         && (rightNew == rightCurr || (rightCurr - rightNew) >= FreeBlock::minBlockSize))
417                     fBlock = curr;
418             }
419 
420             if (fBlock) {
421                 // consume must be called before result of removing from a bin is visible externally.
422                 sync->blockConsumed();
423                 // TODO: think about cases when block stays in the same bin
424                 b->removeBlock(fBlock);
425                 if (freeBins[binIdx].empty())
426                     bitMask.set(binIdx, false);
427                 fBlock->sizeTmp = szBlock;
428                 break;
429             } else { // block size is not valid, search for next block in the bin
430                 curr->setMeFree(szBlock);
431                 curr->rightNeig(szBlock)->setLeftFree(szBlock);
432             }
433         }
434     }
435     return fBlock;
436 }
437 
438 bool Backend::IndexedBins::tryReleaseRegions(int binIdx, Backend *backend)
439 {
440     Bin *b = &freeBins[binIdx];
441     FreeBlock *fBlockList = nullptr;
442 
443     // got all blocks from the bin and re-do coalesce on them
444     // to release single-block regions
445 try_next:
446     if (!b->empty()) {
447         MallocMutex::scoped_lock binLock(b->tLock);
448         for (FreeBlock *curr = b->head.load(std::memory_order_relaxed); curr; ) {
449             size_t szBlock = curr->tryLockBlock();
450             if (!szBlock)
451                 goto try_next;
452 
453             FreeBlock *next = curr->next;
454 
455             b->removeBlock(curr);
456             curr->sizeTmp = szBlock;
457             curr->nextToFree = fBlockList;
458             fBlockList = curr;
459             curr = next;
460         }
461     }
462     return backend->coalescAndPutList(fBlockList, /*forceCoalescQDrop=*/true,
463                                       /*reportBlocksProcessed=*/false);
464 }
465 
466 void Backend::Bin::removeBlock(FreeBlock *fBlock)
467 {
468     MALLOC_ASSERT(fBlock->next||fBlock->prev||fBlock== head.load(std::memory_order_relaxed),
469                   "Detected that a block is not in the bin.");
470     if (head.load(std::memory_order_relaxed) == fBlock)
471         head.store(fBlock->next, std::memory_order_relaxed);
472     if (tail == fBlock)
473         tail = fBlock->prev;
474     if (fBlock->prev)
475         fBlock->prev->next = fBlock->next;
476     if (fBlock->next)
477         fBlock->next->prev = fBlock->prev;
478 }
479 
480 void Backend::IndexedBins::addBlock(int binIdx, FreeBlock *fBlock, size_t /* blockSz */, bool addToTail)
481 {
482     Bin *b = &freeBins[binIdx];
483     fBlock->myBin = binIdx;
484     fBlock->next = fBlock->prev = nullptr;
485     {
486         MallocMutex::scoped_lock scopedLock(b->tLock);
487         if (addToTail) {
488             fBlock->prev = b->tail;
489             b->tail = fBlock;
490             if (fBlock->prev)
491                 fBlock->prev->next = fBlock;
492             if (!b->head.load(std::memory_order_relaxed))
493                 b->head.store(fBlock, std::memory_order_relaxed);
494         } else {
495             fBlock->next = b->head.load(std::memory_order_relaxed);
496             b->head.store(fBlock, std::memory_order_relaxed);
497             if (fBlock->next)
498                 fBlock->next->prev = fBlock;
499             if (!b->tail)
500                 b->tail = fBlock;
501         }
502     }
503     bitMask.set(binIdx, true);
504 }
505 
506 bool Backend::IndexedBins::tryAddBlock(int binIdx, FreeBlock *fBlock, bool addToTail)
507 {
508     bool locked;
509     Bin *b = &freeBins[binIdx];
510     fBlock->myBin = binIdx;
511     if (addToTail) {
512         fBlock->next = nullptr;
513         {
514             MallocMutex::scoped_lock scopedLock(b->tLock, /*wait=*/false, &locked);
515             if (!locked)
516                 return false;
517             fBlock->prev = b->tail;
518             b->tail = fBlock;
519             if (fBlock->prev)
520                 fBlock->prev->next = fBlock;
521             if (!b->head.load(std::memory_order_relaxed))
522                 b->head.store(fBlock, std::memory_order_relaxed);
523         }
524     } else {
525         fBlock->prev = nullptr;
526         {
527             MallocMutex::scoped_lock scopedLock(b->tLock, /*wait=*/false, &locked);
528             if (!locked)
529                 return false;
530             fBlock->next = b->head.load(std::memory_order_relaxed);
531             b->head.store(fBlock, std::memory_order_relaxed);
532             if (fBlock->next)
533                 fBlock->next->prev = fBlock;
534             if (!b->tail)
535                 b->tail = fBlock;
536         }
537     }
538     bitMask.set(binIdx, true);
539     return true;
540 }
541 
542 void Backend::IndexedBins::reset()
543 {
544     for (unsigned i=0; i<Backend::freeBinsNum; i++)
545         freeBins[i].reset();
546     bitMask.reset();
547 }
548 
549 void Backend::IndexedBins::lockRemoveBlock(int binIdx, FreeBlock *fBlock)
550 {
551     MallocMutex::scoped_lock scopedLock(freeBins[binIdx].tLock);
552     freeBins[binIdx].removeBlock(fBlock);
553     if (freeBins[binIdx].empty())
554         bitMask.set(binIdx, false);
555 }
556 
557 bool ExtMemoryPool::regionsAreReleaseable() const
558 {
559     return !keepAllMemory && !delayRegsReleasing;
560 }
561 
562 FreeBlock *Backend::splitBlock(FreeBlock *fBlock, int num, size_t size, bool blockIsAligned, bool needAlignedBlock)
563 {
564     const size_t totalSize = num * size;
565 
566     // SPECIAL CASE, for unaligned block we have to cut the middle of a block
567     // and return remaining left and right part. Possible only in a fixed pool scenario.
568     if (needAlignedBlock && !blockIsAligned) {
569         MALLOC_ASSERT(extMemPool->fixedPool,
570                 "Aligned block request from unaligned bin possible only in fixed pool scenario.");
571 
572         // Space to use is in the middle
573         FreeBlock *newBlock = alignUp(fBlock, slabSize);
574         FreeBlock *rightPart = (FreeBlock*)((uintptr_t)newBlock + totalSize);
575         uintptr_t fBlockEnd = (uintptr_t)fBlock + fBlock->sizeTmp;
576 
577         // Return free right part
578         if ((uintptr_t)rightPart != fBlockEnd) {
579             rightPart->initHeader();  // to prevent coalescing rightPart with fBlock
580             size_t rightSize = fBlockEnd - (uintptr_t)rightPart;
581             coalescAndPut(rightPart, rightSize, toAlignedBin(rightPart, rightSize));
582         }
583         // And free left part
584         if (newBlock != fBlock) {
585             newBlock->initHeader(); // to prevent coalescing fBlock with newB
586             size_t leftSize = (uintptr_t)newBlock - (uintptr_t)fBlock;
587             coalescAndPut(fBlock, leftSize, toAlignedBin(fBlock, leftSize));
588         }
589         fBlock = newBlock;
590     } else if (size_t splitSize = fBlock->sizeTmp - totalSize) { // need to split the block
591         // GENERAL CASE, cut the left or right part of the block
592         FreeBlock *splitBlock = nullptr;
593         if (needAlignedBlock) {
594             // For slab aligned blocks cut the right side of the block
595             // and return it to a requester, original block returns to backend
596             splitBlock = fBlock;
597             fBlock = (FreeBlock*)((uintptr_t)splitBlock + splitSize);
598             fBlock->initHeader();
599         } else {
600             // For large object blocks cut original block and put free righ part to backend
601             splitBlock = (FreeBlock*)((uintptr_t)fBlock + totalSize);
602             splitBlock->initHeader();
603         }
604         // Mark free block as it`s parent only when the requested type (needAlignedBlock)
605         // and returned from Bins/OS block (isAligned) are equal (XOR operation used)
606         bool markAligned = (blockIsAligned ^ needAlignedBlock) ? toAlignedBin(splitBlock, splitSize) : blockIsAligned;
607         coalescAndPut(splitBlock, splitSize, markAligned);
608     }
609     MALLOC_ASSERT(!needAlignedBlock || isAligned(fBlock, slabSize), "Expect to get aligned block, if one was requested.");
610     FreeBlock::markBlocks(fBlock, num, size);
611     return fBlock;
612 }
613 
614 size_t Backend::getMaxBinnedSize() const
615 {
616     return hugePages.isEnabled && !inUserPool() ?
617         maxBinned_HugePage : maxBinned_SmallPage;
618 }
619 
620 inline bool Backend::MaxRequestComparator::operator()(size_t oldMaxReq, size_t requestSize) const
621 {
622     return requestSize > oldMaxReq && requestSize < backend->getMaxBinnedSize();
623 }
624 
625 // last chance to get memory
626 FreeBlock *Backend::releaseMemInCaches(intptr_t startModifiedCnt,
627                                     int *lockedBinsThreshold, int numOfLockedBins)
628 {
629     // something released from caches
630     if (extMemPool->hardCachesCleanup()
631         // ..or can use blocks that are in processing now
632         || bkndSync.waitTillBlockReleased(startModifiedCnt))
633         return (FreeBlock*)VALID_BLOCK_IN_BIN;
634     // OS can't give us more memory, but we have some in locked bins
635     if (*lockedBinsThreshold && numOfLockedBins) {
636         *lockedBinsThreshold = 0;
637         return (FreeBlock*)VALID_BLOCK_IN_BIN;
638     }
639     return nullptr; // nothing found, give up
640 }
641 
642 FreeBlock *Backend::askMemFromOS(size_t blockSize, intptr_t startModifiedCnt,
643                                  int *lockedBinsThreshold, int numOfLockedBins,
644                                  bool *splittableRet, bool needSlabRegion)
645 {
646     FreeBlock *block;
647     // The block sizes can be divided into 3 groups:
648     //   1. "quite small": popular object size, we are in bootstarp or something
649     //      like; request several regions.
650     //   2. "quite large": we want to have several such blocks in the region
651     //      but not want several pre-allocated regions.
652     //   3. "huge": exact fit, we allocate only one block and do not allow
653     //       any other allocations to placed in a region.
654     // Dividing the block sizes in these groups we are trying to balance between
655     // too small regions (that leads to fragmentation) and too large ones (that
656     // leads to excessive address space consumption). If a region is "too
657     // large", allocate only one, to prevent fragmentation. It supposedly
658     // doesn't hurt performance, because the object requested by user is large.
659     // Bounds for the groups are:
660     const size_t maxBinned = getMaxBinnedSize();
661     const size_t quiteSmall = maxBinned / 8;
662     const size_t quiteLarge = maxBinned;
663 
664     if (blockSize >= quiteLarge) {
665         // Do not interact with other threads via semaphores, as for exact fit
666         // we can't share regions with them, memory requesting is individual.
667         block = addNewRegion(blockSize, MEMREG_ONE_BLOCK, /*addToBin=*/false);
668         if (!block)
669             return releaseMemInCaches(startModifiedCnt, lockedBinsThreshold, numOfLockedBins);
670         *splittableRet = false;
671     } else {
672         const size_t regSz_sizeBased = alignUp(4*maxRequestedSize, 1024*1024);
673         // Another thread is modifying backend while we can't get the block.
674         // Wait while it leaves and re-do the scan
675         // before trying other ways to extend the backend.
676         if (bkndSync.waitTillBlockReleased(startModifiedCnt)
677             // semaphore is protecting adding more more memory from OS
678             || memExtendingSema.wait())
679             return (FreeBlock*)VALID_BLOCK_IN_BIN;
680 
681         if (startModifiedCnt != bkndSync.getNumOfMods()) {
682             memExtendingSema.signal();
683             return (FreeBlock*)VALID_BLOCK_IN_BIN;
684         }
685 
686         if (blockSize < quiteSmall) {
687             // For this size of blocks, add NUM_OF_REG "advance" regions in bin,
688             // and return one as a result.
689             // TODO: add to bin first, because other threads can use them right away.
690             // This must be done carefully, because blocks in bins can be released
691             // in releaseCachesToLimit().
692             const unsigned NUM_OF_REG = 3;
693             MemRegionType regType = needSlabRegion ? MEMREG_SLAB_BLOCKS : MEMREG_LARGE_BLOCKS;
694             block = addNewRegion(regSz_sizeBased, regType, /*addToBin=*/false);
695             if (block)
696                 for (unsigned idx=0; idx<NUM_OF_REG; idx++)
697                     if (! addNewRegion(regSz_sizeBased, regType, /*addToBin=*/true))
698                         break;
699         } else {
700             block = addNewRegion(regSz_sizeBased, MEMREG_LARGE_BLOCKS, /*addToBin=*/false);
701         }
702         memExtendingSema.signal();
703 
704         // no regions found, try to clean cache
705         if (!block || block == (FreeBlock*)VALID_BLOCK_IN_BIN)
706             return releaseMemInCaches(startModifiedCnt, lockedBinsThreshold, numOfLockedBins);
707         // Since a region can hold more than one block it can be split.
708         *splittableRet = true;
709     }
710     // after asking memory from OS, release caches if we above the memory limits
711     releaseCachesToLimit();
712 
713     return block;
714 }
715 
716 void Backend::releaseCachesToLimit()
717 {
718     if (!memSoftLimit.load(std::memory_order_relaxed)
719             || totalMemSize.load(std::memory_order_relaxed) <= memSoftLimit.load(std::memory_order_relaxed)) {
720         return;
721     }
722     size_t locTotalMemSize, locMemSoftLimit;
723 
724     scanCoalescQ(/*forceCoalescQDrop=*/false);
725     if (extMemPool->softCachesCleanup() &&
726         (locTotalMemSize = totalMemSize.load(std::memory_order_acquire)) <=
727         (locMemSoftLimit = memSoftLimit.load(std::memory_order_acquire)))
728         return;
729     // clean global large-object cache, if this is not enough, clean local caches
730     // do this in several tries, because backend fragmentation can prevent
731     // region from releasing
732     for (int cleanLocal = 0; cleanLocal<2; cleanLocal++)
733         while (cleanLocal ?
734                  extMemPool->allLocalCaches.cleanup(/*cleanOnlyUnused=*/true) :
735                  extMemPool->loc.decreasingCleanup())
736             if ((locTotalMemSize = totalMemSize.load(std::memory_order_acquire)) <=
737                 (locMemSoftLimit = memSoftLimit.load(std::memory_order_acquire)))
738                 return;
739     // last chance to match memSoftLimit
740     extMemPool->hardCachesCleanup();
741 }
742 
743 int Backend::IndexedBins::getMinNonemptyBin(unsigned startBin) const
744 {
745     int p = bitMask.getMinTrue(startBin);
746     return p == -1 ? Backend::freeBinsNum : p;
747 }
748 
749 FreeBlock *Backend::IndexedBins::findBlock(int nativeBin, BackendSync *sync, size_t size,
750         bool needAlignedBlock, bool alignedBin, int *numOfLockedBins)
751 {
752     for (int i=getMinNonemptyBin(nativeBin); i<(int)freeBinsNum; i=getMinNonemptyBin(i+1))
753         if (FreeBlock *block = getFromBin(i, sync, size, needAlignedBlock, alignedBin, /*wait=*/false, numOfLockedBins))
754             return block;
755 
756     return nullptr;
757 }
758 
759 void Backend::requestBootstrapMem()
760 {
761     if (bootsrapMemDone == bootsrapMemStatus.load(std::memory_order_acquire))
762         return;
763     MallocMutex::scoped_lock lock( bootsrapMemStatusMutex );
764     if (bootsrapMemDone == bootsrapMemStatus)
765         return;
766     MALLOC_ASSERT(bootsrapMemNotDone == bootsrapMemStatus, ASSERT_TEXT);
767     bootsrapMemStatus = bootsrapMemInitializing;
768     // request some rather big region during bootstrap in advance
769     // ok to get nullptr here, as later we re-do a request with more modest size
770     addNewRegion(2*1024*1024, MEMREG_SLAB_BLOCKS, /*addToBin=*/true);
771     bootsrapMemStatus = bootsrapMemDone;
772 }
773 
774 // try to allocate size Byte block in available bins
775 // needAlignedRes is true if result must be slab-aligned
776 FreeBlock *Backend::genericGetBlock(int num, size_t size, bool needAlignedBlock)
777 {
778     FreeBlock *block = nullptr;
779     const size_t totalReqSize = num*size;
780     // no splitting after requesting new region, asks exact size
781     const int nativeBin = sizeToBin(totalReqSize);
782 
783     requestBootstrapMem();
784     // If we found 2 or less locked bins, it's time to ask more memory from OS.
785     // But nothing can be asked from fixed pool. And we prefer wait, not ask
786     // for more memory, if block is quite large.
787     int lockedBinsThreshold = extMemPool->fixedPool || size>=maxBinned_SmallPage? 0 : 2;
788 
789     // Find maximal requested size limited by getMaxBinnedSize()
790     AtomicUpdate(maxRequestedSize, totalReqSize, MaxRequestComparator(this));
791     scanCoalescQ(/*forceCoalescQDrop=*/false);
792 
793     bool splittable = true;
794     for (;;) {
795         const intptr_t startModifiedCnt = bkndSync.getNumOfMods();
796         int numOfLockedBins;
797 
798         do {
799             numOfLockedBins = 0;
800             if (needAlignedBlock) {
801                 block = freeSlabAlignedBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock,
802                                                         /*alignedBin=*/true, &numOfLockedBins);
803                 if (!block && extMemPool->fixedPool)
804                     block = freeLargeBlockBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock,
805                                                         /*alignedBin=*/false, &numOfLockedBins);
806             } else {
807                 block = freeLargeBlockBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock,
808                                                         /*alignedBin=*/false, &numOfLockedBins);
809                 if (!block && extMemPool->fixedPool)
810                     block = freeSlabAlignedBins.findBlock(nativeBin, &bkndSync, num*size, needAlignedBlock,
811                                                         /*alignedBin=*/true, &numOfLockedBins);
812             }
813         } while (!block && numOfLockedBins>lockedBinsThreshold);
814 
815         if (block)
816             break;
817 
818         if (!(scanCoalescQ(/*forceCoalescQDrop=*/true) | extMemPool->softCachesCleanup())) {
819             // bins are not updated,
820             // only remaining possibility is to ask for more memory
821             block = askMemFromOS(totalReqSize, startModifiedCnt, &lockedBinsThreshold,
822                         numOfLockedBins, &splittable, needAlignedBlock);
823             if (!block)
824                 return nullptr;
825             if (block != (FreeBlock*)VALID_BLOCK_IN_BIN) {
826                 // size can be increased in askMemFromOS, that's why >=
827                 MALLOC_ASSERT(block->sizeTmp >= size, ASSERT_TEXT);
828                 break;
829             }
830             // valid block somewhere in bins, let's find it
831             block = nullptr;
832         }
833     }
834     MALLOC_ASSERT(block, ASSERT_TEXT);
835     if (splittable) {
836         // At this point we have to be sure that slabAligned attribute describes the right block state
837         block = splitBlock(block, num, size, block->slabAligned, needAlignedBlock);
838     }
839     // matched blockConsumed() from startUseBlock()
840     bkndSync.blockReleased();
841 
842     return block;
843 }
844 
845 LargeMemoryBlock *Backend::getLargeBlock(size_t size)
846 {
847     LargeMemoryBlock *lmb =
848         (LargeMemoryBlock*)genericGetBlock(1, size, /*needAlignedRes=*/false);
849     if (lmb) {
850         lmb->unalignedSize = size;
851         if (extMemPool->userPool())
852             extMemPool->lmbList.add(lmb);
853     }
854     return lmb;
855 }
856 
857 BlockI *Backend::getSlabBlock(int num) {
858     BlockI *b = (BlockI*)genericGetBlock(num, slabSize, /*slabAligned=*/true);
859     MALLOC_ASSERT(isAligned(b, slabSize), ASSERT_TEXT);
860     return b;
861 }
862 
863 void Backend::putSlabBlock(BlockI *block) {
864     genericPutBlock((FreeBlock *)block, slabSize, /*slabAligned=*/true);
865 }
866 
867 void *Backend::getBackRefSpace(size_t size, bool *rawMemUsed)
868 {
869     // This block is released only at shutdown, so it can prevent
870     // a entire region releasing when it's received from the backend,
871     // so prefer getRawMemory using.
872     if (void *ret = getRawMemory(size, REGULAR)) {
873         *rawMemUsed = true;
874         return ret;
875     }
876     void *ret = genericGetBlock(1, size, /*needAlignedRes=*/false);
877     if (ret) *rawMemUsed = false;
878     return ret;
879 }
880 
881 void Backend::putBackRefSpace(void *b, size_t size, bool rawMemUsed)
882 {
883     if (rawMemUsed)
884         freeRawMemory(b, size);
885     // ignore not raw mem, as it released on region releasing
886 }
887 
888 void Backend::removeBlockFromBin(FreeBlock *fBlock)
889 {
890     if (fBlock->myBin != Backend::NO_BIN) {
891         if (fBlock->slabAligned)
892             freeSlabAlignedBins.lockRemoveBlock(fBlock->myBin, fBlock);
893         else
894             freeLargeBlockBins.lockRemoveBlock(fBlock->myBin, fBlock);
895     }
896 }
897 
898 void Backend::genericPutBlock(FreeBlock *fBlock, size_t blockSz, bool slabAligned)
899 {
900     bkndSync.blockConsumed();
901     coalescAndPut(fBlock, blockSz, slabAligned);
902     bkndSync.blockReleased();
903 }
904 
905 void AllLargeBlocksList::add(LargeMemoryBlock *lmb)
906 {
907     MallocMutex::scoped_lock scoped_cs(largeObjLock);
908     lmb->gPrev = nullptr;
909     lmb->gNext = loHead;
910     if (lmb->gNext)
911         lmb->gNext->gPrev = lmb;
912     loHead = lmb;
913 }
914 
915 void AllLargeBlocksList::remove(LargeMemoryBlock *lmb)
916 {
917     MallocMutex::scoped_lock scoped_cs(largeObjLock);
918     if (loHead == lmb)
919         loHead = lmb->gNext;
920     if (lmb->gNext)
921         lmb->gNext->gPrev = lmb->gPrev;
922     if (lmb->gPrev)
923         lmb->gPrev->gNext = lmb->gNext;
924 }
925 
926 void Backend::putLargeBlock(LargeMemoryBlock *lmb)
927 {
928     if (extMemPool->userPool())
929         extMemPool->lmbList.remove(lmb);
930     genericPutBlock((FreeBlock *)lmb, lmb->unalignedSize, false);
931 }
932 
933 void Backend::returnLargeObject(LargeMemoryBlock *lmb)
934 {
935     removeBackRef(lmb->backRefIdx);
936     putLargeBlock(lmb);
937     STAT_increment(getThreadId(), ThreadCommonCounters, freeLargeObj);
938 }
939 
940 #if BACKEND_HAS_MREMAP
941 void *Backend::remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment)
942 {
943     // no remap for user pools and for object too small that living in bins
944     if (inUserPool() || min(oldSize, newSize)<maxBinned_SmallPage
945         // during remap, can't guarantee alignment more strict than current or
946         // more strict than page alignment
947         || !isAligned(ptr, alignment) || alignment>extMemPool->granularity)
948         return nullptr;
949     const LargeMemoryBlock* lmbOld = ((LargeObjectHdr *)ptr - 1)->memoryBlock;
950     const size_t oldUnalignedSize = lmbOld->unalignedSize;
951     FreeBlock *oldFBlock = (FreeBlock *)lmbOld;
952     FreeBlock *right = oldFBlock->rightNeig(oldUnalignedSize);
953     // in every region only one block can have LAST_REGION_BLOCK on right,
954     // so don't need no synchronization
955     if (!right->isLastRegionBlock())
956         return nullptr;
957 
958     MemRegion *oldRegion = static_cast<LastFreeBlock*>(right)->memRegion;
959     MALLOC_ASSERT( oldRegion < ptr, ASSERT_TEXT );
960     const size_t oldRegionSize = oldRegion->allocSz;
961     if (oldRegion->type != MEMREG_ONE_BLOCK)
962         return nullptr;  // we are not single in the region
963     const size_t userOffset = (uintptr_t)ptr - (uintptr_t)oldRegion;
964     const size_t alignedSize = LargeObjectCache::alignToBin(newSize + userOffset);
965     const size_t requestSize =
966         alignUp(sizeof(MemRegion) + alignedSize + sizeof(LastFreeBlock), extMemPool->granularity);
967     if (requestSize < alignedSize) // is wrapped around?
968         return nullptr;
969     regionList.remove(oldRegion);
970 
971     // The deallocation should be registered in address range before mremap to
972     // prevent a race condition with allocation on another thread.
973     // (OS can reuse the memory and registerAlloc will be missed on another thread)
974     usedAddrRange.registerFree((uintptr_t)oldRegion, (uintptr_t)oldRegion + oldRegionSize);
975 
976     void *ret = mremap(oldRegion, oldRegion->allocSz, requestSize, MREMAP_MAYMOVE);
977     if (MAP_FAILED == ret) { // can't remap, revert and leave
978         regionList.add(oldRegion);
979         usedAddrRange.registerAlloc((uintptr_t)oldRegion, (uintptr_t)oldRegion + oldRegionSize);
980         return nullptr;
981     }
982     MemRegion *region = (MemRegion*)ret;
983     MALLOC_ASSERT(region->type == MEMREG_ONE_BLOCK, ASSERT_TEXT);
984     region->allocSz = requestSize;
985     region->blockSz = alignedSize;
986 
987     FreeBlock *fBlock = (FreeBlock *)alignUp((uintptr_t)region + sizeof(MemRegion),
988                                              largeObjectAlignment);
989 
990     regionList.add(region);
991     startUseBlock(region, fBlock, /*addToBin=*/false);
992     MALLOC_ASSERT(fBlock->sizeTmp == region->blockSz, ASSERT_TEXT);
993     // matched blockConsumed() in startUseBlock().
994     // TODO: get rid of useless pair blockConsumed()/blockReleased()
995     bkndSync.blockReleased();
996 
997     // object must start at same offset from region's start
998     void *object = (void*)((uintptr_t)region + userOffset);
999     MALLOC_ASSERT(isAligned(object, alignment), ASSERT_TEXT);
1000     LargeObjectHdr *header = (LargeObjectHdr*)object - 1;
1001     setBackRef(header->backRefIdx, header);
1002 
1003     LargeMemoryBlock *lmb = (LargeMemoryBlock*)fBlock;
1004     lmb->unalignedSize = region->blockSz;
1005     lmb->objectSize = newSize;
1006     lmb->backRefIdx = header->backRefIdx;
1007     header->memoryBlock = lmb;
1008     MALLOC_ASSERT((uintptr_t)lmb + lmb->unalignedSize >=
1009                   (uintptr_t)object + lmb->objectSize, "An object must fit to the block.");
1010 
1011     usedAddrRange.registerAlloc((uintptr_t)region, (uintptr_t)region + requestSize);
1012     totalMemSize.fetch_add(region->allocSz - oldRegionSize);
1013 
1014     return object;
1015 }
1016 #endif /* BACKEND_HAS_MREMAP */
1017 
1018 void Backend::releaseRegion(MemRegion *memRegion)
1019 {
1020     regionList.remove(memRegion);
1021     freeRawMem(memRegion, memRegion->allocSz);
1022 }
1023 
1024 // coalesce fBlock with its neighborhood
1025 FreeBlock *Backend::doCoalesc(FreeBlock *fBlock, MemRegion **mRegion)
1026 {
1027     FreeBlock *resBlock = fBlock;
1028     size_t resSize = fBlock->sizeTmp;
1029     MemRegion *memRegion = nullptr;
1030 
1031     fBlock->markCoalescing(resSize);
1032     resBlock->blockInBin = false;
1033 
1034     // coalescing with left neighbor
1035     size_t leftSz = fBlock->trySetLeftUsed(GuardedSize::COAL_BLOCK);
1036     if (leftSz != GuardedSize::LOCKED) {
1037         if (leftSz == GuardedSize::COAL_BLOCK) {
1038             coalescQ.putBlock(fBlock);
1039             return nullptr;
1040         } else {
1041             FreeBlock *left = fBlock->leftNeig(leftSz);
1042             size_t lSz = left->trySetMeUsed(GuardedSize::COAL_BLOCK);
1043             if (lSz <= GuardedSize::MAX_LOCKED_VAL) {
1044                 fBlock->setLeftFree(leftSz); // rollback
1045                 coalescQ.putBlock(fBlock);
1046                 return nullptr;
1047             } else {
1048                 MALLOC_ASSERT(lSz == leftSz, "Invalid header");
1049                 left->blockInBin = true;
1050                 resBlock = left;
1051                 resSize += leftSz;
1052                 resBlock->sizeTmp = resSize;
1053             }
1054         }
1055     }
1056     // coalescing with right neighbor
1057     FreeBlock *right = fBlock->rightNeig(fBlock->sizeTmp);
1058     size_t rightSz = right->trySetMeUsed(GuardedSize::COAL_BLOCK);
1059     if (rightSz != GuardedSize::LOCKED) {
1060         // LastFreeBlock is on the right side
1061         if (GuardedSize::LAST_REGION_BLOCK == rightSz) {
1062             right->setMeFree(GuardedSize::LAST_REGION_BLOCK);
1063             memRegion = static_cast<LastFreeBlock*>(right)->memRegion;
1064         } else if (GuardedSize::COAL_BLOCK == rightSz) {
1065             if (resBlock->blockInBin) {
1066                 resBlock->blockInBin = false;
1067                 removeBlockFromBin(resBlock);
1068             }
1069             coalescQ.putBlock(resBlock);
1070             return nullptr;
1071         } else {
1072             size_t rSz = right->rightNeig(rightSz)->
1073                 trySetLeftUsed(GuardedSize::COAL_BLOCK);
1074             if (rSz <= GuardedSize::MAX_LOCKED_VAL) {
1075                 right->setMeFree(rightSz);  // rollback
1076                 if (resBlock->blockInBin) {
1077                     resBlock->blockInBin = false;
1078                     removeBlockFromBin(resBlock);
1079                 }
1080                 coalescQ.putBlock(resBlock);
1081                 return nullptr;
1082             } else {
1083                 MALLOC_ASSERT(rSz == rightSz, "Invalid header");
1084                 removeBlockFromBin(right);
1085                 resSize += rightSz;
1086 
1087                 // Is LastFreeBlock on the right side of right?
1088                 FreeBlock *nextRight = right->rightNeig(rightSz);
1089                 size_t nextRightSz = nextRight->
1090                     trySetMeUsed(GuardedSize::COAL_BLOCK);
1091                 if (nextRightSz > GuardedSize::MAX_LOCKED_VAL) {
1092                     if (nextRightSz == GuardedSize::LAST_REGION_BLOCK)
1093                         memRegion = static_cast<LastFreeBlock*>(nextRight)->memRegion;
1094 
1095                     nextRight->setMeFree(nextRightSz);
1096                 }
1097             }
1098         }
1099     }
1100     if (memRegion) {
1101         MALLOC_ASSERT((uintptr_t)memRegion + memRegion->allocSz >=
1102                       (uintptr_t)right + sizeof(LastFreeBlock), ASSERT_TEXT);
1103         MALLOC_ASSERT((uintptr_t)memRegion < (uintptr_t)resBlock, ASSERT_TEXT);
1104         *mRegion = memRegion;
1105     } else
1106         *mRegion = nullptr;
1107     resBlock->sizeTmp = resSize;
1108     return resBlock;
1109 }
1110 
1111 bool Backend::coalescAndPutList(FreeBlock *list, bool forceCoalescQDrop, bool reportBlocksProcessed)
1112 {
1113     bool regionReleased = false;
1114 
1115     for (FreeBlock *helper; list;
1116          list = helper,
1117              // matches block enqueue in CoalRequestQ::putBlock()
1118              reportBlocksProcessed? coalescQ.blockWasProcessed() : (void)0) {
1119         MemRegion *memRegion;
1120         bool addToTail = false;
1121 
1122         helper = list->nextToFree;
1123         FreeBlock *toRet = doCoalesc(list, &memRegion);
1124         if (!toRet)
1125             continue;
1126 
1127         if (memRegion && memRegion->blockSz == toRet->sizeTmp
1128             && !extMemPool->fixedPool) {
1129             if (extMemPool->regionsAreReleaseable()) {
1130                 // release the region, because there is no used blocks in it
1131                 if (toRet->blockInBin)
1132                     removeBlockFromBin(toRet);
1133                 releaseRegion(memRegion);
1134                 regionReleased = true;
1135                 continue;
1136             } else // add block from empty region to end of bin,
1137                 addToTail = true; // preserving for exact fit
1138         }
1139         size_t currSz = toRet->sizeTmp;
1140         int bin = sizeToBin(currSz);
1141         bool toAligned = extMemPool->fixedPool ? toAlignedBin(toRet, currSz) : toRet->slabAligned;
1142         bool needAddToBin = true;
1143 
1144         if (toRet->blockInBin) {
1145             // Does it stay in same bin?
1146             if (toRet->myBin == bin && toRet->slabAligned == toAligned)
1147                 needAddToBin = false;
1148             else {
1149                 toRet->blockInBin = false;
1150                 removeBlockFromBin(toRet);
1151             }
1152         }
1153 
1154         // Does not stay in same bin, or bin-less; add it
1155         if (needAddToBin) {
1156             toRet->prev = toRet->next = toRet->nextToFree = nullptr;
1157             toRet->myBin = NO_BIN;
1158             toRet->slabAligned = toAligned;
1159 
1160             // If the block is too small to fit in any bin, keep it bin-less.
1161             // It's not a leak because the block later can be coalesced.
1162             if (currSz >= minBinnedSize) {
1163                 toRet->sizeTmp = currSz;
1164                 IndexedBins *target = toRet->slabAligned ? &freeSlabAlignedBins : &freeLargeBlockBins;
1165                 if (forceCoalescQDrop) {
1166                     target->addBlock(bin, toRet, toRet->sizeTmp, addToTail);
1167                 } else if (!target->tryAddBlock(bin, toRet, addToTail)) {
1168                     coalescQ.putBlock(toRet);
1169                     continue;
1170                 }
1171             }
1172             toRet->sizeTmp = 0;
1173         }
1174         // Free (possibly coalesced) free block.
1175         // Adding to bin must be done before this point,
1176         // because after a block is free it can be coalesced, and
1177         // using its pointer became unsafe.
1178         // Remember that coalescing is not done under any global lock.
1179         toRet->setMeFree(currSz);
1180         toRet->rightNeig(currSz)->setLeftFree(currSz);
1181     }
1182     return regionReleased;
1183 }
1184 
1185 // Coalesce fBlock and add it back to a bin;
1186 // processing delayed coalescing requests.
1187 void Backend::coalescAndPut(FreeBlock *fBlock, size_t blockSz, bool slabAligned)
1188 {
1189     fBlock->sizeTmp = blockSz;
1190     fBlock->nextToFree = nullptr;
1191     fBlock->slabAligned = slabAligned;
1192 
1193     coalescAndPutList(fBlock, /*forceCoalescQDrop=*/false, /*reportBlocksProcessed=*/false);
1194 }
1195 
1196 bool Backend::scanCoalescQ(bool forceCoalescQDrop)
1197 {
1198     FreeBlock *currCoalescList = coalescQ.getAll();
1199 
1200     if (currCoalescList)
1201         // reportBlocksProcessed=true informs that the blocks leave coalescQ,
1202         // matches blockConsumed() from CoalRequestQ::putBlock()
1203         coalescAndPutList(currCoalescList, forceCoalescQDrop,
1204                           /*reportBlocksProcessed=*/true);
1205     // returns status of coalescQ.getAll(), as an indication of possible changes in backend
1206     // TODO: coalescAndPutList() may report is some new free blocks became available or not
1207     return currCoalescList;
1208 }
1209 
1210 FreeBlock *Backend::findBlockInRegion(MemRegion *region, size_t exactBlockSize)
1211 {
1212     FreeBlock *fBlock;
1213     size_t blockSz;
1214     uintptr_t fBlockEnd,
1215         lastFreeBlock = (uintptr_t)region + region->allocSz - sizeof(LastFreeBlock);
1216 
1217     static_assert(sizeof(LastFreeBlock) % sizeof(uintptr_t) == 0,
1218         "Atomic applied on LastFreeBlock, and we put it at the end of region, that"
1219         " is uintptr_t-aligned, so no unaligned atomic operations are possible.");
1220      // right bound is slab-aligned, keep LastFreeBlock after it
1221     if (region->type == MEMREG_SLAB_BLOCKS) {
1222         fBlock = (FreeBlock *)alignUp((uintptr_t)region + sizeof(MemRegion), sizeof(uintptr_t));
1223         fBlockEnd = alignDown(lastFreeBlock, slabSize);
1224     } else {
1225         fBlock = (FreeBlock *)alignUp((uintptr_t)region + sizeof(MemRegion), largeObjectAlignment);
1226         fBlockEnd = (uintptr_t)fBlock + exactBlockSize;
1227         MALLOC_ASSERT(fBlockEnd <= lastFreeBlock, ASSERT_TEXT);
1228     }
1229     if (fBlockEnd <= (uintptr_t)fBlock)
1230         return nullptr; // allocSz is too small
1231     blockSz = fBlockEnd - (uintptr_t)fBlock;
1232     // TODO: extend getSlabBlock to support degradation, i.e. getting less blocks
1233     // then requested, and then relax this check
1234     // (now all or nothing is implemented, check according to this)
1235     if (blockSz < numOfSlabAllocOnMiss*slabSize)
1236         return nullptr;
1237 
1238     region->blockSz = blockSz;
1239     return fBlock;
1240 }
1241 
1242 // startUseBlock may add the free block to a bin, the block can be used and
1243 // even released after this, so the region must be added to regionList already
1244 void Backend::startUseBlock(MemRegion *region, FreeBlock *fBlock, bool addToBin)
1245 {
1246     size_t blockSz = region->blockSz;
1247     fBlock->initHeader();
1248     fBlock->setMeFree(blockSz);
1249 
1250     LastFreeBlock *lastBl = static_cast<LastFreeBlock*>(fBlock->rightNeig(blockSz));
1251     // to not get unaligned atomics during LastFreeBlock access
1252     MALLOC_ASSERT(isAligned(lastBl, sizeof(uintptr_t)), nullptr);
1253     lastBl->initHeader();
1254     lastBl->setMeFree(GuardedSize::LAST_REGION_BLOCK);
1255     lastBl->setLeftFree(blockSz);
1256     lastBl->myBin = NO_BIN;
1257     lastBl->memRegion = region;
1258 
1259     if (addToBin) {
1260         unsigned targetBin = sizeToBin(blockSz);
1261         // during adding advance regions, register bin for a largest block in region
1262         advRegBins.registerBin(targetBin);
1263         if (region->type == MEMREG_SLAB_BLOCKS) {
1264             fBlock->slabAligned = true;
1265             freeSlabAlignedBins.addBlock(targetBin, fBlock, blockSz, /*addToTail=*/false);
1266         } else {
1267             fBlock->slabAligned = false;
1268             freeLargeBlockBins.addBlock(targetBin, fBlock, blockSz, /*addToTail=*/false);
1269         }
1270     } else {
1271         // to match with blockReleased() in genericGetBlock
1272         bkndSync.blockConsumed();
1273         // Understand our alignment for correct splitBlock operation
1274         fBlock->slabAligned = region->type == MEMREG_SLAB_BLOCKS ? true : false;
1275         fBlock->sizeTmp = fBlock->tryLockBlock();
1276         MALLOC_ASSERT(fBlock->sizeTmp >= FreeBlock::minBlockSize, "Locking must be successful");
1277     }
1278 }
1279 
1280 void MemRegionList::add(MemRegion *r)
1281 {
1282     r->prev = nullptr;
1283     MallocMutex::scoped_lock lock(regionListLock);
1284     r->next = head;
1285     head = r;
1286     if (head->next)
1287         head->next->prev = head;
1288 }
1289 
1290 void MemRegionList::remove(MemRegion *r)
1291 {
1292     MallocMutex::scoped_lock lock(regionListLock);
1293     if (head == r)
1294         head = head->next;
1295     if (r->next)
1296         r->next->prev = r->prev;
1297     if (r->prev)
1298         r->prev->next = r->next;
1299 }
1300 
1301 #if __TBB_MALLOC_BACKEND_STAT
1302 int MemRegionList::reportStat(FILE *f)
1303 {
1304     int regNum = 0;
1305     MallocMutex::scoped_lock lock(regionListLock);
1306     for (MemRegion *curr = head; curr; curr = curr->next) {
1307         fprintf(f, "%p: max block %lu B, ", curr, curr->blockSz);
1308         regNum++;
1309     }
1310     return regNum;
1311 }
1312 #endif
1313 
1314 FreeBlock *Backend::addNewRegion(size_t size, MemRegionType memRegType, bool addToBin)
1315 {
1316     static_assert(sizeof(BlockMutexes) <= sizeof(BlockI), "Header must be not overwritten in used blocks");
1317     MALLOC_ASSERT(FreeBlock::minBlockSize > GuardedSize::MAX_SPEC_VAL,
1318           "Block length must not conflict with special values of GuardedSize");
1319     // If the region is not "for slabs" we should reserve some space for
1320     // a region header, the worst case alignment and the last block mark.
1321     const size_t requestSize = memRegType == MEMREG_SLAB_BLOCKS ? size :
1322         size + sizeof(MemRegion) + largeObjectAlignment
1323              +  FreeBlock::minBlockSize + sizeof(LastFreeBlock);
1324 
1325     size_t rawSize = requestSize;
1326     MemRegion *region = (MemRegion*)allocRawMem(rawSize);
1327     if (!region) {
1328         MALLOC_ASSERT(rawSize==requestSize, "getRawMem has not allocated memory but changed the allocated size.");
1329         return nullptr;
1330     }
1331     if (rawSize < sizeof(MemRegion)) {
1332         if (!extMemPool->fixedPool)
1333             freeRawMem(region, rawSize);
1334         return nullptr;
1335     }
1336 
1337     region->type = memRegType;
1338     region->allocSz = rawSize;
1339     FreeBlock *fBlock = findBlockInRegion(region, size);
1340     if (!fBlock) {
1341         if (!extMemPool->fixedPool)
1342             freeRawMem(region, rawSize);
1343         return nullptr;
1344     }
1345     regionList.add(region);
1346     startUseBlock(region, fBlock, addToBin);
1347     bkndSync.binsModified();
1348     return addToBin? (FreeBlock*)VALID_BLOCK_IN_BIN : fBlock;
1349 }
1350 
1351 void Backend::init(ExtMemoryPool *extMemoryPool)
1352 {
1353     extMemPool = extMemoryPool;
1354     usedAddrRange.init();
1355     coalescQ.init(&bkndSync);
1356     bkndSync.init(this);
1357 }
1358 
1359 void Backend::reset()
1360 {
1361     MALLOC_ASSERT(extMemPool->userPool(), "Only user pool can be reset.");
1362     // no active threads are allowed in backend while reset() called
1363     verify();
1364 
1365     freeLargeBlockBins.reset();
1366     freeSlabAlignedBins.reset();
1367     advRegBins.reset();
1368 
1369     for (MemRegion *curr = regionList.head; curr; curr = curr->next) {
1370         FreeBlock *fBlock = findBlockInRegion(curr, curr->blockSz);
1371         MALLOC_ASSERT(fBlock, "A memory region unexpectedly got smaller");
1372         startUseBlock(curr, fBlock, /*addToBin=*/true);
1373     }
1374 }
1375 
1376 bool Backend::destroy()
1377 {
1378     bool noError = true;
1379     // no active threads are allowed in backend while destroy() called
1380     verify();
1381     if (!inUserPool()) {
1382         freeLargeBlockBins.reset();
1383         freeSlabAlignedBins.reset();
1384     }
1385     while (regionList.head) {
1386         MemRegion *helper = regionList.head->next;
1387         noError &= freeRawMem(regionList.head, regionList.head->allocSz);
1388         regionList.head = helper;
1389     }
1390     return noError;
1391 }
1392 
1393 bool Backend::clean()
1394 {
1395     scanCoalescQ(/*forceCoalescQDrop=*/false);
1396 
1397     bool res = false;
1398     // We can have several blocks occupying a whole region,
1399     // because such regions are added in advance (see askMemFromOS() and reset()),
1400     // and never used. Release them all.
1401     for (int i = advRegBins.getMinUsedBin(0); i != -1; i = advRegBins.getMinUsedBin(i+1)) {
1402         if (i == freeSlabAlignedBins.getMinNonemptyBin(i))
1403             res |= freeSlabAlignedBins.tryReleaseRegions(i, this);
1404         if (i == freeLargeBlockBins.getMinNonemptyBin(i))
1405             res |= freeLargeBlockBins.tryReleaseRegions(i, this);
1406     }
1407 
1408     return res;
1409 }
1410 
1411 void Backend::IndexedBins::verify()
1412 {
1413 #if MALLOC_DEBUG
1414     for (int i=0; i<(int)freeBinsNum; i++) {
1415         for (FreeBlock *fb = freeBins[i].head.load(std::memory_order_relaxed); fb; fb=fb->next) {
1416             uintptr_t mySz = fb->myL.value;
1417             MALLOC_ASSERT(mySz>GuardedSize::MAX_SPEC_VAL, ASSERT_TEXT);
1418             FreeBlock *right = (FreeBlock*)((uintptr_t)fb + mySz);
1419             suppress_unused_warning(right);
1420             MALLOC_ASSERT(right->myL.value<=GuardedSize::MAX_SPEC_VAL, ASSERT_TEXT);
1421             MALLOC_ASSERT(right->leftL.value==mySz, ASSERT_TEXT);
1422             MALLOC_ASSERT(fb->leftL.value<=GuardedSize::MAX_SPEC_VAL, ASSERT_TEXT);
1423         }
1424     }
1425 #endif
1426 }
1427 
1428 // For correct operation, it must be called when no other threads
1429 // is changing backend.
1430 void Backend::verify()
1431 {
1432 #if MALLOC_DEBUG
1433     scanCoalescQ(/*forceCoalescQDrop=*/false);
1434 #endif // MALLOC_DEBUG
1435 
1436     freeLargeBlockBins.verify();
1437     freeSlabAlignedBins.verify();
1438 }
1439 
1440 #if __TBB_MALLOC_BACKEND_STAT
1441 size_t Backend::Bin::countFreeBlocks()
1442 {
1443     size_t cnt = 0;
1444     {
1445         MallocMutex::scoped_lock lock(tLock);
1446         for (FreeBlock *fb = head; fb; fb = fb->next)
1447             cnt++;
1448     }
1449     return cnt;
1450 }
1451 
1452 size_t Backend::Bin::reportFreeBlocks(FILE *f)
1453 {
1454     size_t totalSz = 0;
1455     MallocMutex::scoped_lock lock(tLock);
1456     for (FreeBlock *fb = head; fb; fb = fb->next) {
1457         size_t sz = fb->tryLockBlock();
1458         fb->setMeFree(sz);
1459         fprintf(f, " [%p;%p]", fb, (void*)((uintptr_t)fb+sz));
1460         totalSz += sz;
1461     }
1462     return totalSz;
1463 }
1464 
1465 void Backend::IndexedBins::reportStat(FILE *f)
1466 {
1467     size_t totalSize = 0;
1468 
1469     for (int i=0; i<Backend::freeBinsNum; i++)
1470         if (size_t cnt = freeBins[i].countFreeBlocks()) {
1471             totalSize += freeBins[i].reportFreeBlocks(f);
1472             fprintf(f, " %d:%lu, ", i, cnt);
1473         }
1474     fprintf(f, "\ttotal size %lu KB", totalSize/1024);
1475 }
1476 
1477 void Backend::reportStat(FILE *f)
1478 {
1479     scanCoalescQ(/*forceCoalescQDrop=*/false);
1480 
1481     fprintf(f, "\n  regions:\n");
1482     int regNum = regionList.reportStat(f);
1483     fprintf(f, "\n%d regions, %lu KB in all regions\n  free bins:\nlarge bins: ",
1484             regNum, totalMemSize/1024);
1485     freeLargeBlockBins.reportStat(f);
1486     fprintf(f, "\naligned bins: ");
1487     freeSlabAlignedBins.reportStat(f);
1488     fprintf(f, "\n");
1489 }
1490 #endif // __TBB_MALLOC_BACKEND_STAT
1491 
1492 } } // namespaces
1493