xref: /oneTBB/src/tbbmalloc/backend.h (revision 32d5ec1f)
1 /*
2     Copyright (c) 2005-2023 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #ifndef __TBB_tbbmalloc_internal_H
18     #error tbbmalloc_internal.h must be included at this point
19 #endif
20 
21 #ifndef __TBB_backend_H
22 #define __TBB_backend_H
23 
24 // Included from namespace rml::internal
25 
26 // global state of blocks currently in processing
27 class BackendSync {
28     // Class instances should reside in zero-initialized memory!
29     // The number of blocks currently removed from a bin and not returned back
30     std::atomic<intptr_t> inFlyBlocks;        // to another
31     std::atomic<intptr_t> binsModifications;  // incremented on every bin modification
32     Backend *backend;
33 public:
init(Backend * b)34     void init(Backend *b) { backend = b; }
blockConsumed()35     void blockConsumed() { inFlyBlocks++; }
binsModified()36     void binsModified() { binsModifications++; }
blockReleased()37     void blockReleased() {
38 #if __TBB_MALLOC_BACKEND_STAT
39         MALLOC_ITT_SYNC_RELEASING(&inFlyBlocks);
40 #endif
41         binsModifications++;
42         intptr_t prev = inFlyBlocks.fetch_sub(1);
43         MALLOC_ASSERT(prev > 0, ASSERT_TEXT);
44         suppress_unused_warning(prev);
45     }
getNumOfMods()46     intptr_t getNumOfMods() const { return binsModifications.load(std::memory_order_acquire); }
47     // return true if need re-do the blocks search
48     inline bool waitTillBlockReleased(intptr_t startModifiedCnt);
49 };
50 
51 class CoalRequestQ { // queue of free blocks that coalescing was delayed
52 private:
53     std::atomic<FreeBlock*> blocksToFree;
54     BackendSync *bkndSync;
55     // counted blocks in blocksToFree and that are leaved blocksToFree
56     // and still in active coalescing
57     std::atomic<intptr_t> inFlyBlocks;
58 public:
init(BackendSync * bSync)59     void init(BackendSync *bSync) { bkndSync = bSync; }
60     FreeBlock *getAll(); // return current list of blocks and make queue empty
61     void putBlock(FreeBlock *fBlock);
62     inline void blockWasProcessed();
blocksInFly()63     intptr_t blocksInFly() const { return inFlyBlocks.load(std::memory_order_acquire); }
64 };
65 
66 class MemExtendingSema {
67     std::atomic<intptr_t>    active;
68 public:
wait()69     bool wait() {
70         bool rescanBins = false;
71         // up to 3 threads can add more memory from OS simultaneously,
72         // rest of threads have to wait
73         intptr_t prevCnt = active.load(std::memory_order_acquire);
74         for (;;) {
75             if (prevCnt < 3) {
76                 if (active.compare_exchange_strong(prevCnt, prevCnt + 1)) {
77                     break;
78                 }
79             } else {
80                 SpinWaitWhileEq(active, prevCnt);
81                 rescanBins = true;
82                 break;
83             }
84         }
85         return rescanBins;
86     }
signal()87     void signal() { active.fetch_sub(1); }
88 };
89 
90 enum MemRegionType {
91     // The region holds only slabs
92     MEMREG_SLAB_BLOCKS = 0,
93     // The region can hold several large object blocks
94     MEMREG_LARGE_BLOCKS,
95     // The region holds only one block with a requested size
96     MEMREG_ONE_BLOCK
97 };
98 
99 class MemRegionList {
100     MallocMutex regionListLock;
101 public:
102     MemRegion  *head;
103     void add(MemRegion *r);
104     void remove(MemRegion *r);
105     int reportStat(FILE *f);
106 };
107 
108 class Backend {
109 private:
110 /* Blocks in range [minBinnedSize; getMaxBinnedSize()] are kept in bins,
111    one region can contains several blocks. Larger blocks are allocated directly
112    and one region always contains one block.
113 */
114     enum {
115         minBinnedSize = 8*1024UL,
116         /*   If huge pages are available, maxBinned_HugePage used.
117              If not, maxBinned_SmallPage is the threshold.
118              TODO: use pool's granularity for upper bound setting.*/
119         maxBinned_SmallPage = 1024*1024UL,
120         // TODO: support other page sizes
121         maxBinned_HugePage = 4*1024*1024UL
122     };
123     enum {
124         VALID_BLOCK_IN_BIN = 1 // valid block added to bin, not returned as result
125     };
126 public:
127     // Backend bins step is the same as CacheStep for large object cache
128     static const size_t   freeBinsStep = LargeObjectCache::LargeBSProps::CacheStep;
129     static const unsigned freeBinsNum = (maxBinned_HugePage-minBinnedSize)/freeBinsStep + 1;
130 
131     // if previous access missed per-thread slabs pool,
132     // allocate numOfSlabAllocOnMiss blocks in advance
133     static const int numOfSlabAllocOnMiss = 2;
134 
135     enum {
136         NO_BIN = -1,
137         // special bin for blocks >= maxBinned_HugePage, blocks go to this bin
138         // when pool is created with keepAllMemory policy
139         // TODO: currently this bin is scanned using "1st fit", as it accumulates
140         // blocks of different sizes, "best fit" is preferred in terms of fragmentation
141         HUGE_BIN = freeBinsNum-1
142     };
143 
144     // Bin keeps 2-linked list of free blocks. It must be 2-linked
145     // because during coalescing a block it's removed from a middle of the list.
146     struct Bin {
147         std::atomic<FreeBlock*> head;
148         FreeBlock*              tail;
149         MallocMutex             tLock;
150 
151         void removeBlock(FreeBlock *fBlock);
resetBin152         void reset() {
153             head.store(nullptr, std::memory_order_relaxed);
154             tail = nullptr;
155         }
emptyBin156         bool empty() const { return !head.load(std::memory_order_relaxed); }
157 
158         size_t countFreeBlocks();
159         size_t reportFreeBlocks(FILE *f);
160         void reportStat(FILE *f);
161     };
162 
163     typedef BitMaskMin<Backend::freeBinsNum> BitMaskBins;
164 
165     // array of bins supplemented with bitmask for fast finding of non-empty bins
166     class IndexedBins {
167         BitMaskBins bitMask;
168         Bin         freeBins[Backend::freeBinsNum];
169         FreeBlock *getFromBin(int binIdx, BackendSync *sync, size_t size,
170                 bool needAlignedBlock, bool alignedBin, bool wait, int *resLocked);
171     public:
172         FreeBlock *findBlock(int nativeBin, BackendSync *sync, size_t size,
173                 bool needAlignedBlock, bool alignedBin,int *numOfLockedBins);
174         bool tryReleaseRegions(int binIdx, Backend *backend);
175         void lockRemoveBlock(int binIdx, FreeBlock *fBlock);
176         void addBlock(int binIdx, FreeBlock *fBlock, size_t blockSz, bool addToTail);
177         bool tryAddBlock(int binIdx, FreeBlock *fBlock, bool addToTail);
178         int  getMinNonemptyBin(unsigned startBin) const;
179         void verify();
180         void reset();
181         void reportStat(FILE *f);
182     };
183 
184 private:
185     class AdvRegionsBins {
186         BitMaskBins bins;
187     public:
registerBin(int regBin)188         void registerBin(int regBin) { bins.set(regBin, 1); }
getMinUsedBin(int start)189         int getMinUsedBin(int start) const { return bins.getMinTrue(start); }
reset()190         void reset() { bins.reset(); }
191     };
192     // auxiliary class to atomic maximum request finding
193     class MaxRequestComparator {
194         const Backend *backend;
195     public:
MaxRequestComparator(const Backend * be)196         MaxRequestComparator(const Backend *be) : backend(be) {}
197         inline bool operator()(size_t oldMaxReq, size_t requestSize) const;
198     };
199 
200 #if CHECK_ALLOCATION_RANGE
201     // Keep min and max of all addresses requested from OS,
202     // use it for checking memory possibly allocated by replaced allocators
203     // and for debugging purposes. Valid only for default memory pool.
204     class UsedAddressRange {
205         static const uintptr_t ADDRESS_UPPER_BOUND = UINTPTR_MAX;
206 
207         std::atomic<uintptr_t> leftBound,
208                                rightBound;
209         MallocMutex mutex;
210     public:
211         // rightBound is zero-initialized
init()212         void init() { leftBound.store(ADDRESS_UPPER_BOUND, std::memory_order_relaxed); }
213         void registerAlloc(uintptr_t left, uintptr_t right);
214         void registerFree(uintptr_t left, uintptr_t right);
215         // as only left and right bounds are kept, we can return true
216         // for pointer not allocated by us, if more than single region
217         // was requested from OS
inRange(void * ptr)218         bool inRange(void *ptr) const {
219             const uintptr_t p = (uintptr_t)ptr;
220             return leftBound.load(std::memory_order_relaxed)<=p &&
221                    p<=rightBound.load(std::memory_order_relaxed);
222         }
223     };
224 #else
225     class UsedAddressRange {
226     public:
init()227         void init() { }
registerAlloc(uintptr_t,uintptr_t)228         void registerAlloc(uintptr_t, uintptr_t) {}
registerFree(uintptr_t,uintptr_t)229         void registerFree(uintptr_t, uintptr_t) {}
inRange(void *)230         bool inRange(void *) const { return true; }
231     };
232 #endif
233 
234     ExtMemoryPool   *extMemPool;
235     // used for release every region on pool destroying
236     MemRegionList    regionList;
237 
238     CoalRequestQ     coalescQ; // queue of coalescing requests
239     BackendSync      bkndSync;
240     // semaphore protecting adding more more memory from OS
241     MemExtendingSema memExtendingSema;
242     //size_t           totalMemSize,
243     //                 memSoftLimit;
244     std::atomic<size_t> totalMemSize;
245     std::atomic<size_t> memSoftLimit;
246     UsedAddressRange usedAddrRange;
247     // to keep 1st allocation large than requested, keep bootstrapping status
248     enum {
249         bootsrapMemNotDone = 0,
250         bootsrapMemInitializing,
251         bootsrapMemDone
252     };
253     std::atomic<intptr_t> bootsrapMemStatus;
254     MallocMutex      bootsrapMemStatusMutex;
255 
256     // Using of maximal observed requested size allows decrease
257     // memory consumption for small requests and decrease fragmentation
258     // for workloads when small and large allocation requests are mixed.
259     // TODO: decrease, not only increase it
260     std::atomic<size_t> maxRequestedSize;
261 
262     // register bins related to advance regions
263     AdvRegionsBins advRegBins;
264     // Storage for split FreeBlocks
265     IndexedBins freeLargeBlockBins,
266                 freeSlabAlignedBins;
267 
268     std::atomic<intptr_t> backendCleanCnt;
269     // Our friends
270     friend class BackendSync;
271 
272     /******************************** Backend methods ******************************/
273 
274     /*--------------------------- Coalescing functions ----------------------------*/
275     void coalescAndPut(FreeBlock *fBlock, size_t blockSz, bool slabAligned);
276     bool coalescAndPutList(FreeBlock *head, bool forceCoalescQDrop, bool reportBlocksProcessed);
277 
278     // Main coalescing operation
279     FreeBlock *doCoalesc(FreeBlock *fBlock, MemRegion **memRegion);
280 
281     // Queue for conflicted blocks during coalescing
282     bool scanCoalescQ(bool forceCoalescQDrop);
blocksInCoalescing()283     intptr_t blocksInCoalescing() const { return coalescQ.blocksInFly(); }
284 
285     /*--------------------- FreeBlock backend accessors ---------------------------*/
286     FreeBlock *genericGetBlock(int num, size_t size, bool slabAligned);
287     void genericPutBlock(FreeBlock *fBlock, size_t blockSz, bool slabAligned);
288 
289     // Split the block and return remaining parts to backend if possible
290     FreeBlock *splitBlock(FreeBlock *fBlock, int num, size_t size, bool isAligned, bool needAlignedBlock);
291 
292     void removeBlockFromBin(FreeBlock *fBlock);
293 
294     // TODO: combine with returnLargeObject
295     void putLargeBlock(LargeMemoryBlock *lmb);
296 
297     /*------------------- Starting point for OS allocation ------------------------*/
298     void requestBootstrapMem();
299     FreeBlock *askMemFromOS(size_t totalReqSize, intptr_t startModifiedCnt,
300                             int *lockedBinsThreshold, int numOfLockedBins,
301                             bool *splittable, bool needSlabRegion);
302 
303     /*---------------------- Memory regions allocation ----------------------------*/
304     FreeBlock *addNewRegion(size_t size, MemRegionType type, bool addToBin);
305     void releaseRegion(MemRegion *region);
306 
307     // TODO: combine in one initMemoryRegion function
308     FreeBlock *findBlockInRegion(MemRegion *region, size_t exactBlockSize);
309     void startUseBlock(MemRegion *region, FreeBlock *fBlock, bool addToBin);
310 
311     /*------------------------- Raw memory accessors ------------------------------*/
312     void *allocRawMem(size_t &size);
313     bool freeRawMem(void *object, size_t size);
314 
315     /*------------------------------ Cleanup functions ----------------------------*/
316     // Clean all memory from all caches (extMemPool hard cleanup)
317     FreeBlock *releaseMemInCaches(intptr_t startModifiedCnt, int *lockedBinsThreshold, int numOfLockedBins);
318     // Soft heap limit (regular cleanup, then maybe hard cleanup)
319     void releaseCachesToLimit();
320 
321     /*---------------------------------- Utility ----------------------------------*/
322     // TODO: move inside IndexedBins class
sizeToBin(size_t size)323     static int sizeToBin(size_t size) {
324         if (size >= maxBinned_HugePage)
325             return HUGE_BIN;
326         else if (size < minBinnedSize)
327             return NO_BIN;
328 
329         int bin = (size - minBinnedSize)/freeBinsStep;
330 
331         MALLOC_ASSERT(bin < HUGE_BIN, "Invalid size.");
332         return bin;
333     }
toAlignedBin(FreeBlock * block,size_t size)334     static bool toAlignedBin(FreeBlock *block, size_t size) {
335         return isAligned((char*)block + size, slabSize) && size >= slabSize;
336     }
337 
338 public:
339     /*--------------------- Init, reset, destroy, verify  -------------------------*/
340     void init(ExtMemoryPool *extMemoryPool);
341     bool destroy();
342 
343     void verify();
344     void reset();
345     bool clean(); // clean on caches cleanup
346 
347     /*------------------------- Slab block request --------------------------------*/
348     BlockI *getSlabBlock(int num);
349     void putSlabBlock(BlockI *block);
350 
351     /*-------------------------- Large object request -----------------------------*/
352     LargeMemoryBlock *getLargeBlock(size_t size);
353     // TODO: make consistent with getLargeBlock
354     void returnLargeObject(LargeMemoryBlock *lmb);
355 
356     /*-------------------------- Backreference memory request ----------------------*/
357     void *getBackRefSpace(size_t size, bool *rawMemUsed);
358     void putBackRefSpace(void *b, size_t size, bool rawMemUsed);
359 
360     /*----------------------------- Remap object ----------------------------------*/
361     void *remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment);
362 
363     /*---------------------------- Validation -------------------------------------*/
364     bool inUserPool() const;
ptrCanBeValid(void * ptr)365     bool ptrCanBeValid(void *ptr) const { return usedAddrRange.inRange(ptr); }
366 
367     /*-------------------------- Configuration API --------------------------------*/
368     // Soft heap limit
setRecommendedMaxSize(size_t softLimit)369     void setRecommendedMaxSize(size_t softLimit) {
370         memSoftLimit = softLimit;
371         releaseCachesToLimit();
372     }
373 
374     /*------------------------------- Info ----------------------------------------*/
375     size_t getMaxBinnedSize() const;
376 
377     /*-------------------------- Testing, statistics ------------------------------*/
378 #if __TBB_MALLOC_WHITEBOX_TEST
getTotalMemSize()379     size_t getTotalMemSize() const { return totalMemSize.load(std::memory_order_relaxed); }
380 #endif
381 #if __TBB_MALLOC_BACKEND_STAT
382     void reportStat(FILE *f);
383 private:
binToSize(int bin)384     static size_t binToSize(int bin) {
385         MALLOC_ASSERT(bin <= HUGE_BIN, "Invalid bin.");
386 
387         return bin*freeBinsStep + minBinnedSize;
388     }
389 #endif
390 };
391 
392 #endif // __TBB_backend_H
393