1 /* 2 Copyright (c) 2005-2023 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #ifndef __TBB_tbbmalloc_internal_H 18 #error tbbmalloc_internal.h must be included at this point 19 #endif 20 21 #ifndef __TBB_backend_H 22 #define __TBB_backend_H 23 24 // Included from namespace rml::internal 25 26 // global state of blocks currently in processing 27 class BackendSync { 28 // Class instances should reside in zero-initialized memory! 29 // The number of blocks currently removed from a bin and not returned back 30 std::atomic<intptr_t> inFlyBlocks; // to another 31 std::atomic<intptr_t> binsModifications; // incremented on every bin modification 32 Backend *backend; 33 public: init(Backend * b)34 void init(Backend *b) { backend = b; } blockConsumed()35 void blockConsumed() { inFlyBlocks++; } binsModified()36 void binsModified() { binsModifications++; } blockReleased()37 void blockReleased() { 38 #if __TBB_MALLOC_BACKEND_STAT 39 MALLOC_ITT_SYNC_RELEASING(&inFlyBlocks); 40 #endif 41 binsModifications++; 42 intptr_t prev = inFlyBlocks.fetch_sub(1); 43 MALLOC_ASSERT(prev > 0, ASSERT_TEXT); 44 suppress_unused_warning(prev); 45 } getNumOfMods()46 intptr_t getNumOfMods() const { return binsModifications.load(std::memory_order_acquire); } 47 // return true if need re-do the blocks search 48 inline bool waitTillBlockReleased(intptr_t startModifiedCnt); 49 }; 50 51 class CoalRequestQ { // queue of free blocks that coalescing was delayed 52 private: 53 std::atomic<FreeBlock*> blocksToFree; 54 BackendSync *bkndSync; 55 // counted blocks in blocksToFree and that are leaved blocksToFree 56 // and still in active coalescing 57 std::atomic<intptr_t> inFlyBlocks; 58 public: init(BackendSync * bSync)59 void init(BackendSync *bSync) { bkndSync = bSync; } 60 FreeBlock *getAll(); // return current list of blocks and make queue empty 61 void putBlock(FreeBlock *fBlock); 62 inline void blockWasProcessed(); blocksInFly()63 intptr_t blocksInFly() const { return inFlyBlocks.load(std::memory_order_acquire); } 64 }; 65 66 class MemExtendingSema { 67 std::atomic<intptr_t> active; 68 public: wait()69 bool wait() { 70 bool rescanBins = false; 71 // up to 3 threads can add more memory from OS simultaneously, 72 // rest of threads have to wait 73 intptr_t prevCnt = active.load(std::memory_order_acquire); 74 for (;;) { 75 if (prevCnt < 3) { 76 if (active.compare_exchange_strong(prevCnt, prevCnt + 1)) { 77 break; 78 } 79 } else { 80 SpinWaitWhileEq(active, prevCnt); 81 rescanBins = true; 82 break; 83 } 84 } 85 return rescanBins; 86 } signal()87 void signal() { active.fetch_sub(1); } 88 }; 89 90 enum MemRegionType { 91 // The region holds only slabs 92 MEMREG_SLAB_BLOCKS = 0, 93 // The region can hold several large object blocks 94 MEMREG_LARGE_BLOCKS, 95 // The region holds only one block with a requested size 96 MEMREG_ONE_BLOCK 97 }; 98 99 class MemRegionList { 100 MallocMutex regionListLock; 101 public: 102 MemRegion *head; 103 void add(MemRegion *r); 104 void remove(MemRegion *r); 105 int reportStat(FILE *f); 106 }; 107 108 class Backend { 109 private: 110 /* Blocks in range [minBinnedSize; getMaxBinnedSize()] are kept in bins, 111 one region can contains several blocks. Larger blocks are allocated directly 112 and one region always contains one block. 113 */ 114 enum { 115 minBinnedSize = 8*1024UL, 116 /* If huge pages are available, maxBinned_HugePage used. 117 If not, maxBinned_SmallPage is the threshold. 118 TODO: use pool's granularity for upper bound setting.*/ 119 maxBinned_SmallPage = 1024*1024UL, 120 // TODO: support other page sizes 121 maxBinned_HugePage = 4*1024*1024UL 122 }; 123 enum { 124 VALID_BLOCK_IN_BIN = 1 // valid block added to bin, not returned as result 125 }; 126 public: 127 // Backend bins step is the same as CacheStep for large object cache 128 static const size_t freeBinsStep = LargeObjectCache::LargeBSProps::CacheStep; 129 static const unsigned freeBinsNum = (maxBinned_HugePage-minBinnedSize)/freeBinsStep + 1; 130 131 // if previous access missed per-thread slabs pool, 132 // allocate numOfSlabAllocOnMiss blocks in advance 133 static const int numOfSlabAllocOnMiss = 2; 134 135 enum { 136 NO_BIN = -1, 137 // special bin for blocks >= maxBinned_HugePage, blocks go to this bin 138 // when pool is created with keepAllMemory policy 139 // TODO: currently this bin is scanned using "1st fit", as it accumulates 140 // blocks of different sizes, "best fit" is preferred in terms of fragmentation 141 HUGE_BIN = freeBinsNum-1 142 }; 143 144 // Bin keeps 2-linked list of free blocks. It must be 2-linked 145 // because during coalescing a block it's removed from a middle of the list. 146 struct Bin { 147 std::atomic<FreeBlock*> head; 148 FreeBlock* tail; 149 MallocMutex tLock; 150 151 void removeBlock(FreeBlock *fBlock); resetBin152 void reset() { 153 head.store(nullptr, std::memory_order_relaxed); 154 tail = nullptr; 155 } emptyBin156 bool empty() const { return !head.load(std::memory_order_relaxed); } 157 158 size_t countFreeBlocks(); 159 size_t reportFreeBlocks(FILE *f); 160 void reportStat(FILE *f); 161 }; 162 163 typedef BitMaskMin<Backend::freeBinsNum> BitMaskBins; 164 165 // array of bins supplemented with bitmask for fast finding of non-empty bins 166 class IndexedBins { 167 BitMaskBins bitMask; 168 Bin freeBins[Backend::freeBinsNum]; 169 FreeBlock *getFromBin(int binIdx, BackendSync *sync, size_t size, 170 bool needAlignedBlock, bool alignedBin, bool wait, int *resLocked); 171 public: 172 FreeBlock *findBlock(int nativeBin, BackendSync *sync, size_t size, 173 bool needAlignedBlock, bool alignedBin,int *numOfLockedBins); 174 bool tryReleaseRegions(int binIdx, Backend *backend); 175 void lockRemoveBlock(int binIdx, FreeBlock *fBlock); 176 void addBlock(int binIdx, FreeBlock *fBlock, size_t blockSz, bool addToTail); 177 bool tryAddBlock(int binIdx, FreeBlock *fBlock, bool addToTail); 178 int getMinNonemptyBin(unsigned startBin) const; 179 void verify(); 180 void reset(); 181 void reportStat(FILE *f); 182 }; 183 184 private: 185 class AdvRegionsBins { 186 BitMaskBins bins; 187 public: registerBin(int regBin)188 void registerBin(int regBin) { bins.set(regBin, 1); } getMinUsedBin(int start)189 int getMinUsedBin(int start) const { return bins.getMinTrue(start); } reset()190 void reset() { bins.reset(); } 191 }; 192 // auxiliary class to atomic maximum request finding 193 class MaxRequestComparator { 194 const Backend *backend; 195 public: MaxRequestComparator(const Backend * be)196 MaxRequestComparator(const Backend *be) : backend(be) {} 197 inline bool operator()(size_t oldMaxReq, size_t requestSize) const; 198 }; 199 200 #if CHECK_ALLOCATION_RANGE 201 // Keep min and max of all addresses requested from OS, 202 // use it for checking memory possibly allocated by replaced allocators 203 // and for debugging purposes. Valid only for default memory pool. 204 class UsedAddressRange { 205 static const uintptr_t ADDRESS_UPPER_BOUND = UINTPTR_MAX; 206 207 std::atomic<uintptr_t> leftBound, 208 rightBound; 209 MallocMutex mutex; 210 public: 211 // rightBound is zero-initialized init()212 void init() { leftBound.store(ADDRESS_UPPER_BOUND, std::memory_order_relaxed); } 213 void registerAlloc(uintptr_t left, uintptr_t right); 214 void registerFree(uintptr_t left, uintptr_t right); 215 // as only left and right bounds are kept, we can return true 216 // for pointer not allocated by us, if more than single region 217 // was requested from OS inRange(void * ptr)218 bool inRange(void *ptr) const { 219 const uintptr_t p = (uintptr_t)ptr; 220 return leftBound.load(std::memory_order_relaxed)<=p && 221 p<=rightBound.load(std::memory_order_relaxed); 222 } 223 }; 224 #else 225 class UsedAddressRange { 226 public: init()227 void init() { } registerAlloc(uintptr_t,uintptr_t)228 void registerAlloc(uintptr_t, uintptr_t) {} registerFree(uintptr_t,uintptr_t)229 void registerFree(uintptr_t, uintptr_t) {} inRange(void *)230 bool inRange(void *) const { return true; } 231 }; 232 #endif 233 234 ExtMemoryPool *extMemPool; 235 // used for release every region on pool destroying 236 MemRegionList regionList; 237 238 CoalRequestQ coalescQ; // queue of coalescing requests 239 BackendSync bkndSync; 240 // semaphore protecting adding more more memory from OS 241 MemExtendingSema memExtendingSema; 242 //size_t totalMemSize, 243 // memSoftLimit; 244 std::atomic<size_t> totalMemSize; 245 std::atomic<size_t> memSoftLimit; 246 UsedAddressRange usedAddrRange; 247 // to keep 1st allocation large than requested, keep bootstrapping status 248 enum { 249 bootsrapMemNotDone = 0, 250 bootsrapMemInitializing, 251 bootsrapMemDone 252 }; 253 std::atomic<intptr_t> bootsrapMemStatus; 254 MallocMutex bootsrapMemStatusMutex; 255 256 // Using of maximal observed requested size allows decrease 257 // memory consumption for small requests and decrease fragmentation 258 // for workloads when small and large allocation requests are mixed. 259 // TODO: decrease, not only increase it 260 std::atomic<size_t> maxRequestedSize; 261 262 // register bins related to advance regions 263 AdvRegionsBins advRegBins; 264 // Storage for split FreeBlocks 265 IndexedBins freeLargeBlockBins, 266 freeSlabAlignedBins; 267 268 std::atomic<intptr_t> backendCleanCnt; 269 // Our friends 270 friend class BackendSync; 271 272 /******************************** Backend methods ******************************/ 273 274 /*--------------------------- Coalescing functions ----------------------------*/ 275 void coalescAndPut(FreeBlock *fBlock, size_t blockSz, bool slabAligned); 276 bool coalescAndPutList(FreeBlock *head, bool forceCoalescQDrop, bool reportBlocksProcessed); 277 278 // Main coalescing operation 279 FreeBlock *doCoalesc(FreeBlock *fBlock, MemRegion **memRegion); 280 281 // Queue for conflicted blocks during coalescing 282 bool scanCoalescQ(bool forceCoalescQDrop); blocksInCoalescing()283 intptr_t blocksInCoalescing() const { return coalescQ.blocksInFly(); } 284 285 /*--------------------- FreeBlock backend accessors ---------------------------*/ 286 FreeBlock *genericGetBlock(int num, size_t size, bool slabAligned); 287 void genericPutBlock(FreeBlock *fBlock, size_t blockSz, bool slabAligned); 288 289 // Split the block and return remaining parts to backend if possible 290 FreeBlock *splitBlock(FreeBlock *fBlock, int num, size_t size, bool isAligned, bool needAlignedBlock); 291 292 void removeBlockFromBin(FreeBlock *fBlock); 293 294 // TODO: combine with returnLargeObject 295 void putLargeBlock(LargeMemoryBlock *lmb); 296 297 /*------------------- Starting point for OS allocation ------------------------*/ 298 void requestBootstrapMem(); 299 FreeBlock *askMemFromOS(size_t totalReqSize, intptr_t startModifiedCnt, 300 int *lockedBinsThreshold, int numOfLockedBins, 301 bool *splittable, bool needSlabRegion); 302 303 /*---------------------- Memory regions allocation ----------------------------*/ 304 FreeBlock *addNewRegion(size_t size, MemRegionType type, bool addToBin); 305 void releaseRegion(MemRegion *region); 306 307 // TODO: combine in one initMemoryRegion function 308 FreeBlock *findBlockInRegion(MemRegion *region, size_t exactBlockSize); 309 void startUseBlock(MemRegion *region, FreeBlock *fBlock, bool addToBin); 310 311 /*------------------------- Raw memory accessors ------------------------------*/ 312 void *allocRawMem(size_t &size); 313 bool freeRawMem(void *object, size_t size); 314 315 /*------------------------------ Cleanup functions ----------------------------*/ 316 // Clean all memory from all caches (extMemPool hard cleanup) 317 FreeBlock *releaseMemInCaches(intptr_t startModifiedCnt, int *lockedBinsThreshold, int numOfLockedBins); 318 // Soft heap limit (regular cleanup, then maybe hard cleanup) 319 void releaseCachesToLimit(); 320 321 /*---------------------------------- Utility ----------------------------------*/ 322 // TODO: move inside IndexedBins class sizeToBin(size_t size)323 static int sizeToBin(size_t size) { 324 if (size >= maxBinned_HugePage) 325 return HUGE_BIN; 326 else if (size < minBinnedSize) 327 return NO_BIN; 328 329 int bin = (size - minBinnedSize)/freeBinsStep; 330 331 MALLOC_ASSERT(bin < HUGE_BIN, "Invalid size."); 332 return bin; 333 } toAlignedBin(FreeBlock * block,size_t size)334 static bool toAlignedBin(FreeBlock *block, size_t size) { 335 return isAligned((char*)block + size, slabSize) && size >= slabSize; 336 } 337 338 public: 339 /*--------------------- Init, reset, destroy, verify -------------------------*/ 340 void init(ExtMemoryPool *extMemoryPool); 341 bool destroy(); 342 343 void verify(); 344 void reset(); 345 bool clean(); // clean on caches cleanup 346 347 /*------------------------- Slab block request --------------------------------*/ 348 BlockI *getSlabBlock(int num); 349 void putSlabBlock(BlockI *block); 350 351 /*-------------------------- Large object request -----------------------------*/ 352 LargeMemoryBlock *getLargeBlock(size_t size); 353 // TODO: make consistent with getLargeBlock 354 void returnLargeObject(LargeMemoryBlock *lmb); 355 356 /*-------------------------- Backreference memory request ----------------------*/ 357 void *getBackRefSpace(size_t size, bool *rawMemUsed); 358 void putBackRefSpace(void *b, size_t size, bool rawMemUsed); 359 360 /*----------------------------- Remap object ----------------------------------*/ 361 void *remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment); 362 363 /*---------------------------- Validation -------------------------------------*/ 364 bool inUserPool() const; ptrCanBeValid(void * ptr)365 bool ptrCanBeValid(void *ptr) const { return usedAddrRange.inRange(ptr); } 366 367 /*-------------------------- Configuration API --------------------------------*/ 368 // Soft heap limit setRecommendedMaxSize(size_t softLimit)369 void setRecommendedMaxSize(size_t softLimit) { 370 memSoftLimit = softLimit; 371 releaseCachesToLimit(); 372 } 373 374 /*------------------------------- Info ----------------------------------------*/ 375 size_t getMaxBinnedSize() const; 376 377 /*-------------------------- Testing, statistics ------------------------------*/ 378 #if __TBB_MALLOC_WHITEBOX_TEST getTotalMemSize()379 size_t getTotalMemSize() const { return totalMemSize.load(std::memory_order_relaxed); } 380 #endif 381 #if __TBB_MALLOC_BACKEND_STAT 382 void reportStat(FILE *f); 383 private: binToSize(int bin)384 static size_t binToSize(int bin) { 385 MALLOC_ASSERT(bin <= HUGE_BIN, "Invalid bin."); 386 387 return bin*freeBinsStep + minBinnedSize; 388 } 389 #endif 390 }; 391 392 #endif // __TBB_backend_H 393