1 /* 2 Copyright (c) 2005-2023 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #ifndef __TBB_tbbmalloc_internal_H 18 #error tbbmalloc_internal.h must be included at this point 19 #endif 20 21 #ifndef __TBB_large_objects_H 22 #define __TBB_large_objects_H 23 24 //! The list of possible Cache Bin Aggregator operations. 25 /* Declared here to avoid Solaris Studio* 12.2 "multiple definitions" error */ 26 enum CacheBinOperationType { 27 CBOP_INVALID = 0, 28 CBOP_GET, 29 CBOP_PUT_LIST, 30 CBOP_CLEAN_TO_THRESHOLD, 31 CBOP_CLEAN_ALL, 32 CBOP_UPDATE_USED_SIZE 33 }; 34 35 // The Cache Bin Aggregator operation status list. 36 // CBST_NOWAIT can be specified for non-blocking operations. 37 enum CacheBinOperationStatus { 38 CBST_WAIT = 0, 39 CBST_NOWAIT, 40 CBST_DONE 41 }; 42 43 /* 44 * Bins that grow with arithmetic step 45 */ 46 template<size_t MIN_SIZE, size_t MAX_SIZE> 47 struct LargeBinStructureProps { 48 public: 49 static const size_t MinSize = MIN_SIZE, MaxSize = MAX_SIZE; 50 static const size_t CacheStep = 8 * 1024; 51 static const unsigned NumBins = (MaxSize - MinSize) / CacheStep; 52 53 static size_t alignToBin(size_t size) { 54 return alignUp(size, CacheStep); 55 } 56 57 static int sizeToIdx(size_t size) { 58 MALLOC_ASSERT(MinSize <= size && size < MaxSize, ASSERT_TEXT); 59 MALLOC_ASSERT(size % CacheStep == 0, ASSERT_TEXT); 60 return (size - MinSize) / CacheStep; 61 } 62 }; 63 64 /* 65 * Bins that grow with special geometric progression. 66 */ 67 template<size_t MIN_SIZE, size_t MAX_SIZE> 68 struct HugeBinStructureProps { 69 70 private: 71 // Sizes grow with the following formula: Size = MinSize * (2 ^ (Index / StepFactor)) 72 // There are StepFactor bins (8 be default) between each power of 2 bin 73 static const int MaxSizeExp = Log2<MAX_SIZE>::value; 74 static const int MinSizeExp = Log2<MIN_SIZE>::value; 75 static const int StepFactor = 8; 76 static const int StepFactorExp = Log2<StepFactor>::value; 77 78 public: 79 static const size_t MinSize = MIN_SIZE, MaxSize = MAX_SIZE; 80 static const unsigned NumBins = (MaxSizeExp - MinSizeExp) * StepFactor; 81 82 static size_t alignToBin(size_t size) { 83 MALLOC_ASSERT(size >= StepFactor, "Size must not be less than the StepFactor"); 84 size_t minorStepExp = BitScanRev(size) - StepFactorExp; 85 return alignUp(size, 1ULL << minorStepExp); 86 } 87 88 // Sizes between the power of 2 values are aproximated to StepFactor. 89 static int sizeToIdx(size_t size) { 90 MALLOC_ASSERT(MinSize <= size && size <= MaxSize, ASSERT_TEXT); 91 int sizeExp = (int)BitScanRev(size); // same as __TBB_Log2 92 size_t majorStepSize = 1ULL << sizeExp; 93 int minorStepExp = sizeExp - StepFactorExp; 94 int minorIdx = (size - majorStepSize) >> minorStepExp; 95 MALLOC_ASSERT(size == majorStepSize + ((size_t)minorIdx << minorStepExp), 96 "Size is not aligned on the bin"); 97 return StepFactor * (sizeExp - MinSizeExp) + minorIdx; 98 } 99 }; 100 101 /* 102 * Cache properties accessor 103 * 104 * TooLargeFactor -- when cache size treated "too large" in comparison to user data size 105 * OnMissFactor -- If cache miss occurred and cache was cleaned, 106 * set ageThreshold to OnMissFactor * the difference 107 * between current time and last time cache was cleaned. 108 * LongWaitFactor -- to detect rarely-used bins and forget about their usage history 109 */ 110 template<typename StructureProps, int TOO_LARGE, int ON_MISS, int LONG_WAIT> 111 struct LargeObjectCacheProps : public StructureProps { 112 static const int TooLargeFactor = TOO_LARGE, OnMissFactor = ON_MISS, LongWaitFactor = LONG_WAIT; 113 }; 114 115 template<typename Props> 116 class LargeObjectCacheImpl { 117 private: 118 119 // Current sizes of used and cached objects. It's calculated while we are 120 // traversing bins, and used for isLOCTooLarge() check at the same time. 121 class BinsSummary { 122 size_t usedSz; 123 size_t cachedSz; 124 public: 125 BinsSummary() : usedSz(0), cachedSz(0) {} 126 // "too large" criteria 127 bool isLOCTooLarge() const { return cachedSz > Props::TooLargeFactor * usedSz; } 128 void update(size_t usedSize, size_t cachedSize) { 129 usedSz += usedSize; 130 cachedSz += cachedSize; 131 } 132 void reset() { usedSz = cachedSz = 0; } 133 }; 134 135 public: 136 // The number of bins to cache large/huge objects. 137 static const uint32_t numBins = Props::NumBins; 138 139 typedef BitMaskMax<numBins> BinBitMask; 140 141 // 2-linked list of same-size cached blocks ordered by age (oldest on top) 142 // TODO: are we really want the list to be 2-linked? This allows us 143 // reduce memory consumption and do less operations under lock. 144 // TODO: try to switch to 32-bit logical time to save space in CacheBin 145 // and move bins to different cache lines. 146 class CacheBin { 147 private: 148 LargeMemoryBlock* first; 149 std::atomic<LargeMemoryBlock*> last; 150 /* age of an oldest block in the list; equal to last->age, if last defined, 151 used for quick checking it without acquiring the lock. */ 152 std::atomic<uintptr_t> oldest; 153 /* currAge when something was excluded out of list because of the age, 154 not because of cache hit */ 155 uintptr_t lastCleanedAge; 156 /* Current threshold value for the blocks of a particular size. 157 Set on cache miss. */ 158 std::atomic<intptr_t> ageThreshold; 159 160 /* total size of all objects corresponding to the bin and allocated by user */ 161 std::atomic<size_t> usedSize; 162 /* total size of all objects cached in the bin */ 163 std::atomic<size_t> cachedSize; 164 /* mean time of presence of block in the bin before successful reuse */ 165 std::atomic<intptr_t> meanHitRange; 166 /* time of last get called for the bin */ 167 uintptr_t lastGet; 168 169 typename MallocAggregator<CacheBinOperation>::type aggregator; 170 171 void ExecuteOperation(CacheBinOperation *op, ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx, bool longLifeTime = true); 172 173 /* should be placed in zero-initialized memory, ctor not needed. */ 174 CacheBin(); 175 176 public: 177 void init() { 178 memset(static_cast<void*>(this), 0, sizeof(CacheBin)); 179 } 180 181 /* ---------- Cache accessors ---------- */ 182 void putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *head, BinBitMask *bitMask, int idx); 183 LargeMemoryBlock *get(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx); 184 185 /* ---------- Cleanup functions -------- */ 186 bool cleanToThreshold(ExtMemoryPool *extMemPool, BinBitMask *bitMask, uintptr_t currTime, int idx); 187 bool releaseAllToBackend(ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx); 188 /* ------------------------------------- */ 189 190 void updateUsedSize(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx); 191 void decreaseThreshold() { 192 intptr_t threshold = ageThreshold.load(std::memory_order_relaxed); 193 if (threshold) 194 ageThreshold.store((threshold + meanHitRange.load(std::memory_order_relaxed)) / 2, std::memory_order_relaxed); 195 } 196 void updateBinsSummary(BinsSummary *binsSummary) const { 197 binsSummary->update(usedSize.load(std::memory_order_relaxed), cachedSize.load(std::memory_order_relaxed)); 198 } 199 size_t getSize() const { return cachedSize.load(std::memory_order_relaxed); } 200 size_t getUsedSize() const { return usedSize.load(std::memory_order_relaxed); } 201 size_t reportStat(int num, FILE *f); 202 203 /* --------- Unsafe methods used with the aggregator ------- */ 204 void forgetOutdatedState(uintptr_t currTime); 205 LargeMemoryBlock *putList(LargeMemoryBlock *head, LargeMemoryBlock *tail, BinBitMask *bitMask, 206 int idx, int num, size_t hugeObjectThreshold); 207 LargeMemoryBlock *get(); 208 LargeMemoryBlock *cleanToThreshold(uintptr_t currTime, BinBitMask *bitMask, int idx); 209 LargeMemoryBlock *cleanAll(BinBitMask *bitMask, int idx); 210 void updateUsedSize(size_t size, BinBitMask *bitMask, int idx) { 211 if (!usedSize.load(std::memory_order_relaxed)) bitMask->set(idx, true); 212 usedSize.store(usedSize.load(std::memory_order_relaxed) + size, std::memory_order_relaxed); 213 if (!usedSize.load(std::memory_order_relaxed) && !first) bitMask->set(idx, false); 214 } 215 void updateMeanHitRange( intptr_t hitRange ) { 216 hitRange = hitRange >= 0 ? hitRange : 0; 217 intptr_t mean = meanHitRange.load(std::memory_order_relaxed); 218 mean = mean ? (mean + hitRange) / 2 : hitRange; 219 meanHitRange.store(mean, std::memory_order_relaxed); 220 } 221 void updateAgeThreshold( uintptr_t currTime ) { 222 if (lastCleanedAge) 223 ageThreshold.store(Props::OnMissFactor * (currTime - lastCleanedAge), std::memory_order_relaxed); 224 } 225 void updateCachedSize(size_t size) { 226 cachedSize.store(cachedSize.load(std::memory_order_relaxed) + size, std::memory_order_relaxed); 227 } 228 void setLastGet( uintptr_t newLastGet ) { 229 lastGet = newLastGet; 230 } 231 /* -------------------------------------------------------- */ 232 }; 233 234 // Huge bins index for fast regular cleanup searching in case of 235 // the "huge size threshold" setting defined 236 intptr_t hugeSizeThresholdIdx; 237 238 private: 239 // How many times LOC was "too large" 240 std::atomic<intptr_t> tooLargeLOC; 241 // for fast finding of used bins and bins with non-zero usedSize; 242 // indexed from the end, as we need largest 1st 243 BinBitMask bitMask; 244 // bins with lists of recently freed large blocks cached for re-use 245 CacheBin bin[numBins]; 246 247 public: 248 /* ------------ CacheBin structure dependent stuff ------------ */ 249 static size_t alignToBin(size_t size) { 250 return Props::alignToBin(size); 251 } 252 static int sizeToIdx(size_t size) { 253 return Props::sizeToIdx(size); 254 } 255 256 /* --------- Main cache functions (put, get object) ------------ */ 257 void putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *largeBlock); 258 LargeMemoryBlock *get(ExtMemoryPool *extMemPool, size_t size); 259 260 /* ------------------------ Cleanup ---------------------------- */ 261 bool regularCleanup(ExtMemoryPool *extMemPool, uintptr_t currAge, bool doThreshDecr); 262 bool cleanAll(ExtMemoryPool *extMemPool); 263 264 /* -------------------------- Other ---------------------------- */ 265 void updateCacheState(ExtMemoryPool *extMemPool, DecreaseOrIncrease op, size_t size); 266 267 void reset(); 268 void reportStat(FILE *f); 269 #if __TBB_MALLOC_WHITEBOX_TEST 270 size_t getLOCSize() const; 271 size_t getUsedSize() const; 272 #endif 273 }; 274 275 class LargeObjectCache { 276 private: 277 // Large bins [minLargeSize, maxLargeSize) 278 // Huge bins [maxLargeSize, maxHugeSize) 279 static const size_t minLargeSize = 8 * 1024, 280 maxLargeSize = 8 * 1024 * 1024, 281 // Cache memory up to 1TB (or 2GB for 32-bit arch), but sieve objects from the special threshold 282 maxHugeSize = tbb::detail::select_size_t_constant<2147483648U, 1099511627776ULL>::value; 283 284 public: 285 // Upper bound threshold for caching size. After that size all objects sieve through cache 286 // By default - 64MB, previous value was 129MB (needed by some Intel(R) Math Kernel Library (Intel(R) MKL) benchmarks) 287 static const size_t defaultMaxHugeSize = 64UL * 1024UL * 1024UL; 288 // After that size large object interpreted as huge and does not participate in regular cleanup. 289 // Can be changed during the program execution. 290 size_t hugeSizeThreshold; 291 292 private: 293 // Large objects cache properties 294 typedef LargeBinStructureProps<minLargeSize, maxLargeSize> LargeBSProps; 295 typedef LargeObjectCacheProps<LargeBSProps, 2, 2, 16> LargeCacheTypeProps; 296 297 // Huge objects cache properties 298 typedef HugeBinStructureProps<maxLargeSize, maxHugeSize> HugeBSProps; 299 typedef LargeObjectCacheProps<HugeBSProps, 1, 1, 4> HugeCacheTypeProps; 300 301 // Cache implementation type with properties 302 typedef LargeObjectCacheImpl< LargeCacheTypeProps > LargeCacheType; 303 typedef LargeObjectCacheImpl< HugeCacheTypeProps > HugeCacheType; 304 305 // Beginning of largeCache is more actively used and smaller than hugeCache, 306 // so put hugeCache first to prevent false sharing 307 // with LargeObjectCache's predecessor 308 HugeCacheType hugeCache; 309 LargeCacheType largeCache; 310 311 /* logical time, incremented on each put/get operation 312 To prevent starvation between pools, keep separately for each pool. 313 Overflow is OK, as we only want difference between 314 its current value and some recent. 315 316 Both malloc and free should increment logical time, as in 317 a different case multiple cached blocks would have same age, 318 and accuracy of predictors suffers. 319 */ 320 std::atomic<uintptr_t> cacheCurrTime; 321 322 // Memory pool that owns this LargeObjectCache. 323 // strict 1:1 relation, never changed 324 ExtMemoryPool *extMemPool; 325 326 // Returns artificial bin index, 327 // it's used only during sorting and never saved 328 static int sizeToIdx(size_t size); 329 330 // Our friends 331 friend class Backend; 332 333 public: 334 void init(ExtMemoryPool *memPool); 335 336 // Item accessors 337 void put(LargeMemoryBlock *largeBlock); 338 void putList(LargeMemoryBlock *head); 339 LargeMemoryBlock *get(size_t size); 340 341 void updateCacheState(DecreaseOrIncrease op, size_t size); 342 bool isCleanupNeededOnRange(uintptr_t range, uintptr_t currTime); 343 344 // Cleanup operations 345 bool doCleanup(uintptr_t currTime, bool doThreshDecr); 346 bool decreasingCleanup(); 347 bool regularCleanup(); 348 bool cleanAll(); 349 void reset(); 350 351 void reportStat(FILE *f); 352 #if __TBB_MALLOC_WHITEBOX_TEST 353 size_t getLOCSize() const; 354 size_t getUsedSize() const; 355 #endif 356 357 // Cache deals with exact-fit sizes, so need to align each size 358 // to the specific bin when put object to cache 359 static size_t alignToBin(size_t size); 360 361 void setHugeSizeThreshold(size_t value); 362 363 // Check if we should cache or sieve this size 364 bool sizeInCacheRange(size_t size); 365 366 uintptr_t getCurrTimeRange(uintptr_t range); 367 void registerRealloc(size_t oldSize, size_t newSize); 368 }; 369 370 #endif // __TBB_large_objects_H 371 372