1 /* 2 Copyright (c) 2005-2023 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #ifndef __TBB_tbbmalloc_internal_H 18 #error tbbmalloc_internal.h must be included at this point 19 #endif 20 21 #ifndef __TBB_large_objects_H 22 #define __TBB_large_objects_H 23 24 //! The list of possible Cache Bin Aggregator operations. 25 /* Declared here to avoid Solaris Studio* 12.2 "multiple definitions" error */ 26 enum CacheBinOperationType { 27 CBOP_INVALID = 0, 28 CBOP_GET, 29 CBOP_PUT_LIST, 30 CBOP_CLEAN_TO_THRESHOLD, 31 CBOP_CLEAN_ALL, 32 CBOP_UPDATE_USED_SIZE 33 }; 34 35 // The Cache Bin Aggregator operation status list. 36 // CBST_NOWAIT can be specified for non-blocking operations. 37 enum CacheBinOperationStatus { 38 CBST_WAIT = 0, 39 CBST_NOWAIT, 40 CBST_DONE 41 }; 42 43 /* 44 * Bins that grow with arithmetic step 45 */ 46 template<size_t MIN_SIZE, size_t MAX_SIZE> 47 struct LargeBinStructureProps { 48 public: 49 static const size_t MinSize = MIN_SIZE, MaxSize = MAX_SIZE; 50 static const size_t CacheStep = 8 * 1024; 51 static const unsigned NumBins = (MaxSize - MinSize) / CacheStep; 52 53 static size_t alignToBin(size_t size) { 54 return alignUp(size, CacheStep); 55 } 56 57 static int sizeToIdx(size_t size) { 58 MALLOC_ASSERT(MinSize <= size && size < MaxSize, ASSERT_TEXT); 59 MALLOC_ASSERT(size % CacheStep == 0, ASSERT_TEXT); 60 return (size - MinSize) / CacheStep; 61 } 62 }; 63 64 /* 65 * Bins that grow with special geometric progression. 66 */ 67 template<size_t MIN_SIZE, size_t MAX_SIZE> 68 struct HugeBinStructureProps { 69 70 private: 71 // Sizes grow with the following formula: Size = MinSize * (2 ^ (Index / StepFactor)) 72 // There are StepFactor bins (8 be default) between each power of 2 bin 73 static const int MaxSizeExp = Log2<MAX_SIZE>::value; 74 static const int MinSizeExp = Log2<MIN_SIZE>::value; 75 static const int StepFactor = 8; 76 static const int StepFactorExp = Log2<StepFactor>::value; 77 78 public: 79 static const size_t MinSize = MIN_SIZE, MaxSize = MAX_SIZE; 80 static const unsigned NumBins = (MaxSizeExp - MinSizeExp) * StepFactor; 81 82 static size_t alignToBin(size_t size) { 83 MALLOC_ASSERT(size >= StepFactor, "Size must not be less than the StepFactor"); 84 size_t minorStepExp = BitScanRev(size) - StepFactorExp; 85 return alignUp(size, 1ULL << minorStepExp); 86 } 87 88 // Sizes between the power of 2 values are aproximated to StepFactor. 89 static int sizeToIdx(size_t size) { 90 MALLOC_ASSERT(MinSize <= size && size <= MaxSize, ASSERT_TEXT); 91 int sizeExp = (int)BitScanRev(size); // same as __TBB_Log2 92 MALLOC_ASSERT(sizeExp >= 0, "A shift amount (sizeExp) must not be negative"); 93 size_t majorStepSize = 1ULL << sizeExp; 94 int minorStepExp = sizeExp - StepFactorExp; 95 MALLOC_ASSERT(minorStepExp >= 0, "A shift amount (minorStepExp) must not be negative"); 96 int minorIdx = (size - majorStepSize) >> minorStepExp; 97 MALLOC_ASSERT(size == majorStepSize + ((size_t)minorIdx << minorStepExp), 98 "Size is not aligned on the bin"); 99 return StepFactor * (sizeExp - MinSizeExp) + minorIdx; 100 } 101 }; 102 103 /* 104 * Cache properties accessor 105 * 106 * TooLargeFactor -- when cache size treated "too large" in comparison to user data size 107 * OnMissFactor -- If cache miss occurred and cache was cleaned, 108 * set ageThreshold to OnMissFactor * the difference 109 * between current time and last time cache was cleaned. 110 * LongWaitFactor -- to detect rarely-used bins and forget about their usage history 111 */ 112 template<typename StructureProps, int TOO_LARGE, int ON_MISS, int LONG_WAIT> 113 struct LargeObjectCacheProps : public StructureProps { 114 static const int TooLargeFactor = TOO_LARGE, OnMissFactor = ON_MISS, LongWaitFactor = LONG_WAIT; 115 }; 116 117 template<typename Props> 118 class LargeObjectCacheImpl { 119 private: 120 121 // Current sizes of used and cached objects. It's calculated while we are 122 // traversing bins, and used for isLOCTooLarge() check at the same time. 123 class BinsSummary { 124 size_t usedSz; 125 size_t cachedSz; 126 public: 127 BinsSummary() : usedSz(0), cachedSz(0) {} 128 // "too large" criteria 129 bool isLOCTooLarge() const { return cachedSz > Props::TooLargeFactor * usedSz; } 130 void update(size_t usedSize, size_t cachedSize) { 131 usedSz += usedSize; 132 cachedSz += cachedSize; 133 } 134 void reset() { usedSz = cachedSz = 0; } 135 }; 136 137 public: 138 // The number of bins to cache large/huge objects. 139 static const uint32_t numBins = Props::NumBins; 140 141 typedef BitMaskMax<numBins> BinBitMask; 142 143 // 2-linked list of same-size cached blocks ordered by age (oldest on top) 144 // TODO: are we really want the list to be 2-linked? This allows us 145 // reduce memory consumption and do less operations under lock. 146 // TODO: try to switch to 32-bit logical time to save space in CacheBin 147 // and move bins to different cache lines. 148 class CacheBin { 149 private: 150 LargeMemoryBlock* first; 151 std::atomic<LargeMemoryBlock*> last; 152 /* age of an oldest block in the list; equal to last->age, if last defined, 153 used for quick checking it without acquiring the lock. */ 154 std::atomic<uintptr_t> oldest; 155 /* currAge when something was excluded out of list because of the age, 156 not because of cache hit */ 157 uintptr_t lastCleanedAge; 158 /* Current threshold value for the blocks of a particular size. 159 Set on cache miss. */ 160 std::atomic<intptr_t> ageThreshold; 161 162 /* total size of all objects corresponding to the bin and allocated by user */ 163 std::atomic<size_t> usedSize; 164 /* total size of all objects cached in the bin */ 165 std::atomic<size_t> cachedSize; 166 /* mean time of presence of block in the bin before successful reuse */ 167 std::atomic<intptr_t> meanHitRange; 168 /* time of last get called for the bin */ 169 uintptr_t lastGet; 170 171 typename MallocAggregator<CacheBinOperation>::type aggregator; 172 173 void ExecuteOperation(CacheBinOperation *op, ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx, bool longLifeTime = true); 174 175 /* should be placed in zero-initialized memory, ctor not needed. */ 176 CacheBin(); 177 178 public: 179 void init() { 180 memset(static_cast<void*>(this), 0, sizeof(CacheBin)); 181 } 182 183 /* ---------- Cache accessors ---------- */ 184 void putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *head, BinBitMask *bitMask, int idx); 185 LargeMemoryBlock *get(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx); 186 187 /* ---------- Cleanup functions -------- */ 188 bool cleanToThreshold(ExtMemoryPool *extMemPool, BinBitMask *bitMask, uintptr_t currTime, int idx); 189 bool releaseAllToBackend(ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx); 190 /* ------------------------------------- */ 191 192 void updateUsedSize(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx); 193 void decreaseThreshold() { 194 intptr_t threshold = ageThreshold.load(std::memory_order_relaxed); 195 if (threshold) 196 ageThreshold.store((threshold + meanHitRange.load(std::memory_order_relaxed)) / 2, std::memory_order_relaxed); 197 } 198 void updateBinsSummary(BinsSummary *binsSummary) const { 199 binsSummary->update(usedSize.load(std::memory_order_relaxed), cachedSize.load(std::memory_order_relaxed)); 200 } 201 size_t getSize() const { return cachedSize.load(std::memory_order_relaxed); } 202 size_t getUsedSize() const { return usedSize.load(std::memory_order_relaxed); } 203 size_t reportStat(int num, FILE *f); 204 205 /* --------- Unsafe methods used with the aggregator ------- */ 206 void forgetOutdatedState(uintptr_t currTime); 207 LargeMemoryBlock *putList(LargeMemoryBlock *head, LargeMemoryBlock *tail, BinBitMask *bitMask, 208 int idx, int num, size_t hugeObjectThreshold); 209 LargeMemoryBlock *get(); 210 LargeMemoryBlock *cleanToThreshold(uintptr_t currTime, BinBitMask *bitMask, int idx); 211 LargeMemoryBlock *cleanAll(BinBitMask *bitMask, int idx); 212 void updateUsedSize(size_t size, BinBitMask *bitMask, int idx) { 213 if (!usedSize.load(std::memory_order_relaxed)) bitMask->set(idx, true); 214 usedSize.store(usedSize.load(std::memory_order_relaxed) + size, std::memory_order_relaxed); 215 if (!usedSize.load(std::memory_order_relaxed) && !first) bitMask->set(idx, false); 216 } 217 void updateMeanHitRange( intptr_t hitRange ) { 218 hitRange = hitRange >= 0 ? hitRange : 0; 219 intptr_t mean = meanHitRange.load(std::memory_order_relaxed); 220 mean = mean ? (mean + hitRange) / 2 : hitRange; 221 meanHitRange.store(mean, std::memory_order_relaxed); 222 } 223 void updateAgeThreshold( uintptr_t currTime ) { 224 if (lastCleanedAge) 225 ageThreshold.store(Props::OnMissFactor * (currTime - lastCleanedAge), std::memory_order_relaxed); 226 } 227 void updateCachedSize(size_t size) { 228 cachedSize.store(cachedSize.load(std::memory_order_relaxed) + size, std::memory_order_relaxed); 229 } 230 void setLastGet( uintptr_t newLastGet ) { 231 lastGet = newLastGet; 232 } 233 /* -------------------------------------------------------- */ 234 }; 235 236 // Huge bins index for fast regular cleanup searching in case of 237 // the "huge size threshold" setting defined 238 intptr_t hugeSizeThresholdIdx; 239 240 private: 241 // How many times LOC was "too large" 242 std::atomic<intptr_t> tooLargeLOC; 243 // for fast finding of used bins and bins with non-zero usedSize; 244 // indexed from the end, as we need largest 1st 245 BinBitMask bitMask; 246 // bins with lists of recently freed large blocks cached for re-use 247 CacheBin bin[numBins]; 248 249 public: 250 /* ------------ CacheBin structure dependent stuff ------------ */ 251 static size_t alignToBin(size_t size) { 252 return Props::alignToBin(size); 253 } 254 static int sizeToIdx(size_t size) { 255 return Props::sizeToIdx(size); 256 } 257 258 /* --------- Main cache functions (put, get object) ------------ */ 259 void putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *largeBlock); 260 LargeMemoryBlock *get(ExtMemoryPool *extMemPool, size_t size); 261 262 /* ------------------------ Cleanup ---------------------------- */ 263 bool regularCleanup(ExtMemoryPool *extMemPool, uintptr_t currAge, bool doThreshDecr); 264 bool cleanAll(ExtMemoryPool *extMemPool); 265 266 /* -------------------------- Other ---------------------------- */ 267 void updateCacheState(ExtMemoryPool *extMemPool, DecreaseOrIncrease op, size_t size); 268 269 void reset(); 270 void reportStat(FILE *f); 271 #if __TBB_MALLOC_WHITEBOX_TEST 272 size_t getLOCSize() const; 273 size_t getUsedSize() const; 274 #endif 275 }; 276 277 class LargeObjectCache { 278 private: 279 // Large bins [minLargeSize, maxLargeSize) 280 // Huge bins [maxLargeSize, maxHugeSize) 281 static const size_t minLargeSize = 8 * 1024, 282 maxLargeSize = 8 * 1024 * 1024, 283 // Cache memory up to 1TB (or 2GB for 32-bit arch), but sieve objects from the special threshold 284 maxHugeSize = tbb::detail::select_size_t_constant<2147483648U, 1099511627776ULL>::value; 285 286 public: 287 // Upper bound threshold for caching size. After that size all objects sieve through cache 288 // By default - 64MB, previous value was 129MB (needed by some Intel(R) Math Kernel Library (Intel(R) MKL) benchmarks) 289 static const size_t defaultMaxHugeSize = 64UL * 1024UL * 1024UL; 290 // After that size large object interpreted as huge and does not participate in regular cleanup. 291 // Can be changed during the program execution. 292 size_t hugeSizeThreshold; 293 294 private: 295 // Large objects cache properties 296 typedef LargeBinStructureProps<minLargeSize, maxLargeSize> LargeBSProps; 297 typedef LargeObjectCacheProps<LargeBSProps, 2, 2, 16> LargeCacheTypeProps; 298 299 // Huge objects cache properties 300 typedef HugeBinStructureProps<maxLargeSize, maxHugeSize> HugeBSProps; 301 typedef LargeObjectCacheProps<HugeBSProps, 1, 1, 4> HugeCacheTypeProps; 302 303 // Cache implementation type with properties 304 typedef LargeObjectCacheImpl< LargeCacheTypeProps > LargeCacheType; 305 typedef LargeObjectCacheImpl< HugeCacheTypeProps > HugeCacheType; 306 307 // Beginning of largeCache is more actively used and smaller than hugeCache, 308 // so put hugeCache first to prevent false sharing 309 // with LargeObjectCache's predecessor 310 HugeCacheType hugeCache; 311 LargeCacheType largeCache; 312 313 /* logical time, incremented on each put/get operation 314 To prevent starvation between pools, keep separately for each pool. 315 Overflow is OK, as we only want difference between 316 its current value and some recent. 317 318 Both malloc and free should increment logical time, as in 319 a different case multiple cached blocks would have same age, 320 and accuracy of predictors suffers. 321 */ 322 std::atomic<uintptr_t> cacheCurrTime; 323 324 // Memory pool that owns this LargeObjectCache. 325 // strict 1:1 relation, never changed 326 ExtMemoryPool *extMemPool; 327 328 // Returns artificial bin index, 329 // it's used only during sorting and never saved 330 static int sizeToIdx(size_t size); 331 332 // Our friends 333 friend class Backend; 334 335 public: 336 void init(ExtMemoryPool *memPool); 337 338 // Item accessors 339 void put(LargeMemoryBlock *largeBlock); 340 void putList(LargeMemoryBlock *head); 341 LargeMemoryBlock *get(size_t size); 342 343 void updateCacheState(DecreaseOrIncrease op, size_t size); 344 bool isCleanupNeededOnRange(uintptr_t range, uintptr_t currTime); 345 346 // Cleanup operations 347 bool doCleanup(uintptr_t currTime, bool doThreshDecr); 348 bool decreasingCleanup(); 349 bool regularCleanup(); 350 bool cleanAll(); 351 void reset(); 352 353 void reportStat(FILE *f); 354 #if __TBB_MALLOC_WHITEBOX_TEST 355 size_t getLOCSize() const; 356 size_t getUsedSize() const; 357 #endif 358 359 // Cache deals with exact-fit sizes, so need to align each size 360 // to the specific bin when put object to cache 361 static size_t alignToBin(size_t size); 362 363 void setHugeSizeThreshold(size_t value); 364 365 // Check if we should cache or sieve this size 366 bool sizeInCacheRange(size_t size); 367 368 uintptr_t getCurrTimeRange(uintptr_t range); 369 void registerRealloc(size_t oldSize, size_t newSize); 370 }; 371 372 #endif // __TBB_large_objects_H 373 374