1 /* 2 Copyright (c) 2005-2021 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #ifndef __TBB_tbbmalloc_internal_H 18 #error tbbmalloc_internal.h must be included at this point 19 #endif 20 21 #ifndef __TBB_large_objects_H 22 #define __TBB_large_objects_H 23 24 //! The list of possible Cache Bin Aggregator operations. 25 /* Declared here to avoid Solaris Studio* 12.2 "multiple definitions" error */ 26 enum CacheBinOperationType { 27 CBOP_INVALID = 0, 28 CBOP_GET, 29 CBOP_PUT_LIST, 30 CBOP_CLEAN_TO_THRESHOLD, 31 CBOP_CLEAN_ALL, 32 CBOP_UPDATE_USED_SIZE 33 }; 34 35 // The Cache Bin Aggregator operation status list. 36 // CBST_NOWAIT can be specified for non-blocking operations. 37 enum CacheBinOperationStatus { 38 CBST_WAIT = 0, 39 CBST_NOWAIT, 40 CBST_DONE 41 }; 42 43 /* 44 * Bins that grow with arithmetic step 45 */ 46 template<size_t MIN_SIZE, size_t MAX_SIZE> 47 struct LargeBinStructureProps { 48 public: 49 static const size_t MinSize = MIN_SIZE, MaxSize = MAX_SIZE; 50 static const size_t CacheStep = 8 * 1024; 51 static const unsigned NumBins = (MaxSize - MinSize) / CacheStep; 52 53 static size_t alignToBin(size_t size) { 54 return alignUp(size, CacheStep); 55 } 56 57 static int sizeToIdx(size_t size) { 58 MALLOC_ASSERT(MinSize <= size && size < MaxSize, ASSERT_TEXT); 59 MALLOC_ASSERT(size % CacheStep == 0, ASSERT_TEXT); 60 return (size - MinSize) / CacheStep; 61 } 62 }; 63 64 /* 65 * Bins that grow with special geometric progression. 66 */ 67 template<size_t MIN_SIZE, size_t MAX_SIZE> 68 struct HugeBinStructureProps { 69 70 private: 71 // Sizes grow with the following formula: Size = MinSize * (2 ^ (Index / StepFactor)) 72 // There are StepFactor bins (8 be default) between each power of 2 bin 73 static const int MaxSizeExp = Log2<MAX_SIZE>::value; 74 static const int MinSizeExp = Log2<MIN_SIZE>::value; 75 static const int StepFactor = 8; 76 static const int StepFactorExp = Log2<StepFactor>::value; 77 78 public: 79 static const size_t MinSize = MIN_SIZE, MaxSize = MAX_SIZE; 80 static const unsigned NumBins = (MaxSizeExp - MinSizeExp) * StepFactor; 81 82 static size_t alignToBin(size_t size) { 83 size_t minorStepExp = BitScanRev(size) - StepFactorExp; 84 return alignUp(size, 1ULL << minorStepExp); 85 } 86 87 // Sizes between the power of 2 values are aproximated to StepFactor. 88 static int sizeToIdx(size_t size) { 89 MALLOC_ASSERT(MinSize <= size && size <= MaxSize, ASSERT_TEXT); 90 int sizeExp = (int)BitScanRev(size); // same as __TBB_Log2 91 size_t majorStepSize = 1ULL << sizeExp; 92 int minorStepExp = sizeExp - StepFactorExp; 93 int minorIdx = (size - majorStepSize) >> minorStepExp; 94 MALLOC_ASSERT(size == majorStepSize + ((size_t)minorIdx << minorStepExp), 95 "Size is not aligned on the bin"); 96 return StepFactor * (sizeExp - MinSizeExp) + minorIdx; 97 } 98 }; 99 100 /* 101 * Cache properties accessor 102 * 103 * TooLargeFactor -- when cache size treated "too large" in comparison to user data size 104 * OnMissFactor -- If cache miss occurred and cache was cleaned, 105 * set ageThreshold to OnMissFactor * the difference 106 * between current time and last time cache was cleaned. 107 * LongWaitFactor -- to detect rarely-used bins and forget about their usage history 108 */ 109 template<typename StructureProps, int TOO_LARGE, int ON_MISS, int LONG_WAIT> 110 struct LargeObjectCacheProps : public StructureProps { 111 static const int TooLargeFactor = TOO_LARGE, OnMissFactor = ON_MISS, LongWaitFactor = LONG_WAIT; 112 }; 113 114 template<typename Props> 115 class LargeObjectCacheImpl { 116 private: 117 118 // Current sizes of used and cached objects. It's calculated while we are 119 // traversing bins, and used for isLOCTooLarge() check at the same time. 120 class BinsSummary { 121 size_t usedSz; 122 size_t cachedSz; 123 public: 124 BinsSummary() : usedSz(0), cachedSz(0) {} 125 // "too large" criteria 126 bool isLOCTooLarge() const { return cachedSz > Props::TooLargeFactor * usedSz; } 127 void update(size_t usedSize, size_t cachedSize) { 128 usedSz += usedSize; 129 cachedSz += cachedSize; 130 } 131 void reset() { usedSz = cachedSz = 0; } 132 }; 133 134 public: 135 // The number of bins to cache large/huge objects. 136 static const uint32_t numBins = Props::NumBins; 137 138 typedef BitMaskMax<numBins> BinBitMask; 139 140 // 2-linked list of same-size cached blocks ordered by age (oldest on top) 141 // TODO: are we really want the list to be 2-linked? This allows us 142 // reduce memory consumption and do less operations under lock. 143 // TODO: try to switch to 32-bit logical time to save space in CacheBin 144 // and move bins to different cache lines. 145 class CacheBin { 146 private: 147 LargeMemoryBlock* first; 148 std::atomic<LargeMemoryBlock*> last; 149 /* age of an oldest block in the list; equal to last->age, if last defined, 150 used for quick checking it without acquiring the lock. */ 151 std::atomic<uintptr_t> oldest; 152 /* currAge when something was excluded out of list because of the age, 153 not because of cache hit */ 154 uintptr_t lastCleanedAge; 155 /* Current threshold value for the blocks of a particular size. 156 Set on cache miss. */ 157 std::atomic<intptr_t> ageThreshold; 158 159 /* total size of all objects corresponding to the bin and allocated by user */ 160 std::atomic<size_t> usedSize; 161 /* total size of all objects cached in the bin */ 162 std::atomic<size_t> cachedSize; 163 /* mean time of presence of block in the bin before successful reuse */ 164 std::atomic<intptr_t> meanHitRange; 165 /* time of last get called for the bin */ 166 uintptr_t lastGet; 167 168 typename MallocAggregator<CacheBinOperation>::type aggregator; 169 170 void ExecuteOperation(CacheBinOperation *op, ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx, bool longLifeTime = true); 171 172 /* should be placed in zero-initialized memory, ctor not needed. */ 173 CacheBin(); 174 175 public: 176 void init() { 177 memset(this, 0, sizeof(CacheBin)); 178 } 179 180 /* ---------- Cache accessors ---------- */ 181 void putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *head, BinBitMask *bitMask, int idx); 182 LargeMemoryBlock *get(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx); 183 184 /* ---------- Cleanup functions -------- */ 185 bool cleanToThreshold(ExtMemoryPool *extMemPool, BinBitMask *bitMask, uintptr_t currTime, int idx); 186 bool releaseAllToBackend(ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx); 187 /* ------------------------------------- */ 188 189 void updateUsedSize(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx); 190 void decreaseThreshold() { 191 intptr_t threshold = ageThreshold.load(std::memory_order_relaxed); 192 if (threshold) 193 ageThreshold.store((threshold + meanHitRange.load(std::memory_order_relaxed)) / 2, std::memory_order_relaxed); 194 } 195 void updateBinsSummary(BinsSummary *binsSummary) const { 196 binsSummary->update(usedSize.load(std::memory_order_relaxed), cachedSize.load(std::memory_order_relaxed)); 197 } 198 size_t getSize() const { return cachedSize.load(std::memory_order_relaxed); } 199 size_t getUsedSize() const { return usedSize.load(std::memory_order_relaxed); } 200 size_t reportStat(int num, FILE *f); 201 202 /* --------- Unsafe methods used with the aggregator ------- */ 203 void forgetOutdatedState(uintptr_t currTime); 204 LargeMemoryBlock *putList(LargeMemoryBlock *head, LargeMemoryBlock *tail, BinBitMask *bitMask, 205 int idx, int num, size_t hugeObjectThreshold); 206 LargeMemoryBlock *get(); 207 LargeMemoryBlock *cleanToThreshold(uintptr_t currTime, BinBitMask *bitMask, int idx); 208 LargeMemoryBlock *cleanAll(BinBitMask *bitMask, int idx); 209 void updateUsedSize(size_t size, BinBitMask *bitMask, int idx) { 210 if (!usedSize.load(std::memory_order_relaxed)) bitMask->set(idx, true); 211 usedSize.store(usedSize.load(std::memory_order_relaxed) + size, std::memory_order_relaxed); 212 if (!usedSize.load(std::memory_order_relaxed) && !first) bitMask->set(idx, false); 213 } 214 void updateMeanHitRange( intptr_t hitRange ) { 215 hitRange = hitRange >= 0 ? hitRange : 0; 216 intptr_t mean = meanHitRange.load(std::memory_order_relaxed); 217 mean = mean ? (mean + hitRange) / 2 : hitRange; 218 meanHitRange.store(mean, std::memory_order_relaxed); 219 } 220 void updateAgeThreshold( uintptr_t currTime ) { 221 if (lastCleanedAge) 222 ageThreshold.store(Props::OnMissFactor * (currTime - lastCleanedAge), std::memory_order_relaxed); 223 } 224 void updateCachedSize(size_t size) { 225 cachedSize.store(cachedSize.load(std::memory_order_relaxed) + size, std::memory_order_relaxed); 226 } 227 void setLastGet( uintptr_t newLastGet ) { 228 lastGet = newLastGet; 229 } 230 /* -------------------------------------------------------- */ 231 }; 232 233 // Huge bins index for fast regular cleanup searching in case of 234 // the "huge size threshold" setting defined 235 intptr_t hugeSizeThresholdIdx; 236 237 private: 238 // How many times LOC was "too large" 239 std::atomic<intptr_t> tooLargeLOC; 240 // for fast finding of used bins and bins with non-zero usedSize; 241 // indexed from the end, as we need largest 1st 242 BinBitMask bitMask; 243 // bins with lists of recently freed large blocks cached for re-use 244 CacheBin bin[numBins]; 245 246 public: 247 /* ------------ CacheBin structure dependent stuff ------------ */ 248 static size_t alignToBin(size_t size) { 249 return Props::alignToBin(size); 250 } 251 static int sizeToIdx(size_t size) { 252 return Props::sizeToIdx(size); 253 } 254 255 /* --------- Main cache functions (put, get object) ------------ */ 256 void putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *largeBlock); 257 LargeMemoryBlock *get(ExtMemoryPool *extMemPool, size_t size); 258 259 /* ------------------------ Cleanup ---------------------------- */ 260 bool regularCleanup(ExtMemoryPool *extMemPool, uintptr_t currAge, bool doThreshDecr); 261 bool cleanAll(ExtMemoryPool *extMemPool); 262 263 /* -------------------------- Other ---------------------------- */ 264 void updateCacheState(ExtMemoryPool *extMemPool, DecreaseOrIncrease op, size_t size); 265 266 void reset(); 267 void reportStat(FILE *f); 268 #if __TBB_MALLOC_WHITEBOX_TEST 269 size_t getLOCSize() const; 270 size_t getUsedSize() const; 271 #endif 272 }; 273 274 class LargeObjectCache { 275 private: 276 // Large bins [minLargeSize, maxLargeSize) 277 // Huge bins [maxLargeSize, maxHugeSize) 278 static const size_t minLargeSize = 8 * 1024, 279 maxLargeSize = 8 * 1024 * 1024, 280 // Cache memory up to 1TB (or 2GB for 32-bit arch), but sieve objects from the special threshold 281 maxHugeSize = tbb::detail::select_size_t_constant<2147483648U, 1099511627776ULL>::value; 282 283 public: 284 // Upper bound threshold for caching size. After that size all objects sieve through cache 285 // By default - 64MB, previous value was 129MB (needed by some Intel(R) Math Kernel Library (Intel(R) MKL) benchmarks) 286 static const size_t defaultMaxHugeSize = 64UL * 1024UL * 1024UL; 287 // After that size large object interpreted as huge and does not participate in regular cleanup. 288 // Can be changed during the program execution. 289 size_t hugeSizeThreshold; 290 291 private: 292 // Large objects cache properties 293 typedef LargeBinStructureProps<minLargeSize, maxLargeSize> LargeBSProps; 294 typedef LargeObjectCacheProps<LargeBSProps, 2, 2, 16> LargeCacheTypeProps; 295 296 // Huge objects cache properties 297 typedef HugeBinStructureProps<maxLargeSize, maxHugeSize> HugeBSProps; 298 typedef LargeObjectCacheProps<HugeBSProps, 1, 1, 4> HugeCacheTypeProps; 299 300 // Cache implementation type with properties 301 typedef LargeObjectCacheImpl< LargeCacheTypeProps > LargeCacheType; 302 typedef LargeObjectCacheImpl< HugeCacheTypeProps > HugeCacheType; 303 304 // Beginning of largeCache is more actively used and smaller than hugeCache, 305 // so put hugeCache first to prevent false sharing 306 // with LargeObjectCache's predecessor 307 HugeCacheType hugeCache; 308 LargeCacheType largeCache; 309 310 /* logical time, incremented on each put/get operation 311 To prevent starvation between pools, keep separately for each pool. 312 Overflow is OK, as we only want difference between 313 its current value and some recent. 314 315 Both malloc and free should increment logical time, as in 316 a different case multiple cached blocks would have same age, 317 and accuracy of predictors suffers. 318 */ 319 std::atomic<uintptr_t> cacheCurrTime; 320 321 // Memory pool that owns this LargeObjectCache. 322 // strict 1:1 relation, never changed 323 ExtMemoryPool *extMemPool; 324 325 // Returns artificial bin index, 326 // it's used only during sorting and never saved 327 static int sizeToIdx(size_t size); 328 329 // Our friends 330 friend class Backend; 331 332 public: 333 void init(ExtMemoryPool *memPool); 334 335 // Item accessors 336 void put(LargeMemoryBlock *largeBlock); 337 void putList(LargeMemoryBlock *head); 338 LargeMemoryBlock *get(size_t size); 339 340 void updateCacheState(DecreaseOrIncrease op, size_t size); 341 bool isCleanupNeededOnRange(uintptr_t range, uintptr_t currTime); 342 343 // Cleanup operations 344 bool doCleanup(uintptr_t currTime, bool doThreshDecr); 345 bool decreasingCleanup(); 346 bool regularCleanup(); 347 bool cleanAll(); 348 void reset(); 349 350 void reportStat(FILE *f); 351 #if __TBB_MALLOC_WHITEBOX_TEST 352 size_t getLOCSize() const; 353 size_t getUsedSize() const; 354 #endif 355 356 // Cache deals with exact-fit sizes, so need to align each size 357 // to the specific bin when put object to cache 358 static size_t alignToBin(size_t size); 359 360 void setHugeSizeThreshold(size_t value); 361 362 // Check if we should cache or sieve this size 363 bool sizeInCacheRange(size_t size); 364 365 uintptr_t getCurrTimeRange(uintptr_t range); 366 void registerRealloc(size_t oldSize, size_t newSize); 367 }; 368 369 #endif // __TBB_large_objects_H 370 371