xref: /oneTBB/src/tbbmalloc/large_objects.h (revision c4a799df)
1 /*
2     Copyright (c) 2005-2023 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #ifndef __TBB_tbbmalloc_internal_H
18     #error tbbmalloc_internal.h must be included at this point
19 #endif
20 
21 #ifndef __TBB_large_objects_H
22 #define __TBB_large_objects_H
23 
24 //! The list of possible Cache Bin Aggregator operations.
25 /*  Declared here to avoid Solaris Studio* 12.2 "multiple definitions" error */
26 enum CacheBinOperationType {
27     CBOP_INVALID = 0,
28     CBOP_GET,
29     CBOP_PUT_LIST,
30     CBOP_CLEAN_TO_THRESHOLD,
31     CBOP_CLEAN_ALL,
32     CBOP_UPDATE_USED_SIZE
33 };
34 
35 // The Cache Bin Aggregator operation status list.
36 // CBST_NOWAIT can be specified for non-blocking operations.
37 enum CacheBinOperationStatus {
38     CBST_WAIT = 0,
39     CBST_NOWAIT,
40     CBST_DONE
41 };
42 
43 /*
44  * Bins that grow with arithmetic step
45  */
46 template<size_t MIN_SIZE, size_t MAX_SIZE>
47 struct LargeBinStructureProps {
48 public:
49     static const size_t   MinSize = MIN_SIZE, MaxSize = MAX_SIZE;
50     static const size_t   CacheStep = 8 * 1024;
51     static const unsigned NumBins = (MaxSize - MinSize) / CacheStep;
52 
alignToBinLargeBinStructureProps53     static size_t alignToBin(size_t size) {
54         return alignUp(size, CacheStep);
55     }
56 
sizeToIdxLargeBinStructureProps57     static int sizeToIdx(size_t size) {
58         MALLOC_ASSERT(MinSize <= size && size < MaxSize, ASSERT_TEXT);
59         MALLOC_ASSERT(size % CacheStep == 0, ASSERT_TEXT);
60         return (size - MinSize) / CacheStep;
61     }
62 };
63 
64 /*
65  * Bins that grow with special geometric progression.
66  */
67 template<size_t MIN_SIZE, size_t MAX_SIZE>
68 struct HugeBinStructureProps {
69 
70 private:
71     // Sizes grow with the following formula: Size = MinSize * (2 ^ (Index / StepFactor))
72     // There are StepFactor bins (8 be default) between each power of 2 bin
73     static const int MaxSizeExp    = Log2<MAX_SIZE>::value;
74     static const int MinSizeExp    = Log2<MIN_SIZE>::value;
75     static const int StepFactor    = 8;
76     static const int StepFactorExp = Log2<StepFactor>::value;
77 
78 public:
79     static const size_t   MinSize = MIN_SIZE, MaxSize = MAX_SIZE;
80     static const unsigned NumBins = (MaxSizeExp - MinSizeExp) * StepFactor;
81 
alignToBinHugeBinStructureProps82     static size_t alignToBin(size_t size) {
83         MALLOC_ASSERT(size >= StepFactor, "Size must not be less than the StepFactor");
84         size_t minorStepExp = BitScanRev(size) - StepFactorExp;
85         return alignUp(size, 1ULL << minorStepExp);
86     }
87 
88     // Sizes between the power of 2 values are approximated to StepFactor.
sizeToIdxHugeBinStructureProps89     static int sizeToIdx(size_t size) {
90         MALLOC_ASSERT(MinSize <= size && size <= MaxSize, ASSERT_TEXT);
91         int sizeExp = (int)BitScanRev(size); // same as __TBB_Log2
92         MALLOC_ASSERT(sizeExp >= 0, "A shift amount (sizeExp) must not be negative");
93         size_t majorStepSize = 1ULL << sizeExp;
94         int minorStepExp = sizeExp - StepFactorExp;
95         MALLOC_ASSERT(minorStepExp >= 0, "A shift amount (minorStepExp) must not be negative");
96         int minorIdx = (size - majorStepSize) >> minorStepExp;
97         MALLOC_ASSERT(size == majorStepSize + ((size_t)minorIdx << minorStepExp),
98             "Size is not aligned on the bin");
99         return StepFactor * (sizeExp - MinSizeExp) + minorIdx;
100     }
101 };
102 
103 /*
104  * Cache properties accessor
105  *
106  * TooLargeFactor -- when cache size treated "too large" in comparison to user data size
107  * OnMissFactor -- If cache miss occurred and cache was cleaned,
108  *                 set ageThreshold to OnMissFactor * the difference
109  *                 between current time and last time cache was cleaned.
110  * LongWaitFactor -- to detect rarely-used bins and forget about their usage history
111  */
112 template<typename StructureProps, int TOO_LARGE, int ON_MISS, int LONG_WAIT>
113 struct LargeObjectCacheProps : public StructureProps {
114     static const int TooLargeFactor = TOO_LARGE, OnMissFactor = ON_MISS, LongWaitFactor = LONG_WAIT;
115 };
116 
117 template<typename Props>
118 class LargeObjectCacheImpl {
119 private:
120 
121     // Current sizes of used and cached objects. It's calculated while we are
122     // traversing bins, and used for isLOCTooLarge() check at the same time.
123     class BinsSummary {
124         size_t usedSz;
125         size_t cachedSz;
126     public:
BinsSummary()127         BinsSummary() : usedSz(0), cachedSz(0) {}
128         // "too large" criteria
isLOCTooLarge()129         bool isLOCTooLarge() const { return cachedSz > Props::TooLargeFactor * usedSz; }
update(size_t usedSize,size_t cachedSize)130         void update(size_t usedSize, size_t cachedSize) {
131             usedSz += usedSize;
132             cachedSz += cachedSize;
133         }
reset()134         void reset() { usedSz = cachedSz = 0; }
135     };
136 
137 public:
138     // The number of bins to cache large/huge objects.
139     static const uint32_t numBins = Props::NumBins;
140 
141     typedef BitMaskMax<numBins> BinBitMask;
142 
143     // 2-linked list of same-size cached blocks ordered by age (oldest on top)
144     // TODO: are we really want the list to be 2-linked? This allows us
145     // reduce memory consumption and do less operations under lock.
146     // TODO: try to switch to 32-bit logical time to save space in CacheBin
147     // and move bins to different cache lines.
148     class CacheBin {
149     private:
150         LargeMemoryBlock* first;
151         std::atomic<LargeMemoryBlock*> last;
152         /* age of an oldest block in the list; equal to last->age, if last defined,
153             used for quick checking it without acquiring the lock. */
154         std::atomic<uintptr_t> oldest;
155         /* currAge when something was excluded out of list because of the age,
156          not because of cache hit */
157         uintptr_t         lastCleanedAge;
158         /* Current threshold value for the blocks of a particular size.
159          Set on cache miss. */
160         std::atomic<intptr_t> ageThreshold;
161 
162         /* total size of all objects corresponding to the bin and allocated by user */
163         std::atomic<size_t> usedSize;
164         /* total size of all objects cached in the bin */
165         std::atomic<size_t> cachedSize;
166         /* mean time of presence of block in the bin before successful reuse */
167         std::atomic<intptr_t> meanHitRange;
168         /* time of last get called for the bin */
169         uintptr_t         lastGet;
170 
171         typename MallocAggregator<CacheBinOperation>::type aggregator;
172 
173         void ExecuteOperation(CacheBinOperation *op, ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx, bool longLifeTime = true);
174 
175         /* should be placed in zero-initialized memory, ctor not needed. */
176         CacheBin();
177 
178     public:
init()179         void init() {
180             memset(static_cast<void*>(this), 0, sizeof(CacheBin));
181         }
182 
183         /* ---------- Cache accessors ---------- */
184         void putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *head, BinBitMask *bitMask, int idx);
185         LargeMemoryBlock *get(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx);
186 
187         /* ---------- Cleanup functions -------- */
188         bool cleanToThreshold(ExtMemoryPool *extMemPool, BinBitMask *bitMask, uintptr_t currTime, int idx);
189         bool releaseAllToBackend(ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx);
190         /* ------------------------------------- */
191 
192         void updateUsedSize(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx);
decreaseThreshold()193         void decreaseThreshold() {
194             intptr_t threshold = ageThreshold.load(std::memory_order_relaxed);
195             if (threshold)
196                 ageThreshold.store((threshold + meanHitRange.load(std::memory_order_relaxed)) / 2, std::memory_order_relaxed);
197         }
updateBinsSummary(BinsSummary * binsSummary)198         void updateBinsSummary(BinsSummary *binsSummary) const {
199             binsSummary->update(usedSize.load(std::memory_order_relaxed), cachedSize.load(std::memory_order_relaxed));
200         }
getSize()201         size_t getSize() const { return cachedSize.load(std::memory_order_relaxed); }
getUsedSize()202         size_t getUsedSize() const { return usedSize.load(std::memory_order_relaxed); }
203         size_t reportStat(int num, FILE *f);
204 
205         /* --------- Unsafe methods used with the aggregator ------- */
206         void forgetOutdatedState(uintptr_t currTime);
207         LargeMemoryBlock *putList(LargeMemoryBlock *head, LargeMemoryBlock *tail, BinBitMask *bitMask,
208                 int idx, int num, size_t hugeObjectThreshold);
209         LargeMemoryBlock *get();
210         LargeMemoryBlock *cleanToThreshold(uintptr_t currTime, BinBitMask *bitMask, int idx);
211         LargeMemoryBlock *cleanAll(BinBitMask *bitMask, int idx);
updateUsedSize(size_t size,BinBitMask * bitMask,int idx)212         void updateUsedSize(size_t size, BinBitMask *bitMask, int idx) {
213             if (!usedSize.load(std::memory_order_relaxed)) bitMask->set(idx, true);
214             usedSize.store(usedSize.load(std::memory_order_relaxed) + size, std::memory_order_relaxed);
215             if (!usedSize.load(std::memory_order_relaxed) && !first) bitMask->set(idx, false);
216         }
updateMeanHitRange(intptr_t hitRange)217         void updateMeanHitRange( intptr_t hitRange ) {
218             hitRange = hitRange >= 0 ? hitRange : 0;
219             intptr_t mean = meanHitRange.load(std::memory_order_relaxed);
220             mean = mean ? (mean + hitRange) / 2 : hitRange;
221             meanHitRange.store(mean, std::memory_order_relaxed);
222         }
updateAgeThreshold(uintptr_t currTime)223         void updateAgeThreshold( uintptr_t currTime ) {
224             if (lastCleanedAge)
225                 ageThreshold.store(Props::OnMissFactor * (currTime - lastCleanedAge), std::memory_order_relaxed);
226         }
updateCachedSize(size_t size)227         void updateCachedSize(size_t size) {
228             cachedSize.store(cachedSize.load(std::memory_order_relaxed) + size, std::memory_order_relaxed);
229         }
setLastGet(uintptr_t newLastGet)230         void setLastGet( uintptr_t newLastGet ) {
231             lastGet = newLastGet;
232         }
233         /* -------------------------------------------------------- */
234     };
235 
236     // Huge bins index for fast regular cleanup searching in case of
237     // the "huge size threshold" setting defined
238     intptr_t     hugeSizeThresholdIdx;
239 
240 private:
241     // How many times LOC was "too large"
242     std::atomic<intptr_t> tooLargeLOC;
243     // for fast finding of used bins and bins with non-zero usedSize;
244     // indexed from the end, as we need largest 1st
245     BinBitMask   bitMask;
246     // bins with lists of recently freed large blocks cached for reuse
247     CacheBin bin[numBins];
248 
249 public:
250     /* ------------ CacheBin structure dependent stuff ------------ */
alignToBin(size_t size)251     static size_t alignToBin(size_t size) {
252         return Props::alignToBin(size);
253     }
sizeToIdx(size_t size)254     static int sizeToIdx(size_t size) {
255         return Props::sizeToIdx(size);
256     }
257 
258     /* --------- Main cache functions (put, get object) ------------ */
259     void putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *largeBlock);
260     LargeMemoryBlock *get(ExtMemoryPool *extMemPool, size_t size);
261 
262     /* ------------------------ Cleanup ---------------------------- */
263     bool regularCleanup(ExtMemoryPool *extMemPool, uintptr_t currAge, bool doThreshDecr);
264     bool cleanAll(ExtMemoryPool *extMemPool);
265 
266     /* -------------------------- Other ---------------------------- */
267     void updateCacheState(ExtMemoryPool *extMemPool, DecreaseOrIncrease op, size_t size);
268 
269     void reset();
270     void reportStat(FILE *f);
271 #if __TBB_MALLOC_WHITEBOX_TEST
272     size_t getLOCSize() const;
273     size_t getUsedSize() const;
274 #endif
275 };
276 
277 class LargeObjectCache {
278 private:
279     // Large bins [minLargeSize, maxLargeSize)
280     // Huge bins [maxLargeSize, maxHugeSize)
281     static const size_t minLargeSize = 8 * 1024,
282                         maxLargeSize = 8 * 1024 * 1024,
283                         // Cache memory up to 1TB (or 2GB for 32-bit arch), but sieve objects from the special threshold
284                         maxHugeSize = tbb::detail::select_size_t_constant<2147483648U, 1099511627776ULL>::value;
285 
286 public:
287     // Upper bound threshold for caching size. After that size all objects sieve through cache
288     // By default - 64MB, previous value was 129MB (needed by some Intel(R) Math Kernel Library (Intel(R) MKL) benchmarks)
289     static const size_t defaultMaxHugeSize = 64UL * 1024UL * 1024UL;
290     // After that size large object interpreted as huge and does not participate in regular cleanup.
291     // Can be changed during the program execution.
292     size_t hugeSizeThreshold;
293 
294 private:
295     // Large objects cache properties
296     typedef LargeBinStructureProps<minLargeSize, maxLargeSize> LargeBSProps;
297     typedef LargeObjectCacheProps<LargeBSProps, 2, 2, 16> LargeCacheTypeProps;
298 
299     // Huge objects cache properties
300     typedef HugeBinStructureProps<maxLargeSize, maxHugeSize> HugeBSProps;
301     typedef LargeObjectCacheProps<HugeBSProps, 1, 1, 4> HugeCacheTypeProps;
302 
303     // Cache implementation type with properties
304     typedef LargeObjectCacheImpl< LargeCacheTypeProps > LargeCacheType;
305     typedef LargeObjectCacheImpl< HugeCacheTypeProps > HugeCacheType;
306 
307     // Beginning of largeCache is more actively used and smaller than hugeCache,
308     // so put hugeCache first to prevent false sharing
309     // with LargeObjectCache's predecessor
310     HugeCacheType hugeCache;
311     LargeCacheType largeCache;
312 
313     /* logical time, incremented on each put/get operation
314        To prevent starvation between pools, keep separately for each pool.
315        Overflow is OK, as we only want difference between
316        its current value and some recent.
317 
318        Both malloc and free should increment logical time, as in
319        a different case multiple cached blocks would have same age,
320        and accuracy of predictors suffers.
321     */
322     std::atomic<uintptr_t> cacheCurrTime;
323 
324     // Memory pool that owns this LargeObjectCache.
325     // strict 1:1 relation, never changed
326     ExtMemoryPool *extMemPool;
327 
328     // Returns artificial bin index,
329     // it's used only during sorting and never saved
330     static int sizeToIdx(size_t size);
331 
332     // Our friends
333     friend class Backend;
334 
335 public:
336     void init(ExtMemoryPool *memPool);
337 
338     // Item accessors
339     void put(LargeMemoryBlock *largeBlock);
340     void putList(LargeMemoryBlock *head);
341     LargeMemoryBlock *get(size_t size);
342 
343     void updateCacheState(DecreaseOrIncrease op, size_t size);
344     bool isCleanupNeededOnRange(uintptr_t range, uintptr_t currTime);
345 
346     // Cleanup operations
347     bool doCleanup(uintptr_t currTime, bool doThreshDecr);
348     bool decreasingCleanup();
349     bool regularCleanup();
350     bool cleanAll();
351     void reset();
352 
353     void reportStat(FILE *f);
354 #if __TBB_MALLOC_WHITEBOX_TEST
355     size_t getLOCSize() const;
356     size_t getUsedSize() const;
357 #endif
358 
359     // Cache deals with exact-fit sizes, so need to align each size
360     // to the specific bin when put object to cache
361     static size_t alignToBin(size_t size);
362 
363     void setHugeSizeThreshold(size_t value);
364 
365     // Check if we should cache or sieve this size
366     bool sizeInCacheRange(size_t size);
367 
368     uintptr_t getCurrTimeRange(uintptr_t range);
369     void registerRealloc(size_t oldSize, size_t newSize);
370 };
371 
372 #endif // __TBB_large_objects_H
373 
374