xref: /oneTBB/src/tbbmalloc/large_objects.h (revision 478de5b1)
1 /*
2     Copyright (c) 2005-2021 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #ifndef __TBB_tbbmalloc_internal_H
18     #error tbbmalloc_internal.h must be included at this point
19 #endif
20 
21 #ifndef __TBB_large_objects_H
22 #define __TBB_large_objects_H
23 
24 //! The list of possible Cache Bin Aggregator operations.
25 /*  Declared here to avoid Solaris Studio* 12.2 "multiple definitions" error */
26 enum CacheBinOperationType {
27     CBOP_INVALID = 0,
28     CBOP_GET,
29     CBOP_PUT_LIST,
30     CBOP_CLEAN_TO_THRESHOLD,
31     CBOP_CLEAN_ALL,
32     CBOP_UPDATE_USED_SIZE
33 };
34 
35 // The Cache Bin Aggregator operation status list.
36 // CBST_NOWAIT can be specified for non-blocking operations.
37 enum CacheBinOperationStatus {
38     CBST_WAIT = 0,
39     CBST_NOWAIT,
40     CBST_DONE
41 };
42 
43 /*
44  * Bins that grow with arithmetic step
45  */
46 template<size_t MIN_SIZE, size_t MAX_SIZE>
47 struct LargeBinStructureProps {
48 public:
49     static const size_t   MinSize = MIN_SIZE, MaxSize = MAX_SIZE;
50     static const size_t   CacheStep = 8 * 1024;
51     static const unsigned NumBins = (MaxSize - MinSize) / CacheStep;
52 
53     static size_t alignToBin(size_t size) {
54         return alignUp(size, CacheStep);
55     }
56 
57     static int sizeToIdx(size_t size) {
58         MALLOC_ASSERT(MinSize <= size && size < MaxSize, ASSERT_TEXT);
59         MALLOC_ASSERT(size % CacheStep == 0, ASSERT_TEXT);
60         return (size - MinSize) / CacheStep;
61     }
62 };
63 
64 /*
65  * Bins that grow with special geometric progression.
66  */
67 template<size_t MIN_SIZE, size_t MAX_SIZE>
68 struct HugeBinStructureProps {
69 
70 private:
71     // Sizes grow with the following formula: Size = MinSize * (2 ^ (Index / StepFactor))
72     // There are StepFactor bins (8 be default) between each power of 2 bin
73     static const int MaxSizeExp    = Log2<MAX_SIZE>::value;
74     static const int MinSizeExp    = Log2<MIN_SIZE>::value;
75     static const int StepFactor    = 8;
76     static const int StepFactorExp = Log2<StepFactor>::value;
77 
78 public:
79     static const size_t   MinSize = MIN_SIZE, MaxSize = MAX_SIZE;
80     static const unsigned NumBins = (MaxSizeExp - MinSizeExp) * StepFactor;
81 
82     static size_t alignToBin(size_t size) {
83         size_t minorStepExp = BitScanRev(size) - StepFactorExp;
84         return alignUp(size, 1ULL << minorStepExp);
85     }
86 
87     // Sizes between the power of 2 values are aproximated to StepFactor.
88     static int sizeToIdx(size_t size) {
89         MALLOC_ASSERT(MinSize <= size && size <= MaxSize, ASSERT_TEXT);
90         int sizeExp = (int)BitScanRev(size); // same as __TBB_Log2
91         size_t majorStepSize = 1ULL << sizeExp;
92         int minorStepExp = sizeExp - StepFactorExp;
93         int minorIdx = (size - majorStepSize) >> minorStepExp;
94         MALLOC_ASSERT(size == majorStepSize + ((size_t)minorIdx << minorStepExp),
95             "Size is not aligned on the bin");
96         return StepFactor * (sizeExp - MinSizeExp) + minorIdx;
97     }
98 };
99 
100 /*
101  * Cache properties accessor
102  *
103  * TooLargeFactor -- when cache size treated "too large" in comparison to user data size
104  * OnMissFactor -- If cache miss occurred and cache was cleaned,
105  *                 set ageThreshold to OnMissFactor * the difference
106  *                 between current time and last time cache was cleaned.
107  * LongWaitFactor -- to detect rarely-used bins and forget about their usage history
108  */
109 template<typename StructureProps, int TOO_LARGE, int ON_MISS, int LONG_WAIT>
110 struct LargeObjectCacheProps : public StructureProps {
111     static const int TooLargeFactor = TOO_LARGE, OnMissFactor = ON_MISS, LongWaitFactor = LONG_WAIT;
112 };
113 
114 template<typename Props>
115 class LargeObjectCacheImpl {
116 private:
117 
118     // Current sizes of used and cached objects. It's calculated while we are
119     // traversing bins, and used for isLOCTooLarge() check at the same time.
120     class BinsSummary {
121         size_t usedSz;
122         size_t cachedSz;
123     public:
124         BinsSummary() : usedSz(0), cachedSz(0) {}
125         // "too large" criteria
126         bool isLOCTooLarge() const { return cachedSz > Props::TooLargeFactor * usedSz; }
127         void update(size_t usedSize, size_t cachedSize) {
128             usedSz += usedSize;
129             cachedSz += cachedSize;
130         }
131         void reset() { usedSz = cachedSz = 0; }
132     };
133 
134 public:
135     // The number of bins to cache large/huge objects.
136     static const uint32_t numBins = Props::NumBins;
137 
138     typedef BitMaskMax<numBins> BinBitMask;
139 
140     // 2-linked list of same-size cached blocks ordered by age (oldest on top)
141     // TODO: are we really want the list to be 2-linked? This allows us
142     // reduce memory consumption and do less operations under lock.
143     // TODO: try to switch to 32-bit logical time to save space in CacheBin
144     // and move bins to different cache lines.
145     class CacheBin {
146     private:
147         LargeMemoryBlock* first;
148         std::atomic<LargeMemoryBlock*> last;
149         /* age of an oldest block in the list; equal to last->age, if last defined,
150             used for quick checking it without acquiring the lock. */
151         std::atomic<uintptr_t> oldest;
152         /* currAge when something was excluded out of list because of the age,
153          not because of cache hit */
154         uintptr_t         lastCleanedAge;
155         /* Current threshold value for the blocks of a particular size.
156          Set on cache miss. */
157         std::atomic<intptr_t> ageThreshold;
158 
159         /* total size of all objects corresponding to the bin and allocated by user */
160         std::atomic<size_t> usedSize;
161         /* total size of all objects cached in the bin */
162         std::atomic<size_t> cachedSize;
163         /* mean time of presence of block in the bin before successful reuse */
164         std::atomic<intptr_t> meanHitRange;
165         /* time of last get called for the bin */
166         uintptr_t         lastGet;
167 
168         typename MallocAggregator<CacheBinOperation>::type aggregator;
169 
170         void ExecuteOperation(CacheBinOperation *op, ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx, bool longLifeTime = true);
171 
172         /* should be placed in zero-initialized memory, ctor not needed. */
173         CacheBin();
174 
175     public:
176         void init() {
177             memset(this, 0, sizeof(CacheBin));
178         }
179 
180         /* ---------- Cache accessors ---------- */
181         void putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *head, BinBitMask *bitMask, int idx);
182         LargeMemoryBlock *get(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx);
183 
184         /* ---------- Cleanup functions -------- */
185         bool cleanToThreshold(ExtMemoryPool *extMemPool, BinBitMask *bitMask, uintptr_t currTime, int idx);
186         bool releaseAllToBackend(ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx);
187         /* ------------------------------------- */
188 
189         void updateUsedSize(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx);
190         void decreaseThreshold() {
191             intptr_t threshold = ageThreshold.load(std::memory_order_relaxed);
192             if (threshold)
193                 ageThreshold.store((threshold + meanHitRange.load(std::memory_order_relaxed)) / 2, std::memory_order_relaxed);
194         }
195         void updateBinsSummary(BinsSummary *binsSummary) const {
196             binsSummary->update(usedSize.load(std::memory_order_relaxed), cachedSize.load(std::memory_order_relaxed));
197         }
198         size_t getSize() const { return cachedSize.load(std::memory_order_relaxed); }
199         size_t getUsedSize() const { return usedSize.load(std::memory_order_relaxed); }
200         size_t reportStat(int num, FILE *f);
201 
202         /* --------- Unsafe methods used with the aggregator ------- */
203         void forgetOutdatedState(uintptr_t currTime);
204         LargeMemoryBlock *putList(LargeMemoryBlock *head, LargeMemoryBlock *tail, BinBitMask *bitMask,
205                 int idx, int num, size_t hugeObjectThreshold);
206         LargeMemoryBlock *get();
207         LargeMemoryBlock *cleanToThreshold(uintptr_t currTime, BinBitMask *bitMask, int idx);
208         LargeMemoryBlock *cleanAll(BinBitMask *bitMask, int idx);
209         void updateUsedSize(size_t size, BinBitMask *bitMask, int idx) {
210             if (!usedSize.load(std::memory_order_relaxed)) bitMask->set(idx, true);
211             usedSize.store(usedSize.load(std::memory_order_relaxed) + size, std::memory_order_relaxed);
212             if (!usedSize.load(std::memory_order_relaxed) && !first) bitMask->set(idx, false);
213         }
214         void updateMeanHitRange( intptr_t hitRange ) {
215             hitRange = hitRange >= 0 ? hitRange : 0;
216             intptr_t mean = meanHitRange.load(std::memory_order_relaxed);
217             mean = mean ? (mean + hitRange) / 2 : hitRange;
218             meanHitRange.store(mean, std::memory_order_relaxed);
219         }
220         void updateAgeThreshold( uintptr_t currTime ) {
221             if (lastCleanedAge)
222                 ageThreshold.store(Props::OnMissFactor * (currTime - lastCleanedAge), std::memory_order_relaxed);
223         }
224         void updateCachedSize(size_t size) {
225             cachedSize.store(cachedSize.load(std::memory_order_relaxed) + size, std::memory_order_relaxed);
226         }
227         void setLastGet( uintptr_t newLastGet ) {
228             lastGet = newLastGet;
229         }
230         /* -------------------------------------------------------- */
231     };
232 
233     // Huge bins index for fast regular cleanup searching in case of
234     // the "huge size threshold" setting defined
235     intptr_t     hugeSizeThresholdIdx;
236 
237 private:
238     // How many times LOC was "too large"
239     std::atomic<intptr_t> tooLargeLOC;
240     // for fast finding of used bins and bins with non-zero usedSize;
241     // indexed from the end, as we need largest 1st
242     BinBitMask   bitMask;
243     // bins with lists of recently freed large blocks cached for re-use
244     CacheBin bin[numBins];
245 
246 public:
247     /* ------------ CacheBin structure dependent stuff ------------ */
248     static size_t alignToBin(size_t size) {
249         return Props::alignToBin(size);
250     }
251     static int sizeToIdx(size_t size) {
252         return Props::sizeToIdx(size);
253     }
254 
255     /* --------- Main cache functions (put, get object) ------------ */
256     void putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *largeBlock);
257     LargeMemoryBlock *get(ExtMemoryPool *extMemPool, size_t size);
258 
259     /* ------------------------ Cleanup ---------------------------- */
260     bool regularCleanup(ExtMemoryPool *extMemPool, uintptr_t currAge, bool doThreshDecr);
261     bool cleanAll(ExtMemoryPool *extMemPool);
262 
263     /* -------------------------- Other ---------------------------- */
264     void updateCacheState(ExtMemoryPool *extMemPool, DecreaseOrIncrease op, size_t size);
265 
266     void reset();
267     void reportStat(FILE *f);
268 #if __TBB_MALLOC_WHITEBOX_TEST
269     size_t getLOCSize() const;
270     size_t getUsedSize() const;
271 #endif
272 };
273 
274 class LargeObjectCache {
275 private:
276     // Large bins [minLargeSize, maxLargeSize)
277     // Huge bins [maxLargeSize, maxHugeSize)
278     static const size_t minLargeSize = 8 * 1024,
279                         maxLargeSize = 8 * 1024 * 1024,
280                         // Cache memory up to 1TB (or 2GB for 32-bit arch), but sieve objects from the special threshold
281                         maxHugeSize = tbb::detail::select_size_t_constant<2147483648U, 1099511627776ULL>::value;
282 
283 public:
284     // Upper bound threshold for caching size. After that size all objects sieve through cache
285     // By default - 64MB, previous value was 129MB (needed by some Intel(R) Math Kernel Library (Intel(R) MKL) benchmarks)
286     static const size_t defaultMaxHugeSize = 64UL * 1024UL * 1024UL;
287     // After that size large object interpreted as huge and does not participate in regular cleanup.
288     // Can be changed during the program execution.
289     size_t hugeSizeThreshold;
290 
291 private:
292     // Large objects cache properties
293     typedef LargeBinStructureProps<minLargeSize, maxLargeSize> LargeBSProps;
294     typedef LargeObjectCacheProps<LargeBSProps, 2, 2, 16> LargeCacheTypeProps;
295 
296     // Huge objects cache properties
297     typedef HugeBinStructureProps<maxLargeSize, maxHugeSize> HugeBSProps;
298     typedef LargeObjectCacheProps<HugeBSProps, 1, 1, 4> HugeCacheTypeProps;
299 
300     // Cache implementation type with properties
301     typedef LargeObjectCacheImpl< LargeCacheTypeProps > LargeCacheType;
302     typedef LargeObjectCacheImpl< HugeCacheTypeProps > HugeCacheType;
303 
304     // Beginning of largeCache is more actively used and smaller than hugeCache,
305     // so put hugeCache first to prevent false sharing
306     // with LargeObjectCache's predecessor
307     HugeCacheType hugeCache;
308     LargeCacheType largeCache;
309 
310     /* logical time, incremented on each put/get operation
311        To prevent starvation between pools, keep separately for each pool.
312        Overflow is OK, as we only want difference between
313        its current value and some recent.
314 
315        Both malloc and free should increment logical time, as in
316        a different case multiple cached blocks would have same age,
317        and accuracy of predictors suffers.
318     */
319     std::atomic<uintptr_t> cacheCurrTime;
320 
321     // Memory pool that owns this LargeObjectCache.
322     // strict 1:1 relation, never changed
323     ExtMemoryPool *extMemPool;
324 
325     // Returns artificial bin index,
326     // it's used only during sorting and never saved
327     static int sizeToIdx(size_t size);
328 
329     // Our friends
330     friend class Backend;
331 
332 public:
333     void init(ExtMemoryPool *memPool);
334 
335     // Item accessors
336     void put(LargeMemoryBlock *largeBlock);
337     void putList(LargeMemoryBlock *head);
338     LargeMemoryBlock *get(size_t size);
339 
340     void updateCacheState(DecreaseOrIncrease op, size_t size);
341     bool isCleanupNeededOnRange(uintptr_t range, uintptr_t currTime);
342 
343     // Cleanup operations
344     bool doCleanup(uintptr_t currTime, bool doThreshDecr);
345     bool decreasingCleanup();
346     bool regularCleanup();
347     bool cleanAll();
348     void reset();
349 
350     void reportStat(FILE *f);
351 #if __TBB_MALLOC_WHITEBOX_TEST
352     size_t getLOCSize() const;
353     size_t getUsedSize() const;
354 #endif
355 
356     // Cache deals with exact-fit sizes, so need to align each size
357     // to the specific bin when put object to cache
358     static size_t alignToBin(size_t size);
359 
360     void setHugeSizeThreshold(size_t value);
361 
362     // Check if we should cache or sieve this size
363     bool sizeInCacheRange(size_t size);
364 
365     uintptr_t getCurrTimeRange(uintptr_t range);
366     void registerRealloc(size_t oldSize, size_t newSize);
367 };
368 
369 #endif // __TBB_large_objects_H
370 
371