xref: /oneTBB/src/tbbmalloc/large_objects.h (revision 20514c0e)
1 /*
2     Copyright (c) 2005-2023 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #ifndef __TBB_tbbmalloc_internal_H
18     #error tbbmalloc_internal.h must be included at this point
19 #endif
20 
21 #ifndef __TBB_large_objects_H
22 #define __TBB_large_objects_H
23 
24 //! The list of possible Cache Bin Aggregator operations.
25 /*  Declared here to avoid Solaris Studio* 12.2 "multiple definitions" error */
26 enum CacheBinOperationType {
27     CBOP_INVALID = 0,
28     CBOP_GET,
29     CBOP_PUT_LIST,
30     CBOP_CLEAN_TO_THRESHOLD,
31     CBOP_CLEAN_ALL,
32     CBOP_UPDATE_USED_SIZE
33 };
34 
35 // The Cache Bin Aggregator operation status list.
36 // CBST_NOWAIT can be specified for non-blocking operations.
37 enum CacheBinOperationStatus {
38     CBST_WAIT = 0,
39     CBST_NOWAIT,
40     CBST_DONE
41 };
42 
43 /*
44  * Bins that grow with arithmetic step
45  */
46 template<size_t MIN_SIZE, size_t MAX_SIZE>
47 struct LargeBinStructureProps {
48 public:
49     static const size_t   MinSize = MIN_SIZE, MaxSize = MAX_SIZE;
50     static const size_t   CacheStep = 8 * 1024;
51     static const unsigned NumBins = (MaxSize - MinSize) / CacheStep;
52 
53     static size_t alignToBin(size_t size) {
54         return alignUp(size, CacheStep);
55     }
56 
57     static int sizeToIdx(size_t size) {
58         MALLOC_ASSERT(MinSize <= size && size < MaxSize, ASSERT_TEXT);
59         MALLOC_ASSERT(size % CacheStep == 0, ASSERT_TEXT);
60         return (size - MinSize) / CacheStep;
61     }
62 };
63 
64 /*
65  * Bins that grow with special geometric progression.
66  */
67 template<size_t MIN_SIZE, size_t MAX_SIZE>
68 struct HugeBinStructureProps {
69 
70 private:
71     // Sizes grow with the following formula: Size = MinSize * (2 ^ (Index / StepFactor))
72     // There are StepFactor bins (8 be default) between each power of 2 bin
73     static const int MaxSizeExp    = Log2<MAX_SIZE>::value;
74     static const int MinSizeExp    = Log2<MIN_SIZE>::value;
75     static const int StepFactor    = 8;
76     static const int StepFactorExp = Log2<StepFactor>::value;
77 
78 public:
79     static const size_t   MinSize = MIN_SIZE, MaxSize = MAX_SIZE;
80     static const unsigned NumBins = (MaxSizeExp - MinSizeExp) * StepFactor;
81 
82     static size_t alignToBin(size_t size) {
83         MALLOC_ASSERT(size >= StepFactor, "Size must not be less than the StepFactor");
84         size_t minorStepExp = BitScanRev(size) - StepFactorExp;
85         return alignUp(size, 1ULL << minorStepExp);
86     }
87 
88     // Sizes between the power of 2 values are aproximated to StepFactor.
89     static int sizeToIdx(size_t size) {
90         MALLOC_ASSERT(MinSize <= size && size <= MaxSize, ASSERT_TEXT);
91         int sizeExp = (int)BitScanRev(size); // same as __TBB_Log2
92         size_t majorStepSize = 1ULL << sizeExp;
93         int minorStepExp = sizeExp - StepFactorExp;
94         int minorIdx = (size - majorStepSize) >> minorStepExp;
95         MALLOC_ASSERT(size == majorStepSize + ((size_t)minorIdx << minorStepExp),
96             "Size is not aligned on the bin");
97         return StepFactor * (sizeExp - MinSizeExp) + minorIdx;
98     }
99 };
100 
101 /*
102  * Cache properties accessor
103  *
104  * TooLargeFactor -- when cache size treated "too large" in comparison to user data size
105  * OnMissFactor -- If cache miss occurred and cache was cleaned,
106  *                 set ageThreshold to OnMissFactor * the difference
107  *                 between current time and last time cache was cleaned.
108  * LongWaitFactor -- to detect rarely-used bins and forget about their usage history
109  */
110 template<typename StructureProps, int TOO_LARGE, int ON_MISS, int LONG_WAIT>
111 struct LargeObjectCacheProps : public StructureProps {
112     static const int TooLargeFactor = TOO_LARGE, OnMissFactor = ON_MISS, LongWaitFactor = LONG_WAIT;
113 };
114 
115 template<typename Props>
116 class LargeObjectCacheImpl {
117 private:
118 
119     // Current sizes of used and cached objects. It's calculated while we are
120     // traversing bins, and used for isLOCTooLarge() check at the same time.
121     class BinsSummary {
122         size_t usedSz;
123         size_t cachedSz;
124     public:
125         BinsSummary() : usedSz(0), cachedSz(0) {}
126         // "too large" criteria
127         bool isLOCTooLarge() const { return cachedSz > Props::TooLargeFactor * usedSz; }
128         void update(size_t usedSize, size_t cachedSize) {
129             usedSz += usedSize;
130             cachedSz += cachedSize;
131         }
132         void reset() { usedSz = cachedSz = 0; }
133     };
134 
135 public:
136     // The number of bins to cache large/huge objects.
137     static const uint32_t numBins = Props::NumBins;
138 
139     typedef BitMaskMax<numBins> BinBitMask;
140 
141     // 2-linked list of same-size cached blocks ordered by age (oldest on top)
142     // TODO: are we really want the list to be 2-linked? This allows us
143     // reduce memory consumption and do less operations under lock.
144     // TODO: try to switch to 32-bit logical time to save space in CacheBin
145     // and move bins to different cache lines.
146     class CacheBin {
147     private:
148         LargeMemoryBlock* first;
149         std::atomic<LargeMemoryBlock*> last;
150         /* age of an oldest block in the list; equal to last->age, if last defined,
151             used for quick checking it without acquiring the lock. */
152         std::atomic<uintptr_t> oldest;
153         /* currAge when something was excluded out of list because of the age,
154          not because of cache hit */
155         uintptr_t         lastCleanedAge;
156         /* Current threshold value for the blocks of a particular size.
157          Set on cache miss. */
158         std::atomic<intptr_t> ageThreshold;
159 
160         /* total size of all objects corresponding to the bin and allocated by user */
161         std::atomic<size_t> usedSize;
162         /* total size of all objects cached in the bin */
163         std::atomic<size_t> cachedSize;
164         /* mean time of presence of block in the bin before successful reuse */
165         std::atomic<intptr_t> meanHitRange;
166         /* time of last get called for the bin */
167         uintptr_t         lastGet;
168 
169         typename MallocAggregator<CacheBinOperation>::type aggregator;
170 
171         void ExecuteOperation(CacheBinOperation *op, ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx, bool longLifeTime = true);
172 
173         /* should be placed in zero-initialized memory, ctor not needed. */
174         CacheBin();
175 
176     public:
177         void init() {
178             memset(static_cast<void*>(this), 0, sizeof(CacheBin));
179         }
180 
181         /* ---------- Cache accessors ---------- */
182         void putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *head, BinBitMask *bitMask, int idx);
183         LargeMemoryBlock *get(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx);
184 
185         /* ---------- Cleanup functions -------- */
186         bool cleanToThreshold(ExtMemoryPool *extMemPool, BinBitMask *bitMask, uintptr_t currTime, int idx);
187         bool releaseAllToBackend(ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx);
188         /* ------------------------------------- */
189 
190         void updateUsedSize(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx);
191         void decreaseThreshold() {
192             intptr_t threshold = ageThreshold.load(std::memory_order_relaxed);
193             if (threshold)
194                 ageThreshold.store((threshold + meanHitRange.load(std::memory_order_relaxed)) / 2, std::memory_order_relaxed);
195         }
196         void updateBinsSummary(BinsSummary *binsSummary) const {
197             binsSummary->update(usedSize.load(std::memory_order_relaxed), cachedSize.load(std::memory_order_relaxed));
198         }
199         size_t getSize() const { return cachedSize.load(std::memory_order_relaxed); }
200         size_t getUsedSize() const { return usedSize.load(std::memory_order_relaxed); }
201         size_t reportStat(int num, FILE *f);
202 
203         /* --------- Unsafe methods used with the aggregator ------- */
204         void forgetOutdatedState(uintptr_t currTime);
205         LargeMemoryBlock *putList(LargeMemoryBlock *head, LargeMemoryBlock *tail, BinBitMask *bitMask,
206                 int idx, int num, size_t hugeObjectThreshold);
207         LargeMemoryBlock *get();
208         LargeMemoryBlock *cleanToThreshold(uintptr_t currTime, BinBitMask *bitMask, int idx);
209         LargeMemoryBlock *cleanAll(BinBitMask *bitMask, int idx);
210         void updateUsedSize(size_t size, BinBitMask *bitMask, int idx) {
211             if (!usedSize.load(std::memory_order_relaxed)) bitMask->set(idx, true);
212             usedSize.store(usedSize.load(std::memory_order_relaxed) + size, std::memory_order_relaxed);
213             if (!usedSize.load(std::memory_order_relaxed) && !first) bitMask->set(idx, false);
214         }
215         void updateMeanHitRange( intptr_t hitRange ) {
216             hitRange = hitRange >= 0 ? hitRange : 0;
217             intptr_t mean = meanHitRange.load(std::memory_order_relaxed);
218             mean = mean ? (mean + hitRange) / 2 : hitRange;
219             meanHitRange.store(mean, std::memory_order_relaxed);
220         }
221         void updateAgeThreshold( uintptr_t currTime ) {
222             if (lastCleanedAge)
223                 ageThreshold.store(Props::OnMissFactor * (currTime - lastCleanedAge), std::memory_order_relaxed);
224         }
225         void updateCachedSize(size_t size) {
226             cachedSize.store(cachedSize.load(std::memory_order_relaxed) + size, std::memory_order_relaxed);
227         }
228         void setLastGet( uintptr_t newLastGet ) {
229             lastGet = newLastGet;
230         }
231         /* -------------------------------------------------------- */
232     };
233 
234     // Huge bins index for fast regular cleanup searching in case of
235     // the "huge size threshold" setting defined
236     intptr_t     hugeSizeThresholdIdx;
237 
238 private:
239     // How many times LOC was "too large"
240     std::atomic<intptr_t> tooLargeLOC;
241     // for fast finding of used bins and bins with non-zero usedSize;
242     // indexed from the end, as we need largest 1st
243     BinBitMask   bitMask;
244     // bins with lists of recently freed large blocks cached for re-use
245     CacheBin bin[numBins];
246 
247 public:
248     /* ------------ CacheBin structure dependent stuff ------------ */
249     static size_t alignToBin(size_t size) {
250         return Props::alignToBin(size);
251     }
252     static int sizeToIdx(size_t size) {
253         return Props::sizeToIdx(size);
254     }
255 
256     /* --------- Main cache functions (put, get object) ------------ */
257     void putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *largeBlock);
258     LargeMemoryBlock *get(ExtMemoryPool *extMemPool, size_t size);
259 
260     /* ------------------------ Cleanup ---------------------------- */
261     bool regularCleanup(ExtMemoryPool *extMemPool, uintptr_t currAge, bool doThreshDecr);
262     bool cleanAll(ExtMemoryPool *extMemPool);
263 
264     /* -------------------------- Other ---------------------------- */
265     void updateCacheState(ExtMemoryPool *extMemPool, DecreaseOrIncrease op, size_t size);
266 
267     void reset();
268     void reportStat(FILE *f);
269 #if __TBB_MALLOC_WHITEBOX_TEST
270     size_t getLOCSize() const;
271     size_t getUsedSize() const;
272 #endif
273 };
274 
275 class LargeObjectCache {
276 private:
277     // Large bins [minLargeSize, maxLargeSize)
278     // Huge bins [maxLargeSize, maxHugeSize)
279     static const size_t minLargeSize = 8 * 1024,
280                         maxLargeSize = 8 * 1024 * 1024,
281                         // Cache memory up to 1TB (or 2GB for 32-bit arch), but sieve objects from the special threshold
282                         maxHugeSize = tbb::detail::select_size_t_constant<2147483648U, 1099511627776ULL>::value;
283 
284 public:
285     // Upper bound threshold for caching size. After that size all objects sieve through cache
286     // By default - 64MB, previous value was 129MB (needed by some Intel(R) Math Kernel Library (Intel(R) MKL) benchmarks)
287     static const size_t defaultMaxHugeSize = 64UL * 1024UL * 1024UL;
288     // After that size large object interpreted as huge and does not participate in regular cleanup.
289     // Can be changed during the program execution.
290     size_t hugeSizeThreshold;
291 
292 private:
293     // Large objects cache properties
294     typedef LargeBinStructureProps<minLargeSize, maxLargeSize> LargeBSProps;
295     typedef LargeObjectCacheProps<LargeBSProps, 2, 2, 16> LargeCacheTypeProps;
296 
297     // Huge objects cache properties
298     typedef HugeBinStructureProps<maxLargeSize, maxHugeSize> HugeBSProps;
299     typedef LargeObjectCacheProps<HugeBSProps, 1, 1, 4> HugeCacheTypeProps;
300 
301     // Cache implementation type with properties
302     typedef LargeObjectCacheImpl< LargeCacheTypeProps > LargeCacheType;
303     typedef LargeObjectCacheImpl< HugeCacheTypeProps > HugeCacheType;
304 
305     // Beginning of largeCache is more actively used and smaller than hugeCache,
306     // so put hugeCache first to prevent false sharing
307     // with LargeObjectCache's predecessor
308     HugeCacheType hugeCache;
309     LargeCacheType largeCache;
310 
311     /* logical time, incremented on each put/get operation
312        To prevent starvation between pools, keep separately for each pool.
313        Overflow is OK, as we only want difference between
314        its current value and some recent.
315 
316        Both malloc and free should increment logical time, as in
317        a different case multiple cached blocks would have same age,
318        and accuracy of predictors suffers.
319     */
320     std::atomic<uintptr_t> cacheCurrTime;
321 
322     // Memory pool that owns this LargeObjectCache.
323     // strict 1:1 relation, never changed
324     ExtMemoryPool *extMemPool;
325 
326     // Returns artificial bin index,
327     // it's used only during sorting and never saved
328     static int sizeToIdx(size_t size);
329 
330     // Our friends
331     friend class Backend;
332 
333 public:
334     void init(ExtMemoryPool *memPool);
335 
336     // Item accessors
337     void put(LargeMemoryBlock *largeBlock);
338     void putList(LargeMemoryBlock *head);
339     LargeMemoryBlock *get(size_t size);
340 
341     void updateCacheState(DecreaseOrIncrease op, size_t size);
342     bool isCleanupNeededOnRange(uintptr_t range, uintptr_t currTime);
343 
344     // Cleanup operations
345     bool doCleanup(uintptr_t currTime, bool doThreshDecr);
346     bool decreasingCleanup();
347     bool regularCleanup();
348     bool cleanAll();
349     void reset();
350 
351     void reportStat(FILE *f);
352 #if __TBB_MALLOC_WHITEBOX_TEST
353     size_t getLOCSize() const;
354     size_t getUsedSize() const;
355 #endif
356 
357     // Cache deals with exact-fit sizes, so need to align each size
358     // to the specific bin when put object to cache
359     static size_t alignToBin(size_t size);
360 
361     void setHugeSizeThreshold(size_t value);
362 
363     // Check if we should cache or sieve this size
364     bool sizeInCacheRange(size_t size);
365 
366     uintptr_t getCurrTimeRange(uintptr_t range);
367     void registerRealloc(size_t oldSize, size_t newSize);
368 };
369 
370 #endif // __TBB_large_objects_H
371 
372