xref: /oneTBB/src/tbbmalloc/large_objects.cpp (revision b15aabb3)
1 /*
2     Copyright (c) 2005-2021 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #include "tbbmalloc_internal.h"
18 #include "../tbb/environment.h"
19 
20 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
21     // Suppress warning: unary minus operator applied to unsigned type, result still unsigned
22     // TBB_REVAMP_TODO: review this warning
23     #pragma warning(push)
24     #pragma warning(disable:4146)
25 #endif
26 
27 /******************************* Allocation of large objects *********************************************/
28 
29 namespace rml {
30 namespace internal {
31 
32 /* ---------------------------- Large Object cache init section ---------------------------------------- */
33 
34 void LargeObjectCache::init(ExtMemoryPool *memPool)
35 {
36     extMemPool = memPool;
37     // scalable_allocation_mode can be called before allocator initialization, respect this manual request
38     if (hugeSizeThreshold == 0) {
39         // Huge size threshold initialization if environment variable was set
40         long requestedThreshold = tbb::detail::r1::GetIntegralEnvironmentVariable("TBB_MALLOC_SET_HUGE_SIZE_THRESHOLD");
41         // Read valid env or initialize by default with max possible values
42         if (requestedThreshold != -1) {
43             setHugeSizeThreshold(requestedThreshold);
44         } else {
45             setHugeSizeThreshold(maxHugeSize);
46         }
47     }
48 }
49 
50 /* ----------------------------- Huge size threshold settings ----------------------------------------- */
51 
52 void LargeObjectCache::setHugeSizeThreshold(size_t value)
53 {
54     // Valid in the huge cache range: [MaxLargeSize, MaxHugeSize].
55     if (value <= maxHugeSize) {
56         hugeSizeThreshold = value >= maxLargeSize ? alignToBin(value) : maxLargeSize;
57 
58         // Calculate local indexes for the global threshold size (for fast search inside a regular cleanup)
59         largeCache.hugeSizeThresholdIdx = LargeCacheType::numBins;
60         hugeCache.hugeSizeThresholdIdx = HugeCacheType::sizeToIdx(hugeSizeThreshold);
61     }
62 }
63 
64 bool LargeObjectCache::sizeInCacheRange(size_t size)
65 {
66     return size <= maxHugeSize && (size <= defaultMaxHugeSize || size >= hugeSizeThreshold);
67 }
68 
69 /* ----------------------------------------------------------------------------------------------------- */
70 
71 /* The functor called by the aggregator for the operation list */
72 template<typename Props>
73 class CacheBinFunctor {
74     typename LargeObjectCacheImpl<Props>::CacheBin *const bin;
75     ExtMemoryPool *const extMemPool;
76     typename LargeObjectCacheImpl<Props>::BinBitMask *const bitMask;
77     const int idx;
78 
79     LargeMemoryBlock *toRelease;
80     bool needCleanup;
81     uintptr_t currTime;
82 
83     /* Do preprocessing under the operation list. */
84     /* All the OP_PUT_LIST operations are merged in the one operation.
85        All OP_GET operations are merged with the OP_PUT_LIST operations but
86        it demands the update of the moving average value in the bin.
87        Only the last OP_CLEAN_TO_THRESHOLD operation has sense.
88        The OP_CLEAN_ALL operation also should be performed only once.
89        Moreover it cancels the OP_CLEAN_TO_THRESHOLD operation. */
90     class OperationPreprocessor {
91         // TODO: remove the dependency on CacheBin.
92         typename LargeObjectCacheImpl<Props>::CacheBin *const  bin;
93 
94         /* Contains the relative time in the operation list.
95            It counts in the reverse order since the aggregator also
96            provides operations in the reverse order. */
97         uintptr_t lclTime;
98 
99         /* opGet contains only OP_GET operations which cannot be merge with OP_PUT operations
100            opClean contains all OP_CLEAN_TO_THRESHOLD and OP_CLEAN_ALL operations. */
101         CacheBinOperation *opGet, *opClean;
102         /* The time of the last OP_CLEAN_TO_THRESHOLD operations */
103         uintptr_t cleanTime;
104 
105         /* lastGetOpTime - the time of the last OP_GET operation.
106            lastGet - the same meaning as CacheBin::lastGet */
107         uintptr_t lastGetOpTime, lastGet;
108 
109         /* The total sum of all usedSize changes requested with CBOP_UPDATE_USED_SIZE operations. */
110         size_t updateUsedSize;
111 
112         /* The list of blocks for the OP_PUT_LIST operation. */
113         LargeMemoryBlock *head, *tail;
114         int putListNum;
115 
116         /* if the OP_CLEAN_ALL is requested. */
117         bool isCleanAll;
118 
119         inline void commitOperation(CacheBinOperation *op) const;
120         inline void addOpToOpList(CacheBinOperation *op, CacheBinOperation **opList) const;
121         bool getFromPutList(CacheBinOperation* opGet, uintptr_t currTime);
122         void addToPutList( LargeMemoryBlock *head, LargeMemoryBlock *tail, int num );
123 
124     public:
125         OperationPreprocessor(typename LargeObjectCacheImpl<Props>::CacheBin *bin) :
126             bin(bin), lclTime(0), opGet(NULL), opClean(NULL), cleanTime(0),
127             lastGetOpTime(0), updateUsedSize(0), head(NULL), isCleanAll(false)  {}
128         void operator()(CacheBinOperation* opList);
129         uintptr_t getTimeRange() const { return -lclTime; }
130 
131         friend class CacheBinFunctor;
132     };
133 
134 public:
135     CacheBinFunctor(typename LargeObjectCacheImpl<Props>::CacheBin *bin, ExtMemoryPool *extMemPool,
136                     typename LargeObjectCacheImpl<Props>::BinBitMask *bitMask, int idx) :
137         bin(bin), extMemPool(extMemPool), bitMask(bitMask), idx(idx), toRelease(NULL), needCleanup(false) {}
138     void operator()(CacheBinOperation* opList);
139 
140     bool isCleanupNeeded() const { return needCleanup; }
141     LargeMemoryBlock *getToRelease() const { return toRelease; }
142     uintptr_t getCurrTime() const { return currTime; }
143 };
144 
145 /* ---------------- Cache Bin Aggregator Operation Helpers ---------------- */
146 
147 // The list of structures which describe the operation data
148 struct OpGet {
149     static const CacheBinOperationType type = CBOP_GET;
150     LargeMemoryBlock **res;
151     size_t size;
152     uintptr_t currTime;
153 };
154 
155 struct OpPutList {
156     static const CacheBinOperationType type = CBOP_PUT_LIST;
157     LargeMemoryBlock *head;
158 };
159 
160 struct OpCleanToThreshold {
161     static const CacheBinOperationType type = CBOP_CLEAN_TO_THRESHOLD;
162     LargeMemoryBlock **res;
163     uintptr_t currTime;
164 };
165 
166 struct OpCleanAll {
167     static const CacheBinOperationType type = CBOP_CLEAN_ALL;
168     LargeMemoryBlock **res;
169 };
170 
171 struct OpUpdateUsedSize {
172     static const CacheBinOperationType type = CBOP_UPDATE_USED_SIZE;
173     size_t size;
174 };
175 
176 union CacheBinOperationData {
177 private:
178     OpGet opGet;
179     OpPutList opPutList;
180     OpCleanToThreshold opCleanToThreshold;
181     OpCleanAll opCleanAll;
182     OpUpdateUsedSize opUpdateUsedSize;
183 };
184 
185 // Forward declarations
186 template <typename OpTypeData> OpTypeData& opCast(CacheBinOperation &op);
187 
188 // Describes the aggregator operation
189 struct CacheBinOperation : public MallocAggregatedOperation<CacheBinOperation>::type {
190     CacheBinOperationType type;
191 
192     template <typename OpTypeData>
193     CacheBinOperation(OpTypeData &d, CacheBinOperationStatus st = CBST_WAIT) {
194         opCast<OpTypeData>(*this) = d;
195         type = OpTypeData::type;
196         MallocAggregatedOperation<CacheBinOperation>::type::status = st;
197     }
198 private:
199     CacheBinOperationData data;
200 
201     template <typename OpTypeData>
202     friend OpTypeData& opCast(CacheBinOperation &op);
203 };
204 
205 // The opCast function can be the member of CacheBinOperation but it will have
206 // small stylistic ambiguity: it will look like a getter (with a cast) for the
207 // CacheBinOperation::data data member but it should return a reference to
208 // simplify the code from a lot of getter/setter calls. So the global cast in
209 // the style of static_cast (or reinterpret_cast) seems to be more readable and
210 // have more explicit semantic.
211 template <typename OpTypeData>
212 OpTypeData& opCast(CacheBinOperation &op) {
213     return *reinterpret_cast<OpTypeData*>(&op.data);
214 }
215 
216 /* ------------------------------------------------------------------------ */
217 
218 #if __TBB_MALLOC_LOCACHE_STAT
219 //intptr_t mallocCalls, cacheHits;
220 std::atomic<intptr_t> mallocCalls, cacheHits;
221 //intptr_t memAllocKB, memHitKB;
222 std::atomic<intptr_t> memAllocKB, memHitKB;
223 #endif
224 
225 inline bool lessThanWithOverflow(intptr_t a, intptr_t b)
226 {
227     return (a < b && (b - a < UINTPTR_MAX/2)) ||
228            (a > b && (a - b > UINTPTR_MAX/2));
229 }
230 
231 /* ----------------------------------- Operation processing methods ------------------------------------ */
232 
233 template<typename Props> void CacheBinFunctor<Props>::
234     OperationPreprocessor::commitOperation(CacheBinOperation *op) const
235 {
236     // FencedStore( (intptr_t&)(op->status), CBST_DONE );
237     op->status.store(CBST_DONE, std::memory_order_release);
238 }
239 
240 template<typename Props> void CacheBinFunctor<Props>::
241     OperationPreprocessor::addOpToOpList(CacheBinOperation *op, CacheBinOperation **opList) const
242 {
243     op->next = *opList;
244     *opList = op;
245 }
246 
247 template<typename Props> bool CacheBinFunctor<Props>::
248     OperationPreprocessor::getFromPutList(CacheBinOperation *opGet, uintptr_t currTime)
249 {
250     if ( head ) {
251         uintptr_t age = head->age;
252         LargeMemoryBlock *next = head->next;
253         *opCast<OpGet>(*opGet).res = head;
254         commitOperation( opGet );
255         head = next;
256         putListNum--;
257         MALLOC_ASSERT( putListNum>=0, ASSERT_TEXT );
258 
259         // use moving average with current hit interval
260         bin->updateMeanHitRange( currTime - age );
261         return true;
262     }
263     return false;
264 }
265 
266 template<typename Props> void CacheBinFunctor<Props>::
267     OperationPreprocessor::addToPutList(LargeMemoryBlock *h, LargeMemoryBlock *t, int num)
268 {
269     if ( head ) {
270         MALLOC_ASSERT( tail, ASSERT_TEXT );
271         tail->next = h;
272         h->prev = tail;
273         tail = t;
274         putListNum += num;
275     } else {
276         head = h;
277         tail = t;
278         putListNum = num;
279     }
280 }
281 
282 template<typename Props> void CacheBinFunctor<Props>::
283     OperationPreprocessor::operator()(CacheBinOperation* opList)
284 {
285     for ( CacheBinOperation *op = opList, *opNext; op; op = opNext ) {
286         opNext = op->next;
287         switch ( op->type ) {
288         case CBOP_GET:
289             {
290                 lclTime--;
291                 if ( !lastGetOpTime ) {
292                     lastGetOpTime = lclTime;
293                     lastGet = 0;
294                 } else if ( !lastGet ) lastGet = lclTime;
295 
296                 if ( !getFromPutList(op,lclTime) ) {
297                     opCast<OpGet>(*op).currTime = lclTime;
298                     addOpToOpList( op, &opGet );
299                 }
300             }
301             break;
302 
303         case CBOP_PUT_LIST:
304             {
305                 LargeMemoryBlock *head = opCast<OpPutList>(*op).head;
306                 LargeMemoryBlock *curr = head, *prev = NULL;
307 
308                 int num = 0;
309                 do {
310                     // we do not kept prev pointers during assigning blocks to bins, set them now
311                     curr->prev = prev;
312 
313                     // Save the local times to the memory blocks. Local times are necessary
314                     // for the getFromPutList function which updates the hit range value in
315                     // CacheBin when OP_GET and OP_PUT_LIST operations are merged successfully.
316                     // The age will be updated to the correct global time after preprocessing
317                     // when global cache time is updated.
318                     curr->age = --lclTime;
319 
320                     prev = curr;
321                     num += 1;
322 
323                     STAT_increment(getThreadId(), ThreadCommonCounters, cacheLargeObj);
324                 } while ((curr = curr->next) != NULL);
325 
326                 LargeMemoryBlock *tail = prev;
327                 addToPutList(head, tail, num);
328 
329                 while ( opGet ) {
330                     CacheBinOperation *next = opGet->next;
331                     if ( !getFromPutList(opGet, opCast<OpGet>(*opGet).currTime) )
332                         break;
333                     opGet = next;
334                 }
335             }
336             break;
337 
338         case CBOP_UPDATE_USED_SIZE:
339             updateUsedSize += opCast<OpUpdateUsedSize>(*op).size;
340             commitOperation( op );
341             break;
342 
343         case CBOP_CLEAN_ALL:
344             isCleanAll = true;
345             addOpToOpList( op, &opClean );
346             break;
347 
348         case CBOP_CLEAN_TO_THRESHOLD:
349             {
350                 uintptr_t currTime = opCast<OpCleanToThreshold>(*op).currTime;
351                 // We don't worry about currTime overflow since it is a rare
352                 // occurrence and doesn't affect correctness
353                 cleanTime = cleanTime < currTime ? currTime : cleanTime;
354                 addOpToOpList( op, &opClean );
355             }
356             break;
357 
358         default:
359             MALLOC_ASSERT( false, "Unknown operation." );
360         }
361     }
362     MALLOC_ASSERT( !( opGet && head ), "Not all put/get pairs are processed!" );
363 }
364 
365 template<typename Props> void CacheBinFunctor<Props>::operator()(CacheBinOperation* opList)
366 {
367     MALLOC_ASSERT( opList, "Empty operation list is passed into operation handler." );
368 
369     OperationPreprocessor prep(bin);
370     prep(opList);
371 
372     if ( uintptr_t timeRange = prep.getTimeRange() ) {
373         uintptr_t startTime = extMemPool->loc.getCurrTimeRange(timeRange);
374         // endTime is used as the current (base) time since the local time is negative.
375         uintptr_t endTime = startTime + timeRange;
376 
377         if ( prep.lastGetOpTime && prep.lastGet ) bin->setLastGet(prep.lastGet+endTime);
378 
379         if ( CacheBinOperation *opGet = prep.opGet ) {
380             bool isEmpty = false;
381             do {
382 #if __TBB_MALLOC_WHITEBOX_TEST
383                 tbbmalloc_whitebox::locGetProcessed++;
384 #endif
385                 const OpGet &opGetData = opCast<OpGet>(*opGet);
386                 if ( !isEmpty ) {
387                     if ( LargeMemoryBlock *res = bin->get() ) {
388                         uintptr_t getTime = opGetData.currTime + endTime;
389                         // use moving average with current hit interval
390                         bin->updateMeanHitRange( getTime - res->age);
391                         bin->updateCachedSize( -opGetData.size );
392                         *opGetData.res = res;
393                     } else {
394                         isEmpty = true;
395                         uintptr_t lastGetOpTime = prep.lastGetOpTime+endTime;
396                         bin->forgetOutdatedState(lastGetOpTime);
397                         bin->updateAgeThreshold(lastGetOpTime);
398                     }
399                 }
400 
401                 CacheBinOperation *opNext = opGet->next;
402                 bin->updateUsedSize( opGetData.size, bitMask, idx );
403                 prep.commitOperation( opGet );
404                 opGet = opNext;
405             } while ( opGet );
406             if ( prep.lastGetOpTime )
407                 bin->setLastGet( prep.lastGetOpTime + endTime );
408         } else if ( LargeMemoryBlock *curr = prep.head ) {
409             curr->prev = NULL;
410             while ( curr ) {
411                 // Update local times to global times
412                 curr->age += endTime;
413                 curr=curr->next;
414             }
415 #if __TBB_MALLOC_WHITEBOX_TEST
416             tbbmalloc_whitebox::locPutProcessed+=prep.putListNum;
417 #endif
418             toRelease = bin->putList(prep.head, prep.tail, bitMask, idx, prep.putListNum, extMemPool->loc.hugeSizeThreshold);
419         }
420         needCleanup = extMemPool->loc.isCleanupNeededOnRange(timeRange, startTime);
421         currTime = endTime - 1;
422     }
423 
424     if ( CacheBinOperation *opClean = prep.opClean ) {
425         if ( prep.isCleanAll )
426             *opCast<OpCleanAll>(*opClean).res = bin->cleanAll(bitMask, idx);
427         else
428             *opCast<OpCleanToThreshold>(*opClean).res = bin->cleanToThreshold(prep.cleanTime, bitMask, idx);
429 
430         CacheBinOperation *opNext = opClean->next;
431         prep.commitOperation( opClean );
432 
433         while ((opClean = opNext) != NULL) {
434             opNext = opClean->next;
435             prep.commitOperation(opClean);
436         }
437     }
438 
439     if ( size_t size = prep.updateUsedSize )
440         bin->updateUsedSize(size, bitMask, idx);
441 }
442 /* ----------------------------------------------------------------------------------------------------- */
443 /* --------------------------- Methods for creating and executing operations --------------------------- */
444 template<typename Props> void LargeObjectCacheImpl<Props>::
445     CacheBin::ExecuteOperation(CacheBinOperation *op, ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx, bool longLifeTime)
446 {
447     CacheBinFunctor<Props> func( this, extMemPool, bitMask, idx );
448     aggregator.execute( op, func, longLifeTime );
449 
450     if ( LargeMemoryBlock *toRelease = func.getToRelease()) {
451         extMemPool->backend.returnLargeObject(toRelease);
452     }
453 
454     if ( func.isCleanupNeeded() ) {
455         extMemPool->loc.doCleanup( func.getCurrTime(), /*doThreshDecr=*/false);
456     }
457 }
458 
459 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
460     CacheBin::get(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx)
461 {
462     LargeMemoryBlock *lmb=NULL;
463     OpGet data = {&lmb, size};
464     CacheBinOperation op(data);
465     ExecuteOperation( &op, extMemPool, bitMask, idx );
466     return lmb;
467 }
468 
469 template<typename Props> void LargeObjectCacheImpl<Props>::
470     CacheBin::putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *head, BinBitMask *bitMask, int idx)
471 {
472     MALLOC_ASSERT(sizeof(LargeMemoryBlock)+sizeof(CacheBinOperation)<=head->unalignedSize, "CacheBinOperation is too large to be placed in LargeMemoryBlock!");
473 
474     OpPutList data = {head};
475     CacheBinOperation *op = new (head+1) CacheBinOperation(data, CBST_NOWAIT);
476     ExecuteOperation( op, extMemPool, bitMask, idx, false );
477 }
478 
479 template<typename Props> bool LargeObjectCacheImpl<Props>::
480     CacheBin::cleanToThreshold(ExtMemoryPool *extMemPool, BinBitMask *bitMask, uintptr_t currTime, int idx)
481 {
482     LargeMemoryBlock *toRelease = NULL;
483 
484     /* oldest may be more recent then age, that's why cast to signed type
485        was used. age overflow is also processed correctly. */
486     if (last && (intptr_t)(currTime - oldest) > ageThreshold) {
487         OpCleanToThreshold data = {&toRelease, currTime};
488         CacheBinOperation op(data);
489         ExecuteOperation( &op, extMemPool, bitMask, idx );
490     }
491     bool released = toRelease;
492 
493     Backend *backend = &extMemPool->backend;
494     while ( toRelease ) {
495         LargeMemoryBlock *helper = toRelease->next;
496         backend->returnLargeObject(toRelease);
497         toRelease = helper;
498     }
499     return released;
500 }
501 
502 template<typename Props> bool LargeObjectCacheImpl<Props>::
503     CacheBin::releaseAllToBackend(ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx)
504 {
505     LargeMemoryBlock *toRelease = NULL;
506 
507     if (last) {
508         OpCleanAll data = {&toRelease};
509         CacheBinOperation op(data);
510         ExecuteOperation(&op, extMemPool, bitMask, idx);
511     }
512     bool released = toRelease;
513 
514     Backend *backend = &extMemPool->backend;
515     while ( toRelease ) {
516         LargeMemoryBlock *helper = toRelease->next;
517         MALLOC_ASSERT(!helper || lessThanWithOverflow(helper->age, toRelease->age),
518                       ASSERT_TEXT);
519         backend->returnLargeObject(toRelease);
520         toRelease = helper;
521     }
522     return released;
523 }
524 
525 template<typename Props> void LargeObjectCacheImpl<Props>::
526     CacheBin::updateUsedSize(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx)
527 {
528     OpUpdateUsedSize data = {size};
529     CacheBinOperation op(data);
530     ExecuteOperation( &op, extMemPool, bitMask, idx );
531 }
532 
533 /* ------------------------------ Unsafe methods used with the aggregator ------------------------------ */
534 
535 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
536     CacheBin::putList(LargeMemoryBlock *head, LargeMemoryBlock *tail, BinBitMask *bitMask, int idx, int num, size_t hugeSizeThreshold)
537 {
538     size_t size = head->unalignedSize;
539     usedSize -= num*size;
540     MALLOC_ASSERT( !last || (last->age != 0 && last->age != -1U), ASSERT_TEXT );
541     MALLOC_ASSERT( (tail==head && num==1) || (tail!=head && num>1), ASSERT_TEXT );
542     LargeMemoryBlock *toRelease = NULL;
543     if (size < hugeSizeThreshold && !lastCleanedAge) {
544         // 1st object of such size was released.
545         // Not cache it, and remember when this occurs
546         // to take into account during cache miss.
547         lastCleanedAge = tail->age;
548         toRelease = tail;
549         tail = tail->prev;
550         if (tail)
551             tail->next = NULL;
552         else
553             head = NULL;
554         num--;
555     }
556     if (num) {
557         // add [head;tail] list to cache
558         MALLOC_ASSERT( tail, ASSERT_TEXT );
559         tail->next = first;
560         if (first)
561             first->prev = tail;
562         first = head;
563         if (!last) {
564             MALLOC_ASSERT(0 == oldest, ASSERT_TEXT);
565             oldest = tail->age;
566             last = tail;
567         }
568 
569         cachedSize += num*size;
570     }
571 
572     // No used object, and nothing in the bin, mark the bin as empty
573     if (!usedSize && !first)
574         bitMask->set(idx, false);
575 
576     return toRelease;
577 }
578 
579 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
580     CacheBin::get()
581 {
582     LargeMemoryBlock *result=first;
583     if (result) {
584         first = result->next;
585         if (first)
586             first->prev = NULL;
587         else {
588             last = NULL;
589             oldest = 0;
590         }
591     }
592 
593     return result;
594 }
595 
596 template<typename Props> void LargeObjectCacheImpl<Props>::
597     CacheBin::forgetOutdatedState(uintptr_t currTime)
598 {
599     // If the time since the last get is LongWaitFactor times more than ageThreshold
600     // for the bin, treat the bin as rarely-used and forget everything we know
601     // about it.
602     // If LongWaitFactor is too small, we forget too early and
603     // so prevents good caching, while if too high, caching blocks
604     // with unrelated usage pattern occurs.
605     const uintptr_t sinceLastGet = currTime - lastGet;
606     bool doCleanup = false;
607 
608     if (ageThreshold)
609         doCleanup = sinceLastGet > Props::LongWaitFactor * ageThreshold;
610     else if (lastCleanedAge)
611         doCleanup = sinceLastGet > Props::LongWaitFactor * (lastCleanedAge - lastGet);
612 
613     if (doCleanup) {
614         lastCleanedAge = 0;
615         ageThreshold = 0;
616     }
617 
618 }
619 
620 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
621     CacheBin::cleanToThreshold(uintptr_t currTime, BinBitMask *bitMask, int idx)
622 {
623     /* oldest may be more recent then age, that's why cast to signed type
624     was used. age overflow is also processed correctly. */
625     if ( !last || (intptr_t)(currTime - last->age) < ageThreshold ) return NULL;
626 
627 #if MALLOC_DEBUG
628     uintptr_t nextAge = 0;
629 #endif
630     do {
631 #if MALLOC_DEBUG
632         // check that list ordered
633         MALLOC_ASSERT(!nextAge || lessThanWithOverflow(nextAge, last->age),
634             ASSERT_TEXT);
635         nextAge = last->age;
636 #endif
637         cachedSize -= last->unalignedSize;
638         last = last->prev;
639     } while (last && (intptr_t)(currTime - last->age) > ageThreshold);
640 
641     LargeMemoryBlock *toRelease = NULL;
642     if (last) {
643         toRelease = last->next;
644         oldest = last->age;
645         last->next = NULL;
646     } else {
647         toRelease = first;
648         first = NULL;
649         oldest = 0;
650         if (!usedSize)
651             bitMask->set(idx, false);
652     }
653     MALLOC_ASSERT( toRelease, ASSERT_TEXT );
654     lastCleanedAge = toRelease->age;
655 
656     return toRelease;
657 }
658 
659 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
660     CacheBin::cleanAll(BinBitMask *bitMask, int idx)
661 {
662     if (!last) return NULL;
663 
664     LargeMemoryBlock *toRelease = first;
665     last = NULL;
666     first = NULL;
667     oldest = 0;
668     cachedSize = 0;
669     if (!usedSize)
670         bitMask->set(idx, false);
671 
672     return toRelease;
673 }
674 
675 /* ----------------------------------------------------------------------------------------------------- */
676 
677 template<typename Props> size_t LargeObjectCacheImpl<Props>::
678     CacheBin::reportStat(int num, FILE *f)
679 {
680 #if __TBB_MALLOC_LOCACHE_STAT
681     if (first)
682         printf("%d(%lu): total %lu KB thr %ld lastCln %lu oldest %lu\n",
683                num, num*Props::CacheStep+Props::MinSize,
684                cachedSize/1024, ageThreshold, lastCleanedAge, oldest);
685 #else
686     suppress_unused_warning(num);
687     suppress_unused_warning(f);
688 #endif
689     return cachedSize;
690 }
691 
692 // Release objects from cache blocks that are older than ageThreshold
693 template<typename Props>
694 bool LargeObjectCacheImpl<Props>::regularCleanup(ExtMemoryPool *extMemPool, uintptr_t currTime, bool doThreshDecr)
695 {
696     bool released = false;
697     BinsSummary binsSummary;
698 
699     // Threshold settings is below this cache or starts from zero index
700     if (hugeSizeThresholdIdx == 0) return false;
701 
702     // Starting searching for bin that is less than huge size threshold (can be cleaned-up)
703     int startSearchIdx = hugeSizeThresholdIdx - 1;
704 
705     for (int i = bitMask.getMaxTrue(startSearchIdx); i >= 0; i = bitMask.getMaxTrue(i-1)) {
706         bin[i].updateBinsSummary(&binsSummary);
707         if (!doThreshDecr && tooLargeLOC.load(std::memory_order_relaxed) > 2 && binsSummary.isLOCTooLarge()) {
708             // if LOC is too large for quite long time, decrease the threshold
709             // based on bin hit statistics.
710             // For this, redo cleanup from the beginning.
711             // Note: on this iteration total usedSz can be not too large
712             // in comparison to total cachedSz, as we calculated it only
713             // partially. We are ok with it.
714             i = bitMask.getMaxTrue(startSearchIdx)+1;
715             doThreshDecr = true;
716             binsSummary.reset();
717             continue;
718         }
719         if (doThreshDecr)
720             bin[i].decreaseThreshold();
721 
722         if (bin[i].cleanToThreshold(extMemPool, &bitMask, currTime, i)) {
723             released = true;
724         }
725     }
726     // We want to find if LOC was too large for some time continuously,
727     // so OK with races between incrementing and zeroing, but incrementing
728     // must be atomic.
729     if (binsSummary.isLOCTooLarge()) {
730         tooLargeLOC++;
731     } else {
732         tooLargeLOC.store(0, std::memory_order_relaxed);
733     }
734     return released;
735 }
736 
737 template<typename Props>
738 bool LargeObjectCacheImpl<Props>::cleanAll(ExtMemoryPool *extMemPool)
739 {
740     bool released = false;
741     for (int i = numBins-1; i >= 0; i--) {
742         released |= bin[i].releaseAllToBackend(extMemPool, &bitMask, i);
743     }
744     return released;
745 }
746 
747 template<typename Props>
748 void LargeObjectCacheImpl<Props>::reset() {
749     tooLargeLOC.store(0, std::memory_order_relaxed);
750     for (int i = numBins-1; i >= 0; i--)
751         bin[i].init();
752     bitMask.reset();
753 }
754 
755 #if __TBB_MALLOC_WHITEBOX_TEST
756 template<typename Props>
757 size_t LargeObjectCacheImpl<Props>::getLOCSize() const
758 {
759     size_t size = 0;
760     for (int i = numBins-1; i >= 0; i--)
761         size += bin[i].getSize();
762     return size;
763 }
764 
765 size_t LargeObjectCache::getLOCSize() const
766 {
767     return largeCache.getLOCSize() + hugeCache.getLOCSize();
768 }
769 
770 template<typename Props>
771 size_t LargeObjectCacheImpl<Props>::getUsedSize() const
772 {
773     size_t size = 0;
774     for (int i = numBins-1; i >= 0; i--)
775         size += bin[i].getUsedSize();
776     return size;
777 }
778 
779 size_t LargeObjectCache::getUsedSize() const
780 {
781     return largeCache.getUsedSize() + hugeCache.getUsedSize();
782 }
783 #endif // __TBB_MALLOC_WHITEBOX_TEST
784 
785 inline bool LargeObjectCache::isCleanupNeededOnRange(uintptr_t range, uintptr_t currTime)
786 {
787     return range >= cacheCleanupFreq
788         || currTime+range < currTime-1 // overflow, 0 is power of 2, do cleanup
789         // (prev;prev+range] contains n*cacheCleanupFreq
790         || alignUp(currTime, cacheCleanupFreq)<currTime+range;
791 }
792 
793 bool LargeObjectCache::doCleanup(uintptr_t currTime, bool doThreshDecr)
794 {
795     if (!doThreshDecr)
796         extMemPool->allLocalCaches.markUnused();
797     return largeCache.regularCleanup(extMemPool, currTime, doThreshDecr)
798         | hugeCache.regularCleanup(extMemPool, currTime, doThreshDecr);
799 }
800 
801 bool LargeObjectCache::decreasingCleanup()
802 {
803     return doCleanup(cacheCurrTime.load(std::memory_order_acquire), /*doThreshDecr=*/true);
804 }
805 
806 bool LargeObjectCache::regularCleanup()
807 {
808     return doCleanup(cacheCurrTime.load(std::memory_order_acquire), /*doThreshDecr=*/false);
809 }
810 
811 bool LargeObjectCache::cleanAll()
812 {
813     return largeCache.cleanAll(extMemPool) | hugeCache.cleanAll(extMemPool);
814 }
815 
816 void LargeObjectCache::reset()
817 {
818     largeCache.reset();
819     hugeCache.reset();
820 }
821 
822 template<typename Props>
823 LargeMemoryBlock *LargeObjectCacheImpl<Props>::get(ExtMemoryPool *extMemoryPool, size_t size)
824 {
825     int idx = Props::sizeToIdx(size);
826 
827     LargeMemoryBlock *lmb = bin[idx].get(extMemoryPool, size, &bitMask, idx);
828 
829     if (lmb) {
830         MALLOC_ITT_SYNC_ACQUIRED(bin+idx);
831         STAT_increment(getThreadId(), ThreadCommonCounters, allocCachedLargeObj);
832     }
833     return lmb;
834 }
835 
836 template<typename Props>
837 void LargeObjectCacheImpl<Props>::updateCacheState(ExtMemoryPool *extMemPool, DecreaseOrIncrease op, size_t size)
838 {
839     int idx = Props::sizeToIdx(size);
840     MALLOC_ASSERT(idx<numBins, ASSERT_TEXT);
841     bin[idx].updateUsedSize(extMemPool, op==decrease? -size : size, &bitMask, idx);
842 }
843 
844 #if __TBB_MALLOC_LOCACHE_STAT
845 template<typename Props>
846 void LargeObjectCacheImpl<Props>::reportStat(FILE *f)
847 {
848     size_t cachedSize = 0;
849     for (int i=0; i<numBins; i++)
850         cachedSize += bin[i].reportStat(i, f);
851     fprintf(f, "total LOC size %lu MB\n", cachedSize/1024/1024);
852 }
853 
854 void LargeObjectCache::reportStat(FILE *f)
855 {
856     largeCache.reportStat(f);
857     hugeCache.reportStat(f);
858     fprintf(f, "cache time %lu\n", cacheCurrTime.load(std::memory_order_relaxed));
859 }
860 #endif
861 
862 template<typename Props>
863 void LargeObjectCacheImpl<Props>::putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *toCache)
864 {
865     int toBinIdx = Props::sizeToIdx(toCache->unalignedSize);
866 
867     MALLOC_ITT_SYNC_RELEASING(bin+toBinIdx);
868     bin[toBinIdx].putList(extMemPool, toCache, &bitMask, toBinIdx);
869 }
870 
871 void LargeObjectCache::updateCacheState(DecreaseOrIncrease op, size_t size)
872 {
873     if (size < maxLargeSize)
874         largeCache.updateCacheState(extMemPool, op, size);
875     else if (size < maxHugeSize)
876         hugeCache.updateCacheState(extMemPool, op, size);
877 }
878 
879 uintptr_t LargeObjectCache::getCurrTime()
880 {
881     return ++cacheCurrTime;
882 }
883 
884 uintptr_t LargeObjectCache::getCurrTimeRange(uintptr_t range)
885 {
886     return (cacheCurrTime.fetch_add(range) + 1);
887 }
888 
889 void LargeObjectCache::registerRealloc(size_t oldSize, size_t newSize)
890 {
891     updateCacheState(decrease, oldSize);
892     updateCacheState(increase, alignToBin(newSize));
893 }
894 
895 size_t LargeObjectCache::alignToBin(size_t size) {
896     return size < maxLargeSize ? LargeCacheType::alignToBin(size) : HugeCacheType::alignToBin(size);
897 }
898 
899 // Used for internal purpose
900 int LargeObjectCache::sizeToIdx(size_t size)
901 {
902     MALLOC_ASSERT(size <= maxHugeSize, ASSERT_TEXT);
903     return size < maxLargeSize ?
904         LargeCacheType::sizeToIdx(size) :
905         LargeCacheType::numBins + HugeCacheType::sizeToIdx(size);
906 }
907 
908 void LargeObjectCache::putList(LargeMemoryBlock *list)
909 {
910     LargeMemoryBlock *toProcess, *n;
911 
912     for (LargeMemoryBlock *curr = list; curr; curr = toProcess) {
913         LargeMemoryBlock *tail = curr;
914         toProcess = curr->next;
915         if (!sizeInCacheRange(curr->unalignedSize)) {
916             extMemPool->backend.returnLargeObject(curr);
917             continue;
918         }
919         int currIdx = sizeToIdx(curr->unalignedSize);
920 
921         // Find all blocks fitting to same bin. Not use more efficient sorting
922         // algorithm because list is short (commonly,
923         // LocalLOC's HIGH_MARK-LOW_MARK, i.e. 24 items).
924         for (LargeMemoryBlock *b = toProcess; b; b = n) {
925             n = b->next;
926             if (sizeToIdx(b->unalignedSize) == currIdx) {
927                 tail->next = b;
928                 tail = b;
929                 if (toProcess == b)
930                     toProcess = toProcess->next;
931                 else {
932                     b->prev->next = b->next;
933                     if (b->next)
934                         b->next->prev = b->prev;
935                 }
936             }
937         }
938         tail->next = NULL;
939         if (curr->unalignedSize < maxLargeSize)
940             largeCache.putList(extMemPool, curr);
941         else
942             hugeCache.putList(extMemPool, curr);
943     }
944 }
945 
946 void LargeObjectCache::put(LargeMemoryBlock *largeBlock)
947 {
948     size_t blockSize = largeBlock->unalignedSize;
949     if (sizeInCacheRange(blockSize)) {
950         largeBlock->next = NULL;
951         if (blockSize < maxLargeSize)
952             largeCache.putList(extMemPool, largeBlock);
953         else
954             hugeCache.putList(extMemPool, largeBlock);
955     } else {
956         extMemPool->backend.returnLargeObject(largeBlock);
957     }
958 }
959 
960 LargeMemoryBlock *LargeObjectCache::get(size_t size)
961 {
962     MALLOC_ASSERT( size >= minLargeSize, ASSERT_TEXT );
963     if (sizeInCacheRange(size)) {
964         return size < maxLargeSize ?
965             largeCache.get(extMemPool, size) : hugeCache.get(extMemPool, size);
966     }
967     return NULL;
968 }
969 
970 LargeMemoryBlock *ExtMemoryPool::mallocLargeObject(MemoryPool *pool, size_t allocationSize)
971 {
972 #if __TBB_MALLOC_LOCACHE_STAT
973     mallocCalls++;
974     memAllocKB.fetch_add(allocationSize/1024);
975 #endif
976     LargeMemoryBlock* lmb = loc.get(allocationSize);
977     if (!lmb) {
978         BackRefIdx backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/true);
979         if (backRefIdx.isInvalid())
980             return NULL;
981 
982         // unalignedSize is set in getLargeBlock
983         lmb = backend.getLargeBlock(allocationSize);
984         if (!lmb) {
985             removeBackRef(backRefIdx);
986             loc.updateCacheState(decrease, allocationSize);
987             return NULL;
988         }
989         lmb->backRefIdx = backRefIdx;
990         lmb->pool = pool;
991         STAT_increment(getThreadId(), ThreadCommonCounters, allocNewLargeObj);
992     } else {
993 #if __TBB_MALLOC_LOCACHE_STAT
994         cacheHits++;
995         memHitKB.fetch_add(allocationSize/1024);
996 #endif
997     }
998     return lmb;
999 }
1000 
1001 void ExtMemoryPool::freeLargeObject(LargeMemoryBlock *mBlock)
1002 {
1003     loc.put(mBlock);
1004 }
1005 
1006 void ExtMemoryPool::freeLargeObjectList(LargeMemoryBlock *head)
1007 {
1008     loc.putList(head);
1009 }
1010 
1011 bool ExtMemoryPool::softCachesCleanup()
1012 {
1013     return loc.regularCleanup();
1014 }
1015 
1016 bool ExtMemoryPool::hardCachesCleanup()
1017 {
1018     // thread-local caches must be cleaned before LOC,
1019     // because object from thread-local cache can be released to LOC
1020     bool ret = releaseAllLocalCaches();
1021     ret |= orphanedBlocks.cleanup(&backend);
1022     ret |= loc.cleanAll();
1023     ret |= backend.clean();
1024     return ret;
1025 }
1026 
1027 #if BACKEND_HAS_MREMAP
1028 void *ExtMemoryPool::remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment)
1029 {
1030     const size_t oldUnalignedSize = ((LargeObjectHdr*)ptr - 1)->memoryBlock->unalignedSize;
1031     void *o = backend.remap(ptr, oldSize, newSize, alignment);
1032     if (o) {
1033         LargeMemoryBlock *lmb = ((LargeObjectHdr*)o - 1)->memoryBlock;
1034         loc.registerRealloc(oldUnalignedSize, lmb->unalignedSize);
1035     }
1036     return o;
1037 }
1038 #endif /* BACKEND_HAS_MREMAP */
1039 
1040 /*********** End allocation of large objects **********/
1041 
1042 } // namespace internal
1043 } // namespace rml
1044 
1045 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
1046     #pragma warning(pop)
1047 #endif
1048 
1049