xref: /oneTBB/src/tbbmalloc/large_objects.cpp (revision e6684b22)
1 /*
2     Copyright (c) 2005-2023 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #include "tbbmalloc_internal.h"
18 #include "../src/tbb/environment.h"
19 
20 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
21     // Suppress warning: unary minus operator applied to unsigned type, result still unsigned
22     // TBB_REVAMP_TODO: review this warning
23     #pragma warning(push)
24     #pragma warning(disable:4146)
25 #endif
26 
27 /******************************* Allocation of large objects *********************************************/
28 
29 namespace rml {
30 namespace internal {
31 
32 /* ---------------------------- Large Object cache init section ---------------------------------------- */
33 
34 void LargeObjectCache::init(ExtMemoryPool *memPool)
35 {
36     extMemPool = memPool;
37     // scalable_allocation_mode can be called before allocator initialization, respect this manual request
38     if (hugeSizeThreshold == 0) {
39         // Huge size threshold initialization if environment variable was set
40         long requestedThreshold = tbb::detail::r1::GetIntegralEnvironmentVariable("TBB_MALLOC_SET_HUGE_SIZE_THRESHOLD");
41         // Read valid env or initialize by default with max possible values
42         if (requestedThreshold != -1) {
43             setHugeSizeThreshold(requestedThreshold);
44         } else {
45             setHugeSizeThreshold(maxHugeSize);
46         }
47     }
48 }
49 
50 /* ----------------------------- Huge size threshold settings ----------------------------------------- */
51 
52 void LargeObjectCache::setHugeSizeThreshold(size_t value)
53 {
54     // Valid in the huge cache range: [MaxLargeSize, MaxHugeSize].
55     if (value <= maxHugeSize) {
56         hugeSizeThreshold = value >= maxLargeSize ? alignToBin(value) : maxLargeSize;
57 
58         // Calculate local indexes for the global threshold size (for fast search inside a regular cleanup)
59         largeCache.hugeSizeThresholdIdx = LargeCacheType::numBins;
60         hugeCache.hugeSizeThresholdIdx = HugeCacheType::sizeToIdx(hugeSizeThreshold);
61     }
62 }
63 
64 bool LargeObjectCache::sizeInCacheRange(size_t size)
65 {
66     return size < maxHugeSize && (size <= defaultMaxHugeSize || size >= hugeSizeThreshold);
67 }
68 
69 /* ----------------------------------------------------------------------------------------------------- */
70 
71 /* The functor called by the aggregator for the operation list */
72 template<typename Props>
73 class CacheBinFunctor {
74     typename LargeObjectCacheImpl<Props>::CacheBin *const bin;
75     ExtMemoryPool *const extMemPool;
76     typename LargeObjectCacheImpl<Props>::BinBitMask *const bitMask;
77     const int idx;
78 
79     LargeMemoryBlock *toRelease;
80     bool needCleanup;
81     uintptr_t currTime;
82 
83     /* Do preprocessing under the operation list. */
84     /* All the OP_PUT_LIST operations are merged in the one operation.
85        All OP_GET operations are merged with the OP_PUT_LIST operations but
86        it demands the update of the moving average value in the bin.
87        Only the last OP_CLEAN_TO_THRESHOLD operation has sense.
88        The OP_CLEAN_ALL operation also should be performed only once.
89        Moreover it cancels the OP_CLEAN_TO_THRESHOLD operation. */
90     class OperationPreprocessor {
91         // TODO: remove the dependency on CacheBin.
92         typename LargeObjectCacheImpl<Props>::CacheBin *const  bin;
93 
94         /* Contains the relative time in the operation list.
95            It counts in the reverse order since the aggregator also
96            provides operations in the reverse order. */
97         uintptr_t lclTime;
98 
99         /* opGet contains only OP_GET operations which cannot be merge with OP_PUT operations
100            opClean contains all OP_CLEAN_TO_THRESHOLD and OP_CLEAN_ALL operations. */
101         CacheBinOperation *opGet, *opClean;
102         /* The time of the last OP_CLEAN_TO_THRESHOLD operations */
103         uintptr_t cleanTime;
104 
105         /* lastGetOpTime - the time of the last OP_GET operation.
106            lastGet - the same meaning as CacheBin::lastGet */
107         uintptr_t lastGetOpTime, lastGet;
108 
109         /* The total sum of all usedSize changes requested with CBOP_UPDATE_USED_SIZE operations. */
110         size_t updateUsedSize;
111 
112         /* The list of blocks for the OP_PUT_LIST operation. */
113         LargeMemoryBlock *head, *tail;
114         int putListNum;
115 
116         /* if the OP_CLEAN_ALL is requested. */
117         bool isCleanAll;
118 
119         inline void commitOperation(CacheBinOperation *op) const;
120         inline void addOpToOpList(CacheBinOperation *op, CacheBinOperation **opList) const;
121         bool getFromPutList(CacheBinOperation* opGet, uintptr_t currTime);
122         void addToPutList( LargeMemoryBlock *head, LargeMemoryBlock *tail, int num );
123 
124     public:
125         OperationPreprocessor(typename LargeObjectCacheImpl<Props>::CacheBin *bin) :
126             bin(bin), lclTime(0), opGet(nullptr), opClean(nullptr), cleanTime(0),
127             lastGetOpTime(0), lastGet(0), updateUsedSize(0), head(nullptr), tail(nullptr), putListNum(0), isCleanAll(false)  {}
128         void operator()(CacheBinOperation* opList);
129         uintptr_t getTimeRange() const { return -lclTime; }
130 
131         friend class CacheBinFunctor;
132     };
133 
134 public:
135     CacheBinFunctor(typename LargeObjectCacheImpl<Props>::CacheBin *bin, ExtMemoryPool *extMemPool,
136                     typename LargeObjectCacheImpl<Props>::BinBitMask *bitMask, int idx) :
137         bin(bin), extMemPool(extMemPool), bitMask(bitMask), idx(idx), toRelease(nullptr), needCleanup(false) {}
138     void operator()(CacheBinOperation* opList);
139 
140     bool isCleanupNeeded() const { return needCleanup; }
141     LargeMemoryBlock *getToRelease() const { return toRelease; }
142     uintptr_t getCurrTime() const { return currTime; }
143 };
144 
145 /* ---------------- Cache Bin Aggregator Operation Helpers ---------------- */
146 
147 // The list of structures which describe the operation data
148 struct OpGet {
149     static const CacheBinOperationType type = CBOP_GET;
150     LargeMemoryBlock **res;
151     size_t size;
152     uintptr_t currTime;
153 };
154 
155 struct OpPutList {
156     static const CacheBinOperationType type = CBOP_PUT_LIST;
157     LargeMemoryBlock *head;
158 };
159 
160 struct OpCleanToThreshold {
161     static const CacheBinOperationType type = CBOP_CLEAN_TO_THRESHOLD;
162     LargeMemoryBlock **res;
163     uintptr_t currTime;
164 };
165 
166 struct OpCleanAll {
167     static const CacheBinOperationType type = CBOP_CLEAN_ALL;
168     LargeMemoryBlock **res;
169 };
170 
171 struct OpUpdateUsedSize {
172     static const CacheBinOperationType type = CBOP_UPDATE_USED_SIZE;
173     size_t size;
174 };
175 
176 union CacheBinOperationData {
177 private:
178     OpGet opGet;
179     OpPutList opPutList;
180     OpCleanToThreshold opCleanToThreshold;
181     OpCleanAll opCleanAll;
182     OpUpdateUsedSize opUpdateUsedSize;
183 };
184 
185 // Forward declarations
186 template <typename OpTypeData> OpTypeData& opCast(CacheBinOperation &op);
187 
188 // Describes the aggregator operation
189 struct CacheBinOperation : public MallocAggregatedOperation<CacheBinOperation>::type {
190     CacheBinOperationType type;
191 
192     template <typename OpTypeData>
193     CacheBinOperation(OpTypeData &d, CacheBinOperationStatus st = CBST_WAIT) {
194         opCast<OpTypeData>(*this) = d;
195         type = OpTypeData::type;
196         MallocAggregatedOperation<CacheBinOperation>::type::status = st;
197     }
198 private:
199     CacheBinOperationData data;
200 
201     template <typename OpTypeData>
202     friend OpTypeData& opCast(CacheBinOperation &op);
203 };
204 
205 // The opCast function can be the member of CacheBinOperation but it will have
206 // small stylistic ambiguity: it will look like a getter (with a cast) for the
207 // CacheBinOperation::data data member but it should return a reference to
208 // simplify the code from a lot of getter/setter calls. So the global cast in
209 // the style of static_cast (or reinterpret_cast) seems to be more readable and
210 // have more explicit semantic.
211 template <typename OpTypeData>
212 OpTypeData& opCast(CacheBinOperation &op) {
213     return *reinterpret_cast<OpTypeData*>(&op.data);
214 }
215 
216 /* ------------------------------------------------------------------------ */
217 
218 #if __TBB_MALLOC_LOCACHE_STAT
219 //intptr_t mallocCalls, cacheHits;
220 std::atomic<intptr_t> mallocCalls, cacheHits;
221 //intptr_t memAllocKB, memHitKB;
222 std::atomic<intptr_t> memAllocKB, memHitKB;
223 #endif
224 
225 #if MALLOC_DEBUG
226 inline bool lessThanWithOverflow(intptr_t a, intptr_t b)
227 {
228     return (a < b && (b - a < static_cast<intptr_t>(UINTPTR_MAX/2))) ||
229            (a > b && (a - b > static_cast<intptr_t>(UINTPTR_MAX/2)));
230 }
231 #endif
232 
233 /* ----------------------------------- Operation processing methods ------------------------------------ */
234 
235 template<typename Props> void CacheBinFunctor<Props>::
236     OperationPreprocessor::commitOperation(CacheBinOperation *op) const
237 {
238     // FencedStore( (intptr_t&)(op->status), CBST_DONE );
239     op->status.store(CBST_DONE, std::memory_order_release);
240 }
241 
242 template<typename Props> void CacheBinFunctor<Props>::
243     OperationPreprocessor::addOpToOpList(CacheBinOperation *op, CacheBinOperation **opList) const
244 {
245     op->next = *opList;
246     *opList = op;
247 }
248 
249 template<typename Props> bool CacheBinFunctor<Props>::
250     OperationPreprocessor::getFromPutList(CacheBinOperation *opGet, uintptr_t currTime)
251 {
252     if ( head ) {
253         uintptr_t age = head->age;
254         LargeMemoryBlock *next = head->next;
255         *opCast<OpGet>(*opGet).res = head;
256         commitOperation( opGet );
257         head = next;
258         putListNum--;
259         MALLOC_ASSERT( putListNum>=0, ASSERT_TEXT );
260 
261         // use moving average with current hit interval
262         bin->updateMeanHitRange( currTime - age );
263         return true;
264     }
265     return false;
266 }
267 
268 template<typename Props> void CacheBinFunctor<Props>::
269     OperationPreprocessor::addToPutList(LargeMemoryBlock *h, LargeMemoryBlock *t, int num)
270 {
271     if ( head ) {
272         MALLOC_ASSERT( tail, ASSERT_TEXT );
273         tail->next = h;
274         h->prev = tail;
275         tail = t;
276         putListNum += num;
277     } else {
278         head = h;
279         tail = t;
280         putListNum = num;
281     }
282 }
283 
284 template<typename Props> void CacheBinFunctor<Props>::
285     OperationPreprocessor::operator()(CacheBinOperation* opList)
286 {
287     for ( CacheBinOperation *op = opList, *opNext; op; op = opNext ) {
288         opNext = op->next;
289         switch ( op->type ) {
290         case CBOP_GET:
291             {
292                 lclTime--;
293                 if ( !lastGetOpTime ) {
294                     lastGetOpTime = lclTime;
295                     lastGet = 0;
296                 } else if ( !lastGet ) lastGet = lclTime;
297 
298                 if ( !getFromPutList(op,lclTime) ) {
299                     opCast<OpGet>(*op).currTime = lclTime;
300                     addOpToOpList( op, &opGet );
301                 }
302             }
303             break;
304 
305         case CBOP_PUT_LIST:
306             {
307                 LargeMemoryBlock *head = opCast<OpPutList>(*op).head;
308                 LargeMemoryBlock *curr = head, *prev = nullptr;
309 
310                 int num = 0;
311                 do {
312                     // we do not kept prev pointers during assigning blocks to bins, set them now
313                     curr->prev = prev;
314 
315                     // Save the local times to the memory blocks. Local times are necessary
316                     // for the getFromPutList function which updates the hit range value in
317                     // CacheBin when OP_GET and OP_PUT_LIST operations are merged successfully.
318                     // The age will be updated to the correct global time after preprocessing
319                     // when global cache time is updated.
320                     curr->age = --lclTime;
321 
322                     prev = curr;
323                     num += 1;
324 
325                     STAT_increment(getThreadId(), ThreadCommonCounters, cacheLargeObj);
326                 } while ((curr = curr->next) != nullptr);
327 
328                 LargeMemoryBlock *tail = prev;
329                 addToPutList(head, tail, num);
330 
331                 while ( opGet ) {
332                     CacheBinOperation *next = opGet->next;
333                     if ( !getFromPutList(opGet, opCast<OpGet>(*opGet).currTime) )
334                         break;
335                     opGet = next;
336                 }
337             }
338             break;
339 
340         case CBOP_UPDATE_USED_SIZE:
341             updateUsedSize += opCast<OpUpdateUsedSize>(*op).size;
342             commitOperation( op );
343             break;
344 
345         case CBOP_CLEAN_ALL:
346             isCleanAll = true;
347             addOpToOpList( op, &opClean );
348             break;
349 
350         case CBOP_CLEAN_TO_THRESHOLD:
351             {
352                 uintptr_t currTime = opCast<OpCleanToThreshold>(*op).currTime;
353                 // We don't worry about currTime overflow since it is a rare
354                 // occurrence and doesn't affect correctness
355                 cleanTime = cleanTime < currTime ? currTime : cleanTime;
356                 addOpToOpList( op, &opClean );
357             }
358             break;
359 
360         default:
361             MALLOC_ASSERT( false, "Unknown operation." );
362         }
363     }
364     MALLOC_ASSERT( !( opGet && head ), "Not all put/get pairs are processed!" );
365 }
366 
367 template<typename Props> void CacheBinFunctor<Props>::operator()(CacheBinOperation* opList)
368 {
369     MALLOC_ASSERT( opList, "Empty operation list is passed into operation handler." );
370 
371     OperationPreprocessor prep(bin);
372     prep(opList);
373 
374     if ( uintptr_t timeRange = prep.getTimeRange() ) {
375         uintptr_t startTime = extMemPool->loc.getCurrTimeRange(timeRange);
376         // endTime is used as the current (base) time since the local time is negative.
377         uintptr_t endTime = startTime + timeRange;
378 
379         if ( prep.lastGetOpTime && prep.lastGet ) bin->setLastGet(prep.lastGet+endTime);
380 
381         if ( CacheBinOperation *opGet = prep.opGet ) {
382             bool isEmpty = false;
383             do {
384 #if __TBB_MALLOC_WHITEBOX_TEST
385                 tbbmalloc_whitebox::locGetProcessed++;
386 #endif
387                 const OpGet &opGetData = opCast<OpGet>(*opGet);
388                 if ( !isEmpty ) {
389                     if ( LargeMemoryBlock *res = bin->get() ) {
390                         uintptr_t getTime = opGetData.currTime + endTime;
391                         // use moving average with current hit interval
392                         bin->updateMeanHitRange( getTime - res->age);
393                         bin->updateCachedSize( -opGetData.size );
394                         *opGetData.res = res;
395                     } else {
396                         isEmpty = true;
397                         uintptr_t lastGetOpTime = prep.lastGetOpTime+endTime;
398                         bin->forgetOutdatedState(lastGetOpTime);
399                         bin->updateAgeThreshold(lastGetOpTime);
400                     }
401                 }
402 
403                 CacheBinOperation *opNext = opGet->next;
404                 bin->updateUsedSize( opGetData.size, bitMask, idx );
405                 prep.commitOperation( opGet );
406                 opGet = opNext;
407             } while ( opGet );
408             if ( prep.lastGetOpTime )
409                 bin->setLastGet( prep.lastGetOpTime + endTime );
410         } else if ( LargeMemoryBlock *curr = prep.head ) {
411             curr->prev = nullptr;
412             while ( curr ) {
413                 // Update local times to global times
414                 curr->age += endTime;
415                 curr=curr->next;
416             }
417 #if __TBB_MALLOC_WHITEBOX_TEST
418             tbbmalloc_whitebox::locPutProcessed+=prep.putListNum;
419 #endif
420             toRelease = bin->putList(prep.head, prep.tail, bitMask, idx, prep.putListNum, extMemPool->loc.hugeSizeThreshold);
421         }
422         needCleanup = extMemPool->loc.isCleanupNeededOnRange(timeRange, startTime);
423         currTime = endTime - 1;
424     }
425 
426     if ( CacheBinOperation *opClean = prep.opClean ) {
427         if ( prep.isCleanAll )
428             *opCast<OpCleanAll>(*opClean).res = bin->cleanAll(bitMask, idx);
429         else
430             *opCast<OpCleanToThreshold>(*opClean).res = bin->cleanToThreshold(prep.cleanTime, bitMask, idx);
431 
432         CacheBinOperation *opNext = opClean->next;
433         prep.commitOperation( opClean );
434 
435         while ((opClean = opNext) != nullptr) {
436             opNext = opClean->next;
437             prep.commitOperation(opClean);
438         }
439     }
440 
441     if ( size_t size = prep.updateUsedSize )
442         bin->updateUsedSize(size, bitMask, idx);
443 }
444 /* ----------------------------------------------------------------------------------------------------- */
445 /* --------------------------- Methods for creating and executing operations --------------------------- */
446 template<typename Props> void LargeObjectCacheImpl<Props>::
447     CacheBin::ExecuteOperation(CacheBinOperation *op, ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx, bool longLifeTime)
448 {
449     CacheBinFunctor<Props> func( this, extMemPool, bitMask, idx );
450     aggregator.execute( op, func, longLifeTime );
451 
452     if ( LargeMemoryBlock *toRelease = func.getToRelease()) {
453         extMemPool->backend.returnLargeObject(toRelease);
454     }
455 
456     if ( func.isCleanupNeeded() ) {
457         extMemPool->loc.doCleanup( func.getCurrTime(), /*doThreshDecr=*/false);
458     }
459 }
460 
461 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
462     CacheBin::get(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx)
463 {
464     LargeMemoryBlock *lmb=nullptr;
465     OpGet data = {&lmb, size, static_cast<uintptr_t>(0)};
466     CacheBinOperation op(data);
467     ExecuteOperation( &op, extMemPool, bitMask, idx );
468     return lmb;
469 }
470 
471 template<typename Props> void LargeObjectCacheImpl<Props>::
472     CacheBin::putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *head, BinBitMask *bitMask, int idx)
473 {
474     MALLOC_ASSERT(sizeof(LargeMemoryBlock)+sizeof(CacheBinOperation)<=head->unalignedSize, "CacheBinOperation is too large to be placed in LargeMemoryBlock!");
475 
476     OpPutList data = {head};
477     CacheBinOperation *op = new (head+1) CacheBinOperation(data, CBST_NOWAIT);
478     ExecuteOperation( op, extMemPool, bitMask, idx, false );
479 }
480 
481 template<typename Props> bool LargeObjectCacheImpl<Props>::
482     CacheBin::cleanToThreshold(ExtMemoryPool *extMemPool, BinBitMask *bitMask, uintptr_t currTime, int idx)
483 {
484     LargeMemoryBlock *toRelease = nullptr;
485 
486     /* oldest may be more recent then age, that's why cast to signed type
487        was used. age overflow is also processed correctly. */
488     if (last.load(std::memory_order_relaxed) &&
489         (intptr_t)(currTime - oldest.load(std::memory_order_relaxed)) > ageThreshold.load(std::memory_order_relaxed)) {
490         OpCleanToThreshold data = {&toRelease, currTime};
491         CacheBinOperation op(data);
492         ExecuteOperation( &op, extMemPool, bitMask, idx );
493     }
494     bool released = toRelease;
495 
496     Backend *backend = &extMemPool->backend;
497     while ( toRelease ) {
498         LargeMemoryBlock *helper = toRelease->next;
499         backend->returnLargeObject(toRelease);
500         toRelease = helper;
501     }
502     return released;
503 }
504 
505 template<typename Props> bool LargeObjectCacheImpl<Props>::
506     CacheBin::releaseAllToBackend(ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx)
507 {
508     LargeMemoryBlock *toRelease = nullptr;
509 
510     if (last.load(std::memory_order_relaxed)) {
511         OpCleanAll data = {&toRelease};
512         CacheBinOperation op(data);
513         ExecuteOperation(&op, extMemPool, bitMask, idx);
514     }
515     bool released = toRelease;
516 
517     Backend *backend = &extMemPool->backend;
518     while ( toRelease ) {
519         LargeMemoryBlock *helper = toRelease->next;
520         MALLOC_ASSERT(!helper || lessThanWithOverflow(helper->age, toRelease->age),
521                       ASSERT_TEXT);
522         backend->returnLargeObject(toRelease);
523         toRelease = helper;
524     }
525     return released;
526 }
527 
528 template<typename Props> void LargeObjectCacheImpl<Props>::
529     CacheBin::updateUsedSize(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx)
530 {
531     OpUpdateUsedSize data = {size};
532     CacheBinOperation op(data);
533     ExecuteOperation( &op, extMemPool, bitMask, idx );
534 }
535 
536 /* ------------------------------ Unsafe methods used with the aggregator ------------------------------ */
537 
538 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
539     CacheBin::putList(LargeMemoryBlock *head, LargeMemoryBlock *tail, BinBitMask *bitMask, int idx, int num, size_t hugeSizeThreshold)
540 {
541     size_t size = head->unalignedSize;
542     usedSize.store(usedSize.load(std::memory_order_relaxed) - num * size, std::memory_order_relaxed);
543     MALLOC_ASSERT( !last.load(std::memory_order_relaxed) ||
544         (last.load(std::memory_order_relaxed)->age != 0 && last.load(std::memory_order_relaxed)->age != -1U), ASSERT_TEXT );
545     MALLOC_ASSERT( (tail==head && num==1) || (tail!=head && num>1), ASSERT_TEXT );
546     MALLOC_ASSERT( tail, ASSERT_TEXT );
547     LargeMemoryBlock *toRelease = nullptr;
548     if (size < hugeSizeThreshold && !lastCleanedAge) {
549         // 1st object of such size was released.
550         // Not cache it, and remember when this occurs
551         // to take into account during cache miss.
552         lastCleanedAge = tail->age;
553         toRelease = tail;
554         tail = tail->prev;
555         if (tail)
556             tail->next = nullptr;
557         else
558             head = nullptr;
559         num--;
560     }
561     if (num) {
562         // add [head;tail] list to cache
563         tail->next = first;
564         if (first)
565             first->prev = tail;
566         first = head;
567         if (!last.load(std::memory_order_relaxed)) {
568             MALLOC_ASSERT(0 == oldest.load(std::memory_order_relaxed), ASSERT_TEXT);
569             oldest.store(tail->age, std::memory_order_relaxed);
570             last.store(tail, std::memory_order_relaxed);
571         }
572 
573         cachedSize.store(cachedSize.load(std::memory_order_relaxed) + num * size, std::memory_order_relaxed);
574     }
575 
576     // No used object, and nothing in the bin, mark the bin as empty
577     if (!usedSize.load(std::memory_order_relaxed) && !first)
578         bitMask->set(idx, false);
579 
580     return toRelease;
581 }
582 
583 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
584     CacheBin::get()
585 {
586     LargeMemoryBlock *result=first;
587     if (result) {
588         first = result->next;
589         if (first)
590             first->prev = nullptr;
591         else {
592             last.store(nullptr, std::memory_order_relaxed);
593             oldest.store(0, std::memory_order_relaxed);
594         }
595     }
596 
597     return result;
598 }
599 
600 template<typename Props> void LargeObjectCacheImpl<Props>::
601     CacheBin::forgetOutdatedState(uintptr_t currTime)
602 {
603     // If the time since the last get is LongWaitFactor times more than ageThreshold
604     // for the bin, treat the bin as rarely-used and forget everything we know
605     // about it.
606     // If LongWaitFactor is too small, we forget too early and
607     // so prevents good caching, while if too high, caching blocks
608     // with unrelated usage pattern occurs.
609     const uintptr_t sinceLastGet = currTime - lastGet;
610     bool doCleanup = false;
611 
612     intptr_t threshold = ageThreshold.load(std::memory_order_relaxed);
613     if (threshold)
614         doCleanup = sinceLastGet > static_cast<uintptr_t>(Props::LongWaitFactor * threshold);
615     else if (lastCleanedAge)
616         doCleanup = sinceLastGet > static_cast<uintptr_t>(Props::LongWaitFactor * (lastCleanedAge - lastGet));
617 
618     if (doCleanup) {
619         lastCleanedAge = 0;
620         ageThreshold.store(0, std::memory_order_relaxed);
621     }
622 
623 }
624 
625 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
626     CacheBin::cleanToThreshold(uintptr_t currTime, BinBitMask *bitMask, int idx)
627 {
628     /* oldest may be more recent then age, that's why cast to signed type
629     was used. age overflow is also processed correctly. */
630     if ( !last.load(std::memory_order_relaxed) ||
631         (intptr_t)(currTime - last.load(std::memory_order_relaxed)->age) < ageThreshold.load(std::memory_order_relaxed) )
632         return nullptr;
633 
634 #if MALLOC_DEBUG
635     uintptr_t nextAge = 0;
636 #endif
637     do {
638 #if MALLOC_DEBUG
639         // check that list ordered
640         MALLOC_ASSERT(!nextAge || lessThanWithOverflow(nextAge, last.load(std::memory_order_relaxed)->age),
641             ASSERT_TEXT);
642         nextAge = last.load(std::memory_order_relaxed)->age;
643 #endif
644         cachedSize.store(cachedSize.load(std::memory_order_relaxed) - last.load(std::memory_order_relaxed)->unalignedSize, std::memory_order_relaxed);
645         last.store(last.load(std::memory_order_relaxed)->prev, std::memory_order_relaxed);
646     } while (last.load(std::memory_order_relaxed) &&
647         (intptr_t)(currTime - last.load(std::memory_order_relaxed)->age) > ageThreshold.load(std::memory_order_relaxed));
648 
649     LargeMemoryBlock *toRelease = nullptr;
650     if (last.load(std::memory_order_relaxed)) {
651         toRelease = last.load(std::memory_order_relaxed)->next;
652         oldest.store(last.load(std::memory_order_relaxed)->age, std::memory_order_relaxed);
653         last.load(std::memory_order_relaxed)->next = nullptr;
654     } else {
655         toRelease = first;
656         first = nullptr;
657         oldest.store(0, std::memory_order_relaxed);
658         if (!usedSize.load(std::memory_order_relaxed))
659             bitMask->set(idx, false);
660     }
661     MALLOC_ASSERT( toRelease, ASSERT_TEXT );
662     lastCleanedAge = toRelease->age;
663 
664     return toRelease;
665 }
666 
667 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
668     CacheBin::cleanAll(BinBitMask *bitMask, int idx)
669 {
670     if (!last.load(std::memory_order_relaxed)) return nullptr;
671 
672     LargeMemoryBlock *toRelease = first;
673     last.store(nullptr, std::memory_order_relaxed);
674     first = nullptr;
675     oldest.store(0, std::memory_order_relaxed);
676     cachedSize.store(0, std::memory_order_relaxed);
677     if (!usedSize.load(std::memory_order_relaxed))
678         bitMask->set(idx, false);
679 
680     return toRelease;
681 }
682 
683 /* ----------------------------------------------------------------------------------------------------- */
684 
685 #if __TBB_MALLOC_BACKEND_STAT
686 template<typename Props> size_t LargeObjectCacheImpl<Props>::
687     CacheBin::reportStat(int num, FILE *f)
688 {
689 #if __TBB_MALLOC_LOCACHE_STAT
690     if (first)
691         printf("%d(%lu): total %lu KB thr %ld lastCln %lu oldest %lu\n",
692                num, num*Props::CacheStep+Props::MinSize,
693                cachedSize.load(std::memory_order_relaxed)/1024, ageThresholdageThreshold.load(std::memory_order_relaxed), lastCleanedAge, oldest.load(std::memory_order_relaxed));
694 #else
695     suppress_unused_warning(num);
696     suppress_unused_warning(f);
697 #endif
698     return cachedSize.load(std::memory_order_relaxed);
699 }
700 #endif
701 
702 // Release objects from cache blocks that are older than ageThreshold
703 template<typename Props>
704 bool LargeObjectCacheImpl<Props>::regularCleanup(ExtMemoryPool *extMemPool, uintptr_t currTime, bool doThreshDecr)
705 {
706     bool released = false;
707     BinsSummary binsSummary;
708 
709     // Threshold settings is below this cache or starts from zero index
710     if (hugeSizeThresholdIdx == 0) return false;
711 
712     // Starting searching for bin that is less than huge size threshold (can be cleaned-up)
713     int startSearchIdx = hugeSizeThresholdIdx - 1;
714 
715     for (int i = bitMask.getMaxTrue(startSearchIdx); i >= 0; i = bitMask.getMaxTrue(i-1)) {
716         bin[i].updateBinsSummary(&binsSummary);
717         if (!doThreshDecr && tooLargeLOC.load(std::memory_order_relaxed) > 2 && binsSummary.isLOCTooLarge()) {
718             // if LOC is too large for quite long time, decrease the threshold
719             // based on bin hit statistics.
720             // For this, redo cleanup from the beginning.
721             // Note: on this iteration total usedSz can be not too large
722             // in comparison to total cachedSz, as we calculated it only
723             // partially. We are ok with it.
724             i = bitMask.getMaxTrue(startSearchIdx)+1;
725             doThreshDecr = true;
726             binsSummary.reset();
727             continue;
728         }
729         if (doThreshDecr)
730             bin[i].decreaseThreshold();
731 
732         if (bin[i].cleanToThreshold(extMemPool, &bitMask, currTime, i)) {
733             released = true;
734         }
735     }
736     // We want to find if LOC was too large for some time continuously,
737     // so OK with races between incrementing and zeroing, but incrementing
738     // must be atomic.
739     if (binsSummary.isLOCTooLarge()) {
740         tooLargeLOC++;
741     } else {
742         tooLargeLOC.store(0, std::memory_order_relaxed);
743     }
744     return released;
745 }
746 
747 template<typename Props>
748 bool LargeObjectCacheImpl<Props>::cleanAll(ExtMemoryPool *extMemPool)
749 {
750     bool released = false;
751     for (int i = numBins-1; i >= 0; i--) {
752         released |= bin[i].releaseAllToBackend(extMemPool, &bitMask, i);
753     }
754     return released;
755 }
756 
757 template<typename Props>
758 void LargeObjectCacheImpl<Props>::reset() {
759     tooLargeLOC.store(0, std::memory_order_relaxed);
760     for (int i = numBins-1; i >= 0; i--)
761         bin[i].init();
762     bitMask.reset();
763 }
764 
765 #if __TBB_MALLOC_WHITEBOX_TEST
766 template<typename Props>
767 size_t LargeObjectCacheImpl<Props>::getLOCSize() const
768 {
769     size_t size = 0;
770     for (int i = numBins-1; i >= 0; i--)
771         size += bin[i].getSize();
772     return size;
773 }
774 
775 size_t LargeObjectCache::getLOCSize() const
776 {
777     return largeCache.getLOCSize() + hugeCache.getLOCSize();
778 }
779 
780 template<typename Props>
781 size_t LargeObjectCacheImpl<Props>::getUsedSize() const
782 {
783     size_t size = 0;
784     for (int i = numBins-1; i >= 0; i--)
785         size += bin[i].getUsedSize();
786     return size;
787 }
788 
789 size_t LargeObjectCache::getUsedSize() const
790 {
791     return largeCache.getUsedSize() + hugeCache.getUsedSize();
792 }
793 #endif // __TBB_MALLOC_WHITEBOX_TEST
794 
795 inline bool LargeObjectCache::isCleanupNeededOnRange(uintptr_t range, uintptr_t currTime)
796 {
797     return range >= cacheCleanupFreq
798         || currTime+range < currTime-1 // overflow, 0 is power of 2, do cleanup
799         // (prev;prev+range] contains n*cacheCleanupFreq
800         || alignUp(currTime, cacheCleanupFreq)<currTime+range;
801 }
802 
803 bool LargeObjectCache::doCleanup(uintptr_t currTime, bool doThreshDecr)
804 {
805     if (!doThreshDecr)
806         extMemPool->allLocalCaches.markUnused();
807 
808     bool large_cache_cleaned = largeCache.regularCleanup(extMemPool, currTime, doThreshDecr);
809     bool huge_cache_cleaned = hugeCache.regularCleanup(extMemPool, currTime, doThreshDecr);
810     return large_cache_cleaned || huge_cache_cleaned;
811 }
812 
813 bool LargeObjectCache::decreasingCleanup()
814 {
815     return doCleanup(cacheCurrTime.load(std::memory_order_acquire), /*doThreshDecr=*/true);
816 }
817 
818 bool LargeObjectCache::regularCleanup()
819 {
820     return doCleanup(cacheCurrTime.load(std::memory_order_acquire), /*doThreshDecr=*/false);
821 }
822 
823 bool LargeObjectCache::cleanAll()
824 {
825     bool large_cache_cleaned = largeCache.cleanAll(extMemPool);
826     bool huge_cache_cleaned = hugeCache.cleanAll(extMemPool);
827     return large_cache_cleaned || huge_cache_cleaned;
828 }
829 
830 void LargeObjectCache::reset()
831 {
832     largeCache.reset();
833     hugeCache.reset();
834 }
835 
836 template<typename Props>
837 LargeMemoryBlock *LargeObjectCacheImpl<Props>::get(ExtMemoryPool *extMemoryPool, size_t size)
838 {
839     int idx = Props::sizeToIdx(size);
840 
841     LargeMemoryBlock *lmb = bin[idx].get(extMemoryPool, size, &bitMask, idx);
842 
843     if (lmb) {
844         MALLOC_ITT_SYNC_ACQUIRED(bin+idx);
845         STAT_increment(getThreadId(), ThreadCommonCounters, allocCachedLargeObj);
846     }
847     return lmb;
848 }
849 
850 template<typename Props>
851 void LargeObjectCacheImpl<Props>::updateCacheState(ExtMemoryPool *extMemPool, DecreaseOrIncrease op, size_t size)
852 {
853     int idx = Props::sizeToIdx(size);
854     MALLOC_ASSERT(idx < static_cast<int>(numBins), ASSERT_TEXT);
855     bin[idx].updateUsedSize(extMemPool, op==decrease? -size : size, &bitMask, idx);
856 }
857 
858 #if __TBB_MALLOC_LOCACHE_STAT
859 template<typename Props>
860 void LargeObjectCacheImpl<Props>::reportStat(FILE *f)
861 {
862     size_t cachedSize = 0;
863     for (int i=0; i<numBins; i++)
864         cachedSize += bin[i].reportStat(i, f);
865     fprintf(f, "total LOC size %lu MB\n", cachedSize/1024/1024);
866 }
867 
868 void LargeObjectCache::reportStat(FILE *f)
869 {
870     largeCache.reportStat(f);
871     hugeCache.reportStat(f);
872     fprintf(f, "cache time %lu\n", cacheCurrTime.load(std::memory_order_relaxed));
873 }
874 #endif
875 
876 template<typename Props>
877 void LargeObjectCacheImpl<Props>::putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *toCache)
878 {
879     int toBinIdx = Props::sizeToIdx(toCache->unalignedSize);
880 
881     MALLOC_ITT_SYNC_RELEASING(bin+toBinIdx);
882     bin[toBinIdx].putList(extMemPool, toCache, &bitMask, toBinIdx);
883 }
884 
885 void LargeObjectCache::updateCacheState(DecreaseOrIncrease op, size_t size)
886 {
887     if (size < maxLargeSize)
888         largeCache.updateCacheState(extMemPool, op, size);
889     else if (size < maxHugeSize)
890         hugeCache.updateCacheState(extMemPool, op, size);
891 }
892 
893 
894 uintptr_t LargeObjectCache::getCurrTimeRange(uintptr_t range)
895 {
896     return (cacheCurrTime.fetch_add(range) + 1);
897 }
898 
899 void LargeObjectCache::registerRealloc(size_t oldSize, size_t newSize)
900 {
901     updateCacheState(decrease, oldSize);
902     updateCacheState(increase, alignToBin(newSize));
903 }
904 
905 size_t LargeObjectCache::alignToBin(size_t size) {
906     return size < maxLargeSize ? LargeCacheType::alignToBin(size) : HugeCacheType::alignToBin(size);
907 }
908 
909 // Used for internal purpose
910 int LargeObjectCache::sizeToIdx(size_t size)
911 {
912     MALLOC_ASSERT(size <= maxHugeSize, ASSERT_TEXT);
913     return size < maxLargeSize ?
914         LargeCacheType::sizeToIdx(size) :
915         LargeCacheType::numBins + HugeCacheType::sizeToIdx(size);
916 }
917 
918 void LargeObjectCache::putList(LargeMemoryBlock *list)
919 {
920     LargeMemoryBlock *toProcess, *n;
921 
922     for (LargeMemoryBlock *curr = list; curr; curr = toProcess) {
923         LargeMemoryBlock *tail = curr;
924         toProcess = curr->next;
925         if (!sizeInCacheRange(curr->unalignedSize)) {
926             extMemPool->backend.returnLargeObject(curr);
927             continue;
928         }
929         int currIdx = sizeToIdx(curr->unalignedSize);
930 
931         // Find all blocks fitting to same bin. Not use more efficient sorting
932         // algorithm because list is short (commonly,
933         // LocalLOC's HIGH_MARK-LOW_MARK, i.e. 24 items).
934         for (LargeMemoryBlock *b = toProcess; b; b = n) {
935             n = b->next;
936             if (sizeToIdx(b->unalignedSize) == currIdx) {
937                 tail->next = b;
938                 tail = b;
939                 if (toProcess == b)
940                     toProcess = toProcess->next;
941                 else {
942                     b->prev->next = b->next;
943                     if (b->next)
944                         b->next->prev = b->prev;
945                 }
946             }
947         }
948         tail->next = nullptr;
949         if (curr->unalignedSize < maxLargeSize)
950             largeCache.putList(extMemPool, curr);
951         else
952             hugeCache.putList(extMemPool, curr);
953     }
954 }
955 
956 void LargeObjectCache::put(LargeMemoryBlock *largeBlock)
957 {
958     size_t blockSize = largeBlock->unalignedSize;
959     if (sizeInCacheRange(blockSize)) {
960         largeBlock->next = nullptr;
961         if (blockSize < maxLargeSize)
962             largeCache.putList(extMemPool, largeBlock);
963         else
964             hugeCache.putList(extMemPool, largeBlock);
965     } else {
966         extMemPool->backend.returnLargeObject(largeBlock);
967     }
968 }
969 
970 LargeMemoryBlock *LargeObjectCache::get(size_t size)
971 {
972     MALLOC_ASSERT( size >= minLargeSize, ASSERT_TEXT );
973     if (sizeInCacheRange(size)) {
974         return size < maxLargeSize ?
975             largeCache.get(extMemPool, size) : hugeCache.get(extMemPool, size);
976     }
977     return nullptr;
978 }
979 
980 LargeMemoryBlock *ExtMemoryPool::mallocLargeObject(MemoryPool *pool, size_t allocationSize)
981 {
982 #if __TBB_MALLOC_LOCACHE_STAT
983     mallocCalls++;
984     memAllocKB.fetch_add(allocationSize/1024);
985 #endif
986     LargeMemoryBlock* lmb = loc.get(allocationSize);
987     if (!lmb) {
988         BackRefIdx backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/true);
989         if (backRefIdx.isInvalid())
990             return nullptr;
991 
992         // unalignedSize is set in getLargeBlock
993         lmb = backend.getLargeBlock(allocationSize);
994         if (!lmb) {
995             removeBackRef(backRefIdx);
996             loc.updateCacheState(decrease, allocationSize);
997             return nullptr;
998         }
999         lmb->backRefIdx = backRefIdx;
1000         lmb->pool = pool;
1001         STAT_increment(getThreadId(), ThreadCommonCounters, allocNewLargeObj);
1002     } else {
1003 #if __TBB_MALLOC_LOCACHE_STAT
1004         cacheHits++;
1005         memHitKB.fetch_add(allocationSize/1024);
1006 #endif
1007     }
1008     return lmb;
1009 }
1010 
1011 void ExtMemoryPool::freeLargeObject(LargeMemoryBlock *mBlock)
1012 {
1013     loc.put(mBlock);
1014 }
1015 
1016 void ExtMemoryPool::freeLargeObjectList(LargeMemoryBlock *head)
1017 {
1018     loc.putList(head);
1019 }
1020 
1021 bool ExtMemoryPool::softCachesCleanup()
1022 {
1023     return loc.regularCleanup();
1024 }
1025 
1026 bool ExtMemoryPool::hardCachesCleanup()
1027 {
1028     // thread-local caches must be cleaned before LOC,
1029     // because object from thread-local cache can be released to LOC
1030     bool ret = releaseAllLocalCaches();
1031     ret |= orphanedBlocks.cleanup(&backend);
1032     ret |= loc.cleanAll();
1033     ret |= backend.clean();
1034     return ret;
1035 }
1036 
1037 #if BACKEND_HAS_MREMAP
1038 void *ExtMemoryPool::remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment)
1039 {
1040     const size_t oldUnalignedSize = ((LargeObjectHdr*)ptr - 1)->memoryBlock->unalignedSize;
1041     void *o = backend.remap(ptr, oldSize, newSize, alignment);
1042     if (o) {
1043         LargeMemoryBlock *lmb = ((LargeObjectHdr*)o - 1)->memoryBlock;
1044         loc.registerRealloc(oldUnalignedSize, lmb->unalignedSize);
1045     }
1046     return o;
1047 }
1048 #endif /* BACKEND_HAS_MREMAP */
1049 
1050 /*********** End allocation of large objects **********/
1051 
1052 } // namespace internal
1053 } // namespace rml
1054 
1055 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
1056     #pragma warning(pop)
1057 #endif
1058