xref: /oneTBB/src/tbbmalloc/large_objects.cpp (revision ce0d258e)
1 /*
2     Copyright (c) 2005-2022 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #include "tbbmalloc_internal.h"
18 #include "../src/tbb/environment.h"
19 
20 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
21     // Suppress warning: unary minus operator applied to unsigned type, result still unsigned
22     // TBB_REVAMP_TODO: review this warning
23     #pragma warning(push)
24     #pragma warning(disable:4146)
25 #endif
26 
27 /******************************* Allocation of large objects *********************************************/
28 
29 namespace rml {
30 namespace internal {
31 
32 /* ---------------------------- Large Object cache init section ---------------------------------------- */
33 
34 void LargeObjectCache::init(ExtMemoryPool *memPool)
35 {
36     extMemPool = memPool;
37     // scalable_allocation_mode can be called before allocator initialization, respect this manual request
38     if (hugeSizeThreshold == 0) {
39         // Huge size threshold initialization if environment variable was set
40         long requestedThreshold = tbb::detail::r1::GetIntegralEnvironmentVariable("TBB_MALLOC_SET_HUGE_SIZE_THRESHOLD");
41         // Read valid env or initialize by default with max possible values
42         if (requestedThreshold != -1) {
43             setHugeSizeThreshold(requestedThreshold);
44         } else {
45             setHugeSizeThreshold(maxHugeSize);
46         }
47     }
48 }
49 
50 /* ----------------------------- Huge size threshold settings ----------------------------------------- */
51 
52 void LargeObjectCache::setHugeSizeThreshold(size_t value)
53 {
54     // Valid in the huge cache range: [MaxLargeSize, MaxHugeSize].
55     if (value <= maxHugeSize) {
56         hugeSizeThreshold = value >= maxLargeSize ? alignToBin(value) : maxLargeSize;
57 
58         // Calculate local indexes for the global threshold size (for fast search inside a regular cleanup)
59         largeCache.hugeSizeThresholdIdx = LargeCacheType::numBins;
60         hugeCache.hugeSizeThresholdIdx = HugeCacheType::sizeToIdx(hugeSizeThreshold);
61     }
62 }
63 
64 bool LargeObjectCache::sizeInCacheRange(size_t size)
65 {
66     return size <= maxHugeSize && (size <= defaultMaxHugeSize || size >= hugeSizeThreshold);
67 }
68 
69 /* ----------------------------------------------------------------------------------------------------- */
70 
71 /* The functor called by the aggregator for the operation list */
72 template<typename Props>
73 class CacheBinFunctor {
74     typename LargeObjectCacheImpl<Props>::CacheBin *const bin;
75     ExtMemoryPool *const extMemPool;
76     typename LargeObjectCacheImpl<Props>::BinBitMask *const bitMask;
77     const int idx;
78 
79     LargeMemoryBlock *toRelease;
80     bool needCleanup;
81     uintptr_t currTime;
82 
83     /* Do preprocessing under the operation list. */
84     /* All the OP_PUT_LIST operations are merged in the one operation.
85        All OP_GET operations are merged with the OP_PUT_LIST operations but
86        it demands the update of the moving average value in the bin.
87        Only the last OP_CLEAN_TO_THRESHOLD operation has sense.
88        The OP_CLEAN_ALL operation also should be performed only once.
89        Moreover it cancels the OP_CLEAN_TO_THRESHOLD operation. */
90     class OperationPreprocessor {
91         // TODO: remove the dependency on CacheBin.
92         typename LargeObjectCacheImpl<Props>::CacheBin *const  bin;
93 
94         /* Contains the relative time in the operation list.
95            It counts in the reverse order since the aggregator also
96            provides operations in the reverse order. */
97         uintptr_t lclTime;
98 
99         /* opGet contains only OP_GET operations which cannot be merge with OP_PUT operations
100            opClean contains all OP_CLEAN_TO_THRESHOLD and OP_CLEAN_ALL operations. */
101         CacheBinOperation *opGet, *opClean;
102         /* The time of the last OP_CLEAN_TO_THRESHOLD operations */
103         uintptr_t cleanTime;
104 
105         /* lastGetOpTime - the time of the last OP_GET operation.
106            lastGet - the same meaning as CacheBin::lastGet */
107         uintptr_t lastGetOpTime, lastGet;
108 
109         /* The total sum of all usedSize changes requested with CBOP_UPDATE_USED_SIZE operations. */
110         size_t updateUsedSize;
111 
112         /* The list of blocks for the OP_PUT_LIST operation. */
113         LargeMemoryBlock *head, *tail;
114         int putListNum;
115 
116         /* if the OP_CLEAN_ALL is requested. */
117         bool isCleanAll;
118 
119         inline void commitOperation(CacheBinOperation *op) const;
120         inline void addOpToOpList(CacheBinOperation *op, CacheBinOperation **opList) const;
121         bool getFromPutList(CacheBinOperation* opGet, uintptr_t currTime);
122         void addToPutList( LargeMemoryBlock *head, LargeMemoryBlock *tail, int num );
123 
124     public:
125         OperationPreprocessor(typename LargeObjectCacheImpl<Props>::CacheBin *bin) :
126             bin(bin), lclTime(0), opGet(nullptr), opClean(nullptr), cleanTime(0),
127             lastGetOpTime(0), updateUsedSize(0), head(nullptr), isCleanAll(false)  {}
128         void operator()(CacheBinOperation* opList);
129         uintptr_t getTimeRange() const { return -lclTime; }
130 
131         friend class CacheBinFunctor;
132     };
133 
134 public:
135     CacheBinFunctor(typename LargeObjectCacheImpl<Props>::CacheBin *bin, ExtMemoryPool *extMemPool,
136                     typename LargeObjectCacheImpl<Props>::BinBitMask *bitMask, int idx) :
137         bin(bin), extMemPool(extMemPool), bitMask(bitMask), idx(idx), toRelease(nullptr), needCleanup(false) {}
138     void operator()(CacheBinOperation* opList);
139 
140     bool isCleanupNeeded() const { return needCleanup; }
141     LargeMemoryBlock *getToRelease() const { return toRelease; }
142     uintptr_t getCurrTime() const { return currTime; }
143 };
144 
145 /* ---------------- Cache Bin Aggregator Operation Helpers ---------------- */
146 
147 // The list of structures which describe the operation data
148 struct OpGet {
149     static const CacheBinOperationType type = CBOP_GET;
150     LargeMemoryBlock **res;
151     size_t size;
152     uintptr_t currTime;
153 };
154 
155 struct OpPutList {
156     static const CacheBinOperationType type = CBOP_PUT_LIST;
157     LargeMemoryBlock *head;
158 };
159 
160 struct OpCleanToThreshold {
161     static const CacheBinOperationType type = CBOP_CLEAN_TO_THRESHOLD;
162     LargeMemoryBlock **res;
163     uintptr_t currTime;
164 };
165 
166 struct OpCleanAll {
167     static const CacheBinOperationType type = CBOP_CLEAN_ALL;
168     LargeMemoryBlock **res;
169 };
170 
171 struct OpUpdateUsedSize {
172     static const CacheBinOperationType type = CBOP_UPDATE_USED_SIZE;
173     size_t size;
174 };
175 
176 union CacheBinOperationData {
177 private:
178     OpGet opGet;
179     OpPutList opPutList;
180     OpCleanToThreshold opCleanToThreshold;
181     OpCleanAll opCleanAll;
182     OpUpdateUsedSize opUpdateUsedSize;
183 };
184 
185 // Forward declarations
186 template <typename OpTypeData> OpTypeData& opCast(CacheBinOperation &op);
187 
188 // Describes the aggregator operation
189 struct CacheBinOperation : public MallocAggregatedOperation<CacheBinOperation>::type {
190     CacheBinOperationType type;
191 
192     template <typename OpTypeData>
193     CacheBinOperation(OpTypeData &d, CacheBinOperationStatus st = CBST_WAIT) {
194         opCast<OpTypeData>(*this) = d;
195         type = OpTypeData::type;
196         MallocAggregatedOperation<CacheBinOperation>::type::status = st;
197     }
198 private:
199     CacheBinOperationData data;
200 
201     template <typename OpTypeData>
202     friend OpTypeData& opCast(CacheBinOperation &op);
203 };
204 
205 // The opCast function can be the member of CacheBinOperation but it will have
206 // small stylistic ambiguity: it will look like a getter (with a cast) for the
207 // CacheBinOperation::data data member but it should return a reference to
208 // simplify the code from a lot of getter/setter calls. So the global cast in
209 // the style of static_cast (or reinterpret_cast) seems to be more readable and
210 // have more explicit semantic.
211 template <typename OpTypeData>
212 OpTypeData& opCast(CacheBinOperation &op) {
213     return *reinterpret_cast<OpTypeData*>(&op.data);
214 }
215 
216 /* ------------------------------------------------------------------------ */
217 
218 #if __TBB_MALLOC_LOCACHE_STAT
219 //intptr_t mallocCalls, cacheHits;
220 std::atomic<intptr_t> mallocCalls, cacheHits;
221 //intptr_t memAllocKB, memHitKB;
222 std::atomic<intptr_t> memAllocKB, memHitKB;
223 #endif
224 
225 #if MALLOC_DEBUG
226 inline bool lessThanWithOverflow(intptr_t a, intptr_t b)
227 {
228     return (a < b && (b - a < UINTPTR_MAX/2)) ||
229            (a > b && (a - b > UINTPTR_MAX/2));
230 }
231 #endif
232 
233 /* ----------------------------------- Operation processing methods ------------------------------------ */
234 
235 template<typename Props> void CacheBinFunctor<Props>::
236     OperationPreprocessor::commitOperation(CacheBinOperation *op) const
237 {
238     // FencedStore( (intptr_t&)(op->status), CBST_DONE );
239     op->status.store(CBST_DONE, std::memory_order_release);
240 }
241 
242 template<typename Props> void CacheBinFunctor<Props>::
243     OperationPreprocessor::addOpToOpList(CacheBinOperation *op, CacheBinOperation **opList) const
244 {
245     op->next = *opList;
246     *opList = op;
247 }
248 
249 template<typename Props> bool CacheBinFunctor<Props>::
250     OperationPreprocessor::getFromPutList(CacheBinOperation *opGet, uintptr_t currTime)
251 {
252     if ( head ) {
253         uintptr_t age = head->age;
254         LargeMemoryBlock *next = head->next;
255         *opCast<OpGet>(*opGet).res = head;
256         commitOperation( opGet );
257         head = next;
258         putListNum--;
259         MALLOC_ASSERT( putListNum>=0, ASSERT_TEXT );
260 
261         // use moving average with current hit interval
262         bin->updateMeanHitRange( currTime - age );
263         return true;
264     }
265     return false;
266 }
267 
268 template<typename Props> void CacheBinFunctor<Props>::
269     OperationPreprocessor::addToPutList(LargeMemoryBlock *h, LargeMemoryBlock *t, int num)
270 {
271     if ( head ) {
272         MALLOC_ASSERT( tail, ASSERT_TEXT );
273         tail->next = h;
274         h->prev = tail;
275         tail = t;
276         putListNum += num;
277     } else {
278         head = h;
279         tail = t;
280         putListNum = num;
281     }
282 }
283 
284 template<typename Props> void CacheBinFunctor<Props>::
285     OperationPreprocessor::operator()(CacheBinOperation* opList)
286 {
287     for ( CacheBinOperation *op = opList, *opNext; op; op = opNext ) {
288         opNext = op->next;
289         switch ( op->type ) {
290         case CBOP_GET:
291             {
292                 lclTime--;
293                 if ( !lastGetOpTime ) {
294                     lastGetOpTime = lclTime;
295                     lastGet = 0;
296                 } else if ( !lastGet ) lastGet = lclTime;
297 
298                 if ( !getFromPutList(op,lclTime) ) {
299                     opCast<OpGet>(*op).currTime = lclTime;
300                     addOpToOpList( op, &opGet );
301                 }
302             }
303             break;
304 
305         case CBOP_PUT_LIST:
306             {
307                 LargeMemoryBlock *head = opCast<OpPutList>(*op).head;
308                 LargeMemoryBlock *curr = head, *prev = nullptr;
309 
310                 int num = 0;
311                 do {
312                     // we do not kept prev pointers during assigning blocks to bins, set them now
313                     curr->prev = prev;
314 
315                     // Save the local times to the memory blocks. Local times are necessary
316                     // for the getFromPutList function which updates the hit range value in
317                     // CacheBin when OP_GET and OP_PUT_LIST operations are merged successfully.
318                     // The age will be updated to the correct global time after preprocessing
319                     // when global cache time is updated.
320                     curr->age = --lclTime;
321 
322                     prev = curr;
323                     num += 1;
324 
325                     STAT_increment(getThreadId(), ThreadCommonCounters, cacheLargeObj);
326                 } while ((curr = curr->next) != nullptr);
327 
328                 LargeMemoryBlock *tail = prev;
329                 addToPutList(head, tail, num);
330 
331                 while ( opGet ) {
332                     CacheBinOperation *next = opGet->next;
333                     if ( !getFromPutList(opGet, opCast<OpGet>(*opGet).currTime) )
334                         break;
335                     opGet = next;
336                 }
337             }
338             break;
339 
340         case CBOP_UPDATE_USED_SIZE:
341             updateUsedSize += opCast<OpUpdateUsedSize>(*op).size;
342             commitOperation( op );
343             break;
344 
345         case CBOP_CLEAN_ALL:
346             isCleanAll = true;
347             addOpToOpList( op, &opClean );
348             break;
349 
350         case CBOP_CLEAN_TO_THRESHOLD:
351             {
352                 uintptr_t currTime = opCast<OpCleanToThreshold>(*op).currTime;
353                 // We don't worry about currTime overflow since it is a rare
354                 // occurrence and doesn't affect correctness
355                 cleanTime = cleanTime < currTime ? currTime : cleanTime;
356                 addOpToOpList( op, &opClean );
357             }
358             break;
359 
360         default:
361             MALLOC_ASSERT( false, "Unknown operation." );
362         }
363     }
364     MALLOC_ASSERT( !( opGet && head ), "Not all put/get pairs are processed!" );
365 }
366 
367 template<typename Props> void CacheBinFunctor<Props>::operator()(CacheBinOperation* opList)
368 {
369     MALLOC_ASSERT( opList, "Empty operation list is passed into operation handler." );
370 
371     OperationPreprocessor prep(bin);
372     prep(opList);
373 
374     if ( uintptr_t timeRange = prep.getTimeRange() ) {
375         uintptr_t startTime = extMemPool->loc.getCurrTimeRange(timeRange);
376         // endTime is used as the current (base) time since the local time is negative.
377         uintptr_t endTime = startTime + timeRange;
378 
379         if ( prep.lastGetOpTime && prep.lastGet ) bin->setLastGet(prep.lastGet+endTime);
380 
381         if ( CacheBinOperation *opGet = prep.opGet ) {
382             bool isEmpty = false;
383             do {
384 #if __TBB_MALLOC_WHITEBOX_TEST
385                 tbbmalloc_whitebox::locGetProcessed++;
386 #endif
387                 const OpGet &opGetData = opCast<OpGet>(*opGet);
388                 if ( !isEmpty ) {
389                     if ( LargeMemoryBlock *res = bin->get() ) {
390                         uintptr_t getTime = opGetData.currTime + endTime;
391                         // use moving average with current hit interval
392                         bin->updateMeanHitRange( getTime - res->age);
393                         bin->updateCachedSize( -opGetData.size );
394                         *opGetData.res = res;
395                     } else {
396                         isEmpty = true;
397                         uintptr_t lastGetOpTime = prep.lastGetOpTime+endTime;
398                         bin->forgetOutdatedState(lastGetOpTime);
399                         bin->updateAgeThreshold(lastGetOpTime);
400                     }
401                 }
402 
403                 CacheBinOperation *opNext = opGet->next;
404                 bin->updateUsedSize( opGetData.size, bitMask, idx );
405                 prep.commitOperation( opGet );
406                 opGet = opNext;
407             } while ( opGet );
408             if ( prep.lastGetOpTime )
409                 bin->setLastGet( prep.lastGetOpTime + endTime );
410         } else if ( LargeMemoryBlock *curr = prep.head ) {
411             curr->prev = nullptr;
412             while ( curr ) {
413                 // Update local times to global times
414                 curr->age += endTime;
415                 curr=curr->next;
416             }
417 #if __TBB_MALLOC_WHITEBOX_TEST
418             tbbmalloc_whitebox::locPutProcessed+=prep.putListNum;
419 #endif
420             toRelease = bin->putList(prep.head, prep.tail, bitMask, idx, prep.putListNum, extMemPool->loc.hugeSizeThreshold);
421         }
422         needCleanup = extMemPool->loc.isCleanupNeededOnRange(timeRange, startTime);
423         currTime = endTime - 1;
424     }
425 
426     if ( CacheBinOperation *opClean = prep.opClean ) {
427         if ( prep.isCleanAll )
428             *opCast<OpCleanAll>(*opClean).res = bin->cleanAll(bitMask, idx);
429         else
430             *opCast<OpCleanToThreshold>(*opClean).res = bin->cleanToThreshold(prep.cleanTime, bitMask, idx);
431 
432         CacheBinOperation *opNext = opClean->next;
433         prep.commitOperation( opClean );
434 
435         while ((opClean = opNext) != nullptr) {
436             opNext = opClean->next;
437             prep.commitOperation(opClean);
438         }
439     }
440 
441     if ( size_t size = prep.updateUsedSize )
442         bin->updateUsedSize(size, bitMask, idx);
443 }
444 /* ----------------------------------------------------------------------------------------------------- */
445 /* --------------------------- Methods for creating and executing operations --------------------------- */
446 template<typename Props> void LargeObjectCacheImpl<Props>::
447     CacheBin::ExecuteOperation(CacheBinOperation *op, ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx, bool longLifeTime)
448 {
449     CacheBinFunctor<Props> func( this, extMemPool, bitMask, idx );
450     aggregator.execute( op, func, longLifeTime );
451 
452     if ( LargeMemoryBlock *toRelease = func.getToRelease()) {
453         extMemPool->backend.returnLargeObject(toRelease);
454     }
455 
456     if ( func.isCleanupNeeded() ) {
457         extMemPool->loc.doCleanup( func.getCurrTime(), /*doThreshDecr=*/false);
458     }
459 }
460 
461 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
462     CacheBin::get(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx)
463 {
464     LargeMemoryBlock *lmb=nullptr;
465     OpGet data = {&lmb, size};
466     CacheBinOperation op(data);
467     ExecuteOperation( &op, extMemPool, bitMask, idx );
468     return lmb;
469 }
470 
471 template<typename Props> void LargeObjectCacheImpl<Props>::
472     CacheBin::putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *head, BinBitMask *bitMask, int idx)
473 {
474     MALLOC_ASSERT(sizeof(LargeMemoryBlock)+sizeof(CacheBinOperation)<=head->unalignedSize, "CacheBinOperation is too large to be placed in LargeMemoryBlock!");
475 
476     OpPutList data = {head};
477     CacheBinOperation *op = new (head+1) CacheBinOperation(data, CBST_NOWAIT);
478     ExecuteOperation( op, extMemPool, bitMask, idx, false );
479 }
480 
481 template<typename Props> bool LargeObjectCacheImpl<Props>::
482     CacheBin::cleanToThreshold(ExtMemoryPool *extMemPool, BinBitMask *bitMask, uintptr_t currTime, int idx)
483 {
484     LargeMemoryBlock *toRelease = nullptr;
485 
486     /* oldest may be more recent then age, that's why cast to signed type
487        was used. age overflow is also processed correctly. */
488     if (last.load(std::memory_order_relaxed) &&
489         (intptr_t)(currTime - oldest.load(std::memory_order_relaxed)) > ageThreshold.load(std::memory_order_relaxed)) {
490         OpCleanToThreshold data = {&toRelease, currTime};
491         CacheBinOperation op(data);
492         ExecuteOperation( &op, extMemPool, bitMask, idx );
493     }
494     bool released = toRelease;
495 
496     Backend *backend = &extMemPool->backend;
497     while ( toRelease ) {
498         LargeMemoryBlock *helper = toRelease->next;
499         backend->returnLargeObject(toRelease);
500         toRelease = helper;
501     }
502     return released;
503 }
504 
505 template<typename Props> bool LargeObjectCacheImpl<Props>::
506     CacheBin::releaseAllToBackend(ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx)
507 {
508     LargeMemoryBlock *toRelease = nullptr;
509 
510     if (last.load(std::memory_order_relaxed)) {
511         OpCleanAll data = {&toRelease};
512         CacheBinOperation op(data);
513         ExecuteOperation(&op, extMemPool, bitMask, idx);
514     }
515     bool released = toRelease;
516 
517     Backend *backend = &extMemPool->backend;
518     while ( toRelease ) {
519         LargeMemoryBlock *helper = toRelease->next;
520         MALLOC_ASSERT(!helper || lessThanWithOverflow(helper->age, toRelease->age),
521                       ASSERT_TEXT);
522         backend->returnLargeObject(toRelease);
523         toRelease = helper;
524     }
525     return released;
526 }
527 
528 template<typename Props> void LargeObjectCacheImpl<Props>::
529     CacheBin::updateUsedSize(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx)
530 {
531     OpUpdateUsedSize data = {size};
532     CacheBinOperation op(data);
533     ExecuteOperation( &op, extMemPool, bitMask, idx );
534 }
535 
536 /* ------------------------------ Unsafe methods used with the aggregator ------------------------------ */
537 
538 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
539     CacheBin::putList(LargeMemoryBlock *head, LargeMemoryBlock *tail, BinBitMask *bitMask, int idx, int num, size_t hugeSizeThreshold)
540 {
541     size_t size = head->unalignedSize;
542     usedSize.store(usedSize.load(std::memory_order_relaxed) - num * size, std::memory_order_relaxed);
543     MALLOC_ASSERT( !last.load(std::memory_order_relaxed) ||
544         (last.load(std::memory_order_relaxed)->age != 0 && last.load(std::memory_order_relaxed)->age != -1U), ASSERT_TEXT );
545     MALLOC_ASSERT( (tail==head && num==1) || (tail!=head && num>1), ASSERT_TEXT );
546     LargeMemoryBlock *toRelease = nullptr;
547     if (size < hugeSizeThreshold && !lastCleanedAge) {
548         // 1st object of such size was released.
549         // Not cache it, and remember when this occurs
550         // to take into account during cache miss.
551         lastCleanedAge = tail->age;
552         toRelease = tail;
553         tail = tail->prev;
554         if (tail)
555             tail->next = nullptr;
556         else
557             head = nullptr;
558         num--;
559     }
560     if (num) {
561         // add [head;tail] list to cache
562         MALLOC_ASSERT( tail, ASSERT_TEXT );
563         tail->next = first;
564         if (first)
565             first->prev = tail;
566         first = head;
567         if (!last.load(std::memory_order_relaxed)) {
568             MALLOC_ASSERT(0 == oldest.load(std::memory_order_relaxed), ASSERT_TEXT);
569             oldest.store(tail->age, std::memory_order_relaxed);
570             last.store(tail, std::memory_order_relaxed);
571         }
572 
573         cachedSize.store(cachedSize.load(std::memory_order_relaxed) + num * size, std::memory_order_relaxed);
574     }
575 
576     // No used object, and nothing in the bin, mark the bin as empty
577     if (!usedSize.load(std::memory_order_relaxed) && !first)
578         bitMask->set(idx, false);
579 
580     return toRelease;
581 }
582 
583 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
584     CacheBin::get()
585 {
586     LargeMemoryBlock *result=first;
587     if (result) {
588         first = result->next;
589         if (first)
590             first->prev = nullptr;
591         else {
592             last.store(nullptr, std::memory_order_relaxed);
593             oldest.store(0, std::memory_order_relaxed);
594         }
595     }
596 
597     return result;
598 }
599 
600 template<typename Props> void LargeObjectCacheImpl<Props>::
601     CacheBin::forgetOutdatedState(uintptr_t currTime)
602 {
603     // If the time since the last get is LongWaitFactor times more than ageThreshold
604     // for the bin, treat the bin as rarely-used and forget everything we know
605     // about it.
606     // If LongWaitFactor is too small, we forget too early and
607     // so prevents good caching, while if too high, caching blocks
608     // with unrelated usage pattern occurs.
609     const uintptr_t sinceLastGet = currTime - lastGet;
610     bool doCleanup = false;
611 
612     intptr_t threshold = ageThreshold.load(std::memory_order_relaxed);
613     if (threshold)
614         doCleanup = sinceLastGet > Props::LongWaitFactor * threshold;
615     else if (lastCleanedAge)
616         doCleanup = sinceLastGet > Props::LongWaitFactor * (lastCleanedAge - lastGet);
617 
618     if (doCleanup) {
619         lastCleanedAge = 0;
620         ageThreshold.store(0, std::memory_order_relaxed);
621     }
622 
623 }
624 
625 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
626     CacheBin::cleanToThreshold(uintptr_t currTime, BinBitMask *bitMask, int idx)
627 {
628     /* oldest may be more recent then age, that's why cast to signed type
629     was used. age overflow is also processed correctly. */
630     if ( !last.load(std::memory_order_relaxed) ||
631         (intptr_t)(currTime - last.load(std::memory_order_relaxed)->age) < ageThreshold.load(std::memory_order_relaxed) )
632         return nullptr;
633 
634 #if MALLOC_DEBUG
635     uintptr_t nextAge = 0;
636 #endif
637     do {
638 #if MALLOC_DEBUG
639         // check that list ordered
640         MALLOC_ASSERT(!nextAge || lessThanWithOverflow(nextAge, last.load(std::memory_order_relaxed)->age),
641             ASSERT_TEXT);
642         nextAge = last.load(std::memory_order_relaxed)->age;
643 #endif
644         cachedSize.store(cachedSize.load(std::memory_order_relaxed) - last.load(std::memory_order_relaxed)->unalignedSize, std::memory_order_relaxed);
645         last.store(last.load(std::memory_order_relaxed)->prev, std::memory_order_relaxed);
646     } while (last.load(std::memory_order_relaxed) &&
647         (intptr_t)(currTime - last.load(std::memory_order_relaxed)->age) > ageThreshold.load(std::memory_order_relaxed));
648 
649     LargeMemoryBlock *toRelease = nullptr;
650     if (last.load(std::memory_order_relaxed)) {
651         toRelease = last.load(std::memory_order_relaxed)->next;
652         oldest.store(last.load(std::memory_order_relaxed)->age, std::memory_order_relaxed);
653         last.load(std::memory_order_relaxed)->next = nullptr;
654     } else {
655         toRelease = first;
656         first = nullptr;
657         oldest.store(0, std::memory_order_relaxed);
658         if (!usedSize.load(std::memory_order_relaxed))
659             bitMask->set(idx, false);
660     }
661     MALLOC_ASSERT( toRelease, ASSERT_TEXT );
662     lastCleanedAge = toRelease->age;
663 
664     return toRelease;
665 }
666 
667 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
668     CacheBin::cleanAll(BinBitMask *bitMask, int idx)
669 {
670     if (!last.load(std::memory_order_relaxed)) return nullptr;
671 
672     LargeMemoryBlock *toRelease = first;
673     last.store(nullptr, std::memory_order_relaxed);
674     first = nullptr;
675     oldest.store(0, std::memory_order_relaxed);
676     cachedSize.store(0, std::memory_order_relaxed);
677     if (!usedSize.load(std::memory_order_relaxed))
678         bitMask->set(idx, false);
679 
680     return toRelease;
681 }
682 
683 /* ----------------------------------------------------------------------------------------------------- */
684 
685 #if __TBB_MALLOC_BACKEND_STAT
686 template<typename Props> size_t LargeObjectCacheImpl<Props>::
687     CacheBin::reportStat(int num, FILE *f)
688 {
689 #if __TBB_MALLOC_LOCACHE_STAT
690     if (first)
691         printf("%d(%lu): total %lu KB thr %ld lastCln %lu oldest %lu\n",
692                num, num*Props::CacheStep+Props::MinSize,
693                cachedSize.load(std::memory_order_relaxed)/1024, ageThresholdageThreshold.load(std::memory_order_relaxed), lastCleanedAge, oldest.load(std::memory_order_relaxed));
694 #else
695     suppress_unused_warning(num);
696     suppress_unused_warning(f);
697 #endif
698     return cachedSize.load(std::memory_order_relaxed);
699 }
700 #endif
701 
702 // Release objects from cache blocks that are older than ageThreshold
703 template<typename Props>
704 bool LargeObjectCacheImpl<Props>::regularCleanup(ExtMemoryPool *extMemPool, uintptr_t currTime, bool doThreshDecr)
705 {
706     bool released = false;
707     BinsSummary binsSummary;
708 
709     // Threshold settings is below this cache or starts from zero index
710     if (hugeSizeThresholdIdx == 0) return false;
711 
712     // Starting searching for bin that is less than huge size threshold (can be cleaned-up)
713     int startSearchIdx = hugeSizeThresholdIdx - 1;
714 
715     for (int i = bitMask.getMaxTrue(startSearchIdx); i >= 0; i = bitMask.getMaxTrue(i-1)) {
716         bin[i].updateBinsSummary(&binsSummary);
717         if (!doThreshDecr && tooLargeLOC.load(std::memory_order_relaxed) > 2 && binsSummary.isLOCTooLarge()) {
718             // if LOC is too large for quite long time, decrease the threshold
719             // based on bin hit statistics.
720             // For this, redo cleanup from the beginning.
721             // Note: on this iteration total usedSz can be not too large
722             // in comparison to total cachedSz, as we calculated it only
723             // partially. We are ok with it.
724             i = bitMask.getMaxTrue(startSearchIdx)+1;
725             doThreshDecr = true;
726             binsSummary.reset();
727             continue;
728         }
729         if (doThreshDecr)
730             bin[i].decreaseThreshold();
731 
732         if (bin[i].cleanToThreshold(extMemPool, &bitMask, currTime, i)) {
733             released = true;
734         }
735     }
736     // We want to find if LOC was too large for some time continuously,
737     // so OK with races between incrementing and zeroing, but incrementing
738     // must be atomic.
739     if (binsSummary.isLOCTooLarge()) {
740         tooLargeLOC++;
741     } else {
742         tooLargeLOC.store(0, std::memory_order_relaxed);
743     }
744     return released;
745 }
746 
747 template<typename Props>
748 bool LargeObjectCacheImpl<Props>::cleanAll(ExtMemoryPool *extMemPool)
749 {
750     bool released = false;
751     for (int i = numBins-1; i >= 0; i--) {
752         released |= bin[i].releaseAllToBackend(extMemPool, &bitMask, i);
753     }
754     return released;
755 }
756 
757 template<typename Props>
758 void LargeObjectCacheImpl<Props>::reset() {
759     tooLargeLOC.store(0, std::memory_order_relaxed);
760     for (int i = numBins-1; i >= 0; i--)
761         bin[i].init();
762     bitMask.reset();
763 }
764 
765 #if __TBB_MALLOC_WHITEBOX_TEST
766 template<typename Props>
767 size_t LargeObjectCacheImpl<Props>::getLOCSize() const
768 {
769     size_t size = 0;
770     for (int i = numBins-1; i >= 0; i--)
771         size += bin[i].getSize();
772     return size;
773 }
774 
775 size_t LargeObjectCache::getLOCSize() const
776 {
777     return largeCache.getLOCSize() + hugeCache.getLOCSize();
778 }
779 
780 template<typename Props>
781 size_t LargeObjectCacheImpl<Props>::getUsedSize() const
782 {
783     size_t size = 0;
784     for (int i = numBins-1; i >= 0; i--)
785         size += bin[i].getUsedSize();
786     return size;
787 }
788 
789 size_t LargeObjectCache::getUsedSize() const
790 {
791     return largeCache.getUsedSize() + hugeCache.getUsedSize();
792 }
793 #endif // __TBB_MALLOC_WHITEBOX_TEST
794 
795 inline bool LargeObjectCache::isCleanupNeededOnRange(uintptr_t range, uintptr_t currTime)
796 {
797     return range >= cacheCleanupFreq
798         || currTime+range < currTime-1 // overflow, 0 is power of 2, do cleanup
799         // (prev;prev+range] contains n*cacheCleanupFreq
800         || alignUp(currTime, cacheCleanupFreq)<currTime+range;
801 }
802 
803 bool LargeObjectCache::doCleanup(uintptr_t currTime, bool doThreshDecr)
804 {
805     if (!doThreshDecr)
806         extMemPool->allLocalCaches.markUnused();
807     return largeCache.regularCleanup(extMemPool, currTime, doThreshDecr)
808         | hugeCache.regularCleanup(extMemPool, currTime, doThreshDecr);
809 }
810 
811 bool LargeObjectCache::decreasingCleanup()
812 {
813     return doCleanup(cacheCurrTime.load(std::memory_order_acquire), /*doThreshDecr=*/true);
814 }
815 
816 bool LargeObjectCache::regularCleanup()
817 {
818     return doCleanup(cacheCurrTime.load(std::memory_order_acquire), /*doThreshDecr=*/false);
819 }
820 
821 bool LargeObjectCache::cleanAll()
822 {
823     return largeCache.cleanAll(extMemPool) | hugeCache.cleanAll(extMemPool);
824 }
825 
826 void LargeObjectCache::reset()
827 {
828     largeCache.reset();
829     hugeCache.reset();
830 }
831 
832 template<typename Props>
833 LargeMemoryBlock *LargeObjectCacheImpl<Props>::get(ExtMemoryPool *extMemoryPool, size_t size)
834 {
835     int idx = Props::sizeToIdx(size);
836 
837     LargeMemoryBlock *lmb = bin[idx].get(extMemoryPool, size, &bitMask, idx);
838 
839     if (lmb) {
840         MALLOC_ITT_SYNC_ACQUIRED(bin+idx);
841         STAT_increment(getThreadId(), ThreadCommonCounters, allocCachedLargeObj);
842     }
843     return lmb;
844 }
845 
846 template<typename Props>
847 void LargeObjectCacheImpl<Props>::updateCacheState(ExtMemoryPool *extMemPool, DecreaseOrIncrease op, size_t size)
848 {
849     int idx = Props::sizeToIdx(size);
850     MALLOC_ASSERT(idx<numBins, ASSERT_TEXT);
851     bin[idx].updateUsedSize(extMemPool, op==decrease? -size : size, &bitMask, idx);
852 }
853 
854 #if __TBB_MALLOC_LOCACHE_STAT
855 template<typename Props>
856 void LargeObjectCacheImpl<Props>::reportStat(FILE *f)
857 {
858     size_t cachedSize = 0;
859     for (int i=0; i<numBins; i++)
860         cachedSize += bin[i].reportStat(i, f);
861     fprintf(f, "total LOC size %lu MB\n", cachedSize/1024/1024);
862 }
863 
864 void LargeObjectCache::reportStat(FILE *f)
865 {
866     largeCache.reportStat(f);
867     hugeCache.reportStat(f);
868     fprintf(f, "cache time %lu\n", cacheCurrTime.load(std::memory_order_relaxed));
869 }
870 #endif
871 
872 template<typename Props>
873 void LargeObjectCacheImpl<Props>::putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *toCache)
874 {
875     int toBinIdx = Props::sizeToIdx(toCache->unalignedSize);
876 
877     MALLOC_ITT_SYNC_RELEASING(bin+toBinIdx);
878     bin[toBinIdx].putList(extMemPool, toCache, &bitMask, toBinIdx);
879 }
880 
881 void LargeObjectCache::updateCacheState(DecreaseOrIncrease op, size_t size)
882 {
883     if (size < maxLargeSize)
884         largeCache.updateCacheState(extMemPool, op, size);
885     else if (size < maxHugeSize)
886         hugeCache.updateCacheState(extMemPool, op, size);
887 }
888 
889 
890 uintptr_t LargeObjectCache::getCurrTimeRange(uintptr_t range)
891 {
892     return (cacheCurrTime.fetch_add(range) + 1);
893 }
894 
895 void LargeObjectCache::registerRealloc(size_t oldSize, size_t newSize)
896 {
897     updateCacheState(decrease, oldSize);
898     updateCacheState(increase, alignToBin(newSize));
899 }
900 
901 size_t LargeObjectCache::alignToBin(size_t size) {
902     return size < maxLargeSize ? LargeCacheType::alignToBin(size) : HugeCacheType::alignToBin(size);
903 }
904 
905 // Used for internal purpose
906 int LargeObjectCache::sizeToIdx(size_t size)
907 {
908     MALLOC_ASSERT(size <= maxHugeSize, ASSERT_TEXT);
909     return size < maxLargeSize ?
910         LargeCacheType::sizeToIdx(size) :
911         LargeCacheType::numBins + HugeCacheType::sizeToIdx(size);
912 }
913 
914 void LargeObjectCache::putList(LargeMemoryBlock *list)
915 {
916     LargeMemoryBlock *toProcess, *n;
917 
918     for (LargeMemoryBlock *curr = list; curr; curr = toProcess) {
919         LargeMemoryBlock *tail = curr;
920         toProcess = curr->next;
921         if (!sizeInCacheRange(curr->unalignedSize)) {
922             extMemPool->backend.returnLargeObject(curr);
923             continue;
924         }
925         int currIdx = sizeToIdx(curr->unalignedSize);
926 
927         // Find all blocks fitting to same bin. Not use more efficient sorting
928         // algorithm because list is short (commonly,
929         // LocalLOC's HIGH_MARK-LOW_MARK, i.e. 24 items).
930         for (LargeMemoryBlock *b = toProcess; b; b = n) {
931             n = b->next;
932             if (sizeToIdx(b->unalignedSize) == currIdx) {
933                 tail->next = b;
934                 tail = b;
935                 if (toProcess == b)
936                     toProcess = toProcess->next;
937                 else {
938                     b->prev->next = b->next;
939                     if (b->next)
940                         b->next->prev = b->prev;
941                 }
942             }
943         }
944         tail->next = nullptr;
945         if (curr->unalignedSize < maxLargeSize)
946             largeCache.putList(extMemPool, curr);
947         else
948             hugeCache.putList(extMemPool, curr);
949     }
950 }
951 
952 void LargeObjectCache::put(LargeMemoryBlock *largeBlock)
953 {
954     size_t blockSize = largeBlock->unalignedSize;
955     if (sizeInCacheRange(blockSize)) {
956         largeBlock->next = nullptr;
957         if (blockSize < maxLargeSize)
958             largeCache.putList(extMemPool, largeBlock);
959         else
960             hugeCache.putList(extMemPool, largeBlock);
961     } else {
962         extMemPool->backend.returnLargeObject(largeBlock);
963     }
964 }
965 
966 LargeMemoryBlock *LargeObjectCache::get(size_t size)
967 {
968     MALLOC_ASSERT( size >= minLargeSize, ASSERT_TEXT );
969     if (sizeInCacheRange(size)) {
970         return size < maxLargeSize ?
971             largeCache.get(extMemPool, size) : hugeCache.get(extMemPool, size);
972     }
973     return nullptr;
974 }
975 
976 LargeMemoryBlock *ExtMemoryPool::mallocLargeObject(MemoryPool *pool, size_t allocationSize)
977 {
978 #if __TBB_MALLOC_LOCACHE_STAT
979     mallocCalls++;
980     memAllocKB.fetch_add(allocationSize/1024);
981 #endif
982     LargeMemoryBlock* lmb = loc.get(allocationSize);
983     if (!lmb) {
984         BackRefIdx backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/true);
985         if (backRefIdx.isInvalid())
986             return nullptr;
987 
988         // unalignedSize is set in getLargeBlock
989         lmb = backend.getLargeBlock(allocationSize);
990         if (!lmb) {
991             removeBackRef(backRefIdx);
992             loc.updateCacheState(decrease, allocationSize);
993             return nullptr;
994         }
995         lmb->backRefIdx = backRefIdx;
996         lmb->pool = pool;
997         STAT_increment(getThreadId(), ThreadCommonCounters, allocNewLargeObj);
998     } else {
999 #if __TBB_MALLOC_LOCACHE_STAT
1000         cacheHits++;
1001         memHitKB.fetch_add(allocationSize/1024);
1002 #endif
1003     }
1004     return lmb;
1005 }
1006 
1007 void ExtMemoryPool::freeLargeObject(LargeMemoryBlock *mBlock)
1008 {
1009     loc.put(mBlock);
1010 }
1011 
1012 void ExtMemoryPool::freeLargeObjectList(LargeMemoryBlock *head)
1013 {
1014     loc.putList(head);
1015 }
1016 
1017 bool ExtMemoryPool::softCachesCleanup()
1018 {
1019     return loc.regularCleanup();
1020 }
1021 
1022 bool ExtMemoryPool::hardCachesCleanup()
1023 {
1024     // thread-local caches must be cleaned before LOC,
1025     // because object from thread-local cache can be released to LOC
1026     bool ret = releaseAllLocalCaches();
1027     ret |= orphanedBlocks.cleanup(&backend);
1028     ret |= loc.cleanAll();
1029     ret |= backend.clean();
1030     return ret;
1031 }
1032 
1033 #if BACKEND_HAS_MREMAP
1034 void *ExtMemoryPool::remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment)
1035 {
1036     const size_t oldUnalignedSize = ((LargeObjectHdr*)ptr - 1)->memoryBlock->unalignedSize;
1037     void *o = backend.remap(ptr, oldSize, newSize, alignment);
1038     if (o) {
1039         LargeMemoryBlock *lmb = ((LargeObjectHdr*)o - 1)->memoryBlock;
1040         loc.registerRealloc(oldUnalignedSize, lmb->unalignedSize);
1041     }
1042     return o;
1043 }
1044 #endif /* BACKEND_HAS_MREMAP */
1045 
1046 /*********** End allocation of large objects **********/
1047 
1048 } // namespace internal
1049 } // namespace rml
1050 
1051 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
1052     #pragma warning(pop)
1053 #endif
1054 
1055