xref: /oneTBB/src/tbbmalloc/large_objects.cpp (revision b15aabb3)
151c0b2f7Stbbdev /*
2*b15aabb3Stbbdev     Copyright (c) 2005-2021 Intel Corporation
351c0b2f7Stbbdev 
451c0b2f7Stbbdev     Licensed under the Apache License, Version 2.0 (the "License");
551c0b2f7Stbbdev     you may not use this file except in compliance with the License.
651c0b2f7Stbbdev     You may obtain a copy of the License at
751c0b2f7Stbbdev 
851c0b2f7Stbbdev         http://www.apache.org/licenses/LICENSE-2.0
951c0b2f7Stbbdev 
1051c0b2f7Stbbdev     Unless required by applicable law or agreed to in writing, software
1151c0b2f7Stbbdev     distributed under the License is distributed on an "AS IS" BASIS,
1251c0b2f7Stbbdev     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1351c0b2f7Stbbdev     See the License for the specific language governing permissions and
1451c0b2f7Stbbdev     limitations under the License.
1551c0b2f7Stbbdev */
1651c0b2f7Stbbdev 
1751c0b2f7Stbbdev #include "tbbmalloc_internal.h"
1851c0b2f7Stbbdev #include "../tbb/environment.h"
1951c0b2f7Stbbdev 
2051c0b2f7Stbbdev #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
2151c0b2f7Stbbdev     // Suppress warning: unary minus operator applied to unsigned type, result still unsigned
2251c0b2f7Stbbdev     // TBB_REVAMP_TODO: review this warning
2351c0b2f7Stbbdev     #pragma warning(push)
2451c0b2f7Stbbdev     #pragma warning(disable:4146)
2551c0b2f7Stbbdev #endif
2651c0b2f7Stbbdev 
2751c0b2f7Stbbdev /******************************* Allocation of large objects *********************************************/
2851c0b2f7Stbbdev 
2951c0b2f7Stbbdev namespace rml {
3051c0b2f7Stbbdev namespace internal {
3151c0b2f7Stbbdev 
3251c0b2f7Stbbdev /* ---------------------------- Large Object cache init section ---------------------------------------- */
3351c0b2f7Stbbdev 
3451c0b2f7Stbbdev void LargeObjectCache::init(ExtMemoryPool *memPool)
3551c0b2f7Stbbdev {
3651c0b2f7Stbbdev     extMemPool = memPool;
3751c0b2f7Stbbdev     // scalable_allocation_mode can be called before allocator initialization, respect this manual request
3851c0b2f7Stbbdev     if (hugeSizeThreshold == 0) {
3951c0b2f7Stbbdev         // Huge size threshold initialization if environment variable was set
4051c0b2f7Stbbdev         long requestedThreshold = tbb::detail::r1::GetIntegralEnvironmentVariable("TBB_MALLOC_SET_HUGE_SIZE_THRESHOLD");
4151c0b2f7Stbbdev         // Read valid env or initialize by default with max possible values
4251c0b2f7Stbbdev         if (requestedThreshold != -1) {
4351c0b2f7Stbbdev             setHugeSizeThreshold(requestedThreshold);
4451c0b2f7Stbbdev         } else {
4551c0b2f7Stbbdev             setHugeSizeThreshold(maxHugeSize);
4651c0b2f7Stbbdev         }
4751c0b2f7Stbbdev     }
4851c0b2f7Stbbdev }
4951c0b2f7Stbbdev 
5051c0b2f7Stbbdev /* ----------------------------- Huge size threshold settings ----------------------------------------- */
5151c0b2f7Stbbdev 
5251c0b2f7Stbbdev void LargeObjectCache::setHugeSizeThreshold(size_t value)
5351c0b2f7Stbbdev {
5451c0b2f7Stbbdev     // Valid in the huge cache range: [MaxLargeSize, MaxHugeSize].
5551c0b2f7Stbbdev     if (value <= maxHugeSize) {
5651c0b2f7Stbbdev         hugeSizeThreshold = value >= maxLargeSize ? alignToBin(value) : maxLargeSize;
5751c0b2f7Stbbdev 
5851c0b2f7Stbbdev         // Calculate local indexes for the global threshold size (for fast search inside a regular cleanup)
5951c0b2f7Stbbdev         largeCache.hugeSizeThresholdIdx = LargeCacheType::numBins;
6051c0b2f7Stbbdev         hugeCache.hugeSizeThresholdIdx = HugeCacheType::sizeToIdx(hugeSizeThreshold);
6151c0b2f7Stbbdev     }
6251c0b2f7Stbbdev }
6351c0b2f7Stbbdev 
6451c0b2f7Stbbdev bool LargeObjectCache::sizeInCacheRange(size_t size)
6551c0b2f7Stbbdev {
6651c0b2f7Stbbdev     return size <= maxHugeSize && (size <= defaultMaxHugeSize || size >= hugeSizeThreshold);
6751c0b2f7Stbbdev }
6851c0b2f7Stbbdev 
6951c0b2f7Stbbdev /* ----------------------------------------------------------------------------------------------------- */
7051c0b2f7Stbbdev 
7151c0b2f7Stbbdev /* The functor called by the aggregator for the operation list */
7251c0b2f7Stbbdev template<typename Props>
7351c0b2f7Stbbdev class CacheBinFunctor {
7451c0b2f7Stbbdev     typename LargeObjectCacheImpl<Props>::CacheBin *const bin;
7551c0b2f7Stbbdev     ExtMemoryPool *const extMemPool;
7651c0b2f7Stbbdev     typename LargeObjectCacheImpl<Props>::BinBitMask *const bitMask;
7751c0b2f7Stbbdev     const int idx;
7851c0b2f7Stbbdev 
7951c0b2f7Stbbdev     LargeMemoryBlock *toRelease;
8051c0b2f7Stbbdev     bool needCleanup;
8151c0b2f7Stbbdev     uintptr_t currTime;
8251c0b2f7Stbbdev 
8351c0b2f7Stbbdev     /* Do preprocessing under the operation list. */
8451c0b2f7Stbbdev     /* All the OP_PUT_LIST operations are merged in the one operation.
8551c0b2f7Stbbdev        All OP_GET operations are merged with the OP_PUT_LIST operations but
8651c0b2f7Stbbdev        it demands the update of the moving average value in the bin.
8751c0b2f7Stbbdev        Only the last OP_CLEAN_TO_THRESHOLD operation has sense.
8851c0b2f7Stbbdev        The OP_CLEAN_ALL operation also should be performed only once.
8951c0b2f7Stbbdev        Moreover it cancels the OP_CLEAN_TO_THRESHOLD operation. */
9051c0b2f7Stbbdev     class OperationPreprocessor {
9151c0b2f7Stbbdev         // TODO: remove the dependency on CacheBin.
9251c0b2f7Stbbdev         typename LargeObjectCacheImpl<Props>::CacheBin *const  bin;
9351c0b2f7Stbbdev 
9451c0b2f7Stbbdev         /* Contains the relative time in the operation list.
9551c0b2f7Stbbdev            It counts in the reverse order since the aggregator also
9651c0b2f7Stbbdev            provides operations in the reverse order. */
9751c0b2f7Stbbdev         uintptr_t lclTime;
9851c0b2f7Stbbdev 
9951c0b2f7Stbbdev         /* opGet contains only OP_GET operations which cannot be merge with OP_PUT operations
10051c0b2f7Stbbdev            opClean contains all OP_CLEAN_TO_THRESHOLD and OP_CLEAN_ALL operations. */
10151c0b2f7Stbbdev         CacheBinOperation *opGet, *opClean;
10251c0b2f7Stbbdev         /* The time of the last OP_CLEAN_TO_THRESHOLD operations */
10351c0b2f7Stbbdev         uintptr_t cleanTime;
10451c0b2f7Stbbdev 
10551c0b2f7Stbbdev         /* lastGetOpTime - the time of the last OP_GET operation.
10651c0b2f7Stbbdev            lastGet - the same meaning as CacheBin::lastGet */
10751c0b2f7Stbbdev         uintptr_t lastGetOpTime, lastGet;
10851c0b2f7Stbbdev 
10951c0b2f7Stbbdev         /* The total sum of all usedSize changes requested with CBOP_UPDATE_USED_SIZE operations. */
11051c0b2f7Stbbdev         size_t updateUsedSize;
11151c0b2f7Stbbdev 
11251c0b2f7Stbbdev         /* The list of blocks for the OP_PUT_LIST operation. */
11351c0b2f7Stbbdev         LargeMemoryBlock *head, *tail;
11451c0b2f7Stbbdev         int putListNum;
11551c0b2f7Stbbdev 
11651c0b2f7Stbbdev         /* if the OP_CLEAN_ALL is requested. */
11751c0b2f7Stbbdev         bool isCleanAll;
11851c0b2f7Stbbdev 
11951c0b2f7Stbbdev         inline void commitOperation(CacheBinOperation *op) const;
12051c0b2f7Stbbdev         inline void addOpToOpList(CacheBinOperation *op, CacheBinOperation **opList) const;
12151c0b2f7Stbbdev         bool getFromPutList(CacheBinOperation* opGet, uintptr_t currTime);
12251c0b2f7Stbbdev         void addToPutList( LargeMemoryBlock *head, LargeMemoryBlock *tail, int num );
12351c0b2f7Stbbdev 
12451c0b2f7Stbbdev     public:
12551c0b2f7Stbbdev         OperationPreprocessor(typename LargeObjectCacheImpl<Props>::CacheBin *bin) :
12651c0b2f7Stbbdev             bin(bin), lclTime(0), opGet(NULL), opClean(NULL), cleanTime(0),
12751c0b2f7Stbbdev             lastGetOpTime(0), updateUsedSize(0), head(NULL), isCleanAll(false)  {}
12851c0b2f7Stbbdev         void operator()(CacheBinOperation* opList);
12951c0b2f7Stbbdev         uintptr_t getTimeRange() const { return -lclTime; }
13051c0b2f7Stbbdev 
13151c0b2f7Stbbdev         friend class CacheBinFunctor;
13251c0b2f7Stbbdev     };
13351c0b2f7Stbbdev 
13451c0b2f7Stbbdev public:
13551c0b2f7Stbbdev     CacheBinFunctor(typename LargeObjectCacheImpl<Props>::CacheBin *bin, ExtMemoryPool *extMemPool,
13651c0b2f7Stbbdev                     typename LargeObjectCacheImpl<Props>::BinBitMask *bitMask, int idx) :
13751c0b2f7Stbbdev         bin(bin), extMemPool(extMemPool), bitMask(bitMask), idx(idx), toRelease(NULL), needCleanup(false) {}
13851c0b2f7Stbbdev     void operator()(CacheBinOperation* opList);
13951c0b2f7Stbbdev 
14051c0b2f7Stbbdev     bool isCleanupNeeded() const { return needCleanup; }
14151c0b2f7Stbbdev     LargeMemoryBlock *getToRelease() const { return toRelease; }
14251c0b2f7Stbbdev     uintptr_t getCurrTime() const { return currTime; }
14351c0b2f7Stbbdev };
14451c0b2f7Stbbdev 
14551c0b2f7Stbbdev /* ---------------- Cache Bin Aggregator Operation Helpers ---------------- */
14651c0b2f7Stbbdev 
14751c0b2f7Stbbdev // The list of structures which describe the operation data
14851c0b2f7Stbbdev struct OpGet {
14951c0b2f7Stbbdev     static const CacheBinOperationType type = CBOP_GET;
15051c0b2f7Stbbdev     LargeMemoryBlock **res;
15151c0b2f7Stbbdev     size_t size;
15251c0b2f7Stbbdev     uintptr_t currTime;
15351c0b2f7Stbbdev };
15451c0b2f7Stbbdev 
15551c0b2f7Stbbdev struct OpPutList {
15651c0b2f7Stbbdev     static const CacheBinOperationType type = CBOP_PUT_LIST;
15751c0b2f7Stbbdev     LargeMemoryBlock *head;
15851c0b2f7Stbbdev };
15951c0b2f7Stbbdev 
16051c0b2f7Stbbdev struct OpCleanToThreshold {
16151c0b2f7Stbbdev     static const CacheBinOperationType type = CBOP_CLEAN_TO_THRESHOLD;
16251c0b2f7Stbbdev     LargeMemoryBlock **res;
16351c0b2f7Stbbdev     uintptr_t currTime;
16451c0b2f7Stbbdev };
16551c0b2f7Stbbdev 
16651c0b2f7Stbbdev struct OpCleanAll {
16751c0b2f7Stbbdev     static const CacheBinOperationType type = CBOP_CLEAN_ALL;
16851c0b2f7Stbbdev     LargeMemoryBlock **res;
16951c0b2f7Stbbdev };
17051c0b2f7Stbbdev 
17151c0b2f7Stbbdev struct OpUpdateUsedSize {
17251c0b2f7Stbbdev     static const CacheBinOperationType type = CBOP_UPDATE_USED_SIZE;
17351c0b2f7Stbbdev     size_t size;
17451c0b2f7Stbbdev };
17551c0b2f7Stbbdev 
17651c0b2f7Stbbdev union CacheBinOperationData {
17751c0b2f7Stbbdev private:
17851c0b2f7Stbbdev     OpGet opGet;
17951c0b2f7Stbbdev     OpPutList opPutList;
18051c0b2f7Stbbdev     OpCleanToThreshold opCleanToThreshold;
18151c0b2f7Stbbdev     OpCleanAll opCleanAll;
18251c0b2f7Stbbdev     OpUpdateUsedSize opUpdateUsedSize;
18351c0b2f7Stbbdev };
18451c0b2f7Stbbdev 
18551c0b2f7Stbbdev // Forward declarations
18651c0b2f7Stbbdev template <typename OpTypeData> OpTypeData& opCast(CacheBinOperation &op);
18751c0b2f7Stbbdev 
18851c0b2f7Stbbdev // Describes the aggregator operation
18951c0b2f7Stbbdev struct CacheBinOperation : public MallocAggregatedOperation<CacheBinOperation>::type {
19051c0b2f7Stbbdev     CacheBinOperationType type;
19151c0b2f7Stbbdev 
19251c0b2f7Stbbdev     template <typename OpTypeData>
19351c0b2f7Stbbdev     CacheBinOperation(OpTypeData &d, CacheBinOperationStatus st = CBST_WAIT) {
19451c0b2f7Stbbdev         opCast<OpTypeData>(*this) = d;
19551c0b2f7Stbbdev         type = OpTypeData::type;
19651c0b2f7Stbbdev         MallocAggregatedOperation<CacheBinOperation>::type::status = st;
19751c0b2f7Stbbdev     }
19851c0b2f7Stbbdev private:
19951c0b2f7Stbbdev     CacheBinOperationData data;
20051c0b2f7Stbbdev 
20151c0b2f7Stbbdev     template <typename OpTypeData>
20251c0b2f7Stbbdev     friend OpTypeData& opCast(CacheBinOperation &op);
20351c0b2f7Stbbdev };
20451c0b2f7Stbbdev 
20551c0b2f7Stbbdev // The opCast function can be the member of CacheBinOperation but it will have
20651c0b2f7Stbbdev // small stylistic ambiguity: it will look like a getter (with a cast) for the
20751c0b2f7Stbbdev // CacheBinOperation::data data member but it should return a reference to
20851c0b2f7Stbbdev // simplify the code from a lot of getter/setter calls. So the global cast in
20951c0b2f7Stbbdev // the style of static_cast (or reinterpret_cast) seems to be more readable and
21051c0b2f7Stbbdev // have more explicit semantic.
21151c0b2f7Stbbdev template <typename OpTypeData>
21251c0b2f7Stbbdev OpTypeData& opCast(CacheBinOperation &op) {
21351c0b2f7Stbbdev     return *reinterpret_cast<OpTypeData*>(&op.data);
21451c0b2f7Stbbdev }
21551c0b2f7Stbbdev 
21651c0b2f7Stbbdev /* ------------------------------------------------------------------------ */
21751c0b2f7Stbbdev 
21851c0b2f7Stbbdev #if __TBB_MALLOC_LOCACHE_STAT
21951c0b2f7Stbbdev //intptr_t mallocCalls, cacheHits;
22051c0b2f7Stbbdev std::atomic<intptr_t> mallocCalls, cacheHits;
22151c0b2f7Stbbdev //intptr_t memAllocKB, memHitKB;
22251c0b2f7Stbbdev std::atomic<intptr_t> memAllocKB, memHitKB;
22351c0b2f7Stbbdev #endif
22451c0b2f7Stbbdev 
22551c0b2f7Stbbdev inline bool lessThanWithOverflow(intptr_t a, intptr_t b)
22651c0b2f7Stbbdev {
22751c0b2f7Stbbdev     return (a < b && (b - a < UINTPTR_MAX/2)) ||
22851c0b2f7Stbbdev            (a > b && (a - b > UINTPTR_MAX/2));
22951c0b2f7Stbbdev }
23051c0b2f7Stbbdev 
23151c0b2f7Stbbdev /* ----------------------------------- Operation processing methods ------------------------------------ */
23251c0b2f7Stbbdev 
23351c0b2f7Stbbdev template<typename Props> void CacheBinFunctor<Props>::
23451c0b2f7Stbbdev     OperationPreprocessor::commitOperation(CacheBinOperation *op) const
23551c0b2f7Stbbdev {
23651c0b2f7Stbbdev     // FencedStore( (intptr_t&)(op->status), CBST_DONE );
23751c0b2f7Stbbdev     op->status.store(CBST_DONE, std::memory_order_release);
23851c0b2f7Stbbdev }
23951c0b2f7Stbbdev 
24051c0b2f7Stbbdev template<typename Props> void CacheBinFunctor<Props>::
24151c0b2f7Stbbdev     OperationPreprocessor::addOpToOpList(CacheBinOperation *op, CacheBinOperation **opList) const
24251c0b2f7Stbbdev {
24351c0b2f7Stbbdev     op->next = *opList;
24451c0b2f7Stbbdev     *opList = op;
24551c0b2f7Stbbdev }
24651c0b2f7Stbbdev 
24751c0b2f7Stbbdev template<typename Props> bool CacheBinFunctor<Props>::
24851c0b2f7Stbbdev     OperationPreprocessor::getFromPutList(CacheBinOperation *opGet, uintptr_t currTime)
24951c0b2f7Stbbdev {
25051c0b2f7Stbbdev     if ( head ) {
25151c0b2f7Stbbdev         uintptr_t age = head->age;
25251c0b2f7Stbbdev         LargeMemoryBlock *next = head->next;
25351c0b2f7Stbbdev         *opCast<OpGet>(*opGet).res = head;
25451c0b2f7Stbbdev         commitOperation( opGet );
25551c0b2f7Stbbdev         head = next;
25651c0b2f7Stbbdev         putListNum--;
25751c0b2f7Stbbdev         MALLOC_ASSERT( putListNum>=0, ASSERT_TEXT );
25851c0b2f7Stbbdev 
25951c0b2f7Stbbdev         // use moving average with current hit interval
26051c0b2f7Stbbdev         bin->updateMeanHitRange( currTime - age );
26151c0b2f7Stbbdev         return true;
26251c0b2f7Stbbdev     }
26351c0b2f7Stbbdev     return false;
26451c0b2f7Stbbdev }
26551c0b2f7Stbbdev 
26651c0b2f7Stbbdev template<typename Props> void CacheBinFunctor<Props>::
26751c0b2f7Stbbdev     OperationPreprocessor::addToPutList(LargeMemoryBlock *h, LargeMemoryBlock *t, int num)
26851c0b2f7Stbbdev {
26951c0b2f7Stbbdev     if ( head ) {
27051c0b2f7Stbbdev         MALLOC_ASSERT( tail, ASSERT_TEXT );
27151c0b2f7Stbbdev         tail->next = h;
27251c0b2f7Stbbdev         h->prev = tail;
27351c0b2f7Stbbdev         tail = t;
27451c0b2f7Stbbdev         putListNum += num;
27551c0b2f7Stbbdev     } else {
27651c0b2f7Stbbdev         head = h;
27751c0b2f7Stbbdev         tail = t;
27851c0b2f7Stbbdev         putListNum = num;
27951c0b2f7Stbbdev     }
28051c0b2f7Stbbdev }
28151c0b2f7Stbbdev 
28251c0b2f7Stbbdev template<typename Props> void CacheBinFunctor<Props>::
28351c0b2f7Stbbdev     OperationPreprocessor::operator()(CacheBinOperation* opList)
28451c0b2f7Stbbdev {
28551c0b2f7Stbbdev     for ( CacheBinOperation *op = opList, *opNext; op; op = opNext ) {
28651c0b2f7Stbbdev         opNext = op->next;
28751c0b2f7Stbbdev         switch ( op->type ) {
28851c0b2f7Stbbdev         case CBOP_GET:
28951c0b2f7Stbbdev             {
29051c0b2f7Stbbdev                 lclTime--;
29151c0b2f7Stbbdev                 if ( !lastGetOpTime ) {
29251c0b2f7Stbbdev                     lastGetOpTime = lclTime;
29351c0b2f7Stbbdev                     lastGet = 0;
29451c0b2f7Stbbdev                 } else if ( !lastGet ) lastGet = lclTime;
29551c0b2f7Stbbdev 
29651c0b2f7Stbbdev                 if ( !getFromPutList(op,lclTime) ) {
29751c0b2f7Stbbdev                     opCast<OpGet>(*op).currTime = lclTime;
29851c0b2f7Stbbdev                     addOpToOpList( op, &opGet );
29951c0b2f7Stbbdev                 }
30051c0b2f7Stbbdev             }
30151c0b2f7Stbbdev             break;
30251c0b2f7Stbbdev 
30351c0b2f7Stbbdev         case CBOP_PUT_LIST:
30451c0b2f7Stbbdev             {
30551c0b2f7Stbbdev                 LargeMemoryBlock *head = opCast<OpPutList>(*op).head;
30651c0b2f7Stbbdev                 LargeMemoryBlock *curr = head, *prev = NULL;
30751c0b2f7Stbbdev 
30851c0b2f7Stbbdev                 int num = 0;
30951c0b2f7Stbbdev                 do {
31051c0b2f7Stbbdev                     // we do not kept prev pointers during assigning blocks to bins, set them now
31151c0b2f7Stbbdev                     curr->prev = prev;
31251c0b2f7Stbbdev 
31351c0b2f7Stbbdev                     // Save the local times to the memory blocks. Local times are necessary
31451c0b2f7Stbbdev                     // for the getFromPutList function which updates the hit range value in
31551c0b2f7Stbbdev                     // CacheBin when OP_GET and OP_PUT_LIST operations are merged successfully.
31651c0b2f7Stbbdev                     // The age will be updated to the correct global time after preprocessing
31751c0b2f7Stbbdev                     // when global cache time is updated.
31851c0b2f7Stbbdev                     curr->age = --lclTime;
31951c0b2f7Stbbdev 
32051c0b2f7Stbbdev                     prev = curr;
32151c0b2f7Stbbdev                     num += 1;
32251c0b2f7Stbbdev 
32351c0b2f7Stbbdev                     STAT_increment(getThreadId(), ThreadCommonCounters, cacheLargeObj);
32451c0b2f7Stbbdev                 } while ((curr = curr->next) != NULL);
32551c0b2f7Stbbdev 
32651c0b2f7Stbbdev                 LargeMemoryBlock *tail = prev;
32751c0b2f7Stbbdev                 addToPutList(head, tail, num);
32851c0b2f7Stbbdev 
32951c0b2f7Stbbdev                 while ( opGet ) {
33051c0b2f7Stbbdev                     CacheBinOperation *next = opGet->next;
33151c0b2f7Stbbdev                     if ( !getFromPutList(opGet, opCast<OpGet>(*opGet).currTime) )
33251c0b2f7Stbbdev                         break;
33351c0b2f7Stbbdev                     opGet = next;
33451c0b2f7Stbbdev                 }
33551c0b2f7Stbbdev             }
33651c0b2f7Stbbdev             break;
33751c0b2f7Stbbdev 
33851c0b2f7Stbbdev         case CBOP_UPDATE_USED_SIZE:
33951c0b2f7Stbbdev             updateUsedSize += opCast<OpUpdateUsedSize>(*op).size;
34051c0b2f7Stbbdev             commitOperation( op );
34151c0b2f7Stbbdev             break;
34251c0b2f7Stbbdev 
34351c0b2f7Stbbdev         case CBOP_CLEAN_ALL:
34451c0b2f7Stbbdev             isCleanAll = true;
34551c0b2f7Stbbdev             addOpToOpList( op, &opClean );
34651c0b2f7Stbbdev             break;
34751c0b2f7Stbbdev 
34851c0b2f7Stbbdev         case CBOP_CLEAN_TO_THRESHOLD:
34951c0b2f7Stbbdev             {
35051c0b2f7Stbbdev                 uintptr_t currTime = opCast<OpCleanToThreshold>(*op).currTime;
35151c0b2f7Stbbdev                 // We don't worry about currTime overflow since it is a rare
35251c0b2f7Stbbdev                 // occurrence and doesn't affect correctness
35351c0b2f7Stbbdev                 cleanTime = cleanTime < currTime ? currTime : cleanTime;
35451c0b2f7Stbbdev                 addOpToOpList( op, &opClean );
35551c0b2f7Stbbdev             }
35651c0b2f7Stbbdev             break;
35751c0b2f7Stbbdev 
35851c0b2f7Stbbdev         default:
35951c0b2f7Stbbdev             MALLOC_ASSERT( false, "Unknown operation." );
36051c0b2f7Stbbdev         }
36151c0b2f7Stbbdev     }
36251c0b2f7Stbbdev     MALLOC_ASSERT( !( opGet && head ), "Not all put/get pairs are processed!" );
36351c0b2f7Stbbdev }
36451c0b2f7Stbbdev 
36551c0b2f7Stbbdev template<typename Props> void CacheBinFunctor<Props>::operator()(CacheBinOperation* opList)
36651c0b2f7Stbbdev {
36751c0b2f7Stbbdev     MALLOC_ASSERT( opList, "Empty operation list is passed into operation handler." );
36851c0b2f7Stbbdev 
36951c0b2f7Stbbdev     OperationPreprocessor prep(bin);
37051c0b2f7Stbbdev     prep(opList);
37151c0b2f7Stbbdev 
37251c0b2f7Stbbdev     if ( uintptr_t timeRange = prep.getTimeRange() ) {
37351c0b2f7Stbbdev         uintptr_t startTime = extMemPool->loc.getCurrTimeRange(timeRange);
37451c0b2f7Stbbdev         // endTime is used as the current (base) time since the local time is negative.
37551c0b2f7Stbbdev         uintptr_t endTime = startTime + timeRange;
37651c0b2f7Stbbdev 
37751c0b2f7Stbbdev         if ( prep.lastGetOpTime && prep.lastGet ) bin->setLastGet(prep.lastGet+endTime);
37851c0b2f7Stbbdev 
37951c0b2f7Stbbdev         if ( CacheBinOperation *opGet = prep.opGet ) {
38051c0b2f7Stbbdev             bool isEmpty = false;
38151c0b2f7Stbbdev             do {
38251c0b2f7Stbbdev #if __TBB_MALLOC_WHITEBOX_TEST
38351c0b2f7Stbbdev                 tbbmalloc_whitebox::locGetProcessed++;
38451c0b2f7Stbbdev #endif
38551c0b2f7Stbbdev                 const OpGet &opGetData = opCast<OpGet>(*opGet);
38651c0b2f7Stbbdev                 if ( !isEmpty ) {
38751c0b2f7Stbbdev                     if ( LargeMemoryBlock *res = bin->get() ) {
38851c0b2f7Stbbdev                         uintptr_t getTime = opGetData.currTime + endTime;
38951c0b2f7Stbbdev                         // use moving average with current hit interval
39051c0b2f7Stbbdev                         bin->updateMeanHitRange( getTime - res->age);
39151c0b2f7Stbbdev                         bin->updateCachedSize( -opGetData.size );
39251c0b2f7Stbbdev                         *opGetData.res = res;
39351c0b2f7Stbbdev                     } else {
39451c0b2f7Stbbdev                         isEmpty = true;
39551c0b2f7Stbbdev                         uintptr_t lastGetOpTime = prep.lastGetOpTime+endTime;
39651c0b2f7Stbbdev                         bin->forgetOutdatedState(lastGetOpTime);
39751c0b2f7Stbbdev                         bin->updateAgeThreshold(lastGetOpTime);
39851c0b2f7Stbbdev                     }
39951c0b2f7Stbbdev                 }
40051c0b2f7Stbbdev 
40151c0b2f7Stbbdev                 CacheBinOperation *opNext = opGet->next;
40251c0b2f7Stbbdev                 bin->updateUsedSize( opGetData.size, bitMask, idx );
40351c0b2f7Stbbdev                 prep.commitOperation( opGet );
40451c0b2f7Stbbdev                 opGet = opNext;
40551c0b2f7Stbbdev             } while ( opGet );
40651c0b2f7Stbbdev             if ( prep.lastGetOpTime )
40751c0b2f7Stbbdev                 bin->setLastGet( prep.lastGetOpTime + endTime );
40851c0b2f7Stbbdev         } else if ( LargeMemoryBlock *curr = prep.head ) {
40951c0b2f7Stbbdev             curr->prev = NULL;
41051c0b2f7Stbbdev             while ( curr ) {
41151c0b2f7Stbbdev                 // Update local times to global times
41251c0b2f7Stbbdev                 curr->age += endTime;
41351c0b2f7Stbbdev                 curr=curr->next;
41451c0b2f7Stbbdev             }
41551c0b2f7Stbbdev #if __TBB_MALLOC_WHITEBOX_TEST
41651c0b2f7Stbbdev             tbbmalloc_whitebox::locPutProcessed+=prep.putListNum;
41751c0b2f7Stbbdev #endif
41851c0b2f7Stbbdev             toRelease = bin->putList(prep.head, prep.tail, bitMask, idx, prep.putListNum, extMemPool->loc.hugeSizeThreshold);
41951c0b2f7Stbbdev         }
42051c0b2f7Stbbdev         needCleanup = extMemPool->loc.isCleanupNeededOnRange(timeRange, startTime);
42151c0b2f7Stbbdev         currTime = endTime - 1;
42251c0b2f7Stbbdev     }
42351c0b2f7Stbbdev 
42451c0b2f7Stbbdev     if ( CacheBinOperation *opClean = prep.opClean ) {
42551c0b2f7Stbbdev         if ( prep.isCleanAll )
42651c0b2f7Stbbdev             *opCast<OpCleanAll>(*opClean).res = bin->cleanAll(bitMask, idx);
42751c0b2f7Stbbdev         else
42851c0b2f7Stbbdev             *opCast<OpCleanToThreshold>(*opClean).res = bin->cleanToThreshold(prep.cleanTime, bitMask, idx);
42951c0b2f7Stbbdev 
43051c0b2f7Stbbdev         CacheBinOperation *opNext = opClean->next;
43151c0b2f7Stbbdev         prep.commitOperation( opClean );
43251c0b2f7Stbbdev 
43351c0b2f7Stbbdev         while ((opClean = opNext) != NULL) {
43451c0b2f7Stbbdev             opNext = opClean->next;
43551c0b2f7Stbbdev             prep.commitOperation(opClean);
43651c0b2f7Stbbdev         }
43751c0b2f7Stbbdev     }
43851c0b2f7Stbbdev 
43951c0b2f7Stbbdev     if ( size_t size = prep.updateUsedSize )
44051c0b2f7Stbbdev         bin->updateUsedSize(size, bitMask, idx);
44151c0b2f7Stbbdev }
44251c0b2f7Stbbdev /* ----------------------------------------------------------------------------------------------------- */
44351c0b2f7Stbbdev /* --------------------------- Methods for creating and executing operations --------------------------- */
44451c0b2f7Stbbdev template<typename Props> void LargeObjectCacheImpl<Props>::
44551c0b2f7Stbbdev     CacheBin::ExecuteOperation(CacheBinOperation *op, ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx, bool longLifeTime)
44651c0b2f7Stbbdev {
44751c0b2f7Stbbdev     CacheBinFunctor<Props> func( this, extMemPool, bitMask, idx );
44851c0b2f7Stbbdev     aggregator.execute( op, func, longLifeTime );
44951c0b2f7Stbbdev 
45051c0b2f7Stbbdev     if ( LargeMemoryBlock *toRelease = func.getToRelease()) {
45151c0b2f7Stbbdev         extMemPool->backend.returnLargeObject(toRelease);
45251c0b2f7Stbbdev     }
45351c0b2f7Stbbdev 
45451c0b2f7Stbbdev     if ( func.isCleanupNeeded() ) {
45551c0b2f7Stbbdev         extMemPool->loc.doCleanup( func.getCurrTime(), /*doThreshDecr=*/false);
45651c0b2f7Stbbdev     }
45751c0b2f7Stbbdev }
45851c0b2f7Stbbdev 
45951c0b2f7Stbbdev template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
46051c0b2f7Stbbdev     CacheBin::get(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx)
46151c0b2f7Stbbdev {
46251c0b2f7Stbbdev     LargeMemoryBlock *lmb=NULL;
46351c0b2f7Stbbdev     OpGet data = {&lmb, size};
46451c0b2f7Stbbdev     CacheBinOperation op(data);
46551c0b2f7Stbbdev     ExecuteOperation( &op, extMemPool, bitMask, idx );
46651c0b2f7Stbbdev     return lmb;
46751c0b2f7Stbbdev }
46851c0b2f7Stbbdev 
46951c0b2f7Stbbdev template<typename Props> void LargeObjectCacheImpl<Props>::
47051c0b2f7Stbbdev     CacheBin::putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *head, BinBitMask *bitMask, int idx)
47151c0b2f7Stbbdev {
47251c0b2f7Stbbdev     MALLOC_ASSERT(sizeof(LargeMemoryBlock)+sizeof(CacheBinOperation)<=head->unalignedSize, "CacheBinOperation is too large to be placed in LargeMemoryBlock!");
47351c0b2f7Stbbdev 
47451c0b2f7Stbbdev     OpPutList data = {head};
47551c0b2f7Stbbdev     CacheBinOperation *op = new (head+1) CacheBinOperation(data, CBST_NOWAIT);
47651c0b2f7Stbbdev     ExecuteOperation( op, extMemPool, bitMask, idx, false );
47751c0b2f7Stbbdev }
47851c0b2f7Stbbdev 
47951c0b2f7Stbbdev template<typename Props> bool LargeObjectCacheImpl<Props>::
48051c0b2f7Stbbdev     CacheBin::cleanToThreshold(ExtMemoryPool *extMemPool, BinBitMask *bitMask, uintptr_t currTime, int idx)
48151c0b2f7Stbbdev {
48251c0b2f7Stbbdev     LargeMemoryBlock *toRelease = NULL;
48351c0b2f7Stbbdev 
48451c0b2f7Stbbdev     /* oldest may be more recent then age, that's why cast to signed type
48551c0b2f7Stbbdev        was used. age overflow is also processed correctly. */
48651c0b2f7Stbbdev     if (last && (intptr_t)(currTime - oldest) > ageThreshold) {
48751c0b2f7Stbbdev         OpCleanToThreshold data = {&toRelease, currTime};
48851c0b2f7Stbbdev         CacheBinOperation op(data);
48951c0b2f7Stbbdev         ExecuteOperation( &op, extMemPool, bitMask, idx );
49051c0b2f7Stbbdev     }
49151c0b2f7Stbbdev     bool released = toRelease;
49251c0b2f7Stbbdev 
49351c0b2f7Stbbdev     Backend *backend = &extMemPool->backend;
49451c0b2f7Stbbdev     while ( toRelease ) {
49551c0b2f7Stbbdev         LargeMemoryBlock *helper = toRelease->next;
49651c0b2f7Stbbdev         backend->returnLargeObject(toRelease);
49751c0b2f7Stbbdev         toRelease = helper;
49851c0b2f7Stbbdev     }
49951c0b2f7Stbbdev     return released;
50051c0b2f7Stbbdev }
50151c0b2f7Stbbdev 
50251c0b2f7Stbbdev template<typename Props> bool LargeObjectCacheImpl<Props>::
50351c0b2f7Stbbdev     CacheBin::releaseAllToBackend(ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx)
50451c0b2f7Stbbdev {
50551c0b2f7Stbbdev     LargeMemoryBlock *toRelease = NULL;
50651c0b2f7Stbbdev 
50751c0b2f7Stbbdev     if (last) {
50851c0b2f7Stbbdev         OpCleanAll data = {&toRelease};
50951c0b2f7Stbbdev         CacheBinOperation op(data);
51051c0b2f7Stbbdev         ExecuteOperation(&op, extMemPool, bitMask, idx);
51151c0b2f7Stbbdev     }
51251c0b2f7Stbbdev     bool released = toRelease;
51351c0b2f7Stbbdev 
51451c0b2f7Stbbdev     Backend *backend = &extMemPool->backend;
51551c0b2f7Stbbdev     while ( toRelease ) {
51651c0b2f7Stbbdev         LargeMemoryBlock *helper = toRelease->next;
51751c0b2f7Stbbdev         MALLOC_ASSERT(!helper || lessThanWithOverflow(helper->age, toRelease->age),
51851c0b2f7Stbbdev                       ASSERT_TEXT);
51951c0b2f7Stbbdev         backend->returnLargeObject(toRelease);
52051c0b2f7Stbbdev         toRelease = helper;
52151c0b2f7Stbbdev     }
52251c0b2f7Stbbdev     return released;
52351c0b2f7Stbbdev }
52451c0b2f7Stbbdev 
52551c0b2f7Stbbdev template<typename Props> void LargeObjectCacheImpl<Props>::
52651c0b2f7Stbbdev     CacheBin::updateUsedSize(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx)
52751c0b2f7Stbbdev {
52851c0b2f7Stbbdev     OpUpdateUsedSize data = {size};
52951c0b2f7Stbbdev     CacheBinOperation op(data);
53051c0b2f7Stbbdev     ExecuteOperation( &op, extMemPool, bitMask, idx );
53151c0b2f7Stbbdev }
53251c0b2f7Stbbdev 
53351c0b2f7Stbbdev /* ------------------------------ Unsafe methods used with the aggregator ------------------------------ */
53451c0b2f7Stbbdev 
53551c0b2f7Stbbdev template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
53651c0b2f7Stbbdev     CacheBin::putList(LargeMemoryBlock *head, LargeMemoryBlock *tail, BinBitMask *bitMask, int idx, int num, size_t hugeSizeThreshold)
53751c0b2f7Stbbdev {
53851c0b2f7Stbbdev     size_t size = head->unalignedSize;
53951c0b2f7Stbbdev     usedSize -= num*size;
54051c0b2f7Stbbdev     MALLOC_ASSERT( !last || (last->age != 0 && last->age != -1U), ASSERT_TEXT );
54151c0b2f7Stbbdev     MALLOC_ASSERT( (tail==head && num==1) || (tail!=head && num>1), ASSERT_TEXT );
54251c0b2f7Stbbdev     LargeMemoryBlock *toRelease = NULL;
54351c0b2f7Stbbdev     if (size < hugeSizeThreshold && !lastCleanedAge) {
54451c0b2f7Stbbdev         // 1st object of such size was released.
54551c0b2f7Stbbdev         // Not cache it, and remember when this occurs
54651c0b2f7Stbbdev         // to take into account during cache miss.
54751c0b2f7Stbbdev         lastCleanedAge = tail->age;
54851c0b2f7Stbbdev         toRelease = tail;
54951c0b2f7Stbbdev         tail = tail->prev;
55051c0b2f7Stbbdev         if (tail)
55151c0b2f7Stbbdev             tail->next = NULL;
55251c0b2f7Stbbdev         else
55351c0b2f7Stbbdev             head = NULL;
55451c0b2f7Stbbdev         num--;
55551c0b2f7Stbbdev     }
55651c0b2f7Stbbdev     if (num) {
55751c0b2f7Stbbdev         // add [head;tail] list to cache
55851c0b2f7Stbbdev         MALLOC_ASSERT( tail, ASSERT_TEXT );
55951c0b2f7Stbbdev         tail->next = first;
56051c0b2f7Stbbdev         if (first)
56151c0b2f7Stbbdev             first->prev = tail;
56251c0b2f7Stbbdev         first = head;
56351c0b2f7Stbbdev         if (!last) {
56451c0b2f7Stbbdev             MALLOC_ASSERT(0 == oldest, ASSERT_TEXT);
56551c0b2f7Stbbdev             oldest = tail->age;
56651c0b2f7Stbbdev             last = tail;
56751c0b2f7Stbbdev         }
56851c0b2f7Stbbdev 
56951c0b2f7Stbbdev         cachedSize += num*size;
57051c0b2f7Stbbdev     }
57151c0b2f7Stbbdev 
57251c0b2f7Stbbdev     // No used object, and nothing in the bin, mark the bin as empty
57351c0b2f7Stbbdev     if (!usedSize && !first)
57451c0b2f7Stbbdev         bitMask->set(idx, false);
57551c0b2f7Stbbdev 
57651c0b2f7Stbbdev     return toRelease;
57751c0b2f7Stbbdev }
57851c0b2f7Stbbdev 
57951c0b2f7Stbbdev template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
58051c0b2f7Stbbdev     CacheBin::get()
58151c0b2f7Stbbdev {
58251c0b2f7Stbbdev     LargeMemoryBlock *result=first;
58351c0b2f7Stbbdev     if (result) {
58451c0b2f7Stbbdev         first = result->next;
58551c0b2f7Stbbdev         if (first)
58651c0b2f7Stbbdev             first->prev = NULL;
58751c0b2f7Stbbdev         else {
58851c0b2f7Stbbdev             last = NULL;
58951c0b2f7Stbbdev             oldest = 0;
59051c0b2f7Stbbdev         }
59151c0b2f7Stbbdev     }
59251c0b2f7Stbbdev 
59351c0b2f7Stbbdev     return result;
59451c0b2f7Stbbdev }
59551c0b2f7Stbbdev 
59651c0b2f7Stbbdev template<typename Props> void LargeObjectCacheImpl<Props>::
59751c0b2f7Stbbdev     CacheBin::forgetOutdatedState(uintptr_t currTime)
59851c0b2f7Stbbdev {
59951c0b2f7Stbbdev     // If the time since the last get is LongWaitFactor times more than ageThreshold
60051c0b2f7Stbbdev     // for the bin, treat the bin as rarely-used and forget everything we know
60151c0b2f7Stbbdev     // about it.
60251c0b2f7Stbbdev     // If LongWaitFactor is too small, we forget too early and
60351c0b2f7Stbbdev     // so prevents good caching, while if too high, caching blocks
60451c0b2f7Stbbdev     // with unrelated usage pattern occurs.
60551c0b2f7Stbbdev     const uintptr_t sinceLastGet = currTime - lastGet;
60651c0b2f7Stbbdev     bool doCleanup = false;
60751c0b2f7Stbbdev 
60851c0b2f7Stbbdev     if (ageThreshold)
60951c0b2f7Stbbdev         doCleanup = sinceLastGet > Props::LongWaitFactor * ageThreshold;
61051c0b2f7Stbbdev     else if (lastCleanedAge)
61151c0b2f7Stbbdev         doCleanup = sinceLastGet > Props::LongWaitFactor * (lastCleanedAge - lastGet);
61251c0b2f7Stbbdev 
61351c0b2f7Stbbdev     if (doCleanup) {
61451c0b2f7Stbbdev         lastCleanedAge = 0;
61551c0b2f7Stbbdev         ageThreshold = 0;
61651c0b2f7Stbbdev     }
61751c0b2f7Stbbdev 
61851c0b2f7Stbbdev }
61951c0b2f7Stbbdev 
62051c0b2f7Stbbdev template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
62151c0b2f7Stbbdev     CacheBin::cleanToThreshold(uintptr_t currTime, BinBitMask *bitMask, int idx)
62251c0b2f7Stbbdev {
62351c0b2f7Stbbdev     /* oldest may be more recent then age, that's why cast to signed type
62451c0b2f7Stbbdev     was used. age overflow is also processed correctly. */
62551c0b2f7Stbbdev     if ( !last || (intptr_t)(currTime - last->age) < ageThreshold ) return NULL;
62651c0b2f7Stbbdev 
62751c0b2f7Stbbdev #if MALLOC_DEBUG
62851c0b2f7Stbbdev     uintptr_t nextAge = 0;
62951c0b2f7Stbbdev #endif
63051c0b2f7Stbbdev     do {
63151c0b2f7Stbbdev #if MALLOC_DEBUG
63251c0b2f7Stbbdev         // check that list ordered
63351c0b2f7Stbbdev         MALLOC_ASSERT(!nextAge || lessThanWithOverflow(nextAge, last->age),
63451c0b2f7Stbbdev             ASSERT_TEXT);
63551c0b2f7Stbbdev         nextAge = last->age;
63651c0b2f7Stbbdev #endif
63751c0b2f7Stbbdev         cachedSize -= last->unalignedSize;
63851c0b2f7Stbbdev         last = last->prev;
63951c0b2f7Stbbdev     } while (last && (intptr_t)(currTime - last->age) > ageThreshold);
64051c0b2f7Stbbdev 
64151c0b2f7Stbbdev     LargeMemoryBlock *toRelease = NULL;
64251c0b2f7Stbbdev     if (last) {
64351c0b2f7Stbbdev         toRelease = last->next;
64451c0b2f7Stbbdev         oldest = last->age;
64551c0b2f7Stbbdev         last->next = NULL;
64651c0b2f7Stbbdev     } else {
64751c0b2f7Stbbdev         toRelease = first;
64851c0b2f7Stbbdev         first = NULL;
64951c0b2f7Stbbdev         oldest = 0;
65051c0b2f7Stbbdev         if (!usedSize)
65151c0b2f7Stbbdev             bitMask->set(idx, false);
65251c0b2f7Stbbdev     }
65351c0b2f7Stbbdev     MALLOC_ASSERT( toRelease, ASSERT_TEXT );
65451c0b2f7Stbbdev     lastCleanedAge = toRelease->age;
65551c0b2f7Stbbdev 
65651c0b2f7Stbbdev     return toRelease;
65751c0b2f7Stbbdev }
65851c0b2f7Stbbdev 
65951c0b2f7Stbbdev template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
66051c0b2f7Stbbdev     CacheBin::cleanAll(BinBitMask *bitMask, int idx)
66151c0b2f7Stbbdev {
66251c0b2f7Stbbdev     if (!last) return NULL;
66351c0b2f7Stbbdev 
66451c0b2f7Stbbdev     LargeMemoryBlock *toRelease = first;
66551c0b2f7Stbbdev     last = NULL;
66651c0b2f7Stbbdev     first = NULL;
66751c0b2f7Stbbdev     oldest = 0;
66851c0b2f7Stbbdev     cachedSize = 0;
66951c0b2f7Stbbdev     if (!usedSize)
67051c0b2f7Stbbdev         bitMask->set(idx, false);
67151c0b2f7Stbbdev 
67251c0b2f7Stbbdev     return toRelease;
67351c0b2f7Stbbdev }
67451c0b2f7Stbbdev 
67551c0b2f7Stbbdev /* ----------------------------------------------------------------------------------------------------- */
67651c0b2f7Stbbdev 
67751c0b2f7Stbbdev template<typename Props> size_t LargeObjectCacheImpl<Props>::
67851c0b2f7Stbbdev     CacheBin::reportStat(int num, FILE *f)
67951c0b2f7Stbbdev {
68051c0b2f7Stbbdev #if __TBB_MALLOC_LOCACHE_STAT
68151c0b2f7Stbbdev     if (first)
68251c0b2f7Stbbdev         printf("%d(%lu): total %lu KB thr %ld lastCln %lu oldest %lu\n",
68351c0b2f7Stbbdev                num, num*Props::CacheStep+Props::MinSize,
68451c0b2f7Stbbdev                cachedSize/1024, ageThreshold, lastCleanedAge, oldest);
68551c0b2f7Stbbdev #else
68651c0b2f7Stbbdev     suppress_unused_warning(num);
68751c0b2f7Stbbdev     suppress_unused_warning(f);
68851c0b2f7Stbbdev #endif
68951c0b2f7Stbbdev     return cachedSize;
69051c0b2f7Stbbdev }
69151c0b2f7Stbbdev 
69251c0b2f7Stbbdev // Release objects from cache blocks that are older than ageThreshold
69351c0b2f7Stbbdev template<typename Props>
69451c0b2f7Stbbdev bool LargeObjectCacheImpl<Props>::regularCleanup(ExtMemoryPool *extMemPool, uintptr_t currTime, bool doThreshDecr)
69551c0b2f7Stbbdev {
69651c0b2f7Stbbdev     bool released = false;
69751c0b2f7Stbbdev     BinsSummary binsSummary;
69851c0b2f7Stbbdev 
69951c0b2f7Stbbdev     // Threshold settings is below this cache or starts from zero index
70051c0b2f7Stbbdev     if (hugeSizeThresholdIdx == 0) return false;
70151c0b2f7Stbbdev 
70251c0b2f7Stbbdev     // Starting searching for bin that is less than huge size threshold (can be cleaned-up)
70351c0b2f7Stbbdev     int startSearchIdx = hugeSizeThresholdIdx - 1;
70451c0b2f7Stbbdev 
70551c0b2f7Stbbdev     for (int i = bitMask.getMaxTrue(startSearchIdx); i >= 0; i = bitMask.getMaxTrue(i-1)) {
70651c0b2f7Stbbdev         bin[i].updateBinsSummary(&binsSummary);
70751c0b2f7Stbbdev         if (!doThreshDecr && tooLargeLOC.load(std::memory_order_relaxed) > 2 && binsSummary.isLOCTooLarge()) {
70851c0b2f7Stbbdev             // if LOC is too large for quite long time, decrease the threshold
70951c0b2f7Stbbdev             // based on bin hit statistics.
71051c0b2f7Stbbdev             // For this, redo cleanup from the beginning.
71151c0b2f7Stbbdev             // Note: on this iteration total usedSz can be not too large
71251c0b2f7Stbbdev             // in comparison to total cachedSz, as we calculated it only
71351c0b2f7Stbbdev             // partially. We are ok with it.
71451c0b2f7Stbbdev             i = bitMask.getMaxTrue(startSearchIdx)+1;
71551c0b2f7Stbbdev             doThreshDecr = true;
71651c0b2f7Stbbdev             binsSummary.reset();
71751c0b2f7Stbbdev             continue;
71851c0b2f7Stbbdev         }
71951c0b2f7Stbbdev         if (doThreshDecr)
72051c0b2f7Stbbdev             bin[i].decreaseThreshold();
72151c0b2f7Stbbdev 
72251c0b2f7Stbbdev         if (bin[i].cleanToThreshold(extMemPool, &bitMask, currTime, i)) {
72351c0b2f7Stbbdev             released = true;
72451c0b2f7Stbbdev         }
72551c0b2f7Stbbdev     }
72651c0b2f7Stbbdev     // We want to find if LOC was too large for some time continuously,
72751c0b2f7Stbbdev     // so OK with races between incrementing and zeroing, but incrementing
72851c0b2f7Stbbdev     // must be atomic.
72951c0b2f7Stbbdev     if (binsSummary.isLOCTooLarge()) {
73051c0b2f7Stbbdev         tooLargeLOC++;
73151c0b2f7Stbbdev     } else {
73251c0b2f7Stbbdev         tooLargeLOC.store(0, std::memory_order_relaxed);
73351c0b2f7Stbbdev     }
73451c0b2f7Stbbdev     return released;
73551c0b2f7Stbbdev }
73651c0b2f7Stbbdev 
73751c0b2f7Stbbdev template<typename Props>
73851c0b2f7Stbbdev bool LargeObjectCacheImpl<Props>::cleanAll(ExtMemoryPool *extMemPool)
73951c0b2f7Stbbdev {
74051c0b2f7Stbbdev     bool released = false;
74151c0b2f7Stbbdev     for (int i = numBins-1; i >= 0; i--) {
74251c0b2f7Stbbdev         released |= bin[i].releaseAllToBackend(extMemPool, &bitMask, i);
74351c0b2f7Stbbdev     }
74451c0b2f7Stbbdev     return released;
74551c0b2f7Stbbdev }
74651c0b2f7Stbbdev 
74751c0b2f7Stbbdev template<typename Props>
74851c0b2f7Stbbdev void LargeObjectCacheImpl<Props>::reset() {
74951c0b2f7Stbbdev     tooLargeLOC.store(0, std::memory_order_relaxed);
75051c0b2f7Stbbdev     for (int i = numBins-1; i >= 0; i--)
75151c0b2f7Stbbdev         bin[i].init();
75251c0b2f7Stbbdev     bitMask.reset();
75351c0b2f7Stbbdev }
75451c0b2f7Stbbdev 
75551c0b2f7Stbbdev #if __TBB_MALLOC_WHITEBOX_TEST
75651c0b2f7Stbbdev template<typename Props>
75751c0b2f7Stbbdev size_t LargeObjectCacheImpl<Props>::getLOCSize() const
75851c0b2f7Stbbdev {
75951c0b2f7Stbbdev     size_t size = 0;
76051c0b2f7Stbbdev     for (int i = numBins-1; i >= 0; i--)
76151c0b2f7Stbbdev         size += bin[i].getSize();
76251c0b2f7Stbbdev     return size;
76351c0b2f7Stbbdev }
76451c0b2f7Stbbdev 
76551c0b2f7Stbbdev size_t LargeObjectCache::getLOCSize() const
76651c0b2f7Stbbdev {
76751c0b2f7Stbbdev     return largeCache.getLOCSize() + hugeCache.getLOCSize();
76851c0b2f7Stbbdev }
76951c0b2f7Stbbdev 
77051c0b2f7Stbbdev template<typename Props>
77151c0b2f7Stbbdev size_t LargeObjectCacheImpl<Props>::getUsedSize() const
77251c0b2f7Stbbdev {
77351c0b2f7Stbbdev     size_t size = 0;
77451c0b2f7Stbbdev     for (int i = numBins-1; i >= 0; i--)
77551c0b2f7Stbbdev         size += bin[i].getUsedSize();
77651c0b2f7Stbbdev     return size;
77751c0b2f7Stbbdev }
77851c0b2f7Stbbdev 
77951c0b2f7Stbbdev size_t LargeObjectCache::getUsedSize() const
78051c0b2f7Stbbdev {
78151c0b2f7Stbbdev     return largeCache.getUsedSize() + hugeCache.getUsedSize();
78251c0b2f7Stbbdev }
78351c0b2f7Stbbdev #endif // __TBB_MALLOC_WHITEBOX_TEST
78451c0b2f7Stbbdev 
78551c0b2f7Stbbdev inline bool LargeObjectCache::isCleanupNeededOnRange(uintptr_t range, uintptr_t currTime)
78651c0b2f7Stbbdev {
78751c0b2f7Stbbdev     return range >= cacheCleanupFreq
78851c0b2f7Stbbdev         || currTime+range < currTime-1 // overflow, 0 is power of 2, do cleanup
78951c0b2f7Stbbdev         // (prev;prev+range] contains n*cacheCleanupFreq
79051c0b2f7Stbbdev         || alignUp(currTime, cacheCleanupFreq)<currTime+range;
79151c0b2f7Stbbdev }
79251c0b2f7Stbbdev 
79351c0b2f7Stbbdev bool LargeObjectCache::doCleanup(uintptr_t currTime, bool doThreshDecr)
79451c0b2f7Stbbdev {
79551c0b2f7Stbbdev     if (!doThreshDecr)
79651c0b2f7Stbbdev         extMemPool->allLocalCaches.markUnused();
79751c0b2f7Stbbdev     return largeCache.regularCleanup(extMemPool, currTime, doThreshDecr)
79851c0b2f7Stbbdev         | hugeCache.regularCleanup(extMemPool, currTime, doThreshDecr);
79951c0b2f7Stbbdev }
80051c0b2f7Stbbdev 
80151c0b2f7Stbbdev bool LargeObjectCache::decreasingCleanup()
80251c0b2f7Stbbdev {
80351c0b2f7Stbbdev     return doCleanup(cacheCurrTime.load(std::memory_order_acquire), /*doThreshDecr=*/true);
80451c0b2f7Stbbdev }
80551c0b2f7Stbbdev 
80651c0b2f7Stbbdev bool LargeObjectCache::regularCleanup()
80751c0b2f7Stbbdev {
80851c0b2f7Stbbdev     return doCleanup(cacheCurrTime.load(std::memory_order_acquire), /*doThreshDecr=*/false);
80951c0b2f7Stbbdev }
81051c0b2f7Stbbdev 
81151c0b2f7Stbbdev bool LargeObjectCache::cleanAll()
81251c0b2f7Stbbdev {
81351c0b2f7Stbbdev     return largeCache.cleanAll(extMemPool) | hugeCache.cleanAll(extMemPool);
81451c0b2f7Stbbdev }
81551c0b2f7Stbbdev 
81651c0b2f7Stbbdev void LargeObjectCache::reset()
81751c0b2f7Stbbdev {
81851c0b2f7Stbbdev     largeCache.reset();
81951c0b2f7Stbbdev     hugeCache.reset();
82051c0b2f7Stbbdev }
82151c0b2f7Stbbdev 
82251c0b2f7Stbbdev template<typename Props>
82351c0b2f7Stbbdev LargeMemoryBlock *LargeObjectCacheImpl<Props>::get(ExtMemoryPool *extMemoryPool, size_t size)
82451c0b2f7Stbbdev {
82551c0b2f7Stbbdev     int idx = Props::sizeToIdx(size);
82651c0b2f7Stbbdev 
82751c0b2f7Stbbdev     LargeMemoryBlock *lmb = bin[idx].get(extMemoryPool, size, &bitMask, idx);
82851c0b2f7Stbbdev 
82951c0b2f7Stbbdev     if (lmb) {
83051c0b2f7Stbbdev         MALLOC_ITT_SYNC_ACQUIRED(bin+idx);
83151c0b2f7Stbbdev         STAT_increment(getThreadId(), ThreadCommonCounters, allocCachedLargeObj);
83251c0b2f7Stbbdev     }
83351c0b2f7Stbbdev     return lmb;
83451c0b2f7Stbbdev }
83551c0b2f7Stbbdev 
83651c0b2f7Stbbdev template<typename Props>
83751c0b2f7Stbbdev void LargeObjectCacheImpl<Props>::updateCacheState(ExtMemoryPool *extMemPool, DecreaseOrIncrease op, size_t size)
83851c0b2f7Stbbdev {
83951c0b2f7Stbbdev     int idx = Props::sizeToIdx(size);
84051c0b2f7Stbbdev     MALLOC_ASSERT(idx<numBins, ASSERT_TEXT);
84151c0b2f7Stbbdev     bin[idx].updateUsedSize(extMemPool, op==decrease? -size : size, &bitMask, idx);
84251c0b2f7Stbbdev }
84351c0b2f7Stbbdev 
84451c0b2f7Stbbdev #if __TBB_MALLOC_LOCACHE_STAT
84551c0b2f7Stbbdev template<typename Props>
84651c0b2f7Stbbdev void LargeObjectCacheImpl<Props>::reportStat(FILE *f)
84751c0b2f7Stbbdev {
84851c0b2f7Stbbdev     size_t cachedSize = 0;
84951c0b2f7Stbbdev     for (int i=0; i<numBins; i++)
85051c0b2f7Stbbdev         cachedSize += bin[i].reportStat(i, f);
85151c0b2f7Stbbdev     fprintf(f, "total LOC size %lu MB\n", cachedSize/1024/1024);
85251c0b2f7Stbbdev }
85351c0b2f7Stbbdev 
85451c0b2f7Stbbdev void LargeObjectCache::reportStat(FILE *f)
85551c0b2f7Stbbdev {
85651c0b2f7Stbbdev     largeCache.reportStat(f);
85751c0b2f7Stbbdev     hugeCache.reportStat(f);
85851c0b2f7Stbbdev     fprintf(f, "cache time %lu\n", cacheCurrTime.load(std::memory_order_relaxed));
85951c0b2f7Stbbdev }
86051c0b2f7Stbbdev #endif
86151c0b2f7Stbbdev 
86251c0b2f7Stbbdev template<typename Props>
86351c0b2f7Stbbdev void LargeObjectCacheImpl<Props>::putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *toCache)
86451c0b2f7Stbbdev {
86551c0b2f7Stbbdev     int toBinIdx = Props::sizeToIdx(toCache->unalignedSize);
86651c0b2f7Stbbdev 
86751c0b2f7Stbbdev     MALLOC_ITT_SYNC_RELEASING(bin+toBinIdx);
86851c0b2f7Stbbdev     bin[toBinIdx].putList(extMemPool, toCache, &bitMask, toBinIdx);
86951c0b2f7Stbbdev }
87051c0b2f7Stbbdev 
87151c0b2f7Stbbdev void LargeObjectCache::updateCacheState(DecreaseOrIncrease op, size_t size)
87251c0b2f7Stbbdev {
87351c0b2f7Stbbdev     if (size < maxLargeSize)
87451c0b2f7Stbbdev         largeCache.updateCacheState(extMemPool, op, size);
87551c0b2f7Stbbdev     else if (size < maxHugeSize)
87651c0b2f7Stbbdev         hugeCache.updateCacheState(extMemPool, op, size);
87751c0b2f7Stbbdev }
87851c0b2f7Stbbdev 
87951c0b2f7Stbbdev uintptr_t LargeObjectCache::getCurrTime()
88051c0b2f7Stbbdev {
88151c0b2f7Stbbdev     return ++cacheCurrTime;
88251c0b2f7Stbbdev }
88351c0b2f7Stbbdev 
88451c0b2f7Stbbdev uintptr_t LargeObjectCache::getCurrTimeRange(uintptr_t range)
88551c0b2f7Stbbdev {
88651c0b2f7Stbbdev     return (cacheCurrTime.fetch_add(range) + 1);
88751c0b2f7Stbbdev }
88851c0b2f7Stbbdev 
88951c0b2f7Stbbdev void LargeObjectCache::registerRealloc(size_t oldSize, size_t newSize)
89051c0b2f7Stbbdev {
89151c0b2f7Stbbdev     updateCacheState(decrease, oldSize);
89251c0b2f7Stbbdev     updateCacheState(increase, alignToBin(newSize));
89351c0b2f7Stbbdev }
89451c0b2f7Stbbdev 
89551c0b2f7Stbbdev size_t LargeObjectCache::alignToBin(size_t size) {
89651c0b2f7Stbbdev     return size < maxLargeSize ? LargeCacheType::alignToBin(size) : HugeCacheType::alignToBin(size);
89751c0b2f7Stbbdev }
89851c0b2f7Stbbdev 
89951c0b2f7Stbbdev // Used for internal purpose
90051c0b2f7Stbbdev int LargeObjectCache::sizeToIdx(size_t size)
90151c0b2f7Stbbdev {
90251c0b2f7Stbbdev     MALLOC_ASSERT(size <= maxHugeSize, ASSERT_TEXT);
90351c0b2f7Stbbdev     return size < maxLargeSize ?
90451c0b2f7Stbbdev         LargeCacheType::sizeToIdx(size) :
90551c0b2f7Stbbdev         LargeCacheType::numBins + HugeCacheType::sizeToIdx(size);
90651c0b2f7Stbbdev }
90751c0b2f7Stbbdev 
90851c0b2f7Stbbdev void LargeObjectCache::putList(LargeMemoryBlock *list)
90951c0b2f7Stbbdev {
91051c0b2f7Stbbdev     LargeMemoryBlock *toProcess, *n;
91151c0b2f7Stbbdev 
91251c0b2f7Stbbdev     for (LargeMemoryBlock *curr = list; curr; curr = toProcess) {
91351c0b2f7Stbbdev         LargeMemoryBlock *tail = curr;
91451c0b2f7Stbbdev         toProcess = curr->next;
91551c0b2f7Stbbdev         if (!sizeInCacheRange(curr->unalignedSize)) {
91651c0b2f7Stbbdev             extMemPool->backend.returnLargeObject(curr);
91751c0b2f7Stbbdev             continue;
91851c0b2f7Stbbdev         }
91951c0b2f7Stbbdev         int currIdx = sizeToIdx(curr->unalignedSize);
92051c0b2f7Stbbdev 
92151c0b2f7Stbbdev         // Find all blocks fitting to same bin. Not use more efficient sorting
92251c0b2f7Stbbdev         // algorithm because list is short (commonly,
92351c0b2f7Stbbdev         // LocalLOC's HIGH_MARK-LOW_MARK, i.e. 24 items).
92451c0b2f7Stbbdev         for (LargeMemoryBlock *b = toProcess; b; b = n) {
92551c0b2f7Stbbdev             n = b->next;
92651c0b2f7Stbbdev             if (sizeToIdx(b->unalignedSize) == currIdx) {
92751c0b2f7Stbbdev                 tail->next = b;
92851c0b2f7Stbbdev                 tail = b;
92951c0b2f7Stbbdev                 if (toProcess == b)
93051c0b2f7Stbbdev                     toProcess = toProcess->next;
93151c0b2f7Stbbdev                 else {
93251c0b2f7Stbbdev                     b->prev->next = b->next;
93351c0b2f7Stbbdev                     if (b->next)
93451c0b2f7Stbbdev                         b->next->prev = b->prev;
93551c0b2f7Stbbdev                 }
93651c0b2f7Stbbdev             }
93751c0b2f7Stbbdev         }
93851c0b2f7Stbbdev         tail->next = NULL;
93951c0b2f7Stbbdev         if (curr->unalignedSize < maxLargeSize)
94051c0b2f7Stbbdev             largeCache.putList(extMemPool, curr);
94151c0b2f7Stbbdev         else
94251c0b2f7Stbbdev             hugeCache.putList(extMemPool, curr);
94351c0b2f7Stbbdev     }
94451c0b2f7Stbbdev }
94551c0b2f7Stbbdev 
94651c0b2f7Stbbdev void LargeObjectCache::put(LargeMemoryBlock *largeBlock)
94751c0b2f7Stbbdev {
94851c0b2f7Stbbdev     size_t blockSize = largeBlock->unalignedSize;
94951c0b2f7Stbbdev     if (sizeInCacheRange(blockSize)) {
95051c0b2f7Stbbdev         largeBlock->next = NULL;
95151c0b2f7Stbbdev         if (blockSize < maxLargeSize)
95251c0b2f7Stbbdev             largeCache.putList(extMemPool, largeBlock);
95351c0b2f7Stbbdev         else
95451c0b2f7Stbbdev             hugeCache.putList(extMemPool, largeBlock);
95551c0b2f7Stbbdev     } else {
95651c0b2f7Stbbdev         extMemPool->backend.returnLargeObject(largeBlock);
95751c0b2f7Stbbdev     }
95851c0b2f7Stbbdev }
95951c0b2f7Stbbdev 
96051c0b2f7Stbbdev LargeMemoryBlock *LargeObjectCache::get(size_t size)
96151c0b2f7Stbbdev {
96251c0b2f7Stbbdev     MALLOC_ASSERT( size >= minLargeSize, ASSERT_TEXT );
96351c0b2f7Stbbdev     if (sizeInCacheRange(size)) {
96451c0b2f7Stbbdev         return size < maxLargeSize ?
96551c0b2f7Stbbdev             largeCache.get(extMemPool, size) : hugeCache.get(extMemPool, size);
96651c0b2f7Stbbdev     }
96751c0b2f7Stbbdev     return NULL;
96851c0b2f7Stbbdev }
96951c0b2f7Stbbdev 
97051c0b2f7Stbbdev LargeMemoryBlock *ExtMemoryPool::mallocLargeObject(MemoryPool *pool, size_t allocationSize)
97151c0b2f7Stbbdev {
97251c0b2f7Stbbdev #if __TBB_MALLOC_LOCACHE_STAT
97351c0b2f7Stbbdev     mallocCalls++;
97451c0b2f7Stbbdev     memAllocKB.fetch_add(allocationSize/1024);
97551c0b2f7Stbbdev #endif
97651c0b2f7Stbbdev     LargeMemoryBlock* lmb = loc.get(allocationSize);
97751c0b2f7Stbbdev     if (!lmb) {
97851c0b2f7Stbbdev         BackRefIdx backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/true);
97951c0b2f7Stbbdev         if (backRefIdx.isInvalid())
98051c0b2f7Stbbdev             return NULL;
98151c0b2f7Stbbdev 
98251c0b2f7Stbbdev         // unalignedSize is set in getLargeBlock
98351c0b2f7Stbbdev         lmb = backend.getLargeBlock(allocationSize);
98451c0b2f7Stbbdev         if (!lmb) {
98551c0b2f7Stbbdev             removeBackRef(backRefIdx);
98651c0b2f7Stbbdev             loc.updateCacheState(decrease, allocationSize);
98751c0b2f7Stbbdev             return NULL;
98851c0b2f7Stbbdev         }
98951c0b2f7Stbbdev         lmb->backRefIdx = backRefIdx;
99051c0b2f7Stbbdev         lmb->pool = pool;
99151c0b2f7Stbbdev         STAT_increment(getThreadId(), ThreadCommonCounters, allocNewLargeObj);
99251c0b2f7Stbbdev     } else {
99351c0b2f7Stbbdev #if __TBB_MALLOC_LOCACHE_STAT
99451c0b2f7Stbbdev         cacheHits++;
99551c0b2f7Stbbdev         memHitKB.fetch_add(allocationSize/1024);
99651c0b2f7Stbbdev #endif
99751c0b2f7Stbbdev     }
99851c0b2f7Stbbdev     return lmb;
99951c0b2f7Stbbdev }
100051c0b2f7Stbbdev 
100151c0b2f7Stbbdev void ExtMemoryPool::freeLargeObject(LargeMemoryBlock *mBlock)
100251c0b2f7Stbbdev {
100351c0b2f7Stbbdev     loc.put(mBlock);
100451c0b2f7Stbbdev }
100551c0b2f7Stbbdev 
100651c0b2f7Stbbdev void ExtMemoryPool::freeLargeObjectList(LargeMemoryBlock *head)
100751c0b2f7Stbbdev {
100851c0b2f7Stbbdev     loc.putList(head);
100951c0b2f7Stbbdev }
101051c0b2f7Stbbdev 
101151c0b2f7Stbbdev bool ExtMemoryPool::softCachesCleanup()
101251c0b2f7Stbbdev {
101351c0b2f7Stbbdev     return loc.regularCleanup();
101451c0b2f7Stbbdev }
101551c0b2f7Stbbdev 
101651c0b2f7Stbbdev bool ExtMemoryPool::hardCachesCleanup()
101751c0b2f7Stbbdev {
101851c0b2f7Stbbdev     // thread-local caches must be cleaned before LOC,
101951c0b2f7Stbbdev     // because object from thread-local cache can be released to LOC
102051c0b2f7Stbbdev     bool ret = releaseAllLocalCaches();
102151c0b2f7Stbbdev     ret |= orphanedBlocks.cleanup(&backend);
102251c0b2f7Stbbdev     ret |= loc.cleanAll();
102351c0b2f7Stbbdev     ret |= backend.clean();
102451c0b2f7Stbbdev     return ret;
102551c0b2f7Stbbdev }
102651c0b2f7Stbbdev 
102751c0b2f7Stbbdev #if BACKEND_HAS_MREMAP
102851c0b2f7Stbbdev void *ExtMemoryPool::remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment)
102951c0b2f7Stbbdev {
103051c0b2f7Stbbdev     const size_t oldUnalignedSize = ((LargeObjectHdr*)ptr - 1)->memoryBlock->unalignedSize;
103151c0b2f7Stbbdev     void *o = backend.remap(ptr, oldSize, newSize, alignment);
103251c0b2f7Stbbdev     if (o) {
103351c0b2f7Stbbdev         LargeMemoryBlock *lmb = ((LargeObjectHdr*)o - 1)->memoryBlock;
103451c0b2f7Stbbdev         loc.registerRealloc(oldUnalignedSize, lmb->unalignedSize);
103551c0b2f7Stbbdev     }
103651c0b2f7Stbbdev     return o;
103751c0b2f7Stbbdev }
103851c0b2f7Stbbdev #endif /* BACKEND_HAS_MREMAP */
103951c0b2f7Stbbdev 
104051c0b2f7Stbbdev /*********** End allocation of large objects **********/
104151c0b2f7Stbbdev 
104251c0b2f7Stbbdev } // namespace internal
104351c0b2f7Stbbdev } // namespace rml
104451c0b2f7Stbbdev 
104551c0b2f7Stbbdev #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
104651c0b2f7Stbbdev     #pragma warning(pop)
104751c0b2f7Stbbdev #endif
104851c0b2f7Stbbdev 
1049