151c0b2f7Stbbdev /* 2*b15aabb3Stbbdev Copyright (c) 2005-2021 Intel Corporation 351c0b2f7Stbbdev 451c0b2f7Stbbdev Licensed under the Apache License, Version 2.0 (the "License"); 551c0b2f7Stbbdev you may not use this file except in compliance with the License. 651c0b2f7Stbbdev You may obtain a copy of the License at 751c0b2f7Stbbdev 851c0b2f7Stbbdev http://www.apache.org/licenses/LICENSE-2.0 951c0b2f7Stbbdev 1051c0b2f7Stbbdev Unless required by applicable law or agreed to in writing, software 1151c0b2f7Stbbdev distributed under the License is distributed on an "AS IS" BASIS, 1251c0b2f7Stbbdev WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1351c0b2f7Stbbdev See the License for the specific language governing permissions and 1451c0b2f7Stbbdev limitations under the License. 1551c0b2f7Stbbdev */ 1651c0b2f7Stbbdev 1751c0b2f7Stbbdev #include "tbbmalloc_internal.h" 1851c0b2f7Stbbdev #include "../tbb/environment.h" 1951c0b2f7Stbbdev 2051c0b2f7Stbbdev #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 2151c0b2f7Stbbdev // Suppress warning: unary minus operator applied to unsigned type, result still unsigned 2251c0b2f7Stbbdev // TBB_REVAMP_TODO: review this warning 2351c0b2f7Stbbdev #pragma warning(push) 2451c0b2f7Stbbdev #pragma warning(disable:4146) 2551c0b2f7Stbbdev #endif 2651c0b2f7Stbbdev 2751c0b2f7Stbbdev /******************************* Allocation of large objects *********************************************/ 2851c0b2f7Stbbdev 2951c0b2f7Stbbdev namespace rml { 3051c0b2f7Stbbdev namespace internal { 3151c0b2f7Stbbdev 3251c0b2f7Stbbdev /* ---------------------------- Large Object cache init section ---------------------------------------- */ 3351c0b2f7Stbbdev 3451c0b2f7Stbbdev void LargeObjectCache::init(ExtMemoryPool *memPool) 3551c0b2f7Stbbdev { 3651c0b2f7Stbbdev extMemPool = memPool; 3751c0b2f7Stbbdev // scalable_allocation_mode can be called before allocator initialization, respect this manual request 3851c0b2f7Stbbdev if (hugeSizeThreshold == 0) { 3951c0b2f7Stbbdev // Huge size threshold initialization if environment variable was set 4051c0b2f7Stbbdev long requestedThreshold = tbb::detail::r1::GetIntegralEnvironmentVariable("TBB_MALLOC_SET_HUGE_SIZE_THRESHOLD"); 4151c0b2f7Stbbdev // Read valid env or initialize by default with max possible values 4251c0b2f7Stbbdev if (requestedThreshold != -1) { 4351c0b2f7Stbbdev setHugeSizeThreshold(requestedThreshold); 4451c0b2f7Stbbdev } else { 4551c0b2f7Stbbdev setHugeSizeThreshold(maxHugeSize); 4651c0b2f7Stbbdev } 4751c0b2f7Stbbdev } 4851c0b2f7Stbbdev } 4951c0b2f7Stbbdev 5051c0b2f7Stbbdev /* ----------------------------- Huge size threshold settings ----------------------------------------- */ 5151c0b2f7Stbbdev 5251c0b2f7Stbbdev void LargeObjectCache::setHugeSizeThreshold(size_t value) 5351c0b2f7Stbbdev { 5451c0b2f7Stbbdev // Valid in the huge cache range: [MaxLargeSize, MaxHugeSize]. 5551c0b2f7Stbbdev if (value <= maxHugeSize) { 5651c0b2f7Stbbdev hugeSizeThreshold = value >= maxLargeSize ? alignToBin(value) : maxLargeSize; 5751c0b2f7Stbbdev 5851c0b2f7Stbbdev // Calculate local indexes for the global threshold size (for fast search inside a regular cleanup) 5951c0b2f7Stbbdev largeCache.hugeSizeThresholdIdx = LargeCacheType::numBins; 6051c0b2f7Stbbdev hugeCache.hugeSizeThresholdIdx = HugeCacheType::sizeToIdx(hugeSizeThreshold); 6151c0b2f7Stbbdev } 6251c0b2f7Stbbdev } 6351c0b2f7Stbbdev 6451c0b2f7Stbbdev bool LargeObjectCache::sizeInCacheRange(size_t size) 6551c0b2f7Stbbdev { 6651c0b2f7Stbbdev return size <= maxHugeSize && (size <= defaultMaxHugeSize || size >= hugeSizeThreshold); 6751c0b2f7Stbbdev } 6851c0b2f7Stbbdev 6951c0b2f7Stbbdev /* ----------------------------------------------------------------------------------------------------- */ 7051c0b2f7Stbbdev 7151c0b2f7Stbbdev /* The functor called by the aggregator for the operation list */ 7251c0b2f7Stbbdev template<typename Props> 7351c0b2f7Stbbdev class CacheBinFunctor { 7451c0b2f7Stbbdev typename LargeObjectCacheImpl<Props>::CacheBin *const bin; 7551c0b2f7Stbbdev ExtMemoryPool *const extMemPool; 7651c0b2f7Stbbdev typename LargeObjectCacheImpl<Props>::BinBitMask *const bitMask; 7751c0b2f7Stbbdev const int idx; 7851c0b2f7Stbbdev 7951c0b2f7Stbbdev LargeMemoryBlock *toRelease; 8051c0b2f7Stbbdev bool needCleanup; 8151c0b2f7Stbbdev uintptr_t currTime; 8251c0b2f7Stbbdev 8351c0b2f7Stbbdev /* Do preprocessing under the operation list. */ 8451c0b2f7Stbbdev /* All the OP_PUT_LIST operations are merged in the one operation. 8551c0b2f7Stbbdev All OP_GET operations are merged with the OP_PUT_LIST operations but 8651c0b2f7Stbbdev it demands the update of the moving average value in the bin. 8751c0b2f7Stbbdev Only the last OP_CLEAN_TO_THRESHOLD operation has sense. 8851c0b2f7Stbbdev The OP_CLEAN_ALL operation also should be performed only once. 8951c0b2f7Stbbdev Moreover it cancels the OP_CLEAN_TO_THRESHOLD operation. */ 9051c0b2f7Stbbdev class OperationPreprocessor { 9151c0b2f7Stbbdev // TODO: remove the dependency on CacheBin. 9251c0b2f7Stbbdev typename LargeObjectCacheImpl<Props>::CacheBin *const bin; 9351c0b2f7Stbbdev 9451c0b2f7Stbbdev /* Contains the relative time in the operation list. 9551c0b2f7Stbbdev It counts in the reverse order since the aggregator also 9651c0b2f7Stbbdev provides operations in the reverse order. */ 9751c0b2f7Stbbdev uintptr_t lclTime; 9851c0b2f7Stbbdev 9951c0b2f7Stbbdev /* opGet contains only OP_GET operations which cannot be merge with OP_PUT operations 10051c0b2f7Stbbdev opClean contains all OP_CLEAN_TO_THRESHOLD and OP_CLEAN_ALL operations. */ 10151c0b2f7Stbbdev CacheBinOperation *opGet, *opClean; 10251c0b2f7Stbbdev /* The time of the last OP_CLEAN_TO_THRESHOLD operations */ 10351c0b2f7Stbbdev uintptr_t cleanTime; 10451c0b2f7Stbbdev 10551c0b2f7Stbbdev /* lastGetOpTime - the time of the last OP_GET operation. 10651c0b2f7Stbbdev lastGet - the same meaning as CacheBin::lastGet */ 10751c0b2f7Stbbdev uintptr_t lastGetOpTime, lastGet; 10851c0b2f7Stbbdev 10951c0b2f7Stbbdev /* The total sum of all usedSize changes requested with CBOP_UPDATE_USED_SIZE operations. */ 11051c0b2f7Stbbdev size_t updateUsedSize; 11151c0b2f7Stbbdev 11251c0b2f7Stbbdev /* The list of blocks for the OP_PUT_LIST operation. */ 11351c0b2f7Stbbdev LargeMemoryBlock *head, *tail; 11451c0b2f7Stbbdev int putListNum; 11551c0b2f7Stbbdev 11651c0b2f7Stbbdev /* if the OP_CLEAN_ALL is requested. */ 11751c0b2f7Stbbdev bool isCleanAll; 11851c0b2f7Stbbdev 11951c0b2f7Stbbdev inline void commitOperation(CacheBinOperation *op) const; 12051c0b2f7Stbbdev inline void addOpToOpList(CacheBinOperation *op, CacheBinOperation **opList) const; 12151c0b2f7Stbbdev bool getFromPutList(CacheBinOperation* opGet, uintptr_t currTime); 12251c0b2f7Stbbdev void addToPutList( LargeMemoryBlock *head, LargeMemoryBlock *tail, int num ); 12351c0b2f7Stbbdev 12451c0b2f7Stbbdev public: 12551c0b2f7Stbbdev OperationPreprocessor(typename LargeObjectCacheImpl<Props>::CacheBin *bin) : 12651c0b2f7Stbbdev bin(bin), lclTime(0), opGet(NULL), opClean(NULL), cleanTime(0), 12751c0b2f7Stbbdev lastGetOpTime(0), updateUsedSize(0), head(NULL), isCleanAll(false) {} 12851c0b2f7Stbbdev void operator()(CacheBinOperation* opList); 12951c0b2f7Stbbdev uintptr_t getTimeRange() const { return -lclTime; } 13051c0b2f7Stbbdev 13151c0b2f7Stbbdev friend class CacheBinFunctor; 13251c0b2f7Stbbdev }; 13351c0b2f7Stbbdev 13451c0b2f7Stbbdev public: 13551c0b2f7Stbbdev CacheBinFunctor(typename LargeObjectCacheImpl<Props>::CacheBin *bin, ExtMemoryPool *extMemPool, 13651c0b2f7Stbbdev typename LargeObjectCacheImpl<Props>::BinBitMask *bitMask, int idx) : 13751c0b2f7Stbbdev bin(bin), extMemPool(extMemPool), bitMask(bitMask), idx(idx), toRelease(NULL), needCleanup(false) {} 13851c0b2f7Stbbdev void operator()(CacheBinOperation* opList); 13951c0b2f7Stbbdev 14051c0b2f7Stbbdev bool isCleanupNeeded() const { return needCleanup; } 14151c0b2f7Stbbdev LargeMemoryBlock *getToRelease() const { return toRelease; } 14251c0b2f7Stbbdev uintptr_t getCurrTime() const { return currTime; } 14351c0b2f7Stbbdev }; 14451c0b2f7Stbbdev 14551c0b2f7Stbbdev /* ---------------- Cache Bin Aggregator Operation Helpers ---------------- */ 14651c0b2f7Stbbdev 14751c0b2f7Stbbdev // The list of structures which describe the operation data 14851c0b2f7Stbbdev struct OpGet { 14951c0b2f7Stbbdev static const CacheBinOperationType type = CBOP_GET; 15051c0b2f7Stbbdev LargeMemoryBlock **res; 15151c0b2f7Stbbdev size_t size; 15251c0b2f7Stbbdev uintptr_t currTime; 15351c0b2f7Stbbdev }; 15451c0b2f7Stbbdev 15551c0b2f7Stbbdev struct OpPutList { 15651c0b2f7Stbbdev static const CacheBinOperationType type = CBOP_PUT_LIST; 15751c0b2f7Stbbdev LargeMemoryBlock *head; 15851c0b2f7Stbbdev }; 15951c0b2f7Stbbdev 16051c0b2f7Stbbdev struct OpCleanToThreshold { 16151c0b2f7Stbbdev static const CacheBinOperationType type = CBOP_CLEAN_TO_THRESHOLD; 16251c0b2f7Stbbdev LargeMemoryBlock **res; 16351c0b2f7Stbbdev uintptr_t currTime; 16451c0b2f7Stbbdev }; 16551c0b2f7Stbbdev 16651c0b2f7Stbbdev struct OpCleanAll { 16751c0b2f7Stbbdev static const CacheBinOperationType type = CBOP_CLEAN_ALL; 16851c0b2f7Stbbdev LargeMemoryBlock **res; 16951c0b2f7Stbbdev }; 17051c0b2f7Stbbdev 17151c0b2f7Stbbdev struct OpUpdateUsedSize { 17251c0b2f7Stbbdev static const CacheBinOperationType type = CBOP_UPDATE_USED_SIZE; 17351c0b2f7Stbbdev size_t size; 17451c0b2f7Stbbdev }; 17551c0b2f7Stbbdev 17651c0b2f7Stbbdev union CacheBinOperationData { 17751c0b2f7Stbbdev private: 17851c0b2f7Stbbdev OpGet opGet; 17951c0b2f7Stbbdev OpPutList opPutList; 18051c0b2f7Stbbdev OpCleanToThreshold opCleanToThreshold; 18151c0b2f7Stbbdev OpCleanAll opCleanAll; 18251c0b2f7Stbbdev OpUpdateUsedSize opUpdateUsedSize; 18351c0b2f7Stbbdev }; 18451c0b2f7Stbbdev 18551c0b2f7Stbbdev // Forward declarations 18651c0b2f7Stbbdev template <typename OpTypeData> OpTypeData& opCast(CacheBinOperation &op); 18751c0b2f7Stbbdev 18851c0b2f7Stbbdev // Describes the aggregator operation 18951c0b2f7Stbbdev struct CacheBinOperation : public MallocAggregatedOperation<CacheBinOperation>::type { 19051c0b2f7Stbbdev CacheBinOperationType type; 19151c0b2f7Stbbdev 19251c0b2f7Stbbdev template <typename OpTypeData> 19351c0b2f7Stbbdev CacheBinOperation(OpTypeData &d, CacheBinOperationStatus st = CBST_WAIT) { 19451c0b2f7Stbbdev opCast<OpTypeData>(*this) = d; 19551c0b2f7Stbbdev type = OpTypeData::type; 19651c0b2f7Stbbdev MallocAggregatedOperation<CacheBinOperation>::type::status = st; 19751c0b2f7Stbbdev } 19851c0b2f7Stbbdev private: 19951c0b2f7Stbbdev CacheBinOperationData data; 20051c0b2f7Stbbdev 20151c0b2f7Stbbdev template <typename OpTypeData> 20251c0b2f7Stbbdev friend OpTypeData& opCast(CacheBinOperation &op); 20351c0b2f7Stbbdev }; 20451c0b2f7Stbbdev 20551c0b2f7Stbbdev // The opCast function can be the member of CacheBinOperation but it will have 20651c0b2f7Stbbdev // small stylistic ambiguity: it will look like a getter (with a cast) for the 20751c0b2f7Stbbdev // CacheBinOperation::data data member but it should return a reference to 20851c0b2f7Stbbdev // simplify the code from a lot of getter/setter calls. So the global cast in 20951c0b2f7Stbbdev // the style of static_cast (or reinterpret_cast) seems to be more readable and 21051c0b2f7Stbbdev // have more explicit semantic. 21151c0b2f7Stbbdev template <typename OpTypeData> 21251c0b2f7Stbbdev OpTypeData& opCast(CacheBinOperation &op) { 21351c0b2f7Stbbdev return *reinterpret_cast<OpTypeData*>(&op.data); 21451c0b2f7Stbbdev } 21551c0b2f7Stbbdev 21651c0b2f7Stbbdev /* ------------------------------------------------------------------------ */ 21751c0b2f7Stbbdev 21851c0b2f7Stbbdev #if __TBB_MALLOC_LOCACHE_STAT 21951c0b2f7Stbbdev //intptr_t mallocCalls, cacheHits; 22051c0b2f7Stbbdev std::atomic<intptr_t> mallocCalls, cacheHits; 22151c0b2f7Stbbdev //intptr_t memAllocKB, memHitKB; 22251c0b2f7Stbbdev std::atomic<intptr_t> memAllocKB, memHitKB; 22351c0b2f7Stbbdev #endif 22451c0b2f7Stbbdev 22551c0b2f7Stbbdev inline bool lessThanWithOverflow(intptr_t a, intptr_t b) 22651c0b2f7Stbbdev { 22751c0b2f7Stbbdev return (a < b && (b - a < UINTPTR_MAX/2)) || 22851c0b2f7Stbbdev (a > b && (a - b > UINTPTR_MAX/2)); 22951c0b2f7Stbbdev } 23051c0b2f7Stbbdev 23151c0b2f7Stbbdev /* ----------------------------------- Operation processing methods ------------------------------------ */ 23251c0b2f7Stbbdev 23351c0b2f7Stbbdev template<typename Props> void CacheBinFunctor<Props>:: 23451c0b2f7Stbbdev OperationPreprocessor::commitOperation(CacheBinOperation *op) const 23551c0b2f7Stbbdev { 23651c0b2f7Stbbdev // FencedStore( (intptr_t&)(op->status), CBST_DONE ); 23751c0b2f7Stbbdev op->status.store(CBST_DONE, std::memory_order_release); 23851c0b2f7Stbbdev } 23951c0b2f7Stbbdev 24051c0b2f7Stbbdev template<typename Props> void CacheBinFunctor<Props>:: 24151c0b2f7Stbbdev OperationPreprocessor::addOpToOpList(CacheBinOperation *op, CacheBinOperation **opList) const 24251c0b2f7Stbbdev { 24351c0b2f7Stbbdev op->next = *opList; 24451c0b2f7Stbbdev *opList = op; 24551c0b2f7Stbbdev } 24651c0b2f7Stbbdev 24751c0b2f7Stbbdev template<typename Props> bool CacheBinFunctor<Props>:: 24851c0b2f7Stbbdev OperationPreprocessor::getFromPutList(CacheBinOperation *opGet, uintptr_t currTime) 24951c0b2f7Stbbdev { 25051c0b2f7Stbbdev if ( head ) { 25151c0b2f7Stbbdev uintptr_t age = head->age; 25251c0b2f7Stbbdev LargeMemoryBlock *next = head->next; 25351c0b2f7Stbbdev *opCast<OpGet>(*opGet).res = head; 25451c0b2f7Stbbdev commitOperation( opGet ); 25551c0b2f7Stbbdev head = next; 25651c0b2f7Stbbdev putListNum--; 25751c0b2f7Stbbdev MALLOC_ASSERT( putListNum>=0, ASSERT_TEXT ); 25851c0b2f7Stbbdev 25951c0b2f7Stbbdev // use moving average with current hit interval 26051c0b2f7Stbbdev bin->updateMeanHitRange( currTime - age ); 26151c0b2f7Stbbdev return true; 26251c0b2f7Stbbdev } 26351c0b2f7Stbbdev return false; 26451c0b2f7Stbbdev } 26551c0b2f7Stbbdev 26651c0b2f7Stbbdev template<typename Props> void CacheBinFunctor<Props>:: 26751c0b2f7Stbbdev OperationPreprocessor::addToPutList(LargeMemoryBlock *h, LargeMemoryBlock *t, int num) 26851c0b2f7Stbbdev { 26951c0b2f7Stbbdev if ( head ) { 27051c0b2f7Stbbdev MALLOC_ASSERT( tail, ASSERT_TEXT ); 27151c0b2f7Stbbdev tail->next = h; 27251c0b2f7Stbbdev h->prev = tail; 27351c0b2f7Stbbdev tail = t; 27451c0b2f7Stbbdev putListNum += num; 27551c0b2f7Stbbdev } else { 27651c0b2f7Stbbdev head = h; 27751c0b2f7Stbbdev tail = t; 27851c0b2f7Stbbdev putListNum = num; 27951c0b2f7Stbbdev } 28051c0b2f7Stbbdev } 28151c0b2f7Stbbdev 28251c0b2f7Stbbdev template<typename Props> void CacheBinFunctor<Props>:: 28351c0b2f7Stbbdev OperationPreprocessor::operator()(CacheBinOperation* opList) 28451c0b2f7Stbbdev { 28551c0b2f7Stbbdev for ( CacheBinOperation *op = opList, *opNext; op; op = opNext ) { 28651c0b2f7Stbbdev opNext = op->next; 28751c0b2f7Stbbdev switch ( op->type ) { 28851c0b2f7Stbbdev case CBOP_GET: 28951c0b2f7Stbbdev { 29051c0b2f7Stbbdev lclTime--; 29151c0b2f7Stbbdev if ( !lastGetOpTime ) { 29251c0b2f7Stbbdev lastGetOpTime = lclTime; 29351c0b2f7Stbbdev lastGet = 0; 29451c0b2f7Stbbdev } else if ( !lastGet ) lastGet = lclTime; 29551c0b2f7Stbbdev 29651c0b2f7Stbbdev if ( !getFromPutList(op,lclTime) ) { 29751c0b2f7Stbbdev opCast<OpGet>(*op).currTime = lclTime; 29851c0b2f7Stbbdev addOpToOpList( op, &opGet ); 29951c0b2f7Stbbdev } 30051c0b2f7Stbbdev } 30151c0b2f7Stbbdev break; 30251c0b2f7Stbbdev 30351c0b2f7Stbbdev case CBOP_PUT_LIST: 30451c0b2f7Stbbdev { 30551c0b2f7Stbbdev LargeMemoryBlock *head = opCast<OpPutList>(*op).head; 30651c0b2f7Stbbdev LargeMemoryBlock *curr = head, *prev = NULL; 30751c0b2f7Stbbdev 30851c0b2f7Stbbdev int num = 0; 30951c0b2f7Stbbdev do { 31051c0b2f7Stbbdev // we do not kept prev pointers during assigning blocks to bins, set them now 31151c0b2f7Stbbdev curr->prev = prev; 31251c0b2f7Stbbdev 31351c0b2f7Stbbdev // Save the local times to the memory blocks. Local times are necessary 31451c0b2f7Stbbdev // for the getFromPutList function which updates the hit range value in 31551c0b2f7Stbbdev // CacheBin when OP_GET and OP_PUT_LIST operations are merged successfully. 31651c0b2f7Stbbdev // The age will be updated to the correct global time after preprocessing 31751c0b2f7Stbbdev // when global cache time is updated. 31851c0b2f7Stbbdev curr->age = --lclTime; 31951c0b2f7Stbbdev 32051c0b2f7Stbbdev prev = curr; 32151c0b2f7Stbbdev num += 1; 32251c0b2f7Stbbdev 32351c0b2f7Stbbdev STAT_increment(getThreadId(), ThreadCommonCounters, cacheLargeObj); 32451c0b2f7Stbbdev } while ((curr = curr->next) != NULL); 32551c0b2f7Stbbdev 32651c0b2f7Stbbdev LargeMemoryBlock *tail = prev; 32751c0b2f7Stbbdev addToPutList(head, tail, num); 32851c0b2f7Stbbdev 32951c0b2f7Stbbdev while ( opGet ) { 33051c0b2f7Stbbdev CacheBinOperation *next = opGet->next; 33151c0b2f7Stbbdev if ( !getFromPutList(opGet, opCast<OpGet>(*opGet).currTime) ) 33251c0b2f7Stbbdev break; 33351c0b2f7Stbbdev opGet = next; 33451c0b2f7Stbbdev } 33551c0b2f7Stbbdev } 33651c0b2f7Stbbdev break; 33751c0b2f7Stbbdev 33851c0b2f7Stbbdev case CBOP_UPDATE_USED_SIZE: 33951c0b2f7Stbbdev updateUsedSize += opCast<OpUpdateUsedSize>(*op).size; 34051c0b2f7Stbbdev commitOperation( op ); 34151c0b2f7Stbbdev break; 34251c0b2f7Stbbdev 34351c0b2f7Stbbdev case CBOP_CLEAN_ALL: 34451c0b2f7Stbbdev isCleanAll = true; 34551c0b2f7Stbbdev addOpToOpList( op, &opClean ); 34651c0b2f7Stbbdev break; 34751c0b2f7Stbbdev 34851c0b2f7Stbbdev case CBOP_CLEAN_TO_THRESHOLD: 34951c0b2f7Stbbdev { 35051c0b2f7Stbbdev uintptr_t currTime = opCast<OpCleanToThreshold>(*op).currTime; 35151c0b2f7Stbbdev // We don't worry about currTime overflow since it is a rare 35251c0b2f7Stbbdev // occurrence and doesn't affect correctness 35351c0b2f7Stbbdev cleanTime = cleanTime < currTime ? currTime : cleanTime; 35451c0b2f7Stbbdev addOpToOpList( op, &opClean ); 35551c0b2f7Stbbdev } 35651c0b2f7Stbbdev break; 35751c0b2f7Stbbdev 35851c0b2f7Stbbdev default: 35951c0b2f7Stbbdev MALLOC_ASSERT( false, "Unknown operation." ); 36051c0b2f7Stbbdev } 36151c0b2f7Stbbdev } 36251c0b2f7Stbbdev MALLOC_ASSERT( !( opGet && head ), "Not all put/get pairs are processed!" ); 36351c0b2f7Stbbdev } 36451c0b2f7Stbbdev 36551c0b2f7Stbbdev template<typename Props> void CacheBinFunctor<Props>::operator()(CacheBinOperation* opList) 36651c0b2f7Stbbdev { 36751c0b2f7Stbbdev MALLOC_ASSERT( opList, "Empty operation list is passed into operation handler." ); 36851c0b2f7Stbbdev 36951c0b2f7Stbbdev OperationPreprocessor prep(bin); 37051c0b2f7Stbbdev prep(opList); 37151c0b2f7Stbbdev 37251c0b2f7Stbbdev if ( uintptr_t timeRange = prep.getTimeRange() ) { 37351c0b2f7Stbbdev uintptr_t startTime = extMemPool->loc.getCurrTimeRange(timeRange); 37451c0b2f7Stbbdev // endTime is used as the current (base) time since the local time is negative. 37551c0b2f7Stbbdev uintptr_t endTime = startTime + timeRange; 37651c0b2f7Stbbdev 37751c0b2f7Stbbdev if ( prep.lastGetOpTime && prep.lastGet ) bin->setLastGet(prep.lastGet+endTime); 37851c0b2f7Stbbdev 37951c0b2f7Stbbdev if ( CacheBinOperation *opGet = prep.opGet ) { 38051c0b2f7Stbbdev bool isEmpty = false; 38151c0b2f7Stbbdev do { 38251c0b2f7Stbbdev #if __TBB_MALLOC_WHITEBOX_TEST 38351c0b2f7Stbbdev tbbmalloc_whitebox::locGetProcessed++; 38451c0b2f7Stbbdev #endif 38551c0b2f7Stbbdev const OpGet &opGetData = opCast<OpGet>(*opGet); 38651c0b2f7Stbbdev if ( !isEmpty ) { 38751c0b2f7Stbbdev if ( LargeMemoryBlock *res = bin->get() ) { 38851c0b2f7Stbbdev uintptr_t getTime = opGetData.currTime + endTime; 38951c0b2f7Stbbdev // use moving average with current hit interval 39051c0b2f7Stbbdev bin->updateMeanHitRange( getTime - res->age); 39151c0b2f7Stbbdev bin->updateCachedSize( -opGetData.size ); 39251c0b2f7Stbbdev *opGetData.res = res; 39351c0b2f7Stbbdev } else { 39451c0b2f7Stbbdev isEmpty = true; 39551c0b2f7Stbbdev uintptr_t lastGetOpTime = prep.lastGetOpTime+endTime; 39651c0b2f7Stbbdev bin->forgetOutdatedState(lastGetOpTime); 39751c0b2f7Stbbdev bin->updateAgeThreshold(lastGetOpTime); 39851c0b2f7Stbbdev } 39951c0b2f7Stbbdev } 40051c0b2f7Stbbdev 40151c0b2f7Stbbdev CacheBinOperation *opNext = opGet->next; 40251c0b2f7Stbbdev bin->updateUsedSize( opGetData.size, bitMask, idx ); 40351c0b2f7Stbbdev prep.commitOperation( opGet ); 40451c0b2f7Stbbdev opGet = opNext; 40551c0b2f7Stbbdev } while ( opGet ); 40651c0b2f7Stbbdev if ( prep.lastGetOpTime ) 40751c0b2f7Stbbdev bin->setLastGet( prep.lastGetOpTime + endTime ); 40851c0b2f7Stbbdev } else if ( LargeMemoryBlock *curr = prep.head ) { 40951c0b2f7Stbbdev curr->prev = NULL; 41051c0b2f7Stbbdev while ( curr ) { 41151c0b2f7Stbbdev // Update local times to global times 41251c0b2f7Stbbdev curr->age += endTime; 41351c0b2f7Stbbdev curr=curr->next; 41451c0b2f7Stbbdev } 41551c0b2f7Stbbdev #if __TBB_MALLOC_WHITEBOX_TEST 41651c0b2f7Stbbdev tbbmalloc_whitebox::locPutProcessed+=prep.putListNum; 41751c0b2f7Stbbdev #endif 41851c0b2f7Stbbdev toRelease = bin->putList(prep.head, prep.tail, bitMask, idx, prep.putListNum, extMemPool->loc.hugeSizeThreshold); 41951c0b2f7Stbbdev } 42051c0b2f7Stbbdev needCleanup = extMemPool->loc.isCleanupNeededOnRange(timeRange, startTime); 42151c0b2f7Stbbdev currTime = endTime - 1; 42251c0b2f7Stbbdev } 42351c0b2f7Stbbdev 42451c0b2f7Stbbdev if ( CacheBinOperation *opClean = prep.opClean ) { 42551c0b2f7Stbbdev if ( prep.isCleanAll ) 42651c0b2f7Stbbdev *opCast<OpCleanAll>(*opClean).res = bin->cleanAll(bitMask, idx); 42751c0b2f7Stbbdev else 42851c0b2f7Stbbdev *opCast<OpCleanToThreshold>(*opClean).res = bin->cleanToThreshold(prep.cleanTime, bitMask, idx); 42951c0b2f7Stbbdev 43051c0b2f7Stbbdev CacheBinOperation *opNext = opClean->next; 43151c0b2f7Stbbdev prep.commitOperation( opClean ); 43251c0b2f7Stbbdev 43351c0b2f7Stbbdev while ((opClean = opNext) != NULL) { 43451c0b2f7Stbbdev opNext = opClean->next; 43551c0b2f7Stbbdev prep.commitOperation(opClean); 43651c0b2f7Stbbdev } 43751c0b2f7Stbbdev } 43851c0b2f7Stbbdev 43951c0b2f7Stbbdev if ( size_t size = prep.updateUsedSize ) 44051c0b2f7Stbbdev bin->updateUsedSize(size, bitMask, idx); 44151c0b2f7Stbbdev } 44251c0b2f7Stbbdev /* ----------------------------------------------------------------------------------------------------- */ 44351c0b2f7Stbbdev /* --------------------------- Methods for creating and executing operations --------------------------- */ 44451c0b2f7Stbbdev template<typename Props> void LargeObjectCacheImpl<Props>:: 44551c0b2f7Stbbdev CacheBin::ExecuteOperation(CacheBinOperation *op, ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx, bool longLifeTime) 44651c0b2f7Stbbdev { 44751c0b2f7Stbbdev CacheBinFunctor<Props> func( this, extMemPool, bitMask, idx ); 44851c0b2f7Stbbdev aggregator.execute( op, func, longLifeTime ); 44951c0b2f7Stbbdev 45051c0b2f7Stbbdev if ( LargeMemoryBlock *toRelease = func.getToRelease()) { 45151c0b2f7Stbbdev extMemPool->backend.returnLargeObject(toRelease); 45251c0b2f7Stbbdev } 45351c0b2f7Stbbdev 45451c0b2f7Stbbdev if ( func.isCleanupNeeded() ) { 45551c0b2f7Stbbdev extMemPool->loc.doCleanup( func.getCurrTime(), /*doThreshDecr=*/false); 45651c0b2f7Stbbdev } 45751c0b2f7Stbbdev } 45851c0b2f7Stbbdev 45951c0b2f7Stbbdev template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>:: 46051c0b2f7Stbbdev CacheBin::get(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx) 46151c0b2f7Stbbdev { 46251c0b2f7Stbbdev LargeMemoryBlock *lmb=NULL; 46351c0b2f7Stbbdev OpGet data = {&lmb, size}; 46451c0b2f7Stbbdev CacheBinOperation op(data); 46551c0b2f7Stbbdev ExecuteOperation( &op, extMemPool, bitMask, idx ); 46651c0b2f7Stbbdev return lmb; 46751c0b2f7Stbbdev } 46851c0b2f7Stbbdev 46951c0b2f7Stbbdev template<typename Props> void LargeObjectCacheImpl<Props>:: 47051c0b2f7Stbbdev CacheBin::putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *head, BinBitMask *bitMask, int idx) 47151c0b2f7Stbbdev { 47251c0b2f7Stbbdev MALLOC_ASSERT(sizeof(LargeMemoryBlock)+sizeof(CacheBinOperation)<=head->unalignedSize, "CacheBinOperation is too large to be placed in LargeMemoryBlock!"); 47351c0b2f7Stbbdev 47451c0b2f7Stbbdev OpPutList data = {head}; 47551c0b2f7Stbbdev CacheBinOperation *op = new (head+1) CacheBinOperation(data, CBST_NOWAIT); 47651c0b2f7Stbbdev ExecuteOperation( op, extMemPool, bitMask, idx, false ); 47751c0b2f7Stbbdev } 47851c0b2f7Stbbdev 47951c0b2f7Stbbdev template<typename Props> bool LargeObjectCacheImpl<Props>:: 48051c0b2f7Stbbdev CacheBin::cleanToThreshold(ExtMemoryPool *extMemPool, BinBitMask *bitMask, uintptr_t currTime, int idx) 48151c0b2f7Stbbdev { 48251c0b2f7Stbbdev LargeMemoryBlock *toRelease = NULL; 48351c0b2f7Stbbdev 48451c0b2f7Stbbdev /* oldest may be more recent then age, that's why cast to signed type 48551c0b2f7Stbbdev was used. age overflow is also processed correctly. */ 48651c0b2f7Stbbdev if (last && (intptr_t)(currTime - oldest) > ageThreshold) { 48751c0b2f7Stbbdev OpCleanToThreshold data = {&toRelease, currTime}; 48851c0b2f7Stbbdev CacheBinOperation op(data); 48951c0b2f7Stbbdev ExecuteOperation( &op, extMemPool, bitMask, idx ); 49051c0b2f7Stbbdev } 49151c0b2f7Stbbdev bool released = toRelease; 49251c0b2f7Stbbdev 49351c0b2f7Stbbdev Backend *backend = &extMemPool->backend; 49451c0b2f7Stbbdev while ( toRelease ) { 49551c0b2f7Stbbdev LargeMemoryBlock *helper = toRelease->next; 49651c0b2f7Stbbdev backend->returnLargeObject(toRelease); 49751c0b2f7Stbbdev toRelease = helper; 49851c0b2f7Stbbdev } 49951c0b2f7Stbbdev return released; 50051c0b2f7Stbbdev } 50151c0b2f7Stbbdev 50251c0b2f7Stbbdev template<typename Props> bool LargeObjectCacheImpl<Props>:: 50351c0b2f7Stbbdev CacheBin::releaseAllToBackend(ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx) 50451c0b2f7Stbbdev { 50551c0b2f7Stbbdev LargeMemoryBlock *toRelease = NULL; 50651c0b2f7Stbbdev 50751c0b2f7Stbbdev if (last) { 50851c0b2f7Stbbdev OpCleanAll data = {&toRelease}; 50951c0b2f7Stbbdev CacheBinOperation op(data); 51051c0b2f7Stbbdev ExecuteOperation(&op, extMemPool, bitMask, idx); 51151c0b2f7Stbbdev } 51251c0b2f7Stbbdev bool released = toRelease; 51351c0b2f7Stbbdev 51451c0b2f7Stbbdev Backend *backend = &extMemPool->backend; 51551c0b2f7Stbbdev while ( toRelease ) { 51651c0b2f7Stbbdev LargeMemoryBlock *helper = toRelease->next; 51751c0b2f7Stbbdev MALLOC_ASSERT(!helper || lessThanWithOverflow(helper->age, toRelease->age), 51851c0b2f7Stbbdev ASSERT_TEXT); 51951c0b2f7Stbbdev backend->returnLargeObject(toRelease); 52051c0b2f7Stbbdev toRelease = helper; 52151c0b2f7Stbbdev } 52251c0b2f7Stbbdev return released; 52351c0b2f7Stbbdev } 52451c0b2f7Stbbdev 52551c0b2f7Stbbdev template<typename Props> void LargeObjectCacheImpl<Props>:: 52651c0b2f7Stbbdev CacheBin::updateUsedSize(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx) 52751c0b2f7Stbbdev { 52851c0b2f7Stbbdev OpUpdateUsedSize data = {size}; 52951c0b2f7Stbbdev CacheBinOperation op(data); 53051c0b2f7Stbbdev ExecuteOperation( &op, extMemPool, bitMask, idx ); 53151c0b2f7Stbbdev } 53251c0b2f7Stbbdev 53351c0b2f7Stbbdev /* ------------------------------ Unsafe methods used with the aggregator ------------------------------ */ 53451c0b2f7Stbbdev 53551c0b2f7Stbbdev template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>:: 53651c0b2f7Stbbdev CacheBin::putList(LargeMemoryBlock *head, LargeMemoryBlock *tail, BinBitMask *bitMask, int idx, int num, size_t hugeSizeThreshold) 53751c0b2f7Stbbdev { 53851c0b2f7Stbbdev size_t size = head->unalignedSize; 53951c0b2f7Stbbdev usedSize -= num*size; 54051c0b2f7Stbbdev MALLOC_ASSERT( !last || (last->age != 0 && last->age != -1U), ASSERT_TEXT ); 54151c0b2f7Stbbdev MALLOC_ASSERT( (tail==head && num==1) || (tail!=head && num>1), ASSERT_TEXT ); 54251c0b2f7Stbbdev LargeMemoryBlock *toRelease = NULL; 54351c0b2f7Stbbdev if (size < hugeSizeThreshold && !lastCleanedAge) { 54451c0b2f7Stbbdev // 1st object of such size was released. 54551c0b2f7Stbbdev // Not cache it, and remember when this occurs 54651c0b2f7Stbbdev // to take into account during cache miss. 54751c0b2f7Stbbdev lastCleanedAge = tail->age; 54851c0b2f7Stbbdev toRelease = tail; 54951c0b2f7Stbbdev tail = tail->prev; 55051c0b2f7Stbbdev if (tail) 55151c0b2f7Stbbdev tail->next = NULL; 55251c0b2f7Stbbdev else 55351c0b2f7Stbbdev head = NULL; 55451c0b2f7Stbbdev num--; 55551c0b2f7Stbbdev } 55651c0b2f7Stbbdev if (num) { 55751c0b2f7Stbbdev // add [head;tail] list to cache 55851c0b2f7Stbbdev MALLOC_ASSERT( tail, ASSERT_TEXT ); 55951c0b2f7Stbbdev tail->next = first; 56051c0b2f7Stbbdev if (first) 56151c0b2f7Stbbdev first->prev = tail; 56251c0b2f7Stbbdev first = head; 56351c0b2f7Stbbdev if (!last) { 56451c0b2f7Stbbdev MALLOC_ASSERT(0 == oldest, ASSERT_TEXT); 56551c0b2f7Stbbdev oldest = tail->age; 56651c0b2f7Stbbdev last = tail; 56751c0b2f7Stbbdev } 56851c0b2f7Stbbdev 56951c0b2f7Stbbdev cachedSize += num*size; 57051c0b2f7Stbbdev } 57151c0b2f7Stbbdev 57251c0b2f7Stbbdev // No used object, and nothing in the bin, mark the bin as empty 57351c0b2f7Stbbdev if (!usedSize && !first) 57451c0b2f7Stbbdev bitMask->set(idx, false); 57551c0b2f7Stbbdev 57651c0b2f7Stbbdev return toRelease; 57751c0b2f7Stbbdev } 57851c0b2f7Stbbdev 57951c0b2f7Stbbdev template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>:: 58051c0b2f7Stbbdev CacheBin::get() 58151c0b2f7Stbbdev { 58251c0b2f7Stbbdev LargeMemoryBlock *result=first; 58351c0b2f7Stbbdev if (result) { 58451c0b2f7Stbbdev first = result->next; 58551c0b2f7Stbbdev if (first) 58651c0b2f7Stbbdev first->prev = NULL; 58751c0b2f7Stbbdev else { 58851c0b2f7Stbbdev last = NULL; 58951c0b2f7Stbbdev oldest = 0; 59051c0b2f7Stbbdev } 59151c0b2f7Stbbdev } 59251c0b2f7Stbbdev 59351c0b2f7Stbbdev return result; 59451c0b2f7Stbbdev } 59551c0b2f7Stbbdev 59651c0b2f7Stbbdev template<typename Props> void LargeObjectCacheImpl<Props>:: 59751c0b2f7Stbbdev CacheBin::forgetOutdatedState(uintptr_t currTime) 59851c0b2f7Stbbdev { 59951c0b2f7Stbbdev // If the time since the last get is LongWaitFactor times more than ageThreshold 60051c0b2f7Stbbdev // for the bin, treat the bin as rarely-used and forget everything we know 60151c0b2f7Stbbdev // about it. 60251c0b2f7Stbbdev // If LongWaitFactor is too small, we forget too early and 60351c0b2f7Stbbdev // so prevents good caching, while if too high, caching blocks 60451c0b2f7Stbbdev // with unrelated usage pattern occurs. 60551c0b2f7Stbbdev const uintptr_t sinceLastGet = currTime - lastGet; 60651c0b2f7Stbbdev bool doCleanup = false; 60751c0b2f7Stbbdev 60851c0b2f7Stbbdev if (ageThreshold) 60951c0b2f7Stbbdev doCleanup = sinceLastGet > Props::LongWaitFactor * ageThreshold; 61051c0b2f7Stbbdev else if (lastCleanedAge) 61151c0b2f7Stbbdev doCleanup = sinceLastGet > Props::LongWaitFactor * (lastCleanedAge - lastGet); 61251c0b2f7Stbbdev 61351c0b2f7Stbbdev if (doCleanup) { 61451c0b2f7Stbbdev lastCleanedAge = 0; 61551c0b2f7Stbbdev ageThreshold = 0; 61651c0b2f7Stbbdev } 61751c0b2f7Stbbdev 61851c0b2f7Stbbdev } 61951c0b2f7Stbbdev 62051c0b2f7Stbbdev template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>:: 62151c0b2f7Stbbdev CacheBin::cleanToThreshold(uintptr_t currTime, BinBitMask *bitMask, int idx) 62251c0b2f7Stbbdev { 62351c0b2f7Stbbdev /* oldest may be more recent then age, that's why cast to signed type 62451c0b2f7Stbbdev was used. age overflow is also processed correctly. */ 62551c0b2f7Stbbdev if ( !last || (intptr_t)(currTime - last->age) < ageThreshold ) return NULL; 62651c0b2f7Stbbdev 62751c0b2f7Stbbdev #if MALLOC_DEBUG 62851c0b2f7Stbbdev uintptr_t nextAge = 0; 62951c0b2f7Stbbdev #endif 63051c0b2f7Stbbdev do { 63151c0b2f7Stbbdev #if MALLOC_DEBUG 63251c0b2f7Stbbdev // check that list ordered 63351c0b2f7Stbbdev MALLOC_ASSERT(!nextAge || lessThanWithOverflow(nextAge, last->age), 63451c0b2f7Stbbdev ASSERT_TEXT); 63551c0b2f7Stbbdev nextAge = last->age; 63651c0b2f7Stbbdev #endif 63751c0b2f7Stbbdev cachedSize -= last->unalignedSize; 63851c0b2f7Stbbdev last = last->prev; 63951c0b2f7Stbbdev } while (last && (intptr_t)(currTime - last->age) > ageThreshold); 64051c0b2f7Stbbdev 64151c0b2f7Stbbdev LargeMemoryBlock *toRelease = NULL; 64251c0b2f7Stbbdev if (last) { 64351c0b2f7Stbbdev toRelease = last->next; 64451c0b2f7Stbbdev oldest = last->age; 64551c0b2f7Stbbdev last->next = NULL; 64651c0b2f7Stbbdev } else { 64751c0b2f7Stbbdev toRelease = first; 64851c0b2f7Stbbdev first = NULL; 64951c0b2f7Stbbdev oldest = 0; 65051c0b2f7Stbbdev if (!usedSize) 65151c0b2f7Stbbdev bitMask->set(idx, false); 65251c0b2f7Stbbdev } 65351c0b2f7Stbbdev MALLOC_ASSERT( toRelease, ASSERT_TEXT ); 65451c0b2f7Stbbdev lastCleanedAge = toRelease->age; 65551c0b2f7Stbbdev 65651c0b2f7Stbbdev return toRelease; 65751c0b2f7Stbbdev } 65851c0b2f7Stbbdev 65951c0b2f7Stbbdev template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>:: 66051c0b2f7Stbbdev CacheBin::cleanAll(BinBitMask *bitMask, int idx) 66151c0b2f7Stbbdev { 66251c0b2f7Stbbdev if (!last) return NULL; 66351c0b2f7Stbbdev 66451c0b2f7Stbbdev LargeMemoryBlock *toRelease = first; 66551c0b2f7Stbbdev last = NULL; 66651c0b2f7Stbbdev first = NULL; 66751c0b2f7Stbbdev oldest = 0; 66851c0b2f7Stbbdev cachedSize = 0; 66951c0b2f7Stbbdev if (!usedSize) 67051c0b2f7Stbbdev bitMask->set(idx, false); 67151c0b2f7Stbbdev 67251c0b2f7Stbbdev return toRelease; 67351c0b2f7Stbbdev } 67451c0b2f7Stbbdev 67551c0b2f7Stbbdev /* ----------------------------------------------------------------------------------------------------- */ 67651c0b2f7Stbbdev 67751c0b2f7Stbbdev template<typename Props> size_t LargeObjectCacheImpl<Props>:: 67851c0b2f7Stbbdev CacheBin::reportStat(int num, FILE *f) 67951c0b2f7Stbbdev { 68051c0b2f7Stbbdev #if __TBB_MALLOC_LOCACHE_STAT 68151c0b2f7Stbbdev if (first) 68251c0b2f7Stbbdev printf("%d(%lu): total %lu KB thr %ld lastCln %lu oldest %lu\n", 68351c0b2f7Stbbdev num, num*Props::CacheStep+Props::MinSize, 68451c0b2f7Stbbdev cachedSize/1024, ageThreshold, lastCleanedAge, oldest); 68551c0b2f7Stbbdev #else 68651c0b2f7Stbbdev suppress_unused_warning(num); 68751c0b2f7Stbbdev suppress_unused_warning(f); 68851c0b2f7Stbbdev #endif 68951c0b2f7Stbbdev return cachedSize; 69051c0b2f7Stbbdev } 69151c0b2f7Stbbdev 69251c0b2f7Stbbdev // Release objects from cache blocks that are older than ageThreshold 69351c0b2f7Stbbdev template<typename Props> 69451c0b2f7Stbbdev bool LargeObjectCacheImpl<Props>::regularCleanup(ExtMemoryPool *extMemPool, uintptr_t currTime, bool doThreshDecr) 69551c0b2f7Stbbdev { 69651c0b2f7Stbbdev bool released = false; 69751c0b2f7Stbbdev BinsSummary binsSummary; 69851c0b2f7Stbbdev 69951c0b2f7Stbbdev // Threshold settings is below this cache or starts from zero index 70051c0b2f7Stbbdev if (hugeSizeThresholdIdx == 0) return false; 70151c0b2f7Stbbdev 70251c0b2f7Stbbdev // Starting searching for bin that is less than huge size threshold (can be cleaned-up) 70351c0b2f7Stbbdev int startSearchIdx = hugeSizeThresholdIdx - 1; 70451c0b2f7Stbbdev 70551c0b2f7Stbbdev for (int i = bitMask.getMaxTrue(startSearchIdx); i >= 0; i = bitMask.getMaxTrue(i-1)) { 70651c0b2f7Stbbdev bin[i].updateBinsSummary(&binsSummary); 70751c0b2f7Stbbdev if (!doThreshDecr && tooLargeLOC.load(std::memory_order_relaxed) > 2 && binsSummary.isLOCTooLarge()) { 70851c0b2f7Stbbdev // if LOC is too large for quite long time, decrease the threshold 70951c0b2f7Stbbdev // based on bin hit statistics. 71051c0b2f7Stbbdev // For this, redo cleanup from the beginning. 71151c0b2f7Stbbdev // Note: on this iteration total usedSz can be not too large 71251c0b2f7Stbbdev // in comparison to total cachedSz, as we calculated it only 71351c0b2f7Stbbdev // partially. We are ok with it. 71451c0b2f7Stbbdev i = bitMask.getMaxTrue(startSearchIdx)+1; 71551c0b2f7Stbbdev doThreshDecr = true; 71651c0b2f7Stbbdev binsSummary.reset(); 71751c0b2f7Stbbdev continue; 71851c0b2f7Stbbdev } 71951c0b2f7Stbbdev if (doThreshDecr) 72051c0b2f7Stbbdev bin[i].decreaseThreshold(); 72151c0b2f7Stbbdev 72251c0b2f7Stbbdev if (bin[i].cleanToThreshold(extMemPool, &bitMask, currTime, i)) { 72351c0b2f7Stbbdev released = true; 72451c0b2f7Stbbdev } 72551c0b2f7Stbbdev } 72651c0b2f7Stbbdev // We want to find if LOC was too large for some time continuously, 72751c0b2f7Stbbdev // so OK with races between incrementing and zeroing, but incrementing 72851c0b2f7Stbbdev // must be atomic. 72951c0b2f7Stbbdev if (binsSummary.isLOCTooLarge()) { 73051c0b2f7Stbbdev tooLargeLOC++; 73151c0b2f7Stbbdev } else { 73251c0b2f7Stbbdev tooLargeLOC.store(0, std::memory_order_relaxed); 73351c0b2f7Stbbdev } 73451c0b2f7Stbbdev return released; 73551c0b2f7Stbbdev } 73651c0b2f7Stbbdev 73751c0b2f7Stbbdev template<typename Props> 73851c0b2f7Stbbdev bool LargeObjectCacheImpl<Props>::cleanAll(ExtMemoryPool *extMemPool) 73951c0b2f7Stbbdev { 74051c0b2f7Stbbdev bool released = false; 74151c0b2f7Stbbdev for (int i = numBins-1; i >= 0; i--) { 74251c0b2f7Stbbdev released |= bin[i].releaseAllToBackend(extMemPool, &bitMask, i); 74351c0b2f7Stbbdev } 74451c0b2f7Stbbdev return released; 74551c0b2f7Stbbdev } 74651c0b2f7Stbbdev 74751c0b2f7Stbbdev template<typename Props> 74851c0b2f7Stbbdev void LargeObjectCacheImpl<Props>::reset() { 74951c0b2f7Stbbdev tooLargeLOC.store(0, std::memory_order_relaxed); 75051c0b2f7Stbbdev for (int i = numBins-1; i >= 0; i--) 75151c0b2f7Stbbdev bin[i].init(); 75251c0b2f7Stbbdev bitMask.reset(); 75351c0b2f7Stbbdev } 75451c0b2f7Stbbdev 75551c0b2f7Stbbdev #if __TBB_MALLOC_WHITEBOX_TEST 75651c0b2f7Stbbdev template<typename Props> 75751c0b2f7Stbbdev size_t LargeObjectCacheImpl<Props>::getLOCSize() const 75851c0b2f7Stbbdev { 75951c0b2f7Stbbdev size_t size = 0; 76051c0b2f7Stbbdev for (int i = numBins-1; i >= 0; i--) 76151c0b2f7Stbbdev size += bin[i].getSize(); 76251c0b2f7Stbbdev return size; 76351c0b2f7Stbbdev } 76451c0b2f7Stbbdev 76551c0b2f7Stbbdev size_t LargeObjectCache::getLOCSize() const 76651c0b2f7Stbbdev { 76751c0b2f7Stbbdev return largeCache.getLOCSize() + hugeCache.getLOCSize(); 76851c0b2f7Stbbdev } 76951c0b2f7Stbbdev 77051c0b2f7Stbbdev template<typename Props> 77151c0b2f7Stbbdev size_t LargeObjectCacheImpl<Props>::getUsedSize() const 77251c0b2f7Stbbdev { 77351c0b2f7Stbbdev size_t size = 0; 77451c0b2f7Stbbdev for (int i = numBins-1; i >= 0; i--) 77551c0b2f7Stbbdev size += bin[i].getUsedSize(); 77651c0b2f7Stbbdev return size; 77751c0b2f7Stbbdev } 77851c0b2f7Stbbdev 77951c0b2f7Stbbdev size_t LargeObjectCache::getUsedSize() const 78051c0b2f7Stbbdev { 78151c0b2f7Stbbdev return largeCache.getUsedSize() + hugeCache.getUsedSize(); 78251c0b2f7Stbbdev } 78351c0b2f7Stbbdev #endif // __TBB_MALLOC_WHITEBOX_TEST 78451c0b2f7Stbbdev 78551c0b2f7Stbbdev inline bool LargeObjectCache::isCleanupNeededOnRange(uintptr_t range, uintptr_t currTime) 78651c0b2f7Stbbdev { 78751c0b2f7Stbbdev return range >= cacheCleanupFreq 78851c0b2f7Stbbdev || currTime+range < currTime-1 // overflow, 0 is power of 2, do cleanup 78951c0b2f7Stbbdev // (prev;prev+range] contains n*cacheCleanupFreq 79051c0b2f7Stbbdev || alignUp(currTime, cacheCleanupFreq)<currTime+range; 79151c0b2f7Stbbdev } 79251c0b2f7Stbbdev 79351c0b2f7Stbbdev bool LargeObjectCache::doCleanup(uintptr_t currTime, bool doThreshDecr) 79451c0b2f7Stbbdev { 79551c0b2f7Stbbdev if (!doThreshDecr) 79651c0b2f7Stbbdev extMemPool->allLocalCaches.markUnused(); 79751c0b2f7Stbbdev return largeCache.regularCleanup(extMemPool, currTime, doThreshDecr) 79851c0b2f7Stbbdev | hugeCache.regularCleanup(extMemPool, currTime, doThreshDecr); 79951c0b2f7Stbbdev } 80051c0b2f7Stbbdev 80151c0b2f7Stbbdev bool LargeObjectCache::decreasingCleanup() 80251c0b2f7Stbbdev { 80351c0b2f7Stbbdev return doCleanup(cacheCurrTime.load(std::memory_order_acquire), /*doThreshDecr=*/true); 80451c0b2f7Stbbdev } 80551c0b2f7Stbbdev 80651c0b2f7Stbbdev bool LargeObjectCache::regularCleanup() 80751c0b2f7Stbbdev { 80851c0b2f7Stbbdev return doCleanup(cacheCurrTime.load(std::memory_order_acquire), /*doThreshDecr=*/false); 80951c0b2f7Stbbdev } 81051c0b2f7Stbbdev 81151c0b2f7Stbbdev bool LargeObjectCache::cleanAll() 81251c0b2f7Stbbdev { 81351c0b2f7Stbbdev return largeCache.cleanAll(extMemPool) | hugeCache.cleanAll(extMemPool); 81451c0b2f7Stbbdev } 81551c0b2f7Stbbdev 81651c0b2f7Stbbdev void LargeObjectCache::reset() 81751c0b2f7Stbbdev { 81851c0b2f7Stbbdev largeCache.reset(); 81951c0b2f7Stbbdev hugeCache.reset(); 82051c0b2f7Stbbdev } 82151c0b2f7Stbbdev 82251c0b2f7Stbbdev template<typename Props> 82351c0b2f7Stbbdev LargeMemoryBlock *LargeObjectCacheImpl<Props>::get(ExtMemoryPool *extMemoryPool, size_t size) 82451c0b2f7Stbbdev { 82551c0b2f7Stbbdev int idx = Props::sizeToIdx(size); 82651c0b2f7Stbbdev 82751c0b2f7Stbbdev LargeMemoryBlock *lmb = bin[idx].get(extMemoryPool, size, &bitMask, idx); 82851c0b2f7Stbbdev 82951c0b2f7Stbbdev if (lmb) { 83051c0b2f7Stbbdev MALLOC_ITT_SYNC_ACQUIRED(bin+idx); 83151c0b2f7Stbbdev STAT_increment(getThreadId(), ThreadCommonCounters, allocCachedLargeObj); 83251c0b2f7Stbbdev } 83351c0b2f7Stbbdev return lmb; 83451c0b2f7Stbbdev } 83551c0b2f7Stbbdev 83651c0b2f7Stbbdev template<typename Props> 83751c0b2f7Stbbdev void LargeObjectCacheImpl<Props>::updateCacheState(ExtMemoryPool *extMemPool, DecreaseOrIncrease op, size_t size) 83851c0b2f7Stbbdev { 83951c0b2f7Stbbdev int idx = Props::sizeToIdx(size); 84051c0b2f7Stbbdev MALLOC_ASSERT(idx<numBins, ASSERT_TEXT); 84151c0b2f7Stbbdev bin[idx].updateUsedSize(extMemPool, op==decrease? -size : size, &bitMask, idx); 84251c0b2f7Stbbdev } 84351c0b2f7Stbbdev 84451c0b2f7Stbbdev #if __TBB_MALLOC_LOCACHE_STAT 84551c0b2f7Stbbdev template<typename Props> 84651c0b2f7Stbbdev void LargeObjectCacheImpl<Props>::reportStat(FILE *f) 84751c0b2f7Stbbdev { 84851c0b2f7Stbbdev size_t cachedSize = 0; 84951c0b2f7Stbbdev for (int i=0; i<numBins; i++) 85051c0b2f7Stbbdev cachedSize += bin[i].reportStat(i, f); 85151c0b2f7Stbbdev fprintf(f, "total LOC size %lu MB\n", cachedSize/1024/1024); 85251c0b2f7Stbbdev } 85351c0b2f7Stbbdev 85451c0b2f7Stbbdev void LargeObjectCache::reportStat(FILE *f) 85551c0b2f7Stbbdev { 85651c0b2f7Stbbdev largeCache.reportStat(f); 85751c0b2f7Stbbdev hugeCache.reportStat(f); 85851c0b2f7Stbbdev fprintf(f, "cache time %lu\n", cacheCurrTime.load(std::memory_order_relaxed)); 85951c0b2f7Stbbdev } 86051c0b2f7Stbbdev #endif 86151c0b2f7Stbbdev 86251c0b2f7Stbbdev template<typename Props> 86351c0b2f7Stbbdev void LargeObjectCacheImpl<Props>::putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *toCache) 86451c0b2f7Stbbdev { 86551c0b2f7Stbbdev int toBinIdx = Props::sizeToIdx(toCache->unalignedSize); 86651c0b2f7Stbbdev 86751c0b2f7Stbbdev MALLOC_ITT_SYNC_RELEASING(bin+toBinIdx); 86851c0b2f7Stbbdev bin[toBinIdx].putList(extMemPool, toCache, &bitMask, toBinIdx); 86951c0b2f7Stbbdev } 87051c0b2f7Stbbdev 87151c0b2f7Stbbdev void LargeObjectCache::updateCacheState(DecreaseOrIncrease op, size_t size) 87251c0b2f7Stbbdev { 87351c0b2f7Stbbdev if (size < maxLargeSize) 87451c0b2f7Stbbdev largeCache.updateCacheState(extMemPool, op, size); 87551c0b2f7Stbbdev else if (size < maxHugeSize) 87651c0b2f7Stbbdev hugeCache.updateCacheState(extMemPool, op, size); 87751c0b2f7Stbbdev } 87851c0b2f7Stbbdev 87951c0b2f7Stbbdev uintptr_t LargeObjectCache::getCurrTime() 88051c0b2f7Stbbdev { 88151c0b2f7Stbbdev return ++cacheCurrTime; 88251c0b2f7Stbbdev } 88351c0b2f7Stbbdev 88451c0b2f7Stbbdev uintptr_t LargeObjectCache::getCurrTimeRange(uintptr_t range) 88551c0b2f7Stbbdev { 88651c0b2f7Stbbdev return (cacheCurrTime.fetch_add(range) + 1); 88751c0b2f7Stbbdev } 88851c0b2f7Stbbdev 88951c0b2f7Stbbdev void LargeObjectCache::registerRealloc(size_t oldSize, size_t newSize) 89051c0b2f7Stbbdev { 89151c0b2f7Stbbdev updateCacheState(decrease, oldSize); 89251c0b2f7Stbbdev updateCacheState(increase, alignToBin(newSize)); 89351c0b2f7Stbbdev } 89451c0b2f7Stbbdev 89551c0b2f7Stbbdev size_t LargeObjectCache::alignToBin(size_t size) { 89651c0b2f7Stbbdev return size < maxLargeSize ? LargeCacheType::alignToBin(size) : HugeCacheType::alignToBin(size); 89751c0b2f7Stbbdev } 89851c0b2f7Stbbdev 89951c0b2f7Stbbdev // Used for internal purpose 90051c0b2f7Stbbdev int LargeObjectCache::sizeToIdx(size_t size) 90151c0b2f7Stbbdev { 90251c0b2f7Stbbdev MALLOC_ASSERT(size <= maxHugeSize, ASSERT_TEXT); 90351c0b2f7Stbbdev return size < maxLargeSize ? 90451c0b2f7Stbbdev LargeCacheType::sizeToIdx(size) : 90551c0b2f7Stbbdev LargeCacheType::numBins + HugeCacheType::sizeToIdx(size); 90651c0b2f7Stbbdev } 90751c0b2f7Stbbdev 90851c0b2f7Stbbdev void LargeObjectCache::putList(LargeMemoryBlock *list) 90951c0b2f7Stbbdev { 91051c0b2f7Stbbdev LargeMemoryBlock *toProcess, *n; 91151c0b2f7Stbbdev 91251c0b2f7Stbbdev for (LargeMemoryBlock *curr = list; curr; curr = toProcess) { 91351c0b2f7Stbbdev LargeMemoryBlock *tail = curr; 91451c0b2f7Stbbdev toProcess = curr->next; 91551c0b2f7Stbbdev if (!sizeInCacheRange(curr->unalignedSize)) { 91651c0b2f7Stbbdev extMemPool->backend.returnLargeObject(curr); 91751c0b2f7Stbbdev continue; 91851c0b2f7Stbbdev } 91951c0b2f7Stbbdev int currIdx = sizeToIdx(curr->unalignedSize); 92051c0b2f7Stbbdev 92151c0b2f7Stbbdev // Find all blocks fitting to same bin. Not use more efficient sorting 92251c0b2f7Stbbdev // algorithm because list is short (commonly, 92351c0b2f7Stbbdev // LocalLOC's HIGH_MARK-LOW_MARK, i.e. 24 items). 92451c0b2f7Stbbdev for (LargeMemoryBlock *b = toProcess; b; b = n) { 92551c0b2f7Stbbdev n = b->next; 92651c0b2f7Stbbdev if (sizeToIdx(b->unalignedSize) == currIdx) { 92751c0b2f7Stbbdev tail->next = b; 92851c0b2f7Stbbdev tail = b; 92951c0b2f7Stbbdev if (toProcess == b) 93051c0b2f7Stbbdev toProcess = toProcess->next; 93151c0b2f7Stbbdev else { 93251c0b2f7Stbbdev b->prev->next = b->next; 93351c0b2f7Stbbdev if (b->next) 93451c0b2f7Stbbdev b->next->prev = b->prev; 93551c0b2f7Stbbdev } 93651c0b2f7Stbbdev } 93751c0b2f7Stbbdev } 93851c0b2f7Stbbdev tail->next = NULL; 93951c0b2f7Stbbdev if (curr->unalignedSize < maxLargeSize) 94051c0b2f7Stbbdev largeCache.putList(extMemPool, curr); 94151c0b2f7Stbbdev else 94251c0b2f7Stbbdev hugeCache.putList(extMemPool, curr); 94351c0b2f7Stbbdev } 94451c0b2f7Stbbdev } 94551c0b2f7Stbbdev 94651c0b2f7Stbbdev void LargeObjectCache::put(LargeMemoryBlock *largeBlock) 94751c0b2f7Stbbdev { 94851c0b2f7Stbbdev size_t blockSize = largeBlock->unalignedSize; 94951c0b2f7Stbbdev if (sizeInCacheRange(blockSize)) { 95051c0b2f7Stbbdev largeBlock->next = NULL; 95151c0b2f7Stbbdev if (blockSize < maxLargeSize) 95251c0b2f7Stbbdev largeCache.putList(extMemPool, largeBlock); 95351c0b2f7Stbbdev else 95451c0b2f7Stbbdev hugeCache.putList(extMemPool, largeBlock); 95551c0b2f7Stbbdev } else { 95651c0b2f7Stbbdev extMemPool->backend.returnLargeObject(largeBlock); 95751c0b2f7Stbbdev } 95851c0b2f7Stbbdev } 95951c0b2f7Stbbdev 96051c0b2f7Stbbdev LargeMemoryBlock *LargeObjectCache::get(size_t size) 96151c0b2f7Stbbdev { 96251c0b2f7Stbbdev MALLOC_ASSERT( size >= minLargeSize, ASSERT_TEXT ); 96351c0b2f7Stbbdev if (sizeInCacheRange(size)) { 96451c0b2f7Stbbdev return size < maxLargeSize ? 96551c0b2f7Stbbdev largeCache.get(extMemPool, size) : hugeCache.get(extMemPool, size); 96651c0b2f7Stbbdev } 96751c0b2f7Stbbdev return NULL; 96851c0b2f7Stbbdev } 96951c0b2f7Stbbdev 97051c0b2f7Stbbdev LargeMemoryBlock *ExtMemoryPool::mallocLargeObject(MemoryPool *pool, size_t allocationSize) 97151c0b2f7Stbbdev { 97251c0b2f7Stbbdev #if __TBB_MALLOC_LOCACHE_STAT 97351c0b2f7Stbbdev mallocCalls++; 97451c0b2f7Stbbdev memAllocKB.fetch_add(allocationSize/1024); 97551c0b2f7Stbbdev #endif 97651c0b2f7Stbbdev LargeMemoryBlock* lmb = loc.get(allocationSize); 97751c0b2f7Stbbdev if (!lmb) { 97851c0b2f7Stbbdev BackRefIdx backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/true); 97951c0b2f7Stbbdev if (backRefIdx.isInvalid()) 98051c0b2f7Stbbdev return NULL; 98151c0b2f7Stbbdev 98251c0b2f7Stbbdev // unalignedSize is set in getLargeBlock 98351c0b2f7Stbbdev lmb = backend.getLargeBlock(allocationSize); 98451c0b2f7Stbbdev if (!lmb) { 98551c0b2f7Stbbdev removeBackRef(backRefIdx); 98651c0b2f7Stbbdev loc.updateCacheState(decrease, allocationSize); 98751c0b2f7Stbbdev return NULL; 98851c0b2f7Stbbdev } 98951c0b2f7Stbbdev lmb->backRefIdx = backRefIdx; 99051c0b2f7Stbbdev lmb->pool = pool; 99151c0b2f7Stbbdev STAT_increment(getThreadId(), ThreadCommonCounters, allocNewLargeObj); 99251c0b2f7Stbbdev } else { 99351c0b2f7Stbbdev #if __TBB_MALLOC_LOCACHE_STAT 99451c0b2f7Stbbdev cacheHits++; 99551c0b2f7Stbbdev memHitKB.fetch_add(allocationSize/1024); 99651c0b2f7Stbbdev #endif 99751c0b2f7Stbbdev } 99851c0b2f7Stbbdev return lmb; 99951c0b2f7Stbbdev } 100051c0b2f7Stbbdev 100151c0b2f7Stbbdev void ExtMemoryPool::freeLargeObject(LargeMemoryBlock *mBlock) 100251c0b2f7Stbbdev { 100351c0b2f7Stbbdev loc.put(mBlock); 100451c0b2f7Stbbdev } 100551c0b2f7Stbbdev 100651c0b2f7Stbbdev void ExtMemoryPool::freeLargeObjectList(LargeMemoryBlock *head) 100751c0b2f7Stbbdev { 100851c0b2f7Stbbdev loc.putList(head); 100951c0b2f7Stbbdev } 101051c0b2f7Stbbdev 101151c0b2f7Stbbdev bool ExtMemoryPool::softCachesCleanup() 101251c0b2f7Stbbdev { 101351c0b2f7Stbbdev return loc.regularCleanup(); 101451c0b2f7Stbbdev } 101551c0b2f7Stbbdev 101651c0b2f7Stbbdev bool ExtMemoryPool::hardCachesCleanup() 101751c0b2f7Stbbdev { 101851c0b2f7Stbbdev // thread-local caches must be cleaned before LOC, 101951c0b2f7Stbbdev // because object from thread-local cache can be released to LOC 102051c0b2f7Stbbdev bool ret = releaseAllLocalCaches(); 102151c0b2f7Stbbdev ret |= orphanedBlocks.cleanup(&backend); 102251c0b2f7Stbbdev ret |= loc.cleanAll(); 102351c0b2f7Stbbdev ret |= backend.clean(); 102451c0b2f7Stbbdev return ret; 102551c0b2f7Stbbdev } 102651c0b2f7Stbbdev 102751c0b2f7Stbbdev #if BACKEND_HAS_MREMAP 102851c0b2f7Stbbdev void *ExtMemoryPool::remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment) 102951c0b2f7Stbbdev { 103051c0b2f7Stbbdev const size_t oldUnalignedSize = ((LargeObjectHdr*)ptr - 1)->memoryBlock->unalignedSize; 103151c0b2f7Stbbdev void *o = backend.remap(ptr, oldSize, newSize, alignment); 103251c0b2f7Stbbdev if (o) { 103351c0b2f7Stbbdev LargeMemoryBlock *lmb = ((LargeObjectHdr*)o - 1)->memoryBlock; 103451c0b2f7Stbbdev loc.registerRealloc(oldUnalignedSize, lmb->unalignedSize); 103551c0b2f7Stbbdev } 103651c0b2f7Stbbdev return o; 103751c0b2f7Stbbdev } 103851c0b2f7Stbbdev #endif /* BACKEND_HAS_MREMAP */ 103951c0b2f7Stbbdev 104051c0b2f7Stbbdev /*********** End allocation of large objects **********/ 104151c0b2f7Stbbdev 104251c0b2f7Stbbdev } // namespace internal 104351c0b2f7Stbbdev } // namespace rml 104451c0b2f7Stbbdev 104551c0b2f7Stbbdev #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 104651c0b2f7Stbbdev #pragma warning(pop) 104751c0b2f7Stbbdev #endif 104851c0b2f7Stbbdev 1049