151c0b2f7Stbbdev /* 2*2110128eSsarathnandu Copyright (c) 2005-2023 Intel Corporation 351c0b2f7Stbbdev 451c0b2f7Stbbdev Licensed under the Apache License, Version 2.0 (the "License"); 551c0b2f7Stbbdev you may not use this file except in compliance with the License. 651c0b2f7Stbbdev You may obtain a copy of the License at 751c0b2f7Stbbdev 851c0b2f7Stbbdev http://www.apache.org/licenses/LICENSE-2.0 951c0b2f7Stbbdev 1051c0b2f7Stbbdev Unless required by applicable law or agreed to in writing, software 1151c0b2f7Stbbdev distributed under the License is distributed on an "AS IS" BASIS, 1251c0b2f7Stbbdev WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1351c0b2f7Stbbdev See the License for the specific language governing permissions and 1451c0b2f7Stbbdev limitations under the License. 1551c0b2f7Stbbdev */ 1651c0b2f7Stbbdev 1751c0b2f7Stbbdev #include "tbbmalloc_internal.h" 18478de5b1Stbbdev #include "../src/tbb/environment.h" 1951c0b2f7Stbbdev 2051c0b2f7Stbbdev #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 2151c0b2f7Stbbdev // Suppress warning: unary minus operator applied to unsigned type, result still unsigned 2251c0b2f7Stbbdev // TBB_REVAMP_TODO: review this warning 2351c0b2f7Stbbdev #pragma warning(push) 2451c0b2f7Stbbdev #pragma warning(disable:4146) 2551c0b2f7Stbbdev #endif 2651c0b2f7Stbbdev 2751c0b2f7Stbbdev /******************************* Allocation of large objects *********************************************/ 2851c0b2f7Stbbdev 2951c0b2f7Stbbdev namespace rml { 3051c0b2f7Stbbdev namespace internal { 3151c0b2f7Stbbdev 3251c0b2f7Stbbdev /* ---------------------------- Large Object cache init section ---------------------------------------- */ 3351c0b2f7Stbbdev 3451c0b2f7Stbbdev void LargeObjectCache::init(ExtMemoryPool *memPool) 3551c0b2f7Stbbdev { 3651c0b2f7Stbbdev extMemPool = memPool; 3751c0b2f7Stbbdev // scalable_allocation_mode can be called before allocator initialization, respect this manual request 3851c0b2f7Stbbdev if (hugeSizeThreshold == 0) { 3951c0b2f7Stbbdev // Huge size threshold initialization if environment variable was set 4051c0b2f7Stbbdev long requestedThreshold = tbb::detail::r1::GetIntegralEnvironmentVariable("TBB_MALLOC_SET_HUGE_SIZE_THRESHOLD"); 4151c0b2f7Stbbdev // Read valid env or initialize by default with max possible values 4251c0b2f7Stbbdev if (requestedThreshold != -1) { 4351c0b2f7Stbbdev setHugeSizeThreshold(requestedThreshold); 4451c0b2f7Stbbdev } else { 4551c0b2f7Stbbdev setHugeSizeThreshold(maxHugeSize); 4651c0b2f7Stbbdev } 4751c0b2f7Stbbdev } 4851c0b2f7Stbbdev } 4951c0b2f7Stbbdev 5051c0b2f7Stbbdev /* ----------------------------- Huge size threshold settings ----------------------------------------- */ 5151c0b2f7Stbbdev 5251c0b2f7Stbbdev void LargeObjectCache::setHugeSizeThreshold(size_t value) 5351c0b2f7Stbbdev { 5451c0b2f7Stbbdev // Valid in the huge cache range: [MaxLargeSize, MaxHugeSize]. 5551c0b2f7Stbbdev if (value <= maxHugeSize) { 5651c0b2f7Stbbdev hugeSizeThreshold = value >= maxLargeSize ? alignToBin(value) : maxLargeSize; 5751c0b2f7Stbbdev 5851c0b2f7Stbbdev // Calculate local indexes for the global threshold size (for fast search inside a regular cleanup) 5951c0b2f7Stbbdev largeCache.hugeSizeThresholdIdx = LargeCacheType::numBins; 6051c0b2f7Stbbdev hugeCache.hugeSizeThresholdIdx = HugeCacheType::sizeToIdx(hugeSizeThreshold); 6151c0b2f7Stbbdev } 6251c0b2f7Stbbdev } 6351c0b2f7Stbbdev 6451c0b2f7Stbbdev bool LargeObjectCache::sizeInCacheRange(size_t size) 6551c0b2f7Stbbdev { 66451e76efSDmitri Mokhov return size < maxHugeSize && (size <= defaultMaxHugeSize || size >= hugeSizeThreshold); 6751c0b2f7Stbbdev } 6851c0b2f7Stbbdev 6951c0b2f7Stbbdev /* ----------------------------------------------------------------------------------------------------- */ 7051c0b2f7Stbbdev 7151c0b2f7Stbbdev /* The functor called by the aggregator for the operation list */ 7251c0b2f7Stbbdev template<typename Props> 7351c0b2f7Stbbdev class CacheBinFunctor { 7451c0b2f7Stbbdev typename LargeObjectCacheImpl<Props>::CacheBin *const bin; 7551c0b2f7Stbbdev ExtMemoryPool *const extMemPool; 7651c0b2f7Stbbdev typename LargeObjectCacheImpl<Props>::BinBitMask *const bitMask; 7751c0b2f7Stbbdev const int idx; 7851c0b2f7Stbbdev 7951c0b2f7Stbbdev LargeMemoryBlock *toRelease; 8051c0b2f7Stbbdev bool needCleanup; 8151c0b2f7Stbbdev uintptr_t currTime; 8251c0b2f7Stbbdev 8351c0b2f7Stbbdev /* Do preprocessing under the operation list. */ 8451c0b2f7Stbbdev /* All the OP_PUT_LIST operations are merged in the one operation. 8551c0b2f7Stbbdev All OP_GET operations are merged with the OP_PUT_LIST operations but 8651c0b2f7Stbbdev it demands the update of the moving average value in the bin. 8751c0b2f7Stbbdev Only the last OP_CLEAN_TO_THRESHOLD operation has sense. 8851c0b2f7Stbbdev The OP_CLEAN_ALL operation also should be performed only once. 8951c0b2f7Stbbdev Moreover it cancels the OP_CLEAN_TO_THRESHOLD operation. */ 9051c0b2f7Stbbdev class OperationPreprocessor { 9151c0b2f7Stbbdev // TODO: remove the dependency on CacheBin. 9251c0b2f7Stbbdev typename LargeObjectCacheImpl<Props>::CacheBin *const bin; 9351c0b2f7Stbbdev 9451c0b2f7Stbbdev /* Contains the relative time in the operation list. 9551c0b2f7Stbbdev It counts in the reverse order since the aggregator also 9651c0b2f7Stbbdev provides operations in the reverse order. */ 9751c0b2f7Stbbdev uintptr_t lclTime; 9851c0b2f7Stbbdev 9951c0b2f7Stbbdev /* opGet contains only OP_GET operations which cannot be merge with OP_PUT operations 10051c0b2f7Stbbdev opClean contains all OP_CLEAN_TO_THRESHOLD and OP_CLEAN_ALL operations. */ 10151c0b2f7Stbbdev CacheBinOperation *opGet, *opClean; 10251c0b2f7Stbbdev /* The time of the last OP_CLEAN_TO_THRESHOLD operations */ 10351c0b2f7Stbbdev uintptr_t cleanTime; 10451c0b2f7Stbbdev 10551c0b2f7Stbbdev /* lastGetOpTime - the time of the last OP_GET operation. 10651c0b2f7Stbbdev lastGet - the same meaning as CacheBin::lastGet */ 10751c0b2f7Stbbdev uintptr_t lastGetOpTime, lastGet; 10851c0b2f7Stbbdev 10951c0b2f7Stbbdev /* The total sum of all usedSize changes requested with CBOP_UPDATE_USED_SIZE operations. */ 11051c0b2f7Stbbdev size_t updateUsedSize; 11151c0b2f7Stbbdev 11251c0b2f7Stbbdev /* The list of blocks for the OP_PUT_LIST operation. */ 11351c0b2f7Stbbdev LargeMemoryBlock *head, *tail; 11451c0b2f7Stbbdev int putListNum; 11551c0b2f7Stbbdev 11651c0b2f7Stbbdev /* if the OP_CLEAN_ALL is requested. */ 11751c0b2f7Stbbdev bool isCleanAll; 11851c0b2f7Stbbdev 11951c0b2f7Stbbdev inline void commitOperation(CacheBinOperation *op) const; 12051c0b2f7Stbbdev inline void addOpToOpList(CacheBinOperation *op, CacheBinOperation **opList) const; 12151c0b2f7Stbbdev bool getFromPutList(CacheBinOperation* opGet, uintptr_t currTime); 12251c0b2f7Stbbdev void addToPutList( LargeMemoryBlock *head, LargeMemoryBlock *tail, int num ); 12351c0b2f7Stbbdev 12451c0b2f7Stbbdev public: 12551c0b2f7Stbbdev OperationPreprocessor(typename LargeObjectCacheImpl<Props>::CacheBin *bin) : 12657f524caSIlya Isaev bin(bin), lclTime(0), opGet(nullptr), opClean(nullptr), cleanTime(0), 12757f524caSIlya Isaev lastGetOpTime(0), updateUsedSize(0), head(nullptr), isCleanAll(false) {} 12851c0b2f7Stbbdev void operator()(CacheBinOperation* opList); 12951c0b2f7Stbbdev uintptr_t getTimeRange() const { return -lclTime; } 13051c0b2f7Stbbdev 13151c0b2f7Stbbdev friend class CacheBinFunctor; 13251c0b2f7Stbbdev }; 13351c0b2f7Stbbdev 13451c0b2f7Stbbdev public: 13551c0b2f7Stbbdev CacheBinFunctor(typename LargeObjectCacheImpl<Props>::CacheBin *bin, ExtMemoryPool *extMemPool, 13651c0b2f7Stbbdev typename LargeObjectCacheImpl<Props>::BinBitMask *bitMask, int idx) : 13757f524caSIlya Isaev bin(bin), extMemPool(extMemPool), bitMask(bitMask), idx(idx), toRelease(nullptr), needCleanup(false) {} 13851c0b2f7Stbbdev void operator()(CacheBinOperation* opList); 13951c0b2f7Stbbdev 14051c0b2f7Stbbdev bool isCleanupNeeded() const { return needCleanup; } 14151c0b2f7Stbbdev LargeMemoryBlock *getToRelease() const { return toRelease; } 14251c0b2f7Stbbdev uintptr_t getCurrTime() const { return currTime; } 14351c0b2f7Stbbdev }; 14451c0b2f7Stbbdev 14551c0b2f7Stbbdev /* ---------------- Cache Bin Aggregator Operation Helpers ---------------- */ 14651c0b2f7Stbbdev 14751c0b2f7Stbbdev // The list of structures which describe the operation data 14851c0b2f7Stbbdev struct OpGet { 14951c0b2f7Stbbdev static const CacheBinOperationType type = CBOP_GET; 15051c0b2f7Stbbdev LargeMemoryBlock **res; 15151c0b2f7Stbbdev size_t size; 15251c0b2f7Stbbdev uintptr_t currTime; 15351c0b2f7Stbbdev }; 15451c0b2f7Stbbdev 15551c0b2f7Stbbdev struct OpPutList { 15651c0b2f7Stbbdev static const CacheBinOperationType type = CBOP_PUT_LIST; 15751c0b2f7Stbbdev LargeMemoryBlock *head; 15851c0b2f7Stbbdev }; 15951c0b2f7Stbbdev 16051c0b2f7Stbbdev struct OpCleanToThreshold { 16151c0b2f7Stbbdev static const CacheBinOperationType type = CBOP_CLEAN_TO_THRESHOLD; 16251c0b2f7Stbbdev LargeMemoryBlock **res; 16351c0b2f7Stbbdev uintptr_t currTime; 16451c0b2f7Stbbdev }; 16551c0b2f7Stbbdev 16651c0b2f7Stbbdev struct OpCleanAll { 16751c0b2f7Stbbdev static const CacheBinOperationType type = CBOP_CLEAN_ALL; 16851c0b2f7Stbbdev LargeMemoryBlock **res; 16951c0b2f7Stbbdev }; 17051c0b2f7Stbbdev 17151c0b2f7Stbbdev struct OpUpdateUsedSize { 17251c0b2f7Stbbdev static const CacheBinOperationType type = CBOP_UPDATE_USED_SIZE; 17351c0b2f7Stbbdev size_t size; 17451c0b2f7Stbbdev }; 17551c0b2f7Stbbdev 17651c0b2f7Stbbdev union CacheBinOperationData { 17751c0b2f7Stbbdev private: 17851c0b2f7Stbbdev OpGet opGet; 17951c0b2f7Stbbdev OpPutList opPutList; 18051c0b2f7Stbbdev OpCleanToThreshold opCleanToThreshold; 18151c0b2f7Stbbdev OpCleanAll opCleanAll; 18251c0b2f7Stbbdev OpUpdateUsedSize opUpdateUsedSize; 18351c0b2f7Stbbdev }; 18451c0b2f7Stbbdev 18551c0b2f7Stbbdev // Forward declarations 18651c0b2f7Stbbdev template <typename OpTypeData> OpTypeData& opCast(CacheBinOperation &op); 18751c0b2f7Stbbdev 18851c0b2f7Stbbdev // Describes the aggregator operation 18951c0b2f7Stbbdev struct CacheBinOperation : public MallocAggregatedOperation<CacheBinOperation>::type { 19051c0b2f7Stbbdev CacheBinOperationType type; 19151c0b2f7Stbbdev 19251c0b2f7Stbbdev template <typename OpTypeData> 19351c0b2f7Stbbdev CacheBinOperation(OpTypeData &d, CacheBinOperationStatus st = CBST_WAIT) { 19451c0b2f7Stbbdev opCast<OpTypeData>(*this) = d; 19551c0b2f7Stbbdev type = OpTypeData::type; 19651c0b2f7Stbbdev MallocAggregatedOperation<CacheBinOperation>::type::status = st; 19751c0b2f7Stbbdev } 19851c0b2f7Stbbdev private: 19951c0b2f7Stbbdev CacheBinOperationData data; 20051c0b2f7Stbbdev 20151c0b2f7Stbbdev template <typename OpTypeData> 20251c0b2f7Stbbdev friend OpTypeData& opCast(CacheBinOperation &op); 20351c0b2f7Stbbdev }; 20451c0b2f7Stbbdev 20551c0b2f7Stbbdev // The opCast function can be the member of CacheBinOperation but it will have 20651c0b2f7Stbbdev // small stylistic ambiguity: it will look like a getter (with a cast) for the 20751c0b2f7Stbbdev // CacheBinOperation::data data member but it should return a reference to 20851c0b2f7Stbbdev // simplify the code from a lot of getter/setter calls. So the global cast in 20951c0b2f7Stbbdev // the style of static_cast (or reinterpret_cast) seems to be more readable and 21051c0b2f7Stbbdev // have more explicit semantic. 21151c0b2f7Stbbdev template <typename OpTypeData> 21251c0b2f7Stbbdev OpTypeData& opCast(CacheBinOperation &op) { 21351c0b2f7Stbbdev return *reinterpret_cast<OpTypeData*>(&op.data); 21451c0b2f7Stbbdev } 21551c0b2f7Stbbdev 21651c0b2f7Stbbdev /* ------------------------------------------------------------------------ */ 21751c0b2f7Stbbdev 21851c0b2f7Stbbdev #if __TBB_MALLOC_LOCACHE_STAT 21951c0b2f7Stbbdev //intptr_t mallocCalls, cacheHits; 22051c0b2f7Stbbdev std::atomic<intptr_t> mallocCalls, cacheHits; 22151c0b2f7Stbbdev //intptr_t memAllocKB, memHitKB; 22251c0b2f7Stbbdev std::atomic<intptr_t> memAllocKB, memHitKB; 22351c0b2f7Stbbdev #endif 22451c0b2f7Stbbdev 225478de5b1Stbbdev #if MALLOC_DEBUG 22651c0b2f7Stbbdev inline bool lessThanWithOverflow(intptr_t a, intptr_t b) 22751c0b2f7Stbbdev { 228*2110128eSsarathnandu return (a < b && (b - a < static_cast<intptr_t>(UINTPTR_MAX/2))) || 229*2110128eSsarathnandu (a > b && (a - b > static_cast<intptr_t>(UINTPTR_MAX/2))); 23051c0b2f7Stbbdev } 231478de5b1Stbbdev #endif 23251c0b2f7Stbbdev 23351c0b2f7Stbbdev /* ----------------------------------- Operation processing methods ------------------------------------ */ 23451c0b2f7Stbbdev 23551c0b2f7Stbbdev template<typename Props> void CacheBinFunctor<Props>:: 23651c0b2f7Stbbdev OperationPreprocessor::commitOperation(CacheBinOperation *op) const 23751c0b2f7Stbbdev { 23851c0b2f7Stbbdev // FencedStore( (intptr_t&)(op->status), CBST_DONE ); 23951c0b2f7Stbbdev op->status.store(CBST_DONE, std::memory_order_release); 24051c0b2f7Stbbdev } 24151c0b2f7Stbbdev 24251c0b2f7Stbbdev template<typename Props> void CacheBinFunctor<Props>:: 24351c0b2f7Stbbdev OperationPreprocessor::addOpToOpList(CacheBinOperation *op, CacheBinOperation **opList) const 24451c0b2f7Stbbdev { 24551c0b2f7Stbbdev op->next = *opList; 24651c0b2f7Stbbdev *opList = op; 24751c0b2f7Stbbdev } 24851c0b2f7Stbbdev 24951c0b2f7Stbbdev template<typename Props> bool CacheBinFunctor<Props>:: 25051c0b2f7Stbbdev OperationPreprocessor::getFromPutList(CacheBinOperation *opGet, uintptr_t currTime) 25151c0b2f7Stbbdev { 25251c0b2f7Stbbdev if ( head ) { 25351c0b2f7Stbbdev uintptr_t age = head->age; 25451c0b2f7Stbbdev LargeMemoryBlock *next = head->next; 25551c0b2f7Stbbdev *opCast<OpGet>(*opGet).res = head; 25651c0b2f7Stbbdev commitOperation( opGet ); 25751c0b2f7Stbbdev head = next; 25851c0b2f7Stbbdev putListNum--; 25951c0b2f7Stbbdev MALLOC_ASSERT( putListNum>=0, ASSERT_TEXT ); 26051c0b2f7Stbbdev 26151c0b2f7Stbbdev // use moving average with current hit interval 26251c0b2f7Stbbdev bin->updateMeanHitRange( currTime - age ); 26351c0b2f7Stbbdev return true; 26451c0b2f7Stbbdev } 26551c0b2f7Stbbdev return false; 26651c0b2f7Stbbdev } 26751c0b2f7Stbbdev 26851c0b2f7Stbbdev template<typename Props> void CacheBinFunctor<Props>:: 26951c0b2f7Stbbdev OperationPreprocessor::addToPutList(LargeMemoryBlock *h, LargeMemoryBlock *t, int num) 27051c0b2f7Stbbdev { 27151c0b2f7Stbbdev if ( head ) { 27251c0b2f7Stbbdev MALLOC_ASSERT( tail, ASSERT_TEXT ); 27351c0b2f7Stbbdev tail->next = h; 27451c0b2f7Stbbdev h->prev = tail; 27551c0b2f7Stbbdev tail = t; 27651c0b2f7Stbbdev putListNum += num; 27751c0b2f7Stbbdev } else { 27851c0b2f7Stbbdev head = h; 27951c0b2f7Stbbdev tail = t; 28051c0b2f7Stbbdev putListNum = num; 28151c0b2f7Stbbdev } 28251c0b2f7Stbbdev } 28351c0b2f7Stbbdev 28451c0b2f7Stbbdev template<typename Props> void CacheBinFunctor<Props>:: 28551c0b2f7Stbbdev OperationPreprocessor::operator()(CacheBinOperation* opList) 28651c0b2f7Stbbdev { 28751c0b2f7Stbbdev for ( CacheBinOperation *op = opList, *opNext; op; op = opNext ) { 28851c0b2f7Stbbdev opNext = op->next; 28951c0b2f7Stbbdev switch ( op->type ) { 29051c0b2f7Stbbdev case CBOP_GET: 29151c0b2f7Stbbdev { 29251c0b2f7Stbbdev lclTime--; 29351c0b2f7Stbbdev if ( !lastGetOpTime ) { 29451c0b2f7Stbbdev lastGetOpTime = lclTime; 29551c0b2f7Stbbdev lastGet = 0; 29651c0b2f7Stbbdev } else if ( !lastGet ) lastGet = lclTime; 29751c0b2f7Stbbdev 29851c0b2f7Stbbdev if ( !getFromPutList(op,lclTime) ) { 29951c0b2f7Stbbdev opCast<OpGet>(*op).currTime = lclTime; 30051c0b2f7Stbbdev addOpToOpList( op, &opGet ); 30151c0b2f7Stbbdev } 30251c0b2f7Stbbdev } 30351c0b2f7Stbbdev break; 30451c0b2f7Stbbdev 30551c0b2f7Stbbdev case CBOP_PUT_LIST: 30651c0b2f7Stbbdev { 30751c0b2f7Stbbdev LargeMemoryBlock *head = opCast<OpPutList>(*op).head; 30857f524caSIlya Isaev LargeMemoryBlock *curr = head, *prev = nullptr; 30951c0b2f7Stbbdev 31051c0b2f7Stbbdev int num = 0; 31151c0b2f7Stbbdev do { 31251c0b2f7Stbbdev // we do not kept prev pointers during assigning blocks to bins, set them now 31351c0b2f7Stbbdev curr->prev = prev; 31451c0b2f7Stbbdev 31551c0b2f7Stbbdev // Save the local times to the memory blocks. Local times are necessary 31651c0b2f7Stbbdev // for the getFromPutList function which updates the hit range value in 31751c0b2f7Stbbdev // CacheBin when OP_GET and OP_PUT_LIST operations are merged successfully. 31851c0b2f7Stbbdev // The age will be updated to the correct global time after preprocessing 31951c0b2f7Stbbdev // when global cache time is updated. 32051c0b2f7Stbbdev curr->age = --lclTime; 32151c0b2f7Stbbdev 32251c0b2f7Stbbdev prev = curr; 32351c0b2f7Stbbdev num += 1; 32451c0b2f7Stbbdev 32551c0b2f7Stbbdev STAT_increment(getThreadId(), ThreadCommonCounters, cacheLargeObj); 32657f524caSIlya Isaev } while ((curr = curr->next) != nullptr); 32751c0b2f7Stbbdev 32851c0b2f7Stbbdev LargeMemoryBlock *tail = prev; 32951c0b2f7Stbbdev addToPutList(head, tail, num); 33051c0b2f7Stbbdev 33151c0b2f7Stbbdev while ( opGet ) { 33251c0b2f7Stbbdev CacheBinOperation *next = opGet->next; 33351c0b2f7Stbbdev if ( !getFromPutList(opGet, opCast<OpGet>(*opGet).currTime) ) 33451c0b2f7Stbbdev break; 33551c0b2f7Stbbdev opGet = next; 33651c0b2f7Stbbdev } 33751c0b2f7Stbbdev } 33851c0b2f7Stbbdev break; 33951c0b2f7Stbbdev 34051c0b2f7Stbbdev case CBOP_UPDATE_USED_SIZE: 34151c0b2f7Stbbdev updateUsedSize += opCast<OpUpdateUsedSize>(*op).size; 34251c0b2f7Stbbdev commitOperation( op ); 34351c0b2f7Stbbdev break; 34451c0b2f7Stbbdev 34551c0b2f7Stbbdev case CBOP_CLEAN_ALL: 34651c0b2f7Stbbdev isCleanAll = true; 34751c0b2f7Stbbdev addOpToOpList( op, &opClean ); 34851c0b2f7Stbbdev break; 34951c0b2f7Stbbdev 35051c0b2f7Stbbdev case CBOP_CLEAN_TO_THRESHOLD: 35151c0b2f7Stbbdev { 35251c0b2f7Stbbdev uintptr_t currTime = opCast<OpCleanToThreshold>(*op).currTime; 35351c0b2f7Stbbdev // We don't worry about currTime overflow since it is a rare 35451c0b2f7Stbbdev // occurrence and doesn't affect correctness 35551c0b2f7Stbbdev cleanTime = cleanTime < currTime ? currTime : cleanTime; 35651c0b2f7Stbbdev addOpToOpList( op, &opClean ); 35751c0b2f7Stbbdev } 35851c0b2f7Stbbdev break; 35951c0b2f7Stbbdev 36051c0b2f7Stbbdev default: 36151c0b2f7Stbbdev MALLOC_ASSERT( false, "Unknown operation." ); 36251c0b2f7Stbbdev } 36351c0b2f7Stbbdev } 36451c0b2f7Stbbdev MALLOC_ASSERT( !( opGet && head ), "Not all put/get pairs are processed!" ); 36551c0b2f7Stbbdev } 36651c0b2f7Stbbdev 36751c0b2f7Stbbdev template<typename Props> void CacheBinFunctor<Props>::operator()(CacheBinOperation* opList) 36851c0b2f7Stbbdev { 36951c0b2f7Stbbdev MALLOC_ASSERT( opList, "Empty operation list is passed into operation handler." ); 37051c0b2f7Stbbdev 37151c0b2f7Stbbdev OperationPreprocessor prep(bin); 37251c0b2f7Stbbdev prep(opList); 37351c0b2f7Stbbdev 37451c0b2f7Stbbdev if ( uintptr_t timeRange = prep.getTimeRange() ) { 37551c0b2f7Stbbdev uintptr_t startTime = extMemPool->loc.getCurrTimeRange(timeRange); 37651c0b2f7Stbbdev // endTime is used as the current (base) time since the local time is negative. 37751c0b2f7Stbbdev uintptr_t endTime = startTime + timeRange; 37851c0b2f7Stbbdev 37951c0b2f7Stbbdev if ( prep.lastGetOpTime && prep.lastGet ) bin->setLastGet(prep.lastGet+endTime); 38051c0b2f7Stbbdev 38151c0b2f7Stbbdev if ( CacheBinOperation *opGet = prep.opGet ) { 38251c0b2f7Stbbdev bool isEmpty = false; 38351c0b2f7Stbbdev do { 38451c0b2f7Stbbdev #if __TBB_MALLOC_WHITEBOX_TEST 38551c0b2f7Stbbdev tbbmalloc_whitebox::locGetProcessed++; 38651c0b2f7Stbbdev #endif 38751c0b2f7Stbbdev const OpGet &opGetData = opCast<OpGet>(*opGet); 38851c0b2f7Stbbdev if ( !isEmpty ) { 38951c0b2f7Stbbdev if ( LargeMemoryBlock *res = bin->get() ) { 39051c0b2f7Stbbdev uintptr_t getTime = opGetData.currTime + endTime; 39151c0b2f7Stbbdev // use moving average with current hit interval 39251c0b2f7Stbbdev bin->updateMeanHitRange( getTime - res->age); 39351c0b2f7Stbbdev bin->updateCachedSize( -opGetData.size ); 39451c0b2f7Stbbdev *opGetData.res = res; 39551c0b2f7Stbbdev } else { 39651c0b2f7Stbbdev isEmpty = true; 39751c0b2f7Stbbdev uintptr_t lastGetOpTime = prep.lastGetOpTime+endTime; 39851c0b2f7Stbbdev bin->forgetOutdatedState(lastGetOpTime); 39951c0b2f7Stbbdev bin->updateAgeThreshold(lastGetOpTime); 40051c0b2f7Stbbdev } 40151c0b2f7Stbbdev } 40251c0b2f7Stbbdev 40351c0b2f7Stbbdev CacheBinOperation *opNext = opGet->next; 40451c0b2f7Stbbdev bin->updateUsedSize( opGetData.size, bitMask, idx ); 40551c0b2f7Stbbdev prep.commitOperation( opGet ); 40651c0b2f7Stbbdev opGet = opNext; 40751c0b2f7Stbbdev } while ( opGet ); 40851c0b2f7Stbbdev if ( prep.lastGetOpTime ) 40951c0b2f7Stbbdev bin->setLastGet( prep.lastGetOpTime + endTime ); 41051c0b2f7Stbbdev } else if ( LargeMemoryBlock *curr = prep.head ) { 41157f524caSIlya Isaev curr->prev = nullptr; 41251c0b2f7Stbbdev while ( curr ) { 41351c0b2f7Stbbdev // Update local times to global times 41451c0b2f7Stbbdev curr->age += endTime; 41551c0b2f7Stbbdev curr=curr->next; 41651c0b2f7Stbbdev } 41751c0b2f7Stbbdev #if __TBB_MALLOC_WHITEBOX_TEST 41851c0b2f7Stbbdev tbbmalloc_whitebox::locPutProcessed+=prep.putListNum; 41951c0b2f7Stbbdev #endif 42051c0b2f7Stbbdev toRelease = bin->putList(prep.head, prep.tail, bitMask, idx, prep.putListNum, extMemPool->loc.hugeSizeThreshold); 42151c0b2f7Stbbdev } 42251c0b2f7Stbbdev needCleanup = extMemPool->loc.isCleanupNeededOnRange(timeRange, startTime); 42351c0b2f7Stbbdev currTime = endTime - 1; 42451c0b2f7Stbbdev } 42551c0b2f7Stbbdev 42651c0b2f7Stbbdev if ( CacheBinOperation *opClean = prep.opClean ) { 42751c0b2f7Stbbdev if ( prep.isCleanAll ) 42851c0b2f7Stbbdev *opCast<OpCleanAll>(*opClean).res = bin->cleanAll(bitMask, idx); 42951c0b2f7Stbbdev else 43051c0b2f7Stbbdev *opCast<OpCleanToThreshold>(*opClean).res = bin->cleanToThreshold(prep.cleanTime, bitMask, idx); 43151c0b2f7Stbbdev 43251c0b2f7Stbbdev CacheBinOperation *opNext = opClean->next; 43351c0b2f7Stbbdev prep.commitOperation( opClean ); 43451c0b2f7Stbbdev 43557f524caSIlya Isaev while ((opClean = opNext) != nullptr) { 43651c0b2f7Stbbdev opNext = opClean->next; 43751c0b2f7Stbbdev prep.commitOperation(opClean); 43851c0b2f7Stbbdev } 43951c0b2f7Stbbdev } 44051c0b2f7Stbbdev 44151c0b2f7Stbbdev if ( size_t size = prep.updateUsedSize ) 44251c0b2f7Stbbdev bin->updateUsedSize(size, bitMask, idx); 44351c0b2f7Stbbdev } 44451c0b2f7Stbbdev /* ----------------------------------------------------------------------------------------------------- */ 44551c0b2f7Stbbdev /* --------------------------- Methods for creating and executing operations --------------------------- */ 44651c0b2f7Stbbdev template<typename Props> void LargeObjectCacheImpl<Props>:: 44751c0b2f7Stbbdev CacheBin::ExecuteOperation(CacheBinOperation *op, ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx, bool longLifeTime) 44851c0b2f7Stbbdev { 44951c0b2f7Stbbdev CacheBinFunctor<Props> func( this, extMemPool, bitMask, idx ); 45051c0b2f7Stbbdev aggregator.execute( op, func, longLifeTime ); 45151c0b2f7Stbbdev 45251c0b2f7Stbbdev if ( LargeMemoryBlock *toRelease = func.getToRelease()) { 45351c0b2f7Stbbdev extMemPool->backend.returnLargeObject(toRelease); 45451c0b2f7Stbbdev } 45551c0b2f7Stbbdev 45651c0b2f7Stbbdev if ( func.isCleanupNeeded() ) { 45751c0b2f7Stbbdev extMemPool->loc.doCleanup( func.getCurrTime(), /*doThreshDecr=*/false); 45851c0b2f7Stbbdev } 45951c0b2f7Stbbdev } 46051c0b2f7Stbbdev 46151c0b2f7Stbbdev template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>:: 46251c0b2f7Stbbdev CacheBin::get(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx) 46351c0b2f7Stbbdev { 46457f524caSIlya Isaev LargeMemoryBlock *lmb=nullptr; 465*2110128eSsarathnandu OpGet data = {&lmb, size, static_cast<uintptr_t>(0)}; 46651c0b2f7Stbbdev CacheBinOperation op(data); 46751c0b2f7Stbbdev ExecuteOperation( &op, extMemPool, bitMask, idx ); 46851c0b2f7Stbbdev return lmb; 46951c0b2f7Stbbdev } 47051c0b2f7Stbbdev 47151c0b2f7Stbbdev template<typename Props> void LargeObjectCacheImpl<Props>:: 47251c0b2f7Stbbdev CacheBin::putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *head, BinBitMask *bitMask, int idx) 47351c0b2f7Stbbdev { 47451c0b2f7Stbbdev MALLOC_ASSERT(sizeof(LargeMemoryBlock)+sizeof(CacheBinOperation)<=head->unalignedSize, "CacheBinOperation is too large to be placed in LargeMemoryBlock!"); 47551c0b2f7Stbbdev 47651c0b2f7Stbbdev OpPutList data = {head}; 47751c0b2f7Stbbdev CacheBinOperation *op = new (head+1) CacheBinOperation(data, CBST_NOWAIT); 47851c0b2f7Stbbdev ExecuteOperation( op, extMemPool, bitMask, idx, false ); 47951c0b2f7Stbbdev } 48051c0b2f7Stbbdev 48151c0b2f7Stbbdev template<typename Props> bool LargeObjectCacheImpl<Props>:: 48251c0b2f7Stbbdev CacheBin::cleanToThreshold(ExtMemoryPool *extMemPool, BinBitMask *bitMask, uintptr_t currTime, int idx) 48351c0b2f7Stbbdev { 48457f524caSIlya Isaev LargeMemoryBlock *toRelease = nullptr; 48551c0b2f7Stbbdev 48651c0b2f7Stbbdev /* oldest may be more recent then age, that's why cast to signed type 48751c0b2f7Stbbdev was used. age overflow is also processed correctly. */ 488478de5b1Stbbdev if (last.load(std::memory_order_relaxed) && 489478de5b1Stbbdev (intptr_t)(currTime - oldest.load(std::memory_order_relaxed)) > ageThreshold.load(std::memory_order_relaxed)) { 49051c0b2f7Stbbdev OpCleanToThreshold data = {&toRelease, currTime}; 49151c0b2f7Stbbdev CacheBinOperation op(data); 49251c0b2f7Stbbdev ExecuteOperation( &op, extMemPool, bitMask, idx ); 49351c0b2f7Stbbdev } 49451c0b2f7Stbbdev bool released = toRelease; 49551c0b2f7Stbbdev 49651c0b2f7Stbbdev Backend *backend = &extMemPool->backend; 49751c0b2f7Stbbdev while ( toRelease ) { 49851c0b2f7Stbbdev LargeMemoryBlock *helper = toRelease->next; 49951c0b2f7Stbbdev backend->returnLargeObject(toRelease); 50051c0b2f7Stbbdev toRelease = helper; 50151c0b2f7Stbbdev } 50251c0b2f7Stbbdev return released; 50351c0b2f7Stbbdev } 50451c0b2f7Stbbdev 50551c0b2f7Stbbdev template<typename Props> bool LargeObjectCacheImpl<Props>:: 50651c0b2f7Stbbdev CacheBin::releaseAllToBackend(ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx) 50751c0b2f7Stbbdev { 50857f524caSIlya Isaev LargeMemoryBlock *toRelease = nullptr; 50951c0b2f7Stbbdev 510478de5b1Stbbdev if (last.load(std::memory_order_relaxed)) { 51151c0b2f7Stbbdev OpCleanAll data = {&toRelease}; 51251c0b2f7Stbbdev CacheBinOperation op(data); 51351c0b2f7Stbbdev ExecuteOperation(&op, extMemPool, bitMask, idx); 51451c0b2f7Stbbdev } 51551c0b2f7Stbbdev bool released = toRelease; 51651c0b2f7Stbbdev 51751c0b2f7Stbbdev Backend *backend = &extMemPool->backend; 51851c0b2f7Stbbdev while ( toRelease ) { 51951c0b2f7Stbbdev LargeMemoryBlock *helper = toRelease->next; 52051c0b2f7Stbbdev MALLOC_ASSERT(!helper || lessThanWithOverflow(helper->age, toRelease->age), 52151c0b2f7Stbbdev ASSERT_TEXT); 52251c0b2f7Stbbdev backend->returnLargeObject(toRelease); 52351c0b2f7Stbbdev toRelease = helper; 52451c0b2f7Stbbdev } 52551c0b2f7Stbbdev return released; 52651c0b2f7Stbbdev } 52751c0b2f7Stbbdev 52851c0b2f7Stbbdev template<typename Props> void LargeObjectCacheImpl<Props>:: 52951c0b2f7Stbbdev CacheBin::updateUsedSize(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx) 53051c0b2f7Stbbdev { 53151c0b2f7Stbbdev OpUpdateUsedSize data = {size}; 53251c0b2f7Stbbdev CacheBinOperation op(data); 53351c0b2f7Stbbdev ExecuteOperation( &op, extMemPool, bitMask, idx ); 53451c0b2f7Stbbdev } 53551c0b2f7Stbbdev 53651c0b2f7Stbbdev /* ------------------------------ Unsafe methods used with the aggregator ------------------------------ */ 53751c0b2f7Stbbdev 53851c0b2f7Stbbdev template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>:: 53951c0b2f7Stbbdev CacheBin::putList(LargeMemoryBlock *head, LargeMemoryBlock *tail, BinBitMask *bitMask, int idx, int num, size_t hugeSizeThreshold) 54051c0b2f7Stbbdev { 54151c0b2f7Stbbdev size_t size = head->unalignedSize; 542478de5b1Stbbdev usedSize.store(usedSize.load(std::memory_order_relaxed) - num * size, std::memory_order_relaxed); 543478de5b1Stbbdev MALLOC_ASSERT( !last.load(std::memory_order_relaxed) || 544478de5b1Stbbdev (last.load(std::memory_order_relaxed)->age != 0 && last.load(std::memory_order_relaxed)->age != -1U), ASSERT_TEXT ); 54551c0b2f7Stbbdev MALLOC_ASSERT( (tail==head && num==1) || (tail!=head && num>1), ASSERT_TEXT ); 54657f524caSIlya Isaev LargeMemoryBlock *toRelease = nullptr; 54751c0b2f7Stbbdev if (size < hugeSizeThreshold && !lastCleanedAge) { 54851c0b2f7Stbbdev // 1st object of such size was released. 54951c0b2f7Stbbdev // Not cache it, and remember when this occurs 55051c0b2f7Stbbdev // to take into account during cache miss. 55151c0b2f7Stbbdev lastCleanedAge = tail->age; 55251c0b2f7Stbbdev toRelease = tail; 55351c0b2f7Stbbdev tail = tail->prev; 55451c0b2f7Stbbdev if (tail) 55557f524caSIlya Isaev tail->next = nullptr; 55651c0b2f7Stbbdev else 55757f524caSIlya Isaev head = nullptr; 55851c0b2f7Stbbdev num--; 55951c0b2f7Stbbdev } 56051c0b2f7Stbbdev if (num) { 56151c0b2f7Stbbdev // add [head;tail] list to cache 56251c0b2f7Stbbdev MALLOC_ASSERT( tail, ASSERT_TEXT ); 56351c0b2f7Stbbdev tail->next = first; 56451c0b2f7Stbbdev if (first) 56551c0b2f7Stbbdev first->prev = tail; 56651c0b2f7Stbbdev first = head; 567478de5b1Stbbdev if (!last.load(std::memory_order_relaxed)) { 568478de5b1Stbbdev MALLOC_ASSERT(0 == oldest.load(std::memory_order_relaxed), ASSERT_TEXT); 569478de5b1Stbbdev oldest.store(tail->age, std::memory_order_relaxed); 570478de5b1Stbbdev last.store(tail, std::memory_order_relaxed); 57151c0b2f7Stbbdev } 57251c0b2f7Stbbdev 573478de5b1Stbbdev cachedSize.store(cachedSize.load(std::memory_order_relaxed) + num * size, std::memory_order_relaxed); 57451c0b2f7Stbbdev } 57551c0b2f7Stbbdev 57651c0b2f7Stbbdev // No used object, and nothing in the bin, mark the bin as empty 577478de5b1Stbbdev if (!usedSize.load(std::memory_order_relaxed) && !first) 57851c0b2f7Stbbdev bitMask->set(idx, false); 57951c0b2f7Stbbdev 58051c0b2f7Stbbdev return toRelease; 58151c0b2f7Stbbdev } 58251c0b2f7Stbbdev 58351c0b2f7Stbbdev template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>:: 58451c0b2f7Stbbdev CacheBin::get() 58551c0b2f7Stbbdev { 58651c0b2f7Stbbdev LargeMemoryBlock *result=first; 58751c0b2f7Stbbdev if (result) { 58851c0b2f7Stbbdev first = result->next; 58951c0b2f7Stbbdev if (first) 59057f524caSIlya Isaev first->prev = nullptr; 59151c0b2f7Stbbdev else { 592478de5b1Stbbdev last.store(nullptr, std::memory_order_relaxed); 593478de5b1Stbbdev oldest.store(0, std::memory_order_relaxed); 59451c0b2f7Stbbdev } 59551c0b2f7Stbbdev } 59651c0b2f7Stbbdev 59751c0b2f7Stbbdev return result; 59851c0b2f7Stbbdev } 59951c0b2f7Stbbdev 60051c0b2f7Stbbdev template<typename Props> void LargeObjectCacheImpl<Props>:: 60151c0b2f7Stbbdev CacheBin::forgetOutdatedState(uintptr_t currTime) 60251c0b2f7Stbbdev { 60351c0b2f7Stbbdev // If the time since the last get is LongWaitFactor times more than ageThreshold 60451c0b2f7Stbbdev // for the bin, treat the bin as rarely-used and forget everything we know 60551c0b2f7Stbbdev // about it. 60651c0b2f7Stbbdev // If LongWaitFactor is too small, we forget too early and 60751c0b2f7Stbbdev // so prevents good caching, while if too high, caching blocks 60851c0b2f7Stbbdev // with unrelated usage pattern occurs. 60951c0b2f7Stbbdev const uintptr_t sinceLastGet = currTime - lastGet; 61051c0b2f7Stbbdev bool doCleanup = false; 61151c0b2f7Stbbdev 612478de5b1Stbbdev intptr_t threshold = ageThreshold.load(std::memory_order_relaxed); 613478de5b1Stbbdev if (threshold) 614*2110128eSsarathnandu doCleanup = sinceLastGet > static_cast<uintptr_t>(Props::LongWaitFactor * threshold); 61551c0b2f7Stbbdev else if (lastCleanedAge) 616*2110128eSsarathnandu doCleanup = sinceLastGet > static_cast<uintptr_t>(Props::LongWaitFactor * (lastCleanedAge - lastGet)); 61751c0b2f7Stbbdev 61851c0b2f7Stbbdev if (doCleanup) { 61951c0b2f7Stbbdev lastCleanedAge = 0; 620478de5b1Stbbdev ageThreshold.store(0, std::memory_order_relaxed); 62151c0b2f7Stbbdev } 62251c0b2f7Stbbdev 62351c0b2f7Stbbdev } 62451c0b2f7Stbbdev 62551c0b2f7Stbbdev template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>:: 62651c0b2f7Stbbdev CacheBin::cleanToThreshold(uintptr_t currTime, BinBitMask *bitMask, int idx) 62751c0b2f7Stbbdev { 62851c0b2f7Stbbdev /* oldest may be more recent then age, that's why cast to signed type 62951c0b2f7Stbbdev was used. age overflow is also processed correctly. */ 630478de5b1Stbbdev if ( !last.load(std::memory_order_relaxed) || 631478de5b1Stbbdev (intptr_t)(currTime - last.load(std::memory_order_relaxed)->age) < ageThreshold.load(std::memory_order_relaxed) ) 632478de5b1Stbbdev return nullptr; 63351c0b2f7Stbbdev 63451c0b2f7Stbbdev #if MALLOC_DEBUG 63551c0b2f7Stbbdev uintptr_t nextAge = 0; 63651c0b2f7Stbbdev #endif 63751c0b2f7Stbbdev do { 63851c0b2f7Stbbdev #if MALLOC_DEBUG 63951c0b2f7Stbbdev // check that list ordered 640478de5b1Stbbdev MALLOC_ASSERT(!nextAge || lessThanWithOverflow(nextAge, last.load(std::memory_order_relaxed)->age), 64151c0b2f7Stbbdev ASSERT_TEXT); 642478de5b1Stbbdev nextAge = last.load(std::memory_order_relaxed)->age; 64351c0b2f7Stbbdev #endif 644478de5b1Stbbdev cachedSize.store(cachedSize.load(std::memory_order_relaxed) - last.load(std::memory_order_relaxed)->unalignedSize, std::memory_order_relaxed); 645478de5b1Stbbdev last.store(last.load(std::memory_order_relaxed)->prev, std::memory_order_relaxed); 646478de5b1Stbbdev } while (last.load(std::memory_order_relaxed) && 647478de5b1Stbbdev (intptr_t)(currTime - last.load(std::memory_order_relaxed)->age) > ageThreshold.load(std::memory_order_relaxed)); 64851c0b2f7Stbbdev 64957f524caSIlya Isaev LargeMemoryBlock *toRelease = nullptr; 650478de5b1Stbbdev if (last.load(std::memory_order_relaxed)) { 651478de5b1Stbbdev toRelease = last.load(std::memory_order_relaxed)->next; 652478de5b1Stbbdev oldest.store(last.load(std::memory_order_relaxed)->age, std::memory_order_relaxed); 65357f524caSIlya Isaev last.load(std::memory_order_relaxed)->next = nullptr; 65451c0b2f7Stbbdev } else { 65551c0b2f7Stbbdev toRelease = first; 65657f524caSIlya Isaev first = nullptr; 657478de5b1Stbbdev oldest.store(0, std::memory_order_relaxed); 658478de5b1Stbbdev if (!usedSize.load(std::memory_order_relaxed)) 65951c0b2f7Stbbdev bitMask->set(idx, false); 66051c0b2f7Stbbdev } 66151c0b2f7Stbbdev MALLOC_ASSERT( toRelease, ASSERT_TEXT ); 66251c0b2f7Stbbdev lastCleanedAge = toRelease->age; 66351c0b2f7Stbbdev 66451c0b2f7Stbbdev return toRelease; 66551c0b2f7Stbbdev } 66651c0b2f7Stbbdev 66751c0b2f7Stbbdev template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>:: 66851c0b2f7Stbbdev CacheBin::cleanAll(BinBitMask *bitMask, int idx) 66951c0b2f7Stbbdev { 67057f524caSIlya Isaev if (!last.load(std::memory_order_relaxed)) return nullptr; 67151c0b2f7Stbbdev 67251c0b2f7Stbbdev LargeMemoryBlock *toRelease = first; 67357f524caSIlya Isaev last.store(nullptr, std::memory_order_relaxed); 67457f524caSIlya Isaev first = nullptr; 675478de5b1Stbbdev oldest.store(0, std::memory_order_relaxed); 676478de5b1Stbbdev cachedSize.store(0, std::memory_order_relaxed); 677478de5b1Stbbdev if (!usedSize.load(std::memory_order_relaxed)) 67851c0b2f7Stbbdev bitMask->set(idx, false); 67951c0b2f7Stbbdev 68051c0b2f7Stbbdev return toRelease; 68151c0b2f7Stbbdev } 68251c0b2f7Stbbdev 68351c0b2f7Stbbdev /* ----------------------------------------------------------------------------------------------------- */ 68451c0b2f7Stbbdev 685478de5b1Stbbdev #if __TBB_MALLOC_BACKEND_STAT 68651c0b2f7Stbbdev template<typename Props> size_t LargeObjectCacheImpl<Props>:: 68751c0b2f7Stbbdev CacheBin::reportStat(int num, FILE *f) 68851c0b2f7Stbbdev { 68951c0b2f7Stbbdev #if __TBB_MALLOC_LOCACHE_STAT 69051c0b2f7Stbbdev if (first) 69151c0b2f7Stbbdev printf("%d(%lu): total %lu KB thr %ld lastCln %lu oldest %lu\n", 69251c0b2f7Stbbdev num, num*Props::CacheStep+Props::MinSize, 693478de5b1Stbbdev cachedSize.load(std::memory_order_relaxed)/1024, ageThresholdageThreshold.load(std::memory_order_relaxed), lastCleanedAge, oldest.load(std::memory_order_relaxed)); 69451c0b2f7Stbbdev #else 69551c0b2f7Stbbdev suppress_unused_warning(num); 69651c0b2f7Stbbdev suppress_unused_warning(f); 69751c0b2f7Stbbdev #endif 698478de5b1Stbbdev return cachedSize.load(std::memory_order_relaxed); 69951c0b2f7Stbbdev } 700478de5b1Stbbdev #endif 70151c0b2f7Stbbdev 70251c0b2f7Stbbdev // Release objects from cache blocks that are older than ageThreshold 70351c0b2f7Stbbdev template<typename Props> 70451c0b2f7Stbbdev bool LargeObjectCacheImpl<Props>::regularCleanup(ExtMemoryPool *extMemPool, uintptr_t currTime, bool doThreshDecr) 70551c0b2f7Stbbdev { 70651c0b2f7Stbbdev bool released = false; 70751c0b2f7Stbbdev BinsSummary binsSummary; 70851c0b2f7Stbbdev 70951c0b2f7Stbbdev // Threshold settings is below this cache or starts from zero index 71051c0b2f7Stbbdev if (hugeSizeThresholdIdx == 0) return false; 71151c0b2f7Stbbdev 71251c0b2f7Stbbdev // Starting searching for bin that is less than huge size threshold (can be cleaned-up) 71351c0b2f7Stbbdev int startSearchIdx = hugeSizeThresholdIdx - 1; 71451c0b2f7Stbbdev 71551c0b2f7Stbbdev for (int i = bitMask.getMaxTrue(startSearchIdx); i >= 0; i = bitMask.getMaxTrue(i-1)) { 71651c0b2f7Stbbdev bin[i].updateBinsSummary(&binsSummary); 71751c0b2f7Stbbdev if (!doThreshDecr && tooLargeLOC.load(std::memory_order_relaxed) > 2 && binsSummary.isLOCTooLarge()) { 71851c0b2f7Stbbdev // if LOC is too large for quite long time, decrease the threshold 71951c0b2f7Stbbdev // based on bin hit statistics. 72051c0b2f7Stbbdev // For this, redo cleanup from the beginning. 72151c0b2f7Stbbdev // Note: on this iteration total usedSz can be not too large 72251c0b2f7Stbbdev // in comparison to total cachedSz, as we calculated it only 72351c0b2f7Stbbdev // partially. We are ok with it. 72451c0b2f7Stbbdev i = bitMask.getMaxTrue(startSearchIdx)+1; 72551c0b2f7Stbbdev doThreshDecr = true; 72651c0b2f7Stbbdev binsSummary.reset(); 72751c0b2f7Stbbdev continue; 72851c0b2f7Stbbdev } 72951c0b2f7Stbbdev if (doThreshDecr) 73051c0b2f7Stbbdev bin[i].decreaseThreshold(); 73151c0b2f7Stbbdev 73251c0b2f7Stbbdev if (bin[i].cleanToThreshold(extMemPool, &bitMask, currTime, i)) { 73351c0b2f7Stbbdev released = true; 73451c0b2f7Stbbdev } 73551c0b2f7Stbbdev } 73651c0b2f7Stbbdev // We want to find if LOC was too large for some time continuously, 73751c0b2f7Stbbdev // so OK with races between incrementing and zeroing, but incrementing 73851c0b2f7Stbbdev // must be atomic. 73951c0b2f7Stbbdev if (binsSummary.isLOCTooLarge()) { 74051c0b2f7Stbbdev tooLargeLOC++; 74151c0b2f7Stbbdev } else { 74251c0b2f7Stbbdev tooLargeLOC.store(0, std::memory_order_relaxed); 74351c0b2f7Stbbdev } 74451c0b2f7Stbbdev return released; 74551c0b2f7Stbbdev } 74651c0b2f7Stbbdev 74751c0b2f7Stbbdev template<typename Props> 74851c0b2f7Stbbdev bool LargeObjectCacheImpl<Props>::cleanAll(ExtMemoryPool *extMemPool) 74951c0b2f7Stbbdev { 75051c0b2f7Stbbdev bool released = false; 75151c0b2f7Stbbdev for (int i = numBins-1; i >= 0; i--) { 75251c0b2f7Stbbdev released |= bin[i].releaseAllToBackend(extMemPool, &bitMask, i); 75351c0b2f7Stbbdev } 75451c0b2f7Stbbdev return released; 75551c0b2f7Stbbdev } 75651c0b2f7Stbbdev 75751c0b2f7Stbbdev template<typename Props> 75851c0b2f7Stbbdev void LargeObjectCacheImpl<Props>::reset() { 75951c0b2f7Stbbdev tooLargeLOC.store(0, std::memory_order_relaxed); 76051c0b2f7Stbbdev for (int i = numBins-1; i >= 0; i--) 76151c0b2f7Stbbdev bin[i].init(); 76251c0b2f7Stbbdev bitMask.reset(); 76351c0b2f7Stbbdev } 76451c0b2f7Stbbdev 76551c0b2f7Stbbdev #if __TBB_MALLOC_WHITEBOX_TEST 76651c0b2f7Stbbdev template<typename Props> 76751c0b2f7Stbbdev size_t LargeObjectCacheImpl<Props>::getLOCSize() const 76851c0b2f7Stbbdev { 76951c0b2f7Stbbdev size_t size = 0; 77051c0b2f7Stbbdev for (int i = numBins-1; i >= 0; i--) 77151c0b2f7Stbbdev size += bin[i].getSize(); 77251c0b2f7Stbbdev return size; 77351c0b2f7Stbbdev } 77451c0b2f7Stbbdev 77551c0b2f7Stbbdev size_t LargeObjectCache::getLOCSize() const 77651c0b2f7Stbbdev { 77751c0b2f7Stbbdev return largeCache.getLOCSize() + hugeCache.getLOCSize(); 77851c0b2f7Stbbdev } 77951c0b2f7Stbbdev 78051c0b2f7Stbbdev template<typename Props> 78151c0b2f7Stbbdev size_t LargeObjectCacheImpl<Props>::getUsedSize() const 78251c0b2f7Stbbdev { 78351c0b2f7Stbbdev size_t size = 0; 78451c0b2f7Stbbdev for (int i = numBins-1; i >= 0; i--) 78551c0b2f7Stbbdev size += bin[i].getUsedSize(); 78651c0b2f7Stbbdev return size; 78751c0b2f7Stbbdev } 78851c0b2f7Stbbdev 78951c0b2f7Stbbdev size_t LargeObjectCache::getUsedSize() const 79051c0b2f7Stbbdev { 79151c0b2f7Stbbdev return largeCache.getUsedSize() + hugeCache.getUsedSize(); 79251c0b2f7Stbbdev } 79351c0b2f7Stbbdev #endif // __TBB_MALLOC_WHITEBOX_TEST 79451c0b2f7Stbbdev 79551c0b2f7Stbbdev inline bool LargeObjectCache::isCleanupNeededOnRange(uintptr_t range, uintptr_t currTime) 79651c0b2f7Stbbdev { 79751c0b2f7Stbbdev return range >= cacheCleanupFreq 79851c0b2f7Stbbdev || currTime+range < currTime-1 // overflow, 0 is power of 2, do cleanup 79951c0b2f7Stbbdev // (prev;prev+range] contains n*cacheCleanupFreq 80051c0b2f7Stbbdev || alignUp(currTime, cacheCleanupFreq)<currTime+range; 80151c0b2f7Stbbdev } 80251c0b2f7Stbbdev 80351c0b2f7Stbbdev bool LargeObjectCache::doCleanup(uintptr_t currTime, bool doThreshDecr) 80451c0b2f7Stbbdev { 80551c0b2f7Stbbdev if (!doThreshDecr) 80651c0b2f7Stbbdev extMemPool->allLocalCaches.markUnused(); 80751c0b2f7Stbbdev return largeCache.regularCleanup(extMemPool, currTime, doThreshDecr) 80851c0b2f7Stbbdev | hugeCache.regularCleanup(extMemPool, currTime, doThreshDecr); 80951c0b2f7Stbbdev } 81051c0b2f7Stbbdev 81151c0b2f7Stbbdev bool LargeObjectCache::decreasingCleanup() 81251c0b2f7Stbbdev { 81351c0b2f7Stbbdev return doCleanup(cacheCurrTime.load(std::memory_order_acquire), /*doThreshDecr=*/true); 81451c0b2f7Stbbdev } 81551c0b2f7Stbbdev 81651c0b2f7Stbbdev bool LargeObjectCache::regularCleanup() 81751c0b2f7Stbbdev { 81851c0b2f7Stbbdev return doCleanup(cacheCurrTime.load(std::memory_order_acquire), /*doThreshDecr=*/false); 81951c0b2f7Stbbdev } 82051c0b2f7Stbbdev 82151c0b2f7Stbbdev bool LargeObjectCache::cleanAll() 82251c0b2f7Stbbdev { 82351c0b2f7Stbbdev return largeCache.cleanAll(extMemPool) | hugeCache.cleanAll(extMemPool); 82451c0b2f7Stbbdev } 82551c0b2f7Stbbdev 82651c0b2f7Stbbdev void LargeObjectCache::reset() 82751c0b2f7Stbbdev { 82851c0b2f7Stbbdev largeCache.reset(); 82951c0b2f7Stbbdev hugeCache.reset(); 83051c0b2f7Stbbdev } 83151c0b2f7Stbbdev 83251c0b2f7Stbbdev template<typename Props> 83351c0b2f7Stbbdev LargeMemoryBlock *LargeObjectCacheImpl<Props>::get(ExtMemoryPool *extMemoryPool, size_t size) 83451c0b2f7Stbbdev { 83551c0b2f7Stbbdev int idx = Props::sizeToIdx(size); 83651c0b2f7Stbbdev 83751c0b2f7Stbbdev LargeMemoryBlock *lmb = bin[idx].get(extMemoryPool, size, &bitMask, idx); 83851c0b2f7Stbbdev 83951c0b2f7Stbbdev if (lmb) { 84051c0b2f7Stbbdev MALLOC_ITT_SYNC_ACQUIRED(bin+idx); 84151c0b2f7Stbbdev STAT_increment(getThreadId(), ThreadCommonCounters, allocCachedLargeObj); 84251c0b2f7Stbbdev } 84351c0b2f7Stbbdev return lmb; 84451c0b2f7Stbbdev } 84551c0b2f7Stbbdev 84651c0b2f7Stbbdev template<typename Props> 84751c0b2f7Stbbdev void LargeObjectCacheImpl<Props>::updateCacheState(ExtMemoryPool *extMemPool, DecreaseOrIncrease op, size_t size) 84851c0b2f7Stbbdev { 84951c0b2f7Stbbdev int idx = Props::sizeToIdx(size); 850*2110128eSsarathnandu MALLOC_ASSERT(idx < static_cast<int>(numBins), ASSERT_TEXT); 85151c0b2f7Stbbdev bin[idx].updateUsedSize(extMemPool, op==decrease? -size : size, &bitMask, idx); 85251c0b2f7Stbbdev } 85351c0b2f7Stbbdev 85451c0b2f7Stbbdev #if __TBB_MALLOC_LOCACHE_STAT 85551c0b2f7Stbbdev template<typename Props> 85651c0b2f7Stbbdev void LargeObjectCacheImpl<Props>::reportStat(FILE *f) 85751c0b2f7Stbbdev { 85851c0b2f7Stbbdev size_t cachedSize = 0; 85951c0b2f7Stbbdev for (int i=0; i<numBins; i++) 86051c0b2f7Stbbdev cachedSize += bin[i].reportStat(i, f); 86151c0b2f7Stbbdev fprintf(f, "total LOC size %lu MB\n", cachedSize/1024/1024); 86251c0b2f7Stbbdev } 86351c0b2f7Stbbdev 86451c0b2f7Stbbdev void LargeObjectCache::reportStat(FILE *f) 86551c0b2f7Stbbdev { 86651c0b2f7Stbbdev largeCache.reportStat(f); 86751c0b2f7Stbbdev hugeCache.reportStat(f); 86851c0b2f7Stbbdev fprintf(f, "cache time %lu\n", cacheCurrTime.load(std::memory_order_relaxed)); 86951c0b2f7Stbbdev } 87051c0b2f7Stbbdev #endif 87151c0b2f7Stbbdev 87251c0b2f7Stbbdev template<typename Props> 87351c0b2f7Stbbdev void LargeObjectCacheImpl<Props>::putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *toCache) 87451c0b2f7Stbbdev { 87551c0b2f7Stbbdev int toBinIdx = Props::sizeToIdx(toCache->unalignedSize); 87651c0b2f7Stbbdev 87751c0b2f7Stbbdev MALLOC_ITT_SYNC_RELEASING(bin+toBinIdx); 87851c0b2f7Stbbdev bin[toBinIdx].putList(extMemPool, toCache, &bitMask, toBinIdx); 87951c0b2f7Stbbdev } 88051c0b2f7Stbbdev 88151c0b2f7Stbbdev void LargeObjectCache::updateCacheState(DecreaseOrIncrease op, size_t size) 88251c0b2f7Stbbdev { 88351c0b2f7Stbbdev if (size < maxLargeSize) 88451c0b2f7Stbbdev largeCache.updateCacheState(extMemPool, op, size); 88551c0b2f7Stbbdev else if (size < maxHugeSize) 88651c0b2f7Stbbdev hugeCache.updateCacheState(extMemPool, op, size); 88751c0b2f7Stbbdev } 88851c0b2f7Stbbdev 88951c0b2f7Stbbdev 89051c0b2f7Stbbdev uintptr_t LargeObjectCache::getCurrTimeRange(uintptr_t range) 89151c0b2f7Stbbdev { 89251c0b2f7Stbbdev return (cacheCurrTime.fetch_add(range) + 1); 89351c0b2f7Stbbdev } 89451c0b2f7Stbbdev 89551c0b2f7Stbbdev void LargeObjectCache::registerRealloc(size_t oldSize, size_t newSize) 89651c0b2f7Stbbdev { 89751c0b2f7Stbbdev updateCacheState(decrease, oldSize); 89851c0b2f7Stbbdev updateCacheState(increase, alignToBin(newSize)); 89951c0b2f7Stbbdev } 90051c0b2f7Stbbdev 90151c0b2f7Stbbdev size_t LargeObjectCache::alignToBin(size_t size) { 90251c0b2f7Stbbdev return size < maxLargeSize ? LargeCacheType::alignToBin(size) : HugeCacheType::alignToBin(size); 90351c0b2f7Stbbdev } 90451c0b2f7Stbbdev 90551c0b2f7Stbbdev // Used for internal purpose 90651c0b2f7Stbbdev int LargeObjectCache::sizeToIdx(size_t size) 90751c0b2f7Stbbdev { 90851c0b2f7Stbbdev MALLOC_ASSERT(size <= maxHugeSize, ASSERT_TEXT); 90951c0b2f7Stbbdev return size < maxLargeSize ? 91051c0b2f7Stbbdev LargeCacheType::sizeToIdx(size) : 91151c0b2f7Stbbdev LargeCacheType::numBins + HugeCacheType::sizeToIdx(size); 91251c0b2f7Stbbdev } 91351c0b2f7Stbbdev 91451c0b2f7Stbbdev void LargeObjectCache::putList(LargeMemoryBlock *list) 91551c0b2f7Stbbdev { 91651c0b2f7Stbbdev LargeMemoryBlock *toProcess, *n; 91751c0b2f7Stbbdev 91851c0b2f7Stbbdev for (LargeMemoryBlock *curr = list; curr; curr = toProcess) { 91951c0b2f7Stbbdev LargeMemoryBlock *tail = curr; 92051c0b2f7Stbbdev toProcess = curr->next; 92151c0b2f7Stbbdev if (!sizeInCacheRange(curr->unalignedSize)) { 92251c0b2f7Stbbdev extMemPool->backend.returnLargeObject(curr); 92351c0b2f7Stbbdev continue; 92451c0b2f7Stbbdev } 92551c0b2f7Stbbdev int currIdx = sizeToIdx(curr->unalignedSize); 92651c0b2f7Stbbdev 92751c0b2f7Stbbdev // Find all blocks fitting to same bin. Not use more efficient sorting 92851c0b2f7Stbbdev // algorithm because list is short (commonly, 92951c0b2f7Stbbdev // LocalLOC's HIGH_MARK-LOW_MARK, i.e. 24 items). 93051c0b2f7Stbbdev for (LargeMemoryBlock *b = toProcess; b; b = n) { 93151c0b2f7Stbbdev n = b->next; 93251c0b2f7Stbbdev if (sizeToIdx(b->unalignedSize) == currIdx) { 93351c0b2f7Stbbdev tail->next = b; 93451c0b2f7Stbbdev tail = b; 93551c0b2f7Stbbdev if (toProcess == b) 93651c0b2f7Stbbdev toProcess = toProcess->next; 93751c0b2f7Stbbdev else { 93851c0b2f7Stbbdev b->prev->next = b->next; 93951c0b2f7Stbbdev if (b->next) 94051c0b2f7Stbbdev b->next->prev = b->prev; 94151c0b2f7Stbbdev } 94251c0b2f7Stbbdev } 94351c0b2f7Stbbdev } 94457f524caSIlya Isaev tail->next = nullptr; 94551c0b2f7Stbbdev if (curr->unalignedSize < maxLargeSize) 94651c0b2f7Stbbdev largeCache.putList(extMemPool, curr); 94751c0b2f7Stbbdev else 94851c0b2f7Stbbdev hugeCache.putList(extMemPool, curr); 94951c0b2f7Stbbdev } 95051c0b2f7Stbbdev } 95151c0b2f7Stbbdev 95251c0b2f7Stbbdev void LargeObjectCache::put(LargeMemoryBlock *largeBlock) 95351c0b2f7Stbbdev { 95451c0b2f7Stbbdev size_t blockSize = largeBlock->unalignedSize; 95551c0b2f7Stbbdev if (sizeInCacheRange(blockSize)) { 95657f524caSIlya Isaev largeBlock->next = nullptr; 95751c0b2f7Stbbdev if (blockSize < maxLargeSize) 95851c0b2f7Stbbdev largeCache.putList(extMemPool, largeBlock); 95951c0b2f7Stbbdev else 96051c0b2f7Stbbdev hugeCache.putList(extMemPool, largeBlock); 96151c0b2f7Stbbdev } else { 96251c0b2f7Stbbdev extMemPool->backend.returnLargeObject(largeBlock); 96351c0b2f7Stbbdev } 96451c0b2f7Stbbdev } 96551c0b2f7Stbbdev 96651c0b2f7Stbbdev LargeMemoryBlock *LargeObjectCache::get(size_t size) 96751c0b2f7Stbbdev { 96851c0b2f7Stbbdev MALLOC_ASSERT( size >= minLargeSize, ASSERT_TEXT ); 96951c0b2f7Stbbdev if (sizeInCacheRange(size)) { 97051c0b2f7Stbbdev return size < maxLargeSize ? 97151c0b2f7Stbbdev largeCache.get(extMemPool, size) : hugeCache.get(extMemPool, size); 97251c0b2f7Stbbdev } 97357f524caSIlya Isaev return nullptr; 97451c0b2f7Stbbdev } 97551c0b2f7Stbbdev 97651c0b2f7Stbbdev LargeMemoryBlock *ExtMemoryPool::mallocLargeObject(MemoryPool *pool, size_t allocationSize) 97751c0b2f7Stbbdev { 97851c0b2f7Stbbdev #if __TBB_MALLOC_LOCACHE_STAT 97951c0b2f7Stbbdev mallocCalls++; 98051c0b2f7Stbbdev memAllocKB.fetch_add(allocationSize/1024); 98151c0b2f7Stbbdev #endif 98251c0b2f7Stbbdev LargeMemoryBlock* lmb = loc.get(allocationSize); 98351c0b2f7Stbbdev if (!lmb) { 98451c0b2f7Stbbdev BackRefIdx backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/true); 98551c0b2f7Stbbdev if (backRefIdx.isInvalid()) 98657f524caSIlya Isaev return nullptr; 98751c0b2f7Stbbdev 98851c0b2f7Stbbdev // unalignedSize is set in getLargeBlock 98951c0b2f7Stbbdev lmb = backend.getLargeBlock(allocationSize); 99051c0b2f7Stbbdev if (!lmb) { 99151c0b2f7Stbbdev removeBackRef(backRefIdx); 99251c0b2f7Stbbdev loc.updateCacheState(decrease, allocationSize); 99357f524caSIlya Isaev return nullptr; 99451c0b2f7Stbbdev } 99551c0b2f7Stbbdev lmb->backRefIdx = backRefIdx; 99651c0b2f7Stbbdev lmb->pool = pool; 99751c0b2f7Stbbdev STAT_increment(getThreadId(), ThreadCommonCounters, allocNewLargeObj); 99851c0b2f7Stbbdev } else { 99951c0b2f7Stbbdev #if __TBB_MALLOC_LOCACHE_STAT 100051c0b2f7Stbbdev cacheHits++; 100151c0b2f7Stbbdev memHitKB.fetch_add(allocationSize/1024); 100251c0b2f7Stbbdev #endif 100351c0b2f7Stbbdev } 100451c0b2f7Stbbdev return lmb; 100551c0b2f7Stbbdev } 100651c0b2f7Stbbdev 100751c0b2f7Stbbdev void ExtMemoryPool::freeLargeObject(LargeMemoryBlock *mBlock) 100851c0b2f7Stbbdev { 100951c0b2f7Stbbdev loc.put(mBlock); 101051c0b2f7Stbbdev } 101151c0b2f7Stbbdev 101251c0b2f7Stbbdev void ExtMemoryPool::freeLargeObjectList(LargeMemoryBlock *head) 101351c0b2f7Stbbdev { 101451c0b2f7Stbbdev loc.putList(head); 101551c0b2f7Stbbdev } 101651c0b2f7Stbbdev 101751c0b2f7Stbbdev bool ExtMemoryPool::softCachesCleanup() 101851c0b2f7Stbbdev { 101951c0b2f7Stbbdev return loc.regularCleanup(); 102051c0b2f7Stbbdev } 102151c0b2f7Stbbdev 102251c0b2f7Stbbdev bool ExtMemoryPool::hardCachesCleanup() 102351c0b2f7Stbbdev { 102451c0b2f7Stbbdev // thread-local caches must be cleaned before LOC, 102551c0b2f7Stbbdev // because object from thread-local cache can be released to LOC 102651c0b2f7Stbbdev bool ret = releaseAllLocalCaches(); 102751c0b2f7Stbbdev ret |= orphanedBlocks.cleanup(&backend); 102851c0b2f7Stbbdev ret |= loc.cleanAll(); 102951c0b2f7Stbbdev ret |= backend.clean(); 103051c0b2f7Stbbdev return ret; 103151c0b2f7Stbbdev } 103251c0b2f7Stbbdev 103351c0b2f7Stbbdev #if BACKEND_HAS_MREMAP 103451c0b2f7Stbbdev void *ExtMemoryPool::remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment) 103551c0b2f7Stbbdev { 103651c0b2f7Stbbdev const size_t oldUnalignedSize = ((LargeObjectHdr*)ptr - 1)->memoryBlock->unalignedSize; 103751c0b2f7Stbbdev void *o = backend.remap(ptr, oldSize, newSize, alignment); 103851c0b2f7Stbbdev if (o) { 103951c0b2f7Stbbdev LargeMemoryBlock *lmb = ((LargeObjectHdr*)o - 1)->memoryBlock; 104051c0b2f7Stbbdev loc.registerRealloc(oldUnalignedSize, lmb->unalignedSize); 104151c0b2f7Stbbdev } 104251c0b2f7Stbbdev return o; 104351c0b2f7Stbbdev } 104451c0b2f7Stbbdev #endif /* BACKEND_HAS_MREMAP */ 104551c0b2f7Stbbdev 104651c0b2f7Stbbdev /*********** End allocation of large objects **********/ 104751c0b2f7Stbbdev 104851c0b2f7Stbbdev } // namespace internal 104951c0b2f7Stbbdev } // namespace rml 105051c0b2f7Stbbdev 105151c0b2f7Stbbdev #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 105251c0b2f7Stbbdev #pragma warning(pop) 105351c0b2f7Stbbdev #endif 105451c0b2f7Stbbdev 1055