1 /* 2 Copyright (c) 2005-2022 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #include "tbbmalloc_internal.h" 18 #include "../src/tbb/environment.h" 19 20 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 21 // Suppress warning: unary minus operator applied to unsigned type, result still unsigned 22 // TBB_REVAMP_TODO: review this warning 23 #pragma warning(push) 24 #pragma warning(disable:4146) 25 #endif 26 27 /******************************* Allocation of large objects *********************************************/ 28 29 namespace rml { 30 namespace internal { 31 32 /* ---------------------------- Large Object cache init section ---------------------------------------- */ 33 34 void LargeObjectCache::init(ExtMemoryPool *memPool) 35 { 36 extMemPool = memPool; 37 // scalable_allocation_mode can be called before allocator initialization, respect this manual request 38 if (hugeSizeThreshold == 0) { 39 // Huge size threshold initialization if environment variable was set 40 long requestedThreshold = tbb::detail::r1::GetIntegralEnvironmentVariable("TBB_MALLOC_SET_HUGE_SIZE_THRESHOLD"); 41 // Read valid env or initialize by default with max possible values 42 if (requestedThreshold != -1) { 43 setHugeSizeThreshold(requestedThreshold); 44 } else { 45 setHugeSizeThreshold(maxHugeSize); 46 } 47 } 48 } 49 50 /* ----------------------------- Huge size threshold settings ----------------------------------------- */ 51 52 void LargeObjectCache::setHugeSizeThreshold(size_t value) 53 { 54 // Valid in the huge cache range: [MaxLargeSize, MaxHugeSize]. 55 if (value <= maxHugeSize) { 56 hugeSizeThreshold = value >= maxLargeSize ? alignToBin(value) : maxLargeSize; 57 58 // Calculate local indexes for the global threshold size (for fast search inside a regular cleanup) 59 largeCache.hugeSizeThresholdIdx = LargeCacheType::numBins; 60 hugeCache.hugeSizeThresholdIdx = HugeCacheType::sizeToIdx(hugeSizeThreshold); 61 } 62 } 63 64 bool LargeObjectCache::sizeInCacheRange(size_t size) 65 { 66 return size < maxHugeSize && (size <= defaultMaxHugeSize || size >= hugeSizeThreshold); 67 } 68 69 /* ----------------------------------------------------------------------------------------------------- */ 70 71 /* The functor called by the aggregator for the operation list */ 72 template<typename Props> 73 class CacheBinFunctor { 74 typename LargeObjectCacheImpl<Props>::CacheBin *const bin; 75 ExtMemoryPool *const extMemPool; 76 typename LargeObjectCacheImpl<Props>::BinBitMask *const bitMask; 77 const int idx; 78 79 LargeMemoryBlock *toRelease; 80 bool needCleanup; 81 uintptr_t currTime; 82 83 /* Do preprocessing under the operation list. */ 84 /* All the OP_PUT_LIST operations are merged in the one operation. 85 All OP_GET operations are merged with the OP_PUT_LIST operations but 86 it demands the update of the moving average value in the bin. 87 Only the last OP_CLEAN_TO_THRESHOLD operation has sense. 88 The OP_CLEAN_ALL operation also should be performed only once. 89 Moreover it cancels the OP_CLEAN_TO_THRESHOLD operation. */ 90 class OperationPreprocessor { 91 // TODO: remove the dependency on CacheBin. 92 typename LargeObjectCacheImpl<Props>::CacheBin *const bin; 93 94 /* Contains the relative time in the operation list. 95 It counts in the reverse order since the aggregator also 96 provides operations in the reverse order. */ 97 uintptr_t lclTime; 98 99 /* opGet contains only OP_GET operations which cannot be merge with OP_PUT operations 100 opClean contains all OP_CLEAN_TO_THRESHOLD and OP_CLEAN_ALL operations. */ 101 CacheBinOperation *opGet, *opClean; 102 /* The time of the last OP_CLEAN_TO_THRESHOLD operations */ 103 uintptr_t cleanTime; 104 105 /* lastGetOpTime - the time of the last OP_GET operation. 106 lastGet - the same meaning as CacheBin::lastGet */ 107 uintptr_t lastGetOpTime, lastGet; 108 109 /* The total sum of all usedSize changes requested with CBOP_UPDATE_USED_SIZE operations. */ 110 size_t updateUsedSize; 111 112 /* The list of blocks for the OP_PUT_LIST operation. */ 113 LargeMemoryBlock *head, *tail; 114 int putListNum; 115 116 /* if the OP_CLEAN_ALL is requested. */ 117 bool isCleanAll; 118 119 inline void commitOperation(CacheBinOperation *op) const; 120 inline void addOpToOpList(CacheBinOperation *op, CacheBinOperation **opList) const; 121 bool getFromPutList(CacheBinOperation* opGet, uintptr_t currTime); 122 void addToPutList( LargeMemoryBlock *head, LargeMemoryBlock *tail, int num ); 123 124 public: 125 OperationPreprocessor(typename LargeObjectCacheImpl<Props>::CacheBin *bin) : 126 bin(bin), lclTime(0), opGet(nullptr), opClean(nullptr), cleanTime(0), 127 lastGetOpTime(0), updateUsedSize(0), head(nullptr), isCleanAll(false) {} 128 void operator()(CacheBinOperation* opList); 129 uintptr_t getTimeRange() const { return -lclTime; } 130 131 friend class CacheBinFunctor; 132 }; 133 134 public: 135 CacheBinFunctor(typename LargeObjectCacheImpl<Props>::CacheBin *bin, ExtMemoryPool *extMemPool, 136 typename LargeObjectCacheImpl<Props>::BinBitMask *bitMask, int idx) : 137 bin(bin), extMemPool(extMemPool), bitMask(bitMask), idx(idx), toRelease(nullptr), needCleanup(false) {} 138 void operator()(CacheBinOperation* opList); 139 140 bool isCleanupNeeded() const { return needCleanup; } 141 LargeMemoryBlock *getToRelease() const { return toRelease; } 142 uintptr_t getCurrTime() const { return currTime; } 143 }; 144 145 /* ---------------- Cache Bin Aggregator Operation Helpers ---------------- */ 146 147 // The list of structures which describe the operation data 148 struct OpGet { 149 static const CacheBinOperationType type = CBOP_GET; 150 LargeMemoryBlock **res; 151 size_t size; 152 uintptr_t currTime; 153 }; 154 155 struct OpPutList { 156 static const CacheBinOperationType type = CBOP_PUT_LIST; 157 LargeMemoryBlock *head; 158 }; 159 160 struct OpCleanToThreshold { 161 static const CacheBinOperationType type = CBOP_CLEAN_TO_THRESHOLD; 162 LargeMemoryBlock **res; 163 uintptr_t currTime; 164 }; 165 166 struct OpCleanAll { 167 static const CacheBinOperationType type = CBOP_CLEAN_ALL; 168 LargeMemoryBlock **res; 169 }; 170 171 struct OpUpdateUsedSize { 172 static const CacheBinOperationType type = CBOP_UPDATE_USED_SIZE; 173 size_t size; 174 }; 175 176 union CacheBinOperationData { 177 private: 178 OpGet opGet; 179 OpPutList opPutList; 180 OpCleanToThreshold opCleanToThreshold; 181 OpCleanAll opCleanAll; 182 OpUpdateUsedSize opUpdateUsedSize; 183 }; 184 185 // Forward declarations 186 template <typename OpTypeData> OpTypeData& opCast(CacheBinOperation &op); 187 188 // Describes the aggregator operation 189 struct CacheBinOperation : public MallocAggregatedOperation<CacheBinOperation>::type { 190 CacheBinOperationType type; 191 192 template <typename OpTypeData> 193 CacheBinOperation(OpTypeData &d, CacheBinOperationStatus st = CBST_WAIT) { 194 opCast<OpTypeData>(*this) = d; 195 type = OpTypeData::type; 196 MallocAggregatedOperation<CacheBinOperation>::type::status = st; 197 } 198 private: 199 CacheBinOperationData data; 200 201 template <typename OpTypeData> 202 friend OpTypeData& opCast(CacheBinOperation &op); 203 }; 204 205 // The opCast function can be the member of CacheBinOperation but it will have 206 // small stylistic ambiguity: it will look like a getter (with a cast) for the 207 // CacheBinOperation::data data member but it should return a reference to 208 // simplify the code from a lot of getter/setter calls. So the global cast in 209 // the style of static_cast (or reinterpret_cast) seems to be more readable and 210 // have more explicit semantic. 211 template <typename OpTypeData> 212 OpTypeData& opCast(CacheBinOperation &op) { 213 return *reinterpret_cast<OpTypeData*>(&op.data); 214 } 215 216 /* ------------------------------------------------------------------------ */ 217 218 #if __TBB_MALLOC_LOCACHE_STAT 219 //intptr_t mallocCalls, cacheHits; 220 std::atomic<intptr_t> mallocCalls, cacheHits; 221 //intptr_t memAllocKB, memHitKB; 222 std::atomic<intptr_t> memAllocKB, memHitKB; 223 #endif 224 225 #if MALLOC_DEBUG 226 inline bool lessThanWithOverflow(intptr_t a, intptr_t b) 227 { 228 return (a < b && (b - a < UINTPTR_MAX/2)) || 229 (a > b && (a - b > UINTPTR_MAX/2)); 230 } 231 #endif 232 233 /* ----------------------------------- Operation processing methods ------------------------------------ */ 234 235 template<typename Props> void CacheBinFunctor<Props>:: 236 OperationPreprocessor::commitOperation(CacheBinOperation *op) const 237 { 238 // FencedStore( (intptr_t&)(op->status), CBST_DONE ); 239 op->status.store(CBST_DONE, std::memory_order_release); 240 } 241 242 template<typename Props> void CacheBinFunctor<Props>:: 243 OperationPreprocessor::addOpToOpList(CacheBinOperation *op, CacheBinOperation **opList) const 244 { 245 op->next = *opList; 246 *opList = op; 247 } 248 249 template<typename Props> bool CacheBinFunctor<Props>:: 250 OperationPreprocessor::getFromPutList(CacheBinOperation *opGet, uintptr_t currTime) 251 { 252 if ( head ) { 253 uintptr_t age = head->age; 254 LargeMemoryBlock *next = head->next; 255 *opCast<OpGet>(*opGet).res = head; 256 commitOperation( opGet ); 257 head = next; 258 putListNum--; 259 MALLOC_ASSERT( putListNum>=0, ASSERT_TEXT ); 260 261 // use moving average with current hit interval 262 bin->updateMeanHitRange( currTime - age ); 263 return true; 264 } 265 return false; 266 } 267 268 template<typename Props> void CacheBinFunctor<Props>:: 269 OperationPreprocessor::addToPutList(LargeMemoryBlock *h, LargeMemoryBlock *t, int num) 270 { 271 if ( head ) { 272 MALLOC_ASSERT( tail, ASSERT_TEXT ); 273 tail->next = h; 274 h->prev = tail; 275 tail = t; 276 putListNum += num; 277 } else { 278 head = h; 279 tail = t; 280 putListNum = num; 281 } 282 } 283 284 template<typename Props> void CacheBinFunctor<Props>:: 285 OperationPreprocessor::operator()(CacheBinOperation* opList) 286 { 287 for ( CacheBinOperation *op = opList, *opNext; op; op = opNext ) { 288 opNext = op->next; 289 switch ( op->type ) { 290 case CBOP_GET: 291 { 292 lclTime--; 293 if ( !lastGetOpTime ) { 294 lastGetOpTime = lclTime; 295 lastGet = 0; 296 } else if ( !lastGet ) lastGet = lclTime; 297 298 if ( !getFromPutList(op,lclTime) ) { 299 opCast<OpGet>(*op).currTime = lclTime; 300 addOpToOpList( op, &opGet ); 301 } 302 } 303 break; 304 305 case CBOP_PUT_LIST: 306 { 307 LargeMemoryBlock *head = opCast<OpPutList>(*op).head; 308 LargeMemoryBlock *curr = head, *prev = nullptr; 309 310 int num = 0; 311 do { 312 // we do not kept prev pointers during assigning blocks to bins, set them now 313 curr->prev = prev; 314 315 // Save the local times to the memory blocks. Local times are necessary 316 // for the getFromPutList function which updates the hit range value in 317 // CacheBin when OP_GET and OP_PUT_LIST operations are merged successfully. 318 // The age will be updated to the correct global time after preprocessing 319 // when global cache time is updated. 320 curr->age = --lclTime; 321 322 prev = curr; 323 num += 1; 324 325 STAT_increment(getThreadId(), ThreadCommonCounters, cacheLargeObj); 326 } while ((curr = curr->next) != nullptr); 327 328 LargeMemoryBlock *tail = prev; 329 addToPutList(head, tail, num); 330 331 while ( opGet ) { 332 CacheBinOperation *next = opGet->next; 333 if ( !getFromPutList(opGet, opCast<OpGet>(*opGet).currTime) ) 334 break; 335 opGet = next; 336 } 337 } 338 break; 339 340 case CBOP_UPDATE_USED_SIZE: 341 updateUsedSize += opCast<OpUpdateUsedSize>(*op).size; 342 commitOperation( op ); 343 break; 344 345 case CBOP_CLEAN_ALL: 346 isCleanAll = true; 347 addOpToOpList( op, &opClean ); 348 break; 349 350 case CBOP_CLEAN_TO_THRESHOLD: 351 { 352 uintptr_t currTime = opCast<OpCleanToThreshold>(*op).currTime; 353 // We don't worry about currTime overflow since it is a rare 354 // occurrence and doesn't affect correctness 355 cleanTime = cleanTime < currTime ? currTime : cleanTime; 356 addOpToOpList( op, &opClean ); 357 } 358 break; 359 360 default: 361 MALLOC_ASSERT( false, "Unknown operation." ); 362 } 363 } 364 MALLOC_ASSERT( !( opGet && head ), "Not all put/get pairs are processed!" ); 365 } 366 367 template<typename Props> void CacheBinFunctor<Props>::operator()(CacheBinOperation* opList) 368 { 369 MALLOC_ASSERT( opList, "Empty operation list is passed into operation handler." ); 370 371 OperationPreprocessor prep(bin); 372 prep(opList); 373 374 if ( uintptr_t timeRange = prep.getTimeRange() ) { 375 uintptr_t startTime = extMemPool->loc.getCurrTimeRange(timeRange); 376 // endTime is used as the current (base) time since the local time is negative. 377 uintptr_t endTime = startTime + timeRange; 378 379 if ( prep.lastGetOpTime && prep.lastGet ) bin->setLastGet(prep.lastGet+endTime); 380 381 if ( CacheBinOperation *opGet = prep.opGet ) { 382 bool isEmpty = false; 383 do { 384 #if __TBB_MALLOC_WHITEBOX_TEST 385 tbbmalloc_whitebox::locGetProcessed++; 386 #endif 387 const OpGet &opGetData = opCast<OpGet>(*opGet); 388 if ( !isEmpty ) { 389 if ( LargeMemoryBlock *res = bin->get() ) { 390 uintptr_t getTime = opGetData.currTime + endTime; 391 // use moving average with current hit interval 392 bin->updateMeanHitRange( getTime - res->age); 393 bin->updateCachedSize( -opGetData.size ); 394 *opGetData.res = res; 395 } else { 396 isEmpty = true; 397 uintptr_t lastGetOpTime = prep.lastGetOpTime+endTime; 398 bin->forgetOutdatedState(lastGetOpTime); 399 bin->updateAgeThreshold(lastGetOpTime); 400 } 401 } 402 403 CacheBinOperation *opNext = opGet->next; 404 bin->updateUsedSize( opGetData.size, bitMask, idx ); 405 prep.commitOperation( opGet ); 406 opGet = opNext; 407 } while ( opGet ); 408 if ( prep.lastGetOpTime ) 409 bin->setLastGet( prep.lastGetOpTime + endTime ); 410 } else if ( LargeMemoryBlock *curr = prep.head ) { 411 curr->prev = nullptr; 412 while ( curr ) { 413 // Update local times to global times 414 curr->age += endTime; 415 curr=curr->next; 416 } 417 #if __TBB_MALLOC_WHITEBOX_TEST 418 tbbmalloc_whitebox::locPutProcessed+=prep.putListNum; 419 #endif 420 toRelease = bin->putList(prep.head, prep.tail, bitMask, idx, prep.putListNum, extMemPool->loc.hugeSizeThreshold); 421 } 422 needCleanup = extMemPool->loc.isCleanupNeededOnRange(timeRange, startTime); 423 currTime = endTime - 1; 424 } 425 426 if ( CacheBinOperation *opClean = prep.opClean ) { 427 if ( prep.isCleanAll ) 428 *opCast<OpCleanAll>(*opClean).res = bin->cleanAll(bitMask, idx); 429 else 430 *opCast<OpCleanToThreshold>(*opClean).res = bin->cleanToThreshold(prep.cleanTime, bitMask, idx); 431 432 CacheBinOperation *opNext = opClean->next; 433 prep.commitOperation( opClean ); 434 435 while ((opClean = opNext) != nullptr) { 436 opNext = opClean->next; 437 prep.commitOperation(opClean); 438 } 439 } 440 441 if ( size_t size = prep.updateUsedSize ) 442 bin->updateUsedSize(size, bitMask, idx); 443 } 444 /* ----------------------------------------------------------------------------------------------------- */ 445 /* --------------------------- Methods for creating and executing operations --------------------------- */ 446 template<typename Props> void LargeObjectCacheImpl<Props>:: 447 CacheBin::ExecuteOperation(CacheBinOperation *op, ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx, bool longLifeTime) 448 { 449 CacheBinFunctor<Props> func( this, extMemPool, bitMask, idx ); 450 aggregator.execute( op, func, longLifeTime ); 451 452 if ( LargeMemoryBlock *toRelease = func.getToRelease()) { 453 extMemPool->backend.returnLargeObject(toRelease); 454 } 455 456 if ( func.isCleanupNeeded() ) { 457 extMemPool->loc.doCleanup( func.getCurrTime(), /*doThreshDecr=*/false); 458 } 459 } 460 461 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>:: 462 CacheBin::get(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx) 463 { 464 LargeMemoryBlock *lmb=nullptr; 465 OpGet data = {&lmb, size}; 466 CacheBinOperation op(data); 467 ExecuteOperation( &op, extMemPool, bitMask, idx ); 468 return lmb; 469 } 470 471 template<typename Props> void LargeObjectCacheImpl<Props>:: 472 CacheBin::putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *head, BinBitMask *bitMask, int idx) 473 { 474 MALLOC_ASSERT(sizeof(LargeMemoryBlock)+sizeof(CacheBinOperation)<=head->unalignedSize, "CacheBinOperation is too large to be placed in LargeMemoryBlock!"); 475 476 OpPutList data = {head}; 477 CacheBinOperation *op = new (head+1) CacheBinOperation(data, CBST_NOWAIT); 478 ExecuteOperation( op, extMemPool, bitMask, idx, false ); 479 } 480 481 template<typename Props> bool LargeObjectCacheImpl<Props>:: 482 CacheBin::cleanToThreshold(ExtMemoryPool *extMemPool, BinBitMask *bitMask, uintptr_t currTime, int idx) 483 { 484 LargeMemoryBlock *toRelease = nullptr; 485 486 /* oldest may be more recent then age, that's why cast to signed type 487 was used. age overflow is also processed correctly. */ 488 if (last.load(std::memory_order_relaxed) && 489 (intptr_t)(currTime - oldest.load(std::memory_order_relaxed)) > ageThreshold.load(std::memory_order_relaxed)) { 490 OpCleanToThreshold data = {&toRelease, currTime}; 491 CacheBinOperation op(data); 492 ExecuteOperation( &op, extMemPool, bitMask, idx ); 493 } 494 bool released = toRelease; 495 496 Backend *backend = &extMemPool->backend; 497 while ( toRelease ) { 498 LargeMemoryBlock *helper = toRelease->next; 499 backend->returnLargeObject(toRelease); 500 toRelease = helper; 501 } 502 return released; 503 } 504 505 template<typename Props> bool LargeObjectCacheImpl<Props>:: 506 CacheBin::releaseAllToBackend(ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx) 507 { 508 LargeMemoryBlock *toRelease = nullptr; 509 510 if (last.load(std::memory_order_relaxed)) { 511 OpCleanAll data = {&toRelease}; 512 CacheBinOperation op(data); 513 ExecuteOperation(&op, extMemPool, bitMask, idx); 514 } 515 bool released = toRelease; 516 517 Backend *backend = &extMemPool->backend; 518 while ( toRelease ) { 519 LargeMemoryBlock *helper = toRelease->next; 520 MALLOC_ASSERT(!helper || lessThanWithOverflow(helper->age, toRelease->age), 521 ASSERT_TEXT); 522 backend->returnLargeObject(toRelease); 523 toRelease = helper; 524 } 525 return released; 526 } 527 528 template<typename Props> void LargeObjectCacheImpl<Props>:: 529 CacheBin::updateUsedSize(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx) 530 { 531 OpUpdateUsedSize data = {size}; 532 CacheBinOperation op(data); 533 ExecuteOperation( &op, extMemPool, bitMask, idx ); 534 } 535 536 /* ------------------------------ Unsafe methods used with the aggregator ------------------------------ */ 537 538 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>:: 539 CacheBin::putList(LargeMemoryBlock *head, LargeMemoryBlock *tail, BinBitMask *bitMask, int idx, int num, size_t hugeSizeThreshold) 540 { 541 size_t size = head->unalignedSize; 542 usedSize.store(usedSize.load(std::memory_order_relaxed) - num * size, std::memory_order_relaxed); 543 MALLOC_ASSERT( !last.load(std::memory_order_relaxed) || 544 (last.load(std::memory_order_relaxed)->age != 0 && last.load(std::memory_order_relaxed)->age != -1U), ASSERT_TEXT ); 545 MALLOC_ASSERT( (tail==head && num==1) || (tail!=head && num>1), ASSERT_TEXT ); 546 LargeMemoryBlock *toRelease = nullptr; 547 if (size < hugeSizeThreshold && !lastCleanedAge) { 548 // 1st object of such size was released. 549 // Not cache it, and remember when this occurs 550 // to take into account during cache miss. 551 lastCleanedAge = tail->age; 552 toRelease = tail; 553 tail = tail->prev; 554 if (tail) 555 tail->next = nullptr; 556 else 557 head = nullptr; 558 num--; 559 } 560 if (num) { 561 // add [head;tail] list to cache 562 MALLOC_ASSERT( tail, ASSERT_TEXT ); 563 tail->next = first; 564 if (first) 565 first->prev = tail; 566 first = head; 567 if (!last.load(std::memory_order_relaxed)) { 568 MALLOC_ASSERT(0 == oldest.load(std::memory_order_relaxed), ASSERT_TEXT); 569 oldest.store(tail->age, std::memory_order_relaxed); 570 last.store(tail, std::memory_order_relaxed); 571 } 572 573 cachedSize.store(cachedSize.load(std::memory_order_relaxed) + num * size, std::memory_order_relaxed); 574 } 575 576 // No used object, and nothing in the bin, mark the bin as empty 577 if (!usedSize.load(std::memory_order_relaxed) && !first) 578 bitMask->set(idx, false); 579 580 return toRelease; 581 } 582 583 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>:: 584 CacheBin::get() 585 { 586 LargeMemoryBlock *result=first; 587 if (result) { 588 first = result->next; 589 if (first) 590 first->prev = nullptr; 591 else { 592 last.store(nullptr, std::memory_order_relaxed); 593 oldest.store(0, std::memory_order_relaxed); 594 } 595 } 596 597 return result; 598 } 599 600 template<typename Props> void LargeObjectCacheImpl<Props>:: 601 CacheBin::forgetOutdatedState(uintptr_t currTime) 602 { 603 // If the time since the last get is LongWaitFactor times more than ageThreshold 604 // for the bin, treat the bin as rarely-used and forget everything we know 605 // about it. 606 // If LongWaitFactor is too small, we forget too early and 607 // so prevents good caching, while if too high, caching blocks 608 // with unrelated usage pattern occurs. 609 const uintptr_t sinceLastGet = currTime - lastGet; 610 bool doCleanup = false; 611 612 intptr_t threshold = ageThreshold.load(std::memory_order_relaxed); 613 if (threshold) 614 doCleanup = sinceLastGet > Props::LongWaitFactor * threshold; 615 else if (lastCleanedAge) 616 doCleanup = sinceLastGet > Props::LongWaitFactor * (lastCleanedAge - lastGet); 617 618 if (doCleanup) { 619 lastCleanedAge = 0; 620 ageThreshold.store(0, std::memory_order_relaxed); 621 } 622 623 } 624 625 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>:: 626 CacheBin::cleanToThreshold(uintptr_t currTime, BinBitMask *bitMask, int idx) 627 { 628 /* oldest may be more recent then age, that's why cast to signed type 629 was used. age overflow is also processed correctly. */ 630 if ( !last.load(std::memory_order_relaxed) || 631 (intptr_t)(currTime - last.load(std::memory_order_relaxed)->age) < ageThreshold.load(std::memory_order_relaxed) ) 632 return nullptr; 633 634 #if MALLOC_DEBUG 635 uintptr_t nextAge = 0; 636 #endif 637 do { 638 #if MALLOC_DEBUG 639 // check that list ordered 640 MALLOC_ASSERT(!nextAge || lessThanWithOverflow(nextAge, last.load(std::memory_order_relaxed)->age), 641 ASSERT_TEXT); 642 nextAge = last.load(std::memory_order_relaxed)->age; 643 #endif 644 cachedSize.store(cachedSize.load(std::memory_order_relaxed) - last.load(std::memory_order_relaxed)->unalignedSize, std::memory_order_relaxed); 645 last.store(last.load(std::memory_order_relaxed)->prev, std::memory_order_relaxed); 646 } while (last.load(std::memory_order_relaxed) && 647 (intptr_t)(currTime - last.load(std::memory_order_relaxed)->age) > ageThreshold.load(std::memory_order_relaxed)); 648 649 LargeMemoryBlock *toRelease = nullptr; 650 if (last.load(std::memory_order_relaxed)) { 651 toRelease = last.load(std::memory_order_relaxed)->next; 652 oldest.store(last.load(std::memory_order_relaxed)->age, std::memory_order_relaxed); 653 last.load(std::memory_order_relaxed)->next = nullptr; 654 } else { 655 toRelease = first; 656 first = nullptr; 657 oldest.store(0, std::memory_order_relaxed); 658 if (!usedSize.load(std::memory_order_relaxed)) 659 bitMask->set(idx, false); 660 } 661 MALLOC_ASSERT( toRelease, ASSERT_TEXT ); 662 lastCleanedAge = toRelease->age; 663 664 return toRelease; 665 } 666 667 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>:: 668 CacheBin::cleanAll(BinBitMask *bitMask, int idx) 669 { 670 if (!last.load(std::memory_order_relaxed)) return nullptr; 671 672 LargeMemoryBlock *toRelease = first; 673 last.store(nullptr, std::memory_order_relaxed); 674 first = nullptr; 675 oldest.store(0, std::memory_order_relaxed); 676 cachedSize.store(0, std::memory_order_relaxed); 677 if (!usedSize.load(std::memory_order_relaxed)) 678 bitMask->set(idx, false); 679 680 return toRelease; 681 } 682 683 /* ----------------------------------------------------------------------------------------------------- */ 684 685 #if __TBB_MALLOC_BACKEND_STAT 686 template<typename Props> size_t LargeObjectCacheImpl<Props>:: 687 CacheBin::reportStat(int num, FILE *f) 688 { 689 #if __TBB_MALLOC_LOCACHE_STAT 690 if (first) 691 printf("%d(%lu): total %lu KB thr %ld lastCln %lu oldest %lu\n", 692 num, num*Props::CacheStep+Props::MinSize, 693 cachedSize.load(std::memory_order_relaxed)/1024, ageThresholdageThreshold.load(std::memory_order_relaxed), lastCleanedAge, oldest.load(std::memory_order_relaxed)); 694 #else 695 suppress_unused_warning(num); 696 suppress_unused_warning(f); 697 #endif 698 return cachedSize.load(std::memory_order_relaxed); 699 } 700 #endif 701 702 // Release objects from cache blocks that are older than ageThreshold 703 template<typename Props> 704 bool LargeObjectCacheImpl<Props>::regularCleanup(ExtMemoryPool *extMemPool, uintptr_t currTime, bool doThreshDecr) 705 { 706 bool released = false; 707 BinsSummary binsSummary; 708 709 // Threshold settings is below this cache or starts from zero index 710 if (hugeSizeThresholdIdx == 0) return false; 711 712 // Starting searching for bin that is less than huge size threshold (can be cleaned-up) 713 int startSearchIdx = hugeSizeThresholdIdx - 1; 714 715 for (int i = bitMask.getMaxTrue(startSearchIdx); i >= 0; i = bitMask.getMaxTrue(i-1)) { 716 bin[i].updateBinsSummary(&binsSummary); 717 if (!doThreshDecr && tooLargeLOC.load(std::memory_order_relaxed) > 2 && binsSummary.isLOCTooLarge()) { 718 // if LOC is too large for quite long time, decrease the threshold 719 // based on bin hit statistics. 720 // For this, redo cleanup from the beginning. 721 // Note: on this iteration total usedSz can be not too large 722 // in comparison to total cachedSz, as we calculated it only 723 // partially. We are ok with it. 724 i = bitMask.getMaxTrue(startSearchIdx)+1; 725 doThreshDecr = true; 726 binsSummary.reset(); 727 continue; 728 } 729 if (doThreshDecr) 730 bin[i].decreaseThreshold(); 731 732 if (bin[i].cleanToThreshold(extMemPool, &bitMask, currTime, i)) { 733 released = true; 734 } 735 } 736 // We want to find if LOC was too large for some time continuously, 737 // so OK with races between incrementing and zeroing, but incrementing 738 // must be atomic. 739 if (binsSummary.isLOCTooLarge()) { 740 tooLargeLOC++; 741 } else { 742 tooLargeLOC.store(0, std::memory_order_relaxed); 743 } 744 return released; 745 } 746 747 template<typename Props> 748 bool LargeObjectCacheImpl<Props>::cleanAll(ExtMemoryPool *extMemPool) 749 { 750 bool released = false; 751 for (int i = numBins-1; i >= 0; i--) { 752 released |= bin[i].releaseAllToBackend(extMemPool, &bitMask, i); 753 } 754 return released; 755 } 756 757 template<typename Props> 758 void LargeObjectCacheImpl<Props>::reset() { 759 tooLargeLOC.store(0, std::memory_order_relaxed); 760 for (int i = numBins-1; i >= 0; i--) 761 bin[i].init(); 762 bitMask.reset(); 763 } 764 765 #if __TBB_MALLOC_WHITEBOX_TEST 766 template<typename Props> 767 size_t LargeObjectCacheImpl<Props>::getLOCSize() const 768 { 769 size_t size = 0; 770 for (int i = numBins-1; i >= 0; i--) 771 size += bin[i].getSize(); 772 return size; 773 } 774 775 size_t LargeObjectCache::getLOCSize() const 776 { 777 return largeCache.getLOCSize() + hugeCache.getLOCSize(); 778 } 779 780 template<typename Props> 781 size_t LargeObjectCacheImpl<Props>::getUsedSize() const 782 { 783 size_t size = 0; 784 for (int i = numBins-1; i >= 0; i--) 785 size += bin[i].getUsedSize(); 786 return size; 787 } 788 789 size_t LargeObjectCache::getUsedSize() const 790 { 791 return largeCache.getUsedSize() + hugeCache.getUsedSize(); 792 } 793 #endif // __TBB_MALLOC_WHITEBOX_TEST 794 795 inline bool LargeObjectCache::isCleanupNeededOnRange(uintptr_t range, uintptr_t currTime) 796 { 797 return range >= cacheCleanupFreq 798 || currTime+range < currTime-1 // overflow, 0 is power of 2, do cleanup 799 // (prev;prev+range] contains n*cacheCleanupFreq 800 || alignUp(currTime, cacheCleanupFreq)<currTime+range; 801 } 802 803 bool LargeObjectCache::doCleanup(uintptr_t currTime, bool doThreshDecr) 804 { 805 if (!doThreshDecr) 806 extMemPool->allLocalCaches.markUnused(); 807 return largeCache.regularCleanup(extMemPool, currTime, doThreshDecr) 808 | hugeCache.regularCleanup(extMemPool, currTime, doThreshDecr); 809 } 810 811 bool LargeObjectCache::decreasingCleanup() 812 { 813 return doCleanup(cacheCurrTime.load(std::memory_order_acquire), /*doThreshDecr=*/true); 814 } 815 816 bool LargeObjectCache::regularCleanup() 817 { 818 return doCleanup(cacheCurrTime.load(std::memory_order_acquire), /*doThreshDecr=*/false); 819 } 820 821 bool LargeObjectCache::cleanAll() 822 { 823 return largeCache.cleanAll(extMemPool) | hugeCache.cleanAll(extMemPool); 824 } 825 826 void LargeObjectCache::reset() 827 { 828 largeCache.reset(); 829 hugeCache.reset(); 830 } 831 832 template<typename Props> 833 LargeMemoryBlock *LargeObjectCacheImpl<Props>::get(ExtMemoryPool *extMemoryPool, size_t size) 834 { 835 int idx = Props::sizeToIdx(size); 836 837 LargeMemoryBlock *lmb = bin[idx].get(extMemoryPool, size, &bitMask, idx); 838 839 if (lmb) { 840 MALLOC_ITT_SYNC_ACQUIRED(bin+idx); 841 STAT_increment(getThreadId(), ThreadCommonCounters, allocCachedLargeObj); 842 } 843 return lmb; 844 } 845 846 template<typename Props> 847 void LargeObjectCacheImpl<Props>::updateCacheState(ExtMemoryPool *extMemPool, DecreaseOrIncrease op, size_t size) 848 { 849 int idx = Props::sizeToIdx(size); 850 MALLOC_ASSERT(idx<numBins, ASSERT_TEXT); 851 bin[idx].updateUsedSize(extMemPool, op==decrease? -size : size, &bitMask, idx); 852 } 853 854 #if __TBB_MALLOC_LOCACHE_STAT 855 template<typename Props> 856 void LargeObjectCacheImpl<Props>::reportStat(FILE *f) 857 { 858 size_t cachedSize = 0; 859 for (int i=0; i<numBins; i++) 860 cachedSize += bin[i].reportStat(i, f); 861 fprintf(f, "total LOC size %lu MB\n", cachedSize/1024/1024); 862 } 863 864 void LargeObjectCache::reportStat(FILE *f) 865 { 866 largeCache.reportStat(f); 867 hugeCache.reportStat(f); 868 fprintf(f, "cache time %lu\n", cacheCurrTime.load(std::memory_order_relaxed)); 869 } 870 #endif 871 872 template<typename Props> 873 void LargeObjectCacheImpl<Props>::putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *toCache) 874 { 875 int toBinIdx = Props::sizeToIdx(toCache->unalignedSize); 876 877 MALLOC_ITT_SYNC_RELEASING(bin+toBinIdx); 878 bin[toBinIdx].putList(extMemPool, toCache, &bitMask, toBinIdx); 879 } 880 881 void LargeObjectCache::updateCacheState(DecreaseOrIncrease op, size_t size) 882 { 883 if (size < maxLargeSize) 884 largeCache.updateCacheState(extMemPool, op, size); 885 else if (size < maxHugeSize) 886 hugeCache.updateCacheState(extMemPool, op, size); 887 } 888 889 890 uintptr_t LargeObjectCache::getCurrTimeRange(uintptr_t range) 891 { 892 return (cacheCurrTime.fetch_add(range) + 1); 893 } 894 895 void LargeObjectCache::registerRealloc(size_t oldSize, size_t newSize) 896 { 897 updateCacheState(decrease, oldSize); 898 updateCacheState(increase, alignToBin(newSize)); 899 } 900 901 size_t LargeObjectCache::alignToBin(size_t size) { 902 return size < maxLargeSize ? LargeCacheType::alignToBin(size) : HugeCacheType::alignToBin(size); 903 } 904 905 // Used for internal purpose 906 int LargeObjectCache::sizeToIdx(size_t size) 907 { 908 MALLOC_ASSERT(size <= maxHugeSize, ASSERT_TEXT); 909 return size < maxLargeSize ? 910 LargeCacheType::sizeToIdx(size) : 911 LargeCacheType::numBins + HugeCacheType::sizeToIdx(size); 912 } 913 914 void LargeObjectCache::putList(LargeMemoryBlock *list) 915 { 916 LargeMemoryBlock *toProcess, *n; 917 918 for (LargeMemoryBlock *curr = list; curr; curr = toProcess) { 919 LargeMemoryBlock *tail = curr; 920 toProcess = curr->next; 921 if (!sizeInCacheRange(curr->unalignedSize)) { 922 extMemPool->backend.returnLargeObject(curr); 923 continue; 924 } 925 int currIdx = sizeToIdx(curr->unalignedSize); 926 927 // Find all blocks fitting to same bin. Not use more efficient sorting 928 // algorithm because list is short (commonly, 929 // LocalLOC's HIGH_MARK-LOW_MARK, i.e. 24 items). 930 for (LargeMemoryBlock *b = toProcess; b; b = n) { 931 n = b->next; 932 if (sizeToIdx(b->unalignedSize) == currIdx) { 933 tail->next = b; 934 tail = b; 935 if (toProcess == b) 936 toProcess = toProcess->next; 937 else { 938 b->prev->next = b->next; 939 if (b->next) 940 b->next->prev = b->prev; 941 } 942 } 943 } 944 tail->next = nullptr; 945 if (curr->unalignedSize < maxLargeSize) 946 largeCache.putList(extMemPool, curr); 947 else 948 hugeCache.putList(extMemPool, curr); 949 } 950 } 951 952 void LargeObjectCache::put(LargeMemoryBlock *largeBlock) 953 { 954 size_t blockSize = largeBlock->unalignedSize; 955 if (sizeInCacheRange(blockSize)) { 956 largeBlock->next = nullptr; 957 if (blockSize < maxLargeSize) 958 largeCache.putList(extMemPool, largeBlock); 959 else 960 hugeCache.putList(extMemPool, largeBlock); 961 } else { 962 extMemPool->backend.returnLargeObject(largeBlock); 963 } 964 } 965 966 LargeMemoryBlock *LargeObjectCache::get(size_t size) 967 { 968 MALLOC_ASSERT( size >= minLargeSize, ASSERT_TEXT ); 969 if (sizeInCacheRange(size)) { 970 return size < maxLargeSize ? 971 largeCache.get(extMemPool, size) : hugeCache.get(extMemPool, size); 972 } 973 return nullptr; 974 } 975 976 LargeMemoryBlock *ExtMemoryPool::mallocLargeObject(MemoryPool *pool, size_t allocationSize) 977 { 978 #if __TBB_MALLOC_LOCACHE_STAT 979 mallocCalls++; 980 memAllocKB.fetch_add(allocationSize/1024); 981 #endif 982 LargeMemoryBlock* lmb = loc.get(allocationSize); 983 if (!lmb) { 984 BackRefIdx backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/true); 985 if (backRefIdx.isInvalid()) 986 return nullptr; 987 988 // unalignedSize is set in getLargeBlock 989 lmb = backend.getLargeBlock(allocationSize); 990 if (!lmb) { 991 removeBackRef(backRefIdx); 992 loc.updateCacheState(decrease, allocationSize); 993 return nullptr; 994 } 995 lmb->backRefIdx = backRefIdx; 996 lmb->pool = pool; 997 STAT_increment(getThreadId(), ThreadCommonCounters, allocNewLargeObj); 998 } else { 999 #if __TBB_MALLOC_LOCACHE_STAT 1000 cacheHits++; 1001 memHitKB.fetch_add(allocationSize/1024); 1002 #endif 1003 } 1004 return lmb; 1005 } 1006 1007 void ExtMemoryPool::freeLargeObject(LargeMemoryBlock *mBlock) 1008 { 1009 loc.put(mBlock); 1010 } 1011 1012 void ExtMemoryPool::freeLargeObjectList(LargeMemoryBlock *head) 1013 { 1014 loc.putList(head); 1015 } 1016 1017 bool ExtMemoryPool::softCachesCleanup() 1018 { 1019 return loc.regularCleanup(); 1020 } 1021 1022 bool ExtMemoryPool::hardCachesCleanup() 1023 { 1024 // thread-local caches must be cleaned before LOC, 1025 // because object from thread-local cache can be released to LOC 1026 bool ret = releaseAllLocalCaches(); 1027 ret |= orphanedBlocks.cleanup(&backend); 1028 ret |= loc.cleanAll(); 1029 ret |= backend.clean(); 1030 return ret; 1031 } 1032 1033 #if BACKEND_HAS_MREMAP 1034 void *ExtMemoryPool::remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment) 1035 { 1036 const size_t oldUnalignedSize = ((LargeObjectHdr*)ptr - 1)->memoryBlock->unalignedSize; 1037 void *o = backend.remap(ptr, oldSize, newSize, alignment); 1038 if (o) { 1039 LargeMemoryBlock *lmb = ((LargeObjectHdr*)o - 1)->memoryBlock; 1040 loc.registerRealloc(oldUnalignedSize, lmb->unalignedSize); 1041 } 1042 return o; 1043 } 1044 #endif /* BACKEND_HAS_MREMAP */ 1045 1046 /*********** End allocation of large objects **********/ 1047 1048 } // namespace internal 1049 } // namespace rml 1050 1051 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 1052 #pragma warning(pop) 1053 #endif 1054 1055