1 /* 2 Copyright (c) 2005-2020 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #include "tbbmalloc_internal.h" 18 #include "../tbb/environment.h" 19 20 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 21 // Suppress warning: unary minus operator applied to unsigned type, result still unsigned 22 // TBB_REVAMP_TODO: review this warning 23 #pragma warning(push) 24 #pragma warning(disable:4146) 25 #endif 26 27 /******************************* Allocation of large objects *********************************************/ 28 29 namespace rml { 30 namespace internal { 31 32 /* ---------------------------- Large Object cache init section ---------------------------------------- */ 33 34 void LargeObjectCache::init(ExtMemoryPool *memPool) 35 { 36 extMemPool = memPool; 37 // scalable_allocation_mode can be called before allocator initialization, respect this manual request 38 if (hugeSizeThreshold == 0) { 39 // Huge size threshold initialization if environment variable was set 40 long requestedThreshold = tbb::detail::r1::GetIntegralEnvironmentVariable("TBB_MALLOC_SET_HUGE_SIZE_THRESHOLD"); 41 // Read valid env or initialize by default with max possible values 42 if (requestedThreshold != -1) { 43 setHugeSizeThreshold(requestedThreshold); 44 } else { 45 setHugeSizeThreshold(maxHugeSize); 46 } 47 } 48 } 49 50 /* ----------------------------- Huge size threshold settings ----------------------------------------- */ 51 52 void LargeObjectCache::setHugeSizeThreshold(size_t value) 53 { 54 // Valid in the huge cache range: [MaxLargeSize, MaxHugeSize]. 55 if (value <= maxHugeSize) { 56 hugeSizeThreshold = value >= maxLargeSize ? alignToBin(value) : maxLargeSize; 57 58 // Calculate local indexes for the global threshold size (for fast search inside a regular cleanup) 59 largeCache.hugeSizeThresholdIdx = LargeCacheType::numBins; 60 hugeCache.hugeSizeThresholdIdx = HugeCacheType::sizeToIdx(hugeSizeThreshold); 61 } 62 } 63 64 bool LargeObjectCache::sizeInCacheRange(size_t size) 65 { 66 return size <= maxHugeSize && (size <= defaultMaxHugeSize || size >= hugeSizeThreshold); 67 } 68 69 /* ----------------------------------------------------------------------------------------------------- */ 70 71 /* The functor called by the aggregator for the operation list */ 72 template<typename Props> 73 class CacheBinFunctor { 74 typename LargeObjectCacheImpl<Props>::CacheBin *const bin; 75 ExtMemoryPool *const extMemPool; 76 typename LargeObjectCacheImpl<Props>::BinBitMask *const bitMask; 77 const int idx; 78 79 LargeMemoryBlock *toRelease; 80 bool needCleanup; 81 uintptr_t currTime; 82 83 /* Do preprocessing under the operation list. */ 84 /* All the OP_PUT_LIST operations are merged in the one operation. 85 All OP_GET operations are merged with the OP_PUT_LIST operations but 86 it demands the update of the moving average value in the bin. 87 Only the last OP_CLEAN_TO_THRESHOLD operation has sense. 88 The OP_CLEAN_ALL operation also should be performed only once. 89 Moreover it cancels the OP_CLEAN_TO_THRESHOLD operation. */ 90 class OperationPreprocessor { 91 // TODO: remove the dependency on CacheBin. 92 typename LargeObjectCacheImpl<Props>::CacheBin *const bin; 93 94 /* Contains the relative time in the operation list. 95 It counts in the reverse order since the aggregator also 96 provides operations in the reverse order. */ 97 uintptr_t lclTime; 98 99 /* opGet contains only OP_GET operations which cannot be merge with OP_PUT operations 100 opClean contains all OP_CLEAN_TO_THRESHOLD and OP_CLEAN_ALL operations. */ 101 CacheBinOperation *opGet, *opClean; 102 /* The time of the last OP_CLEAN_TO_THRESHOLD operations */ 103 uintptr_t cleanTime; 104 105 /* lastGetOpTime - the time of the last OP_GET operation. 106 lastGet - the same meaning as CacheBin::lastGet */ 107 uintptr_t lastGetOpTime, lastGet; 108 109 /* The total sum of all usedSize changes requested with CBOP_UPDATE_USED_SIZE operations. */ 110 size_t updateUsedSize; 111 112 /* The list of blocks for the OP_PUT_LIST operation. */ 113 LargeMemoryBlock *head, *tail; 114 int putListNum; 115 116 /* if the OP_CLEAN_ALL is requested. */ 117 bool isCleanAll; 118 119 inline void commitOperation(CacheBinOperation *op) const; 120 inline void addOpToOpList(CacheBinOperation *op, CacheBinOperation **opList) const; 121 bool getFromPutList(CacheBinOperation* opGet, uintptr_t currTime); 122 void addToPutList( LargeMemoryBlock *head, LargeMemoryBlock *tail, int num ); 123 124 public: 125 OperationPreprocessor(typename LargeObjectCacheImpl<Props>::CacheBin *bin) : 126 bin(bin), lclTime(0), opGet(NULL), opClean(NULL), cleanTime(0), 127 lastGetOpTime(0), updateUsedSize(0), head(NULL), isCleanAll(false) {} 128 void operator()(CacheBinOperation* opList); 129 uintptr_t getTimeRange() const { return -lclTime; } 130 131 friend class CacheBinFunctor; 132 }; 133 134 public: 135 CacheBinFunctor(typename LargeObjectCacheImpl<Props>::CacheBin *bin, ExtMemoryPool *extMemPool, 136 typename LargeObjectCacheImpl<Props>::BinBitMask *bitMask, int idx) : 137 bin(bin), extMemPool(extMemPool), bitMask(bitMask), idx(idx), toRelease(NULL), needCleanup(false) {} 138 void operator()(CacheBinOperation* opList); 139 140 bool isCleanupNeeded() const { return needCleanup; } 141 LargeMemoryBlock *getToRelease() const { return toRelease; } 142 uintptr_t getCurrTime() const { return currTime; } 143 }; 144 145 /* ---------------- Cache Bin Aggregator Operation Helpers ---------------- */ 146 147 // The list of structures which describe the operation data 148 struct OpGet { 149 static const CacheBinOperationType type = CBOP_GET; 150 LargeMemoryBlock **res; 151 size_t size; 152 uintptr_t currTime; 153 }; 154 155 struct OpPutList { 156 static const CacheBinOperationType type = CBOP_PUT_LIST; 157 LargeMemoryBlock *head; 158 }; 159 160 struct OpCleanToThreshold { 161 static const CacheBinOperationType type = CBOP_CLEAN_TO_THRESHOLD; 162 LargeMemoryBlock **res; 163 uintptr_t currTime; 164 }; 165 166 struct OpCleanAll { 167 static const CacheBinOperationType type = CBOP_CLEAN_ALL; 168 LargeMemoryBlock **res; 169 }; 170 171 struct OpUpdateUsedSize { 172 static const CacheBinOperationType type = CBOP_UPDATE_USED_SIZE; 173 size_t size; 174 }; 175 176 union CacheBinOperationData { 177 private: 178 OpGet opGet; 179 OpPutList opPutList; 180 OpCleanToThreshold opCleanToThreshold; 181 OpCleanAll opCleanAll; 182 OpUpdateUsedSize opUpdateUsedSize; 183 }; 184 185 // Forward declarations 186 template <typename OpTypeData> OpTypeData& opCast(CacheBinOperation &op); 187 188 // Describes the aggregator operation 189 struct CacheBinOperation : public MallocAggregatedOperation<CacheBinOperation>::type { 190 CacheBinOperationType type; 191 192 template <typename OpTypeData> 193 CacheBinOperation(OpTypeData &d, CacheBinOperationStatus st = CBST_WAIT) { 194 opCast<OpTypeData>(*this) = d; 195 type = OpTypeData::type; 196 MallocAggregatedOperation<CacheBinOperation>::type::status = st; 197 } 198 private: 199 CacheBinOperationData data; 200 201 template <typename OpTypeData> 202 friend OpTypeData& opCast(CacheBinOperation &op); 203 }; 204 205 // The opCast function can be the member of CacheBinOperation but it will have 206 // small stylistic ambiguity: it will look like a getter (with a cast) for the 207 // CacheBinOperation::data data member but it should return a reference to 208 // simplify the code from a lot of getter/setter calls. So the global cast in 209 // the style of static_cast (or reinterpret_cast) seems to be more readable and 210 // have more explicit semantic. 211 template <typename OpTypeData> 212 OpTypeData& opCast(CacheBinOperation &op) { 213 return *reinterpret_cast<OpTypeData*>(&op.data); 214 } 215 216 /* ------------------------------------------------------------------------ */ 217 218 #if __TBB_MALLOC_LOCACHE_STAT 219 //intptr_t mallocCalls, cacheHits; 220 std::atomic<intptr_t> mallocCalls, cacheHits; 221 //intptr_t memAllocKB, memHitKB; 222 std::atomic<intptr_t> memAllocKB, memHitKB; 223 #endif 224 225 inline bool lessThanWithOverflow(intptr_t a, intptr_t b) 226 { 227 return (a < b && (b - a < UINTPTR_MAX/2)) || 228 (a > b && (a - b > UINTPTR_MAX/2)); 229 } 230 231 /* ----------------------------------- Operation processing methods ------------------------------------ */ 232 233 template<typename Props> void CacheBinFunctor<Props>:: 234 OperationPreprocessor::commitOperation(CacheBinOperation *op) const 235 { 236 // FencedStore( (intptr_t&)(op->status), CBST_DONE ); 237 op->status.store(CBST_DONE, std::memory_order_release); 238 } 239 240 template<typename Props> void CacheBinFunctor<Props>:: 241 OperationPreprocessor::addOpToOpList(CacheBinOperation *op, CacheBinOperation **opList) const 242 { 243 op->next = *opList; 244 *opList = op; 245 } 246 247 template<typename Props> bool CacheBinFunctor<Props>:: 248 OperationPreprocessor::getFromPutList(CacheBinOperation *opGet, uintptr_t currTime) 249 { 250 if ( head ) { 251 uintptr_t age = head->age; 252 LargeMemoryBlock *next = head->next; 253 *opCast<OpGet>(*opGet).res = head; 254 commitOperation( opGet ); 255 head = next; 256 putListNum--; 257 MALLOC_ASSERT( putListNum>=0, ASSERT_TEXT ); 258 259 // use moving average with current hit interval 260 bin->updateMeanHitRange( currTime - age ); 261 return true; 262 } 263 return false; 264 } 265 266 template<typename Props> void CacheBinFunctor<Props>:: 267 OperationPreprocessor::addToPutList(LargeMemoryBlock *h, LargeMemoryBlock *t, int num) 268 { 269 if ( head ) { 270 MALLOC_ASSERT( tail, ASSERT_TEXT ); 271 tail->next = h; 272 h->prev = tail; 273 tail = t; 274 putListNum += num; 275 } else { 276 head = h; 277 tail = t; 278 putListNum = num; 279 } 280 } 281 282 template<typename Props> void CacheBinFunctor<Props>:: 283 OperationPreprocessor::operator()(CacheBinOperation* opList) 284 { 285 for ( CacheBinOperation *op = opList, *opNext; op; op = opNext ) { 286 opNext = op->next; 287 switch ( op->type ) { 288 case CBOP_GET: 289 { 290 lclTime--; 291 if ( !lastGetOpTime ) { 292 lastGetOpTime = lclTime; 293 lastGet = 0; 294 } else if ( !lastGet ) lastGet = lclTime; 295 296 if ( !getFromPutList(op,lclTime) ) { 297 opCast<OpGet>(*op).currTime = lclTime; 298 addOpToOpList( op, &opGet ); 299 } 300 } 301 break; 302 303 case CBOP_PUT_LIST: 304 { 305 LargeMemoryBlock *head = opCast<OpPutList>(*op).head; 306 LargeMemoryBlock *curr = head, *prev = NULL; 307 308 int num = 0; 309 do { 310 // we do not kept prev pointers during assigning blocks to bins, set them now 311 curr->prev = prev; 312 313 // Save the local times to the memory blocks. Local times are necessary 314 // for the getFromPutList function which updates the hit range value in 315 // CacheBin when OP_GET and OP_PUT_LIST operations are merged successfully. 316 // The age will be updated to the correct global time after preprocessing 317 // when global cache time is updated. 318 curr->age = --lclTime; 319 320 prev = curr; 321 num += 1; 322 323 STAT_increment(getThreadId(), ThreadCommonCounters, cacheLargeObj); 324 } while ((curr = curr->next) != NULL); 325 326 LargeMemoryBlock *tail = prev; 327 addToPutList(head, tail, num); 328 329 while ( opGet ) { 330 CacheBinOperation *next = opGet->next; 331 if ( !getFromPutList(opGet, opCast<OpGet>(*opGet).currTime) ) 332 break; 333 opGet = next; 334 } 335 } 336 break; 337 338 case CBOP_UPDATE_USED_SIZE: 339 updateUsedSize += opCast<OpUpdateUsedSize>(*op).size; 340 commitOperation( op ); 341 break; 342 343 case CBOP_CLEAN_ALL: 344 isCleanAll = true; 345 addOpToOpList( op, &opClean ); 346 break; 347 348 case CBOP_CLEAN_TO_THRESHOLD: 349 { 350 uintptr_t currTime = opCast<OpCleanToThreshold>(*op).currTime; 351 // We don't worry about currTime overflow since it is a rare 352 // occurrence and doesn't affect correctness 353 cleanTime = cleanTime < currTime ? currTime : cleanTime; 354 addOpToOpList( op, &opClean ); 355 } 356 break; 357 358 default: 359 MALLOC_ASSERT( false, "Unknown operation." ); 360 } 361 } 362 MALLOC_ASSERT( !( opGet && head ), "Not all put/get pairs are processed!" ); 363 } 364 365 template<typename Props> void CacheBinFunctor<Props>::operator()(CacheBinOperation* opList) 366 { 367 MALLOC_ASSERT( opList, "Empty operation list is passed into operation handler." ); 368 369 OperationPreprocessor prep(bin); 370 prep(opList); 371 372 if ( uintptr_t timeRange = prep.getTimeRange() ) { 373 uintptr_t startTime = extMemPool->loc.getCurrTimeRange(timeRange); 374 // endTime is used as the current (base) time since the local time is negative. 375 uintptr_t endTime = startTime + timeRange; 376 377 if ( prep.lastGetOpTime && prep.lastGet ) bin->setLastGet(prep.lastGet+endTime); 378 379 if ( CacheBinOperation *opGet = prep.opGet ) { 380 bool isEmpty = false; 381 do { 382 #if __TBB_MALLOC_WHITEBOX_TEST 383 tbbmalloc_whitebox::locGetProcessed++; 384 #endif 385 const OpGet &opGetData = opCast<OpGet>(*opGet); 386 if ( !isEmpty ) { 387 if ( LargeMemoryBlock *res = bin->get() ) { 388 uintptr_t getTime = opGetData.currTime + endTime; 389 // use moving average with current hit interval 390 bin->updateMeanHitRange( getTime - res->age); 391 bin->updateCachedSize( -opGetData.size ); 392 *opGetData.res = res; 393 } else { 394 isEmpty = true; 395 uintptr_t lastGetOpTime = prep.lastGetOpTime+endTime; 396 bin->forgetOutdatedState(lastGetOpTime); 397 bin->updateAgeThreshold(lastGetOpTime); 398 } 399 } 400 401 CacheBinOperation *opNext = opGet->next; 402 bin->updateUsedSize( opGetData.size, bitMask, idx ); 403 prep.commitOperation( opGet ); 404 opGet = opNext; 405 } while ( opGet ); 406 if ( prep.lastGetOpTime ) 407 bin->setLastGet( prep.lastGetOpTime + endTime ); 408 } else if ( LargeMemoryBlock *curr = prep.head ) { 409 curr->prev = NULL; 410 while ( curr ) { 411 // Update local times to global times 412 curr->age += endTime; 413 curr=curr->next; 414 } 415 #if __TBB_MALLOC_WHITEBOX_TEST 416 tbbmalloc_whitebox::locPutProcessed+=prep.putListNum; 417 #endif 418 toRelease = bin->putList(prep.head, prep.tail, bitMask, idx, prep.putListNum, extMemPool->loc.hugeSizeThreshold); 419 } 420 needCleanup = extMemPool->loc.isCleanupNeededOnRange(timeRange, startTime); 421 currTime = endTime - 1; 422 } 423 424 if ( CacheBinOperation *opClean = prep.opClean ) { 425 if ( prep.isCleanAll ) 426 *opCast<OpCleanAll>(*opClean).res = bin->cleanAll(bitMask, idx); 427 else 428 *opCast<OpCleanToThreshold>(*opClean).res = bin->cleanToThreshold(prep.cleanTime, bitMask, idx); 429 430 CacheBinOperation *opNext = opClean->next; 431 prep.commitOperation( opClean ); 432 433 while ((opClean = opNext) != NULL) { 434 opNext = opClean->next; 435 prep.commitOperation(opClean); 436 } 437 } 438 439 if ( size_t size = prep.updateUsedSize ) 440 bin->updateUsedSize(size, bitMask, idx); 441 } 442 /* ----------------------------------------------------------------------------------------------------- */ 443 /* --------------------------- Methods for creating and executing operations --------------------------- */ 444 template<typename Props> void LargeObjectCacheImpl<Props>:: 445 CacheBin::ExecuteOperation(CacheBinOperation *op, ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx, bool longLifeTime) 446 { 447 CacheBinFunctor<Props> func( this, extMemPool, bitMask, idx ); 448 aggregator.execute( op, func, longLifeTime ); 449 450 if ( LargeMemoryBlock *toRelease = func.getToRelease()) { 451 extMemPool->backend.returnLargeObject(toRelease); 452 } 453 454 if ( func.isCleanupNeeded() ) { 455 extMemPool->loc.doCleanup( func.getCurrTime(), /*doThreshDecr=*/false); 456 } 457 } 458 459 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>:: 460 CacheBin::get(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx) 461 { 462 LargeMemoryBlock *lmb=NULL; 463 OpGet data = {&lmb, size}; 464 CacheBinOperation op(data); 465 ExecuteOperation( &op, extMemPool, bitMask, idx ); 466 return lmb; 467 } 468 469 template<typename Props> void LargeObjectCacheImpl<Props>:: 470 CacheBin::putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *head, BinBitMask *bitMask, int idx) 471 { 472 MALLOC_ASSERT(sizeof(LargeMemoryBlock)+sizeof(CacheBinOperation)<=head->unalignedSize, "CacheBinOperation is too large to be placed in LargeMemoryBlock!"); 473 474 OpPutList data = {head}; 475 CacheBinOperation *op = new (head+1) CacheBinOperation(data, CBST_NOWAIT); 476 ExecuteOperation( op, extMemPool, bitMask, idx, false ); 477 } 478 479 template<typename Props> bool LargeObjectCacheImpl<Props>:: 480 CacheBin::cleanToThreshold(ExtMemoryPool *extMemPool, BinBitMask *bitMask, uintptr_t currTime, int idx) 481 { 482 LargeMemoryBlock *toRelease = NULL; 483 484 /* oldest may be more recent then age, that's why cast to signed type 485 was used. age overflow is also processed correctly. */ 486 if (last && (intptr_t)(currTime - oldest) > ageThreshold) { 487 OpCleanToThreshold data = {&toRelease, currTime}; 488 CacheBinOperation op(data); 489 ExecuteOperation( &op, extMemPool, bitMask, idx ); 490 } 491 bool released = toRelease; 492 493 Backend *backend = &extMemPool->backend; 494 while ( toRelease ) { 495 LargeMemoryBlock *helper = toRelease->next; 496 backend->returnLargeObject(toRelease); 497 toRelease = helper; 498 } 499 return released; 500 } 501 502 template<typename Props> bool LargeObjectCacheImpl<Props>:: 503 CacheBin::releaseAllToBackend(ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx) 504 { 505 LargeMemoryBlock *toRelease = NULL; 506 507 if (last) { 508 OpCleanAll data = {&toRelease}; 509 CacheBinOperation op(data); 510 ExecuteOperation(&op, extMemPool, bitMask, idx); 511 } 512 bool released = toRelease; 513 514 Backend *backend = &extMemPool->backend; 515 while ( toRelease ) { 516 LargeMemoryBlock *helper = toRelease->next; 517 MALLOC_ASSERT(!helper || lessThanWithOverflow(helper->age, toRelease->age), 518 ASSERT_TEXT); 519 backend->returnLargeObject(toRelease); 520 toRelease = helper; 521 } 522 return released; 523 } 524 525 template<typename Props> void LargeObjectCacheImpl<Props>:: 526 CacheBin::updateUsedSize(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx) 527 { 528 OpUpdateUsedSize data = {size}; 529 CacheBinOperation op(data); 530 ExecuteOperation( &op, extMemPool, bitMask, idx ); 531 } 532 533 /* ------------------------------ Unsafe methods used with the aggregator ------------------------------ */ 534 535 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>:: 536 CacheBin::putList(LargeMemoryBlock *head, LargeMemoryBlock *tail, BinBitMask *bitMask, int idx, int num, size_t hugeSizeThreshold) 537 { 538 size_t size = head->unalignedSize; 539 usedSize -= num*size; 540 MALLOC_ASSERT( !last || (last->age != 0 && last->age != -1U), ASSERT_TEXT ); 541 MALLOC_ASSERT( (tail==head && num==1) || (tail!=head && num>1), ASSERT_TEXT ); 542 LargeMemoryBlock *toRelease = NULL; 543 if (size < hugeSizeThreshold && !lastCleanedAge) { 544 // 1st object of such size was released. 545 // Not cache it, and remember when this occurs 546 // to take into account during cache miss. 547 lastCleanedAge = tail->age; 548 toRelease = tail; 549 tail = tail->prev; 550 if (tail) 551 tail->next = NULL; 552 else 553 head = NULL; 554 num--; 555 } 556 if (num) { 557 // add [head;tail] list to cache 558 MALLOC_ASSERT( tail, ASSERT_TEXT ); 559 tail->next = first; 560 if (first) 561 first->prev = tail; 562 first = head; 563 if (!last) { 564 MALLOC_ASSERT(0 == oldest, ASSERT_TEXT); 565 oldest = tail->age; 566 last = tail; 567 } 568 569 cachedSize += num*size; 570 } 571 572 // No used object, and nothing in the bin, mark the bin as empty 573 if (!usedSize && !first) 574 bitMask->set(idx, false); 575 576 return toRelease; 577 } 578 579 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>:: 580 CacheBin::get() 581 { 582 LargeMemoryBlock *result=first; 583 if (result) { 584 first = result->next; 585 if (first) 586 first->prev = NULL; 587 else { 588 last = NULL; 589 oldest = 0; 590 } 591 } 592 593 return result; 594 } 595 596 template<typename Props> void LargeObjectCacheImpl<Props>:: 597 CacheBin::forgetOutdatedState(uintptr_t currTime) 598 { 599 // If the time since the last get is LongWaitFactor times more than ageThreshold 600 // for the bin, treat the bin as rarely-used and forget everything we know 601 // about it. 602 // If LongWaitFactor is too small, we forget too early and 603 // so prevents good caching, while if too high, caching blocks 604 // with unrelated usage pattern occurs. 605 const uintptr_t sinceLastGet = currTime - lastGet; 606 bool doCleanup = false; 607 608 if (ageThreshold) 609 doCleanup = sinceLastGet > Props::LongWaitFactor * ageThreshold; 610 else if (lastCleanedAge) 611 doCleanup = sinceLastGet > Props::LongWaitFactor * (lastCleanedAge - lastGet); 612 613 if (doCleanup) { 614 lastCleanedAge = 0; 615 ageThreshold = 0; 616 } 617 618 } 619 620 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>:: 621 CacheBin::cleanToThreshold(uintptr_t currTime, BinBitMask *bitMask, int idx) 622 { 623 /* oldest may be more recent then age, that's why cast to signed type 624 was used. age overflow is also processed correctly. */ 625 if ( !last || (intptr_t)(currTime - last->age) < ageThreshold ) return NULL; 626 627 #if MALLOC_DEBUG 628 uintptr_t nextAge = 0; 629 #endif 630 do { 631 #if MALLOC_DEBUG 632 // check that list ordered 633 MALLOC_ASSERT(!nextAge || lessThanWithOverflow(nextAge, last->age), 634 ASSERT_TEXT); 635 nextAge = last->age; 636 #endif 637 cachedSize -= last->unalignedSize; 638 last = last->prev; 639 } while (last && (intptr_t)(currTime - last->age) > ageThreshold); 640 641 LargeMemoryBlock *toRelease = NULL; 642 if (last) { 643 toRelease = last->next; 644 oldest = last->age; 645 last->next = NULL; 646 } else { 647 toRelease = first; 648 first = NULL; 649 oldest = 0; 650 if (!usedSize) 651 bitMask->set(idx, false); 652 } 653 MALLOC_ASSERT( toRelease, ASSERT_TEXT ); 654 lastCleanedAge = toRelease->age; 655 656 return toRelease; 657 } 658 659 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>:: 660 CacheBin::cleanAll(BinBitMask *bitMask, int idx) 661 { 662 if (!last) return NULL; 663 664 LargeMemoryBlock *toRelease = first; 665 last = NULL; 666 first = NULL; 667 oldest = 0; 668 cachedSize = 0; 669 if (!usedSize) 670 bitMask->set(idx, false); 671 672 return toRelease; 673 } 674 675 /* ----------------------------------------------------------------------------------------------------- */ 676 677 template<typename Props> size_t LargeObjectCacheImpl<Props>:: 678 CacheBin::reportStat(int num, FILE *f) 679 { 680 #if __TBB_MALLOC_LOCACHE_STAT 681 if (first) 682 printf("%d(%lu): total %lu KB thr %ld lastCln %lu oldest %lu\n", 683 num, num*Props::CacheStep+Props::MinSize, 684 cachedSize/1024, ageThreshold, lastCleanedAge, oldest); 685 #else 686 suppress_unused_warning(num); 687 suppress_unused_warning(f); 688 #endif 689 return cachedSize; 690 } 691 692 // Release objects from cache blocks that are older than ageThreshold 693 template<typename Props> 694 bool LargeObjectCacheImpl<Props>::regularCleanup(ExtMemoryPool *extMemPool, uintptr_t currTime, bool doThreshDecr) 695 { 696 bool released = false; 697 BinsSummary binsSummary; 698 699 // Threshold settings is below this cache or starts from zero index 700 if (hugeSizeThresholdIdx == 0) return false; 701 702 // Starting searching for bin that is less than huge size threshold (can be cleaned-up) 703 int startSearchIdx = hugeSizeThresholdIdx - 1; 704 705 for (int i = bitMask.getMaxTrue(startSearchIdx); i >= 0; i = bitMask.getMaxTrue(i-1)) { 706 bin[i].updateBinsSummary(&binsSummary); 707 if (!doThreshDecr && tooLargeLOC.load(std::memory_order_relaxed) > 2 && binsSummary.isLOCTooLarge()) { 708 // if LOC is too large for quite long time, decrease the threshold 709 // based on bin hit statistics. 710 // For this, redo cleanup from the beginning. 711 // Note: on this iteration total usedSz can be not too large 712 // in comparison to total cachedSz, as we calculated it only 713 // partially. We are ok with it. 714 i = bitMask.getMaxTrue(startSearchIdx)+1; 715 doThreshDecr = true; 716 binsSummary.reset(); 717 continue; 718 } 719 if (doThreshDecr) 720 bin[i].decreaseThreshold(); 721 722 if (bin[i].cleanToThreshold(extMemPool, &bitMask, currTime, i)) { 723 released = true; 724 } 725 } 726 // We want to find if LOC was too large for some time continuously, 727 // so OK with races between incrementing and zeroing, but incrementing 728 // must be atomic. 729 if (binsSummary.isLOCTooLarge()) { 730 tooLargeLOC++; 731 } else { 732 tooLargeLOC.store(0, std::memory_order_relaxed); 733 } 734 return released; 735 } 736 737 template<typename Props> 738 bool LargeObjectCacheImpl<Props>::cleanAll(ExtMemoryPool *extMemPool) 739 { 740 bool released = false; 741 for (int i = numBins-1; i >= 0; i--) { 742 released |= bin[i].releaseAllToBackend(extMemPool, &bitMask, i); 743 } 744 return released; 745 } 746 747 template<typename Props> 748 void LargeObjectCacheImpl<Props>::reset() { 749 tooLargeLOC.store(0, std::memory_order_relaxed); 750 for (int i = numBins-1; i >= 0; i--) 751 bin[i].init(); 752 bitMask.reset(); 753 } 754 755 #if __TBB_MALLOC_WHITEBOX_TEST 756 template<typename Props> 757 size_t LargeObjectCacheImpl<Props>::getLOCSize() const 758 { 759 size_t size = 0; 760 for (int i = numBins-1; i >= 0; i--) 761 size += bin[i].getSize(); 762 return size; 763 } 764 765 size_t LargeObjectCache::getLOCSize() const 766 { 767 return largeCache.getLOCSize() + hugeCache.getLOCSize(); 768 } 769 770 template<typename Props> 771 size_t LargeObjectCacheImpl<Props>::getUsedSize() const 772 { 773 size_t size = 0; 774 for (int i = numBins-1; i >= 0; i--) 775 size += bin[i].getUsedSize(); 776 return size; 777 } 778 779 size_t LargeObjectCache::getUsedSize() const 780 { 781 return largeCache.getUsedSize() + hugeCache.getUsedSize(); 782 } 783 #endif // __TBB_MALLOC_WHITEBOX_TEST 784 785 inline bool LargeObjectCache::isCleanupNeededOnRange(uintptr_t range, uintptr_t currTime) 786 { 787 return range >= cacheCleanupFreq 788 || currTime+range < currTime-1 // overflow, 0 is power of 2, do cleanup 789 // (prev;prev+range] contains n*cacheCleanupFreq 790 || alignUp(currTime, cacheCleanupFreq)<currTime+range; 791 } 792 793 bool LargeObjectCache::doCleanup(uintptr_t currTime, bool doThreshDecr) 794 { 795 if (!doThreshDecr) 796 extMemPool->allLocalCaches.markUnused(); 797 return largeCache.regularCleanup(extMemPool, currTime, doThreshDecr) 798 | hugeCache.regularCleanup(extMemPool, currTime, doThreshDecr); 799 } 800 801 bool LargeObjectCache::decreasingCleanup() 802 { 803 return doCleanup(cacheCurrTime.load(std::memory_order_acquire), /*doThreshDecr=*/true); 804 } 805 806 bool LargeObjectCache::regularCleanup() 807 { 808 return doCleanup(cacheCurrTime.load(std::memory_order_acquire), /*doThreshDecr=*/false); 809 } 810 811 bool LargeObjectCache::cleanAll() 812 { 813 return largeCache.cleanAll(extMemPool) | hugeCache.cleanAll(extMemPool); 814 } 815 816 void LargeObjectCache::reset() 817 { 818 largeCache.reset(); 819 hugeCache.reset(); 820 } 821 822 template<typename Props> 823 LargeMemoryBlock *LargeObjectCacheImpl<Props>::get(ExtMemoryPool *extMemoryPool, size_t size) 824 { 825 int idx = Props::sizeToIdx(size); 826 827 LargeMemoryBlock *lmb = bin[idx].get(extMemoryPool, size, &bitMask, idx); 828 829 if (lmb) { 830 MALLOC_ITT_SYNC_ACQUIRED(bin+idx); 831 STAT_increment(getThreadId(), ThreadCommonCounters, allocCachedLargeObj); 832 } 833 return lmb; 834 } 835 836 template<typename Props> 837 void LargeObjectCacheImpl<Props>::updateCacheState(ExtMemoryPool *extMemPool, DecreaseOrIncrease op, size_t size) 838 { 839 int idx = Props::sizeToIdx(size); 840 MALLOC_ASSERT(idx<numBins, ASSERT_TEXT); 841 bin[idx].updateUsedSize(extMemPool, op==decrease? -size : size, &bitMask, idx); 842 } 843 844 #if __TBB_MALLOC_LOCACHE_STAT 845 template<typename Props> 846 void LargeObjectCacheImpl<Props>::reportStat(FILE *f) 847 { 848 size_t cachedSize = 0; 849 for (int i=0; i<numBins; i++) 850 cachedSize += bin[i].reportStat(i, f); 851 fprintf(f, "total LOC size %lu MB\n", cachedSize/1024/1024); 852 } 853 854 void LargeObjectCache::reportStat(FILE *f) 855 { 856 largeCache.reportStat(f); 857 hugeCache.reportStat(f); 858 fprintf(f, "cache time %lu\n", cacheCurrTime.load(std::memory_order_relaxed)); 859 } 860 #endif 861 862 template<typename Props> 863 void LargeObjectCacheImpl<Props>::putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *toCache) 864 { 865 int toBinIdx = Props::sizeToIdx(toCache->unalignedSize); 866 867 MALLOC_ITT_SYNC_RELEASING(bin+toBinIdx); 868 bin[toBinIdx].putList(extMemPool, toCache, &bitMask, toBinIdx); 869 } 870 871 void LargeObjectCache::updateCacheState(DecreaseOrIncrease op, size_t size) 872 { 873 if (size < maxLargeSize) 874 largeCache.updateCacheState(extMemPool, op, size); 875 else if (size < maxHugeSize) 876 hugeCache.updateCacheState(extMemPool, op, size); 877 } 878 879 uintptr_t LargeObjectCache::getCurrTime() 880 { 881 return ++cacheCurrTime; 882 } 883 884 uintptr_t LargeObjectCache::getCurrTimeRange(uintptr_t range) 885 { 886 return (cacheCurrTime.fetch_add(range) + 1); 887 } 888 889 void LargeObjectCache::registerRealloc(size_t oldSize, size_t newSize) 890 { 891 updateCacheState(decrease, oldSize); 892 updateCacheState(increase, alignToBin(newSize)); 893 } 894 895 size_t LargeObjectCache::alignToBin(size_t size) { 896 return size < maxLargeSize ? LargeCacheType::alignToBin(size) : HugeCacheType::alignToBin(size); 897 } 898 899 // Used for internal purpose 900 int LargeObjectCache::sizeToIdx(size_t size) 901 { 902 MALLOC_ASSERT(size <= maxHugeSize, ASSERT_TEXT); 903 return size < maxLargeSize ? 904 LargeCacheType::sizeToIdx(size) : 905 LargeCacheType::numBins + HugeCacheType::sizeToIdx(size); 906 } 907 908 void LargeObjectCache::putList(LargeMemoryBlock *list) 909 { 910 LargeMemoryBlock *toProcess, *n; 911 912 for (LargeMemoryBlock *curr = list; curr; curr = toProcess) { 913 LargeMemoryBlock *tail = curr; 914 toProcess = curr->next; 915 if (!sizeInCacheRange(curr->unalignedSize)) { 916 extMemPool->backend.returnLargeObject(curr); 917 continue; 918 } 919 int currIdx = sizeToIdx(curr->unalignedSize); 920 921 // Find all blocks fitting to same bin. Not use more efficient sorting 922 // algorithm because list is short (commonly, 923 // LocalLOC's HIGH_MARK-LOW_MARK, i.e. 24 items). 924 for (LargeMemoryBlock *b = toProcess; b; b = n) { 925 n = b->next; 926 if (sizeToIdx(b->unalignedSize) == currIdx) { 927 tail->next = b; 928 tail = b; 929 if (toProcess == b) 930 toProcess = toProcess->next; 931 else { 932 b->prev->next = b->next; 933 if (b->next) 934 b->next->prev = b->prev; 935 } 936 } 937 } 938 tail->next = NULL; 939 if (curr->unalignedSize < maxLargeSize) 940 largeCache.putList(extMemPool, curr); 941 else 942 hugeCache.putList(extMemPool, curr); 943 } 944 } 945 946 void LargeObjectCache::put(LargeMemoryBlock *largeBlock) 947 { 948 size_t blockSize = largeBlock->unalignedSize; 949 if (sizeInCacheRange(blockSize)) { 950 largeBlock->next = NULL; 951 if (blockSize < maxLargeSize) 952 largeCache.putList(extMemPool, largeBlock); 953 else 954 hugeCache.putList(extMemPool, largeBlock); 955 } else { 956 extMemPool->backend.returnLargeObject(largeBlock); 957 } 958 } 959 960 LargeMemoryBlock *LargeObjectCache::get(size_t size) 961 { 962 MALLOC_ASSERT( size >= minLargeSize, ASSERT_TEXT ); 963 if (sizeInCacheRange(size)) { 964 return size < maxLargeSize ? 965 largeCache.get(extMemPool, size) : hugeCache.get(extMemPool, size); 966 } 967 return NULL; 968 } 969 970 LargeMemoryBlock *ExtMemoryPool::mallocLargeObject(MemoryPool *pool, size_t allocationSize) 971 { 972 #if __TBB_MALLOC_LOCACHE_STAT 973 mallocCalls++; 974 memAllocKB.fetch_add(allocationSize/1024); 975 #endif 976 LargeMemoryBlock* lmb = loc.get(allocationSize); 977 if (!lmb) { 978 BackRefIdx backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/true); 979 if (backRefIdx.isInvalid()) 980 return NULL; 981 982 // unalignedSize is set in getLargeBlock 983 lmb = backend.getLargeBlock(allocationSize); 984 if (!lmb) { 985 removeBackRef(backRefIdx); 986 loc.updateCacheState(decrease, allocationSize); 987 return NULL; 988 } 989 lmb->backRefIdx = backRefIdx; 990 lmb->pool = pool; 991 STAT_increment(getThreadId(), ThreadCommonCounters, allocNewLargeObj); 992 } else { 993 #if __TBB_MALLOC_LOCACHE_STAT 994 cacheHits++; 995 memHitKB.fetch_add(allocationSize/1024); 996 #endif 997 } 998 return lmb; 999 } 1000 1001 void ExtMemoryPool::freeLargeObject(LargeMemoryBlock *mBlock) 1002 { 1003 loc.put(mBlock); 1004 } 1005 1006 void ExtMemoryPool::freeLargeObjectList(LargeMemoryBlock *head) 1007 { 1008 loc.putList(head); 1009 } 1010 1011 bool ExtMemoryPool::softCachesCleanup() 1012 { 1013 return loc.regularCleanup(); 1014 } 1015 1016 bool ExtMemoryPool::hardCachesCleanup() 1017 { 1018 // thread-local caches must be cleaned before LOC, 1019 // because object from thread-local cache can be released to LOC 1020 bool ret = releaseAllLocalCaches(); 1021 ret |= orphanedBlocks.cleanup(&backend); 1022 ret |= loc.cleanAll(); 1023 ret |= backend.clean(); 1024 return ret; 1025 } 1026 1027 #if BACKEND_HAS_MREMAP 1028 void *ExtMemoryPool::remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment) 1029 { 1030 const size_t oldUnalignedSize = ((LargeObjectHdr*)ptr - 1)->memoryBlock->unalignedSize; 1031 void *o = backend.remap(ptr, oldSize, newSize, alignment); 1032 if (o) { 1033 LargeMemoryBlock *lmb = ((LargeObjectHdr*)o - 1)->memoryBlock; 1034 loc.registerRealloc(oldUnalignedSize, lmb->unalignedSize); 1035 } 1036 return o; 1037 } 1038 #endif /* BACKEND_HAS_MREMAP */ 1039 1040 /*********** End allocation of large objects **********/ 1041 1042 } // namespace internal 1043 } // namespace rml 1044 1045 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 1046 #pragma warning(pop) 1047 #endif 1048 1049