1 /* 2 Copyright (c) 2005-2023 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #include "tbbmalloc_internal.h" 18 #include "../src/tbb/environment.h" 19 20 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 21 // Suppress warning: unary minus operator applied to unsigned type, result still unsigned 22 // TBB_REVAMP_TODO: review this warning 23 #pragma warning(push) 24 #pragma warning(disable:4146) 25 #endif 26 27 /******************************* Allocation of large objects *********************************************/ 28 29 namespace rml { 30 namespace internal { 31 32 /* ---------------------------- Large Object cache init section ---------------------------------------- */ 33 34 void LargeObjectCache::init(ExtMemoryPool *memPool) 35 { 36 extMemPool = memPool; 37 // scalable_allocation_mode can be called before allocator initialization, respect this manual request 38 if (hugeSizeThreshold == 0) { 39 // Huge size threshold initialization if environment variable was set 40 long requestedThreshold = tbb::detail::r1::GetIntegralEnvironmentVariable("TBB_MALLOC_SET_HUGE_SIZE_THRESHOLD"); 41 // Read valid env or initialize by default with max possible values 42 if (requestedThreshold != -1) { 43 setHugeSizeThreshold(requestedThreshold); 44 } else { 45 setHugeSizeThreshold(maxHugeSize); 46 } 47 } 48 } 49 50 /* ----------------------------- Huge size threshold settings ----------------------------------------- */ 51 52 void LargeObjectCache::setHugeSizeThreshold(size_t value) 53 { 54 // Valid in the huge cache range: [MaxLargeSize, MaxHugeSize]. 55 if (value <= maxHugeSize) { 56 hugeSizeThreshold = value >= maxLargeSize ? alignToBin(value) : maxLargeSize; 57 58 // Calculate local indexes for the global threshold size (for fast search inside a regular cleanup) 59 largeCache.hugeSizeThresholdIdx = LargeCacheType::numBins; 60 hugeCache.hugeSizeThresholdIdx = HugeCacheType::sizeToIdx(hugeSizeThreshold); 61 } 62 } 63 64 bool LargeObjectCache::sizeInCacheRange(size_t size) 65 { 66 return size < maxHugeSize && (size <= defaultMaxHugeSize || size >= hugeSizeThreshold); 67 } 68 69 /* ----------------------------------------------------------------------------------------------------- */ 70 71 /* The functor called by the aggregator for the operation list */ 72 template<typename Props> 73 class CacheBinFunctor { 74 typename LargeObjectCacheImpl<Props>::CacheBin *const bin; 75 ExtMemoryPool *const extMemPool; 76 typename LargeObjectCacheImpl<Props>::BinBitMask *const bitMask; 77 const int idx; 78 79 LargeMemoryBlock *toRelease; 80 bool needCleanup; 81 uintptr_t currTime; 82 83 /* Do preprocessing under the operation list. */ 84 /* All the OP_PUT_LIST operations are merged in the one operation. 85 All OP_GET operations are merged with the OP_PUT_LIST operations but 86 it demands the update of the moving average value in the bin. 87 Only the last OP_CLEAN_TO_THRESHOLD operation has sense. 88 The OP_CLEAN_ALL operation also should be performed only once. 89 Moreover it cancels the OP_CLEAN_TO_THRESHOLD operation. */ 90 class OperationPreprocessor { 91 // TODO: remove the dependency on CacheBin. 92 typename LargeObjectCacheImpl<Props>::CacheBin *const bin; 93 94 /* Contains the relative time in the operation list. 95 It counts in the reverse order since the aggregator also 96 provides operations in the reverse order. */ 97 uintptr_t lclTime; 98 99 /* opGet contains only OP_GET operations which cannot be merge with OP_PUT operations 100 opClean contains all OP_CLEAN_TO_THRESHOLD and OP_CLEAN_ALL operations. */ 101 CacheBinOperation *opGet, *opClean; 102 /* The time of the last OP_CLEAN_TO_THRESHOLD operations */ 103 uintptr_t cleanTime; 104 105 /* lastGetOpTime - the time of the last OP_GET operation. 106 lastGet - the same meaning as CacheBin::lastGet */ 107 uintptr_t lastGetOpTime, lastGet; 108 109 /* The total sum of all usedSize changes requested with CBOP_UPDATE_USED_SIZE operations. */ 110 size_t updateUsedSize; 111 112 /* The list of blocks for the OP_PUT_LIST operation. */ 113 LargeMemoryBlock *head, *tail; 114 int putListNum; 115 116 /* if the OP_CLEAN_ALL is requested. */ 117 bool isCleanAll; 118 119 inline void commitOperation(CacheBinOperation *op) const; 120 inline void addOpToOpList(CacheBinOperation *op, CacheBinOperation **opList) const; 121 bool getFromPutList(CacheBinOperation* opGet, uintptr_t currTime); 122 void addToPutList( LargeMemoryBlock *head, LargeMemoryBlock *tail, int num ); 123 124 public: 125 OperationPreprocessor(typename LargeObjectCacheImpl<Props>::CacheBin *bin) : 126 bin(bin), lclTime(0), opGet(nullptr), opClean(nullptr), cleanTime(0), 127 lastGetOpTime(0), lastGet(0), updateUsedSize(0), head(nullptr), tail(nullptr), putListNum(0), isCleanAll(false) {} 128 void operator()(CacheBinOperation* opList); 129 uintptr_t getTimeRange() const { return -lclTime; } 130 131 friend class CacheBinFunctor; 132 }; 133 134 public: 135 CacheBinFunctor(typename LargeObjectCacheImpl<Props>::CacheBin *bin, ExtMemoryPool *extMemPool, 136 typename LargeObjectCacheImpl<Props>::BinBitMask *bitMask, int idx) : 137 bin(bin), extMemPool(extMemPool), bitMask(bitMask), idx(idx), toRelease(nullptr), needCleanup(false) {} 138 void operator()(CacheBinOperation* opList); 139 140 bool isCleanupNeeded() const { return needCleanup; } 141 LargeMemoryBlock *getToRelease() const { return toRelease; } 142 uintptr_t getCurrTime() const { return currTime; } 143 }; 144 145 /* ---------------- Cache Bin Aggregator Operation Helpers ---------------- */ 146 147 // The list of structures which describe the operation data 148 struct OpGet { 149 static const CacheBinOperationType type = CBOP_GET; 150 LargeMemoryBlock **res; 151 size_t size; 152 uintptr_t currTime; 153 }; 154 155 struct OpPutList { 156 static const CacheBinOperationType type = CBOP_PUT_LIST; 157 LargeMemoryBlock *head; 158 }; 159 160 struct OpCleanToThreshold { 161 static const CacheBinOperationType type = CBOP_CLEAN_TO_THRESHOLD; 162 LargeMemoryBlock **res; 163 uintptr_t currTime; 164 }; 165 166 struct OpCleanAll { 167 static const CacheBinOperationType type = CBOP_CLEAN_ALL; 168 LargeMemoryBlock **res; 169 }; 170 171 struct OpUpdateUsedSize { 172 static const CacheBinOperationType type = CBOP_UPDATE_USED_SIZE; 173 size_t size; 174 }; 175 176 union CacheBinOperationData { 177 private: 178 OpGet opGet; 179 OpPutList opPutList; 180 OpCleanToThreshold opCleanToThreshold; 181 OpCleanAll opCleanAll; 182 OpUpdateUsedSize opUpdateUsedSize; 183 }; 184 185 // Forward declarations 186 template <typename OpTypeData> OpTypeData& opCast(CacheBinOperation &op); 187 188 // Describes the aggregator operation 189 struct CacheBinOperation : public MallocAggregatedOperation<CacheBinOperation>::type { 190 CacheBinOperationType type; 191 192 template <typename OpTypeData> 193 CacheBinOperation(OpTypeData &d, CacheBinOperationStatus st = CBST_WAIT) { 194 opCast<OpTypeData>(*this) = d; 195 type = OpTypeData::type; 196 MallocAggregatedOperation<CacheBinOperation>::type::status = st; 197 } 198 private: 199 CacheBinOperationData data; 200 201 template <typename OpTypeData> 202 friend OpTypeData& opCast(CacheBinOperation &op); 203 }; 204 205 // The opCast function can be the member of CacheBinOperation but it will have 206 // small stylistic ambiguity: it will look like a getter (with a cast) for the 207 // CacheBinOperation::data data member but it should return a reference to 208 // simplify the code from a lot of getter/setter calls. So the global cast in 209 // the style of static_cast (or reinterpret_cast) seems to be more readable and 210 // have more explicit semantic. 211 template <typename OpTypeData> 212 OpTypeData& opCast(CacheBinOperation &op) { 213 return *reinterpret_cast<OpTypeData*>(&op.data); 214 } 215 216 /* ------------------------------------------------------------------------ */ 217 218 #if __TBB_MALLOC_LOCACHE_STAT 219 //intptr_t mallocCalls, cacheHits; 220 std::atomic<intptr_t> mallocCalls, cacheHits; 221 //intptr_t memAllocKB, memHitKB; 222 std::atomic<intptr_t> memAllocKB, memHitKB; 223 #endif 224 225 #if MALLOC_DEBUG 226 inline bool lessThanWithOverflow(intptr_t a, intptr_t b) 227 { 228 return (a < b && (b - a < static_cast<intptr_t>(UINTPTR_MAX/2))) || 229 (a > b && (a - b > static_cast<intptr_t>(UINTPTR_MAX/2))); 230 } 231 #endif 232 233 /* ----------------------------------- Operation processing methods ------------------------------------ */ 234 235 template<typename Props> void CacheBinFunctor<Props>:: 236 OperationPreprocessor::commitOperation(CacheBinOperation *op) const 237 { 238 // FencedStore( (intptr_t&)(op->status), CBST_DONE ); 239 op->status.store(CBST_DONE, std::memory_order_release); 240 } 241 242 template<typename Props> void CacheBinFunctor<Props>:: 243 OperationPreprocessor::addOpToOpList(CacheBinOperation *op, CacheBinOperation **opList) const 244 { 245 op->next = *opList; 246 *opList = op; 247 } 248 249 template<typename Props> bool CacheBinFunctor<Props>:: 250 OperationPreprocessor::getFromPutList(CacheBinOperation *opGet, uintptr_t currTime) 251 { 252 if ( head ) { 253 uintptr_t age = head->age; 254 LargeMemoryBlock *next = head->next; 255 *opCast<OpGet>(*opGet).res = head; 256 commitOperation( opGet ); 257 head = next; 258 putListNum--; 259 MALLOC_ASSERT( putListNum>=0, ASSERT_TEXT ); 260 261 // use moving average with current hit interval 262 bin->updateMeanHitRange( currTime - age ); 263 return true; 264 } 265 return false; 266 } 267 268 template<typename Props> void CacheBinFunctor<Props>:: 269 OperationPreprocessor::addToPutList(LargeMemoryBlock *h, LargeMemoryBlock *t, int num) 270 { 271 if ( head ) { 272 MALLOC_ASSERT( tail, ASSERT_TEXT ); 273 tail->next = h; 274 h->prev = tail; 275 tail = t; 276 putListNum += num; 277 } else { 278 head = h; 279 tail = t; 280 putListNum = num; 281 } 282 } 283 284 template<typename Props> void CacheBinFunctor<Props>:: 285 OperationPreprocessor::operator()(CacheBinOperation* opList) 286 { 287 for ( CacheBinOperation *op = opList, *opNext; op; op = opNext ) { 288 opNext = op->next; 289 switch ( op->type ) { 290 case CBOP_GET: 291 { 292 lclTime--; 293 if ( !lastGetOpTime ) { 294 lastGetOpTime = lclTime; 295 lastGet = 0; 296 } else if ( !lastGet ) lastGet = lclTime; 297 298 if ( !getFromPutList(op,lclTime) ) { 299 opCast<OpGet>(*op).currTime = lclTime; 300 addOpToOpList( op, &opGet ); 301 } 302 } 303 break; 304 305 case CBOP_PUT_LIST: 306 { 307 LargeMemoryBlock *head = opCast<OpPutList>(*op).head; 308 LargeMemoryBlock *curr = head, *prev = nullptr; 309 310 int num = 0; 311 do { 312 // we do not kept prev pointers during assigning blocks to bins, set them now 313 curr->prev = prev; 314 315 // Save the local times to the memory blocks. Local times are necessary 316 // for the getFromPutList function which updates the hit range value in 317 // CacheBin when OP_GET and OP_PUT_LIST operations are merged successfully. 318 // The age will be updated to the correct global time after preprocessing 319 // when global cache time is updated. 320 curr->age = --lclTime; 321 322 prev = curr; 323 num += 1; 324 325 STAT_increment(getThreadId(), ThreadCommonCounters, cacheLargeObj); 326 } while ((curr = curr->next) != nullptr); 327 328 LargeMemoryBlock *tail = prev; 329 addToPutList(head, tail, num); 330 331 while ( opGet ) { 332 CacheBinOperation *next = opGet->next; 333 if ( !getFromPutList(opGet, opCast<OpGet>(*opGet).currTime) ) 334 break; 335 opGet = next; 336 } 337 } 338 break; 339 340 case CBOP_UPDATE_USED_SIZE: 341 updateUsedSize += opCast<OpUpdateUsedSize>(*op).size; 342 commitOperation( op ); 343 break; 344 345 case CBOP_CLEAN_ALL: 346 isCleanAll = true; 347 addOpToOpList( op, &opClean ); 348 break; 349 350 case CBOP_CLEAN_TO_THRESHOLD: 351 { 352 uintptr_t currTime = opCast<OpCleanToThreshold>(*op).currTime; 353 // We don't worry about currTime overflow since it is a rare 354 // occurrence and doesn't affect correctness 355 cleanTime = cleanTime < currTime ? currTime : cleanTime; 356 addOpToOpList( op, &opClean ); 357 } 358 break; 359 360 default: 361 MALLOC_ASSERT( false, "Unknown operation." ); 362 } 363 } 364 MALLOC_ASSERT( !( opGet && head ), "Not all put/get pairs are processed!" ); 365 } 366 367 template<typename Props> void CacheBinFunctor<Props>::operator()(CacheBinOperation* opList) 368 { 369 MALLOC_ASSERT( opList, "Empty operation list is passed into operation handler." ); 370 371 OperationPreprocessor prep(bin); 372 prep(opList); 373 374 if ( uintptr_t timeRange = prep.getTimeRange() ) { 375 uintptr_t startTime = extMemPool->loc.getCurrTimeRange(timeRange); 376 // endTime is used as the current (base) time since the local time is negative. 377 uintptr_t endTime = startTime + timeRange; 378 379 if ( prep.lastGetOpTime && prep.lastGet ) bin->setLastGet(prep.lastGet+endTime); 380 381 if ( CacheBinOperation *opGet = prep.opGet ) { 382 bool isEmpty = false; 383 do { 384 #if __TBB_MALLOC_WHITEBOX_TEST 385 tbbmalloc_whitebox::locGetProcessed++; 386 #endif 387 const OpGet &opGetData = opCast<OpGet>(*opGet); 388 if ( !isEmpty ) { 389 if ( LargeMemoryBlock *res = bin->get() ) { 390 uintptr_t getTime = opGetData.currTime + endTime; 391 // use moving average with current hit interval 392 bin->updateMeanHitRange( getTime - res->age); 393 bin->updateCachedSize( -opGetData.size ); 394 *opGetData.res = res; 395 } else { 396 isEmpty = true; 397 uintptr_t lastGetOpTime = prep.lastGetOpTime+endTime; 398 bin->forgetOutdatedState(lastGetOpTime); 399 bin->updateAgeThreshold(lastGetOpTime); 400 } 401 } 402 403 CacheBinOperation *opNext = opGet->next; 404 bin->updateUsedSize( opGetData.size, bitMask, idx ); 405 prep.commitOperation( opGet ); 406 opGet = opNext; 407 } while ( opGet ); 408 if ( prep.lastGetOpTime ) 409 bin->setLastGet( prep.lastGetOpTime + endTime ); 410 } else if ( LargeMemoryBlock *curr = prep.head ) { 411 curr->prev = nullptr; 412 while ( curr ) { 413 // Update local times to global times 414 curr->age += endTime; 415 curr=curr->next; 416 } 417 #if __TBB_MALLOC_WHITEBOX_TEST 418 tbbmalloc_whitebox::locPutProcessed+=prep.putListNum; 419 #endif 420 toRelease = bin->putList(prep.head, prep.tail, bitMask, idx, prep.putListNum, extMemPool->loc.hugeSizeThreshold); 421 } 422 needCleanup = extMemPool->loc.isCleanupNeededOnRange(timeRange, startTime); 423 currTime = endTime - 1; 424 } 425 426 if ( CacheBinOperation *opClean = prep.opClean ) { 427 if ( prep.isCleanAll ) 428 *opCast<OpCleanAll>(*opClean).res = bin->cleanAll(bitMask, idx); 429 else 430 *opCast<OpCleanToThreshold>(*opClean).res = bin->cleanToThreshold(prep.cleanTime, bitMask, idx); 431 432 CacheBinOperation *opNext = opClean->next; 433 prep.commitOperation( opClean ); 434 435 while ((opClean = opNext) != nullptr) { 436 opNext = opClean->next; 437 prep.commitOperation(opClean); 438 } 439 } 440 441 if ( size_t size = prep.updateUsedSize ) 442 bin->updateUsedSize(size, bitMask, idx); 443 } 444 /* ----------------------------------------------------------------------------------------------------- */ 445 /* --------------------------- Methods for creating and executing operations --------------------------- */ 446 template<typename Props> void LargeObjectCacheImpl<Props>:: 447 CacheBin::ExecuteOperation(CacheBinOperation *op, ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx, bool longLifeTime) 448 { 449 CacheBinFunctor<Props> func( this, extMemPool, bitMask, idx ); 450 aggregator.execute( op, func, longLifeTime ); 451 452 if ( LargeMemoryBlock *toRelease = func.getToRelease()) { 453 extMemPool->backend.returnLargeObject(toRelease); 454 } 455 456 if ( func.isCleanupNeeded() ) { 457 extMemPool->loc.doCleanup( func.getCurrTime(), /*doThreshDecr=*/false); 458 } 459 } 460 461 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>:: 462 CacheBin::get(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx) 463 { 464 LargeMemoryBlock *lmb=nullptr; 465 OpGet data = {&lmb, size, static_cast<uintptr_t>(0)}; 466 CacheBinOperation op(data); 467 ExecuteOperation( &op, extMemPool, bitMask, idx ); 468 return lmb; 469 } 470 471 template<typename Props> void LargeObjectCacheImpl<Props>:: 472 CacheBin::putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *head, BinBitMask *bitMask, int idx) 473 { 474 MALLOC_ASSERT(sizeof(LargeMemoryBlock)+sizeof(CacheBinOperation)<=head->unalignedSize, "CacheBinOperation is too large to be placed in LargeMemoryBlock!"); 475 476 OpPutList data = {head}; 477 CacheBinOperation *op = new (head+1) CacheBinOperation(data, CBST_NOWAIT); 478 ExecuteOperation( op, extMemPool, bitMask, idx, false ); 479 } 480 481 template<typename Props> bool LargeObjectCacheImpl<Props>:: 482 CacheBin::cleanToThreshold(ExtMemoryPool *extMemPool, BinBitMask *bitMask, uintptr_t currTime, int idx) 483 { 484 LargeMemoryBlock *toRelease = nullptr; 485 486 /* oldest may be more recent then age, that's why cast to signed type 487 was used. age overflow is also processed correctly. */ 488 if (last.load(std::memory_order_relaxed) && 489 (intptr_t)(currTime - oldest.load(std::memory_order_relaxed)) > ageThreshold.load(std::memory_order_relaxed)) { 490 OpCleanToThreshold data = {&toRelease, currTime}; 491 CacheBinOperation op(data); 492 ExecuteOperation( &op, extMemPool, bitMask, idx ); 493 } 494 bool released = toRelease; 495 496 Backend *backend = &extMemPool->backend; 497 while ( toRelease ) { 498 LargeMemoryBlock *helper = toRelease->next; 499 backend->returnLargeObject(toRelease); 500 toRelease = helper; 501 } 502 return released; 503 } 504 505 template<typename Props> bool LargeObjectCacheImpl<Props>:: 506 CacheBin::releaseAllToBackend(ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx) 507 { 508 LargeMemoryBlock *toRelease = nullptr; 509 510 if (last.load(std::memory_order_relaxed)) { 511 OpCleanAll data = {&toRelease}; 512 CacheBinOperation op(data); 513 ExecuteOperation(&op, extMemPool, bitMask, idx); 514 } 515 bool released = toRelease; 516 517 Backend *backend = &extMemPool->backend; 518 while ( toRelease ) { 519 LargeMemoryBlock *helper = toRelease->next; 520 MALLOC_ASSERT(!helper || lessThanWithOverflow(helper->age, toRelease->age), 521 ASSERT_TEXT); 522 backend->returnLargeObject(toRelease); 523 toRelease = helper; 524 } 525 return released; 526 } 527 528 template<typename Props> void LargeObjectCacheImpl<Props>:: 529 CacheBin::updateUsedSize(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx) 530 { 531 OpUpdateUsedSize data = {size}; 532 CacheBinOperation op(data); 533 ExecuteOperation( &op, extMemPool, bitMask, idx ); 534 } 535 536 /* ------------------------------ Unsafe methods used with the aggregator ------------------------------ */ 537 538 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>:: 539 CacheBin::putList(LargeMemoryBlock *head, LargeMemoryBlock *tail, BinBitMask *bitMask, int idx, int num, size_t hugeSizeThreshold) 540 { 541 size_t size = head->unalignedSize; 542 usedSize.store(usedSize.load(std::memory_order_relaxed) - num * size, std::memory_order_relaxed); 543 MALLOC_ASSERT( !last.load(std::memory_order_relaxed) || 544 (last.load(std::memory_order_relaxed)->age != 0 && last.load(std::memory_order_relaxed)->age != -1U), ASSERT_TEXT ); 545 MALLOC_ASSERT( (tail==head && num==1) || (tail!=head && num>1), ASSERT_TEXT ); 546 MALLOC_ASSERT( tail, ASSERT_TEXT ); 547 LargeMemoryBlock *toRelease = nullptr; 548 if (size < hugeSizeThreshold && !lastCleanedAge) { 549 // 1st object of such size was released. 550 // Not cache it, and remember when this occurs 551 // to take into account during cache miss. 552 lastCleanedAge = tail->age; 553 toRelease = tail; 554 tail = tail->prev; 555 if (tail) 556 tail->next = nullptr; 557 else 558 head = nullptr; 559 num--; 560 } 561 if (num) { 562 // add [head;tail] list to cache 563 tail->next = first; 564 if (first) 565 first->prev = tail; 566 first = head; 567 if (!last.load(std::memory_order_relaxed)) { 568 MALLOC_ASSERT(0 == oldest.load(std::memory_order_relaxed), ASSERT_TEXT); 569 oldest.store(tail->age, std::memory_order_relaxed); 570 last.store(tail, std::memory_order_relaxed); 571 } 572 573 cachedSize.store(cachedSize.load(std::memory_order_relaxed) + num * size, std::memory_order_relaxed); 574 } 575 576 // No used object, and nothing in the bin, mark the bin as empty 577 if (!usedSize.load(std::memory_order_relaxed) && !first) 578 bitMask->set(idx, false); 579 580 return toRelease; 581 } 582 583 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>:: 584 CacheBin::get() 585 { 586 LargeMemoryBlock *result=first; 587 if (result) { 588 first = result->next; 589 if (first) 590 first->prev = nullptr; 591 else { 592 last.store(nullptr, std::memory_order_relaxed); 593 oldest.store(0, std::memory_order_relaxed); 594 } 595 } 596 597 return result; 598 } 599 600 template<typename Props> void LargeObjectCacheImpl<Props>:: 601 CacheBin::forgetOutdatedState(uintptr_t currTime) 602 { 603 // If the time since the last get is LongWaitFactor times more than ageThreshold 604 // for the bin, treat the bin as rarely-used and forget everything we know 605 // about it. 606 // If LongWaitFactor is too small, we forget too early and 607 // so prevents good caching, while if too high, caching blocks 608 // with unrelated usage pattern occurs. 609 const uintptr_t sinceLastGet = currTime - lastGet; 610 bool doCleanup = false; 611 612 intptr_t threshold = ageThreshold.load(std::memory_order_relaxed); 613 if (threshold) 614 doCleanup = sinceLastGet > static_cast<uintptr_t>(Props::LongWaitFactor * threshold); 615 else if (lastCleanedAge) 616 doCleanup = sinceLastGet > static_cast<uintptr_t>(Props::LongWaitFactor * (lastCleanedAge - lastGet)); 617 618 if (doCleanup) { 619 lastCleanedAge = 0; 620 ageThreshold.store(0, std::memory_order_relaxed); 621 } 622 623 } 624 625 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>:: 626 CacheBin::cleanToThreshold(uintptr_t currTime, BinBitMask *bitMask, int idx) 627 { 628 /* oldest may be more recent then age, that's why cast to signed type 629 was used. age overflow is also processed correctly. */ 630 if ( !last.load(std::memory_order_relaxed) || 631 (intptr_t)(currTime - last.load(std::memory_order_relaxed)->age) < ageThreshold.load(std::memory_order_relaxed) ) 632 return nullptr; 633 634 #if MALLOC_DEBUG 635 uintptr_t nextAge = 0; 636 #endif 637 do { 638 #if MALLOC_DEBUG 639 // check that list ordered 640 MALLOC_ASSERT(!nextAge || lessThanWithOverflow(nextAge, last.load(std::memory_order_relaxed)->age), 641 ASSERT_TEXT); 642 nextAge = last.load(std::memory_order_relaxed)->age; 643 #endif 644 cachedSize.store(cachedSize.load(std::memory_order_relaxed) - last.load(std::memory_order_relaxed)->unalignedSize, std::memory_order_relaxed); 645 last.store(last.load(std::memory_order_relaxed)->prev, std::memory_order_relaxed); 646 } while (last.load(std::memory_order_relaxed) && 647 (intptr_t)(currTime - last.load(std::memory_order_relaxed)->age) > ageThreshold.load(std::memory_order_relaxed)); 648 649 LargeMemoryBlock *toRelease = nullptr; 650 if (last.load(std::memory_order_relaxed)) { 651 toRelease = last.load(std::memory_order_relaxed)->next; 652 oldest.store(last.load(std::memory_order_relaxed)->age, std::memory_order_relaxed); 653 last.load(std::memory_order_relaxed)->next = nullptr; 654 } else { 655 toRelease = first; 656 first = nullptr; 657 oldest.store(0, std::memory_order_relaxed); 658 if (!usedSize.load(std::memory_order_relaxed)) 659 bitMask->set(idx, false); 660 } 661 MALLOC_ASSERT( toRelease, ASSERT_TEXT ); 662 lastCleanedAge = toRelease->age; 663 664 return toRelease; 665 } 666 667 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>:: 668 CacheBin::cleanAll(BinBitMask *bitMask, int idx) 669 { 670 if (!last.load(std::memory_order_relaxed)) return nullptr; 671 672 LargeMemoryBlock *toRelease = first; 673 last.store(nullptr, std::memory_order_relaxed); 674 first = nullptr; 675 oldest.store(0, std::memory_order_relaxed); 676 cachedSize.store(0, std::memory_order_relaxed); 677 if (!usedSize.load(std::memory_order_relaxed)) 678 bitMask->set(idx, false); 679 680 return toRelease; 681 } 682 683 /* ----------------------------------------------------------------------------------------------------- */ 684 685 #if __TBB_MALLOC_BACKEND_STAT 686 template<typename Props> size_t LargeObjectCacheImpl<Props>:: 687 CacheBin::reportStat(int num, FILE *f) 688 { 689 #if __TBB_MALLOC_LOCACHE_STAT 690 if (first) 691 printf("%d(%lu): total %lu KB thr %ld lastCln %lu oldest %lu\n", 692 num, num*Props::CacheStep+Props::MinSize, 693 cachedSize.load(std::memory_order_relaxed)/1024, ageThresholdageThreshold.load(std::memory_order_relaxed), lastCleanedAge, oldest.load(std::memory_order_relaxed)); 694 #else 695 suppress_unused_warning(num); 696 suppress_unused_warning(f); 697 #endif 698 return cachedSize.load(std::memory_order_relaxed); 699 } 700 #endif 701 702 // Release objects from cache blocks that are older than ageThreshold 703 template<typename Props> 704 bool LargeObjectCacheImpl<Props>::regularCleanup(ExtMemoryPool *extMemPool, uintptr_t currTime, bool doThreshDecr) 705 { 706 bool released = false; 707 BinsSummary binsSummary; 708 709 // Threshold settings is below this cache or starts from zero index 710 if (hugeSizeThresholdIdx == 0) return false; 711 712 // Starting searching for bin that is less than huge size threshold (can be cleaned-up) 713 int startSearchIdx = hugeSizeThresholdIdx - 1; 714 715 for (int i = bitMask.getMaxTrue(startSearchIdx); i >= 0; i = bitMask.getMaxTrue(i-1)) { 716 bin[i].updateBinsSummary(&binsSummary); 717 if (!doThreshDecr && tooLargeLOC.load(std::memory_order_relaxed) > 2 && binsSummary.isLOCTooLarge()) { 718 // if LOC is too large for quite long time, decrease the threshold 719 // based on bin hit statistics. 720 // For this, redo cleanup from the beginning. 721 // Note: on this iteration total usedSz can be not too large 722 // in comparison to total cachedSz, as we calculated it only 723 // partially. We are ok with it. 724 i = bitMask.getMaxTrue(startSearchIdx)+1; 725 doThreshDecr = true; 726 binsSummary.reset(); 727 continue; 728 } 729 if (doThreshDecr) 730 bin[i].decreaseThreshold(); 731 732 if (bin[i].cleanToThreshold(extMemPool, &bitMask, currTime, i)) { 733 released = true; 734 } 735 } 736 // We want to find if LOC was too large for some time continuously, 737 // so OK with races between incrementing and zeroing, but incrementing 738 // must be atomic. 739 if (binsSummary.isLOCTooLarge()) { 740 tooLargeLOC++; 741 } else { 742 tooLargeLOC.store(0, std::memory_order_relaxed); 743 } 744 return released; 745 } 746 747 template<typename Props> 748 bool LargeObjectCacheImpl<Props>::cleanAll(ExtMemoryPool *extMemPool) 749 { 750 bool released = false; 751 for (int i = numBins-1; i >= 0; i--) { 752 released |= bin[i].releaseAllToBackend(extMemPool, &bitMask, i); 753 } 754 return released; 755 } 756 757 template<typename Props> 758 void LargeObjectCacheImpl<Props>::reset() { 759 tooLargeLOC.store(0, std::memory_order_relaxed); 760 for (int i = numBins-1; i >= 0; i--) 761 bin[i].init(); 762 bitMask.reset(); 763 } 764 765 #if __TBB_MALLOC_WHITEBOX_TEST 766 template<typename Props> 767 size_t LargeObjectCacheImpl<Props>::getLOCSize() const 768 { 769 size_t size = 0; 770 for (int i = numBins-1; i >= 0; i--) 771 size += bin[i].getSize(); 772 return size; 773 } 774 775 size_t LargeObjectCache::getLOCSize() const 776 { 777 return largeCache.getLOCSize() + hugeCache.getLOCSize(); 778 } 779 780 template<typename Props> 781 size_t LargeObjectCacheImpl<Props>::getUsedSize() const 782 { 783 size_t size = 0; 784 for (int i = numBins-1; i >= 0; i--) 785 size += bin[i].getUsedSize(); 786 return size; 787 } 788 789 size_t LargeObjectCache::getUsedSize() const 790 { 791 return largeCache.getUsedSize() + hugeCache.getUsedSize(); 792 } 793 #endif // __TBB_MALLOC_WHITEBOX_TEST 794 795 inline bool LargeObjectCache::isCleanupNeededOnRange(uintptr_t range, uintptr_t currTime) 796 { 797 return range >= cacheCleanupFreq 798 || currTime+range < currTime-1 // overflow, 0 is power of 2, do cleanup 799 // (prev;prev+range] contains n*cacheCleanupFreq 800 || alignUp(currTime, cacheCleanupFreq)<currTime+range; 801 } 802 803 bool LargeObjectCache::doCleanup(uintptr_t currTime, bool doThreshDecr) 804 { 805 if (!doThreshDecr) 806 extMemPool->allLocalCaches.markUnused(); 807 808 bool large_cache_cleaned = largeCache.regularCleanup(extMemPool, currTime, doThreshDecr); 809 bool huge_cache_cleaned = hugeCache.regularCleanup(extMemPool, currTime, doThreshDecr); 810 return large_cache_cleaned || huge_cache_cleaned; 811 } 812 813 bool LargeObjectCache::decreasingCleanup() 814 { 815 return doCleanup(cacheCurrTime.load(std::memory_order_acquire), /*doThreshDecr=*/true); 816 } 817 818 bool LargeObjectCache::regularCleanup() 819 { 820 return doCleanup(cacheCurrTime.load(std::memory_order_acquire), /*doThreshDecr=*/false); 821 } 822 823 bool LargeObjectCache::cleanAll() 824 { 825 bool large_cache_cleaned = largeCache.cleanAll(extMemPool); 826 bool huge_cache_cleaned = hugeCache.cleanAll(extMemPool); 827 return large_cache_cleaned || huge_cache_cleaned; 828 } 829 830 void LargeObjectCache::reset() 831 { 832 largeCache.reset(); 833 hugeCache.reset(); 834 } 835 836 template<typename Props> 837 LargeMemoryBlock *LargeObjectCacheImpl<Props>::get(ExtMemoryPool *extMemoryPool, size_t size) 838 { 839 int idx = Props::sizeToIdx(size); 840 841 LargeMemoryBlock *lmb = bin[idx].get(extMemoryPool, size, &bitMask, idx); 842 843 if (lmb) { 844 MALLOC_ITT_SYNC_ACQUIRED(bin+idx); 845 STAT_increment(getThreadId(), ThreadCommonCounters, allocCachedLargeObj); 846 } 847 return lmb; 848 } 849 850 template<typename Props> 851 void LargeObjectCacheImpl<Props>::updateCacheState(ExtMemoryPool *extMemPool, DecreaseOrIncrease op, size_t size) 852 { 853 int idx = Props::sizeToIdx(size); 854 MALLOC_ASSERT(idx < static_cast<int>(numBins), ASSERT_TEXT); 855 bin[idx].updateUsedSize(extMemPool, op==decrease? -size : size, &bitMask, idx); 856 } 857 858 #if __TBB_MALLOC_LOCACHE_STAT 859 template<typename Props> 860 void LargeObjectCacheImpl<Props>::reportStat(FILE *f) 861 { 862 size_t cachedSize = 0; 863 for (int i=0; i<numBins; i++) 864 cachedSize += bin[i].reportStat(i, f); 865 fprintf(f, "total LOC size %lu MB\n", cachedSize/1024/1024); 866 } 867 868 void LargeObjectCache::reportStat(FILE *f) 869 { 870 largeCache.reportStat(f); 871 hugeCache.reportStat(f); 872 fprintf(f, "cache time %lu\n", cacheCurrTime.load(std::memory_order_relaxed)); 873 } 874 #endif 875 876 template<typename Props> 877 void LargeObjectCacheImpl<Props>::putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *toCache) 878 { 879 int toBinIdx = Props::sizeToIdx(toCache->unalignedSize); 880 881 MALLOC_ITT_SYNC_RELEASING(bin+toBinIdx); 882 bin[toBinIdx].putList(extMemPool, toCache, &bitMask, toBinIdx); 883 } 884 885 void LargeObjectCache::updateCacheState(DecreaseOrIncrease op, size_t size) 886 { 887 if (size < maxLargeSize) 888 largeCache.updateCacheState(extMemPool, op, size); 889 else if (size < maxHugeSize) 890 hugeCache.updateCacheState(extMemPool, op, size); 891 } 892 893 894 uintptr_t LargeObjectCache::getCurrTimeRange(uintptr_t range) 895 { 896 return (cacheCurrTime.fetch_add(range) + 1); 897 } 898 899 void LargeObjectCache::registerRealloc(size_t oldSize, size_t newSize) 900 { 901 updateCacheState(decrease, oldSize); 902 updateCacheState(increase, alignToBin(newSize)); 903 } 904 905 size_t LargeObjectCache::alignToBin(size_t size) { 906 return size < maxLargeSize ? LargeCacheType::alignToBin(size) : HugeCacheType::alignToBin(size); 907 } 908 909 // Used for internal purpose 910 int LargeObjectCache::sizeToIdx(size_t size) 911 { 912 MALLOC_ASSERT(size <= maxHugeSize, ASSERT_TEXT); 913 return size < maxLargeSize ? 914 LargeCacheType::sizeToIdx(size) : 915 LargeCacheType::numBins + HugeCacheType::sizeToIdx(size); 916 } 917 918 void LargeObjectCache::putList(LargeMemoryBlock *list) 919 { 920 LargeMemoryBlock *toProcess, *n; 921 922 for (LargeMemoryBlock *curr = list; curr; curr = toProcess) { 923 LargeMemoryBlock *tail = curr; 924 toProcess = curr->next; 925 if (!sizeInCacheRange(curr->unalignedSize)) { 926 extMemPool->backend.returnLargeObject(curr); 927 continue; 928 } 929 int currIdx = sizeToIdx(curr->unalignedSize); 930 931 // Find all blocks fitting to same bin. Not use more efficient sorting 932 // algorithm because list is short (commonly, 933 // LocalLOC's HIGH_MARK-LOW_MARK, i.e. 24 items). 934 for (LargeMemoryBlock *b = toProcess; b; b = n) { 935 n = b->next; 936 if (sizeToIdx(b->unalignedSize) == currIdx) { 937 tail->next = b; 938 tail = b; 939 if (toProcess == b) 940 toProcess = toProcess->next; 941 else { 942 b->prev->next = b->next; 943 if (b->next) 944 b->next->prev = b->prev; 945 } 946 } 947 } 948 tail->next = nullptr; 949 if (curr->unalignedSize < maxLargeSize) 950 largeCache.putList(extMemPool, curr); 951 else 952 hugeCache.putList(extMemPool, curr); 953 } 954 } 955 956 void LargeObjectCache::put(LargeMemoryBlock *largeBlock) 957 { 958 size_t blockSize = largeBlock->unalignedSize; 959 if (sizeInCacheRange(blockSize)) { 960 largeBlock->next = nullptr; 961 if (blockSize < maxLargeSize) 962 largeCache.putList(extMemPool, largeBlock); 963 else 964 hugeCache.putList(extMemPool, largeBlock); 965 } else { 966 extMemPool->backend.returnLargeObject(largeBlock); 967 } 968 } 969 970 LargeMemoryBlock *LargeObjectCache::get(size_t size) 971 { 972 MALLOC_ASSERT( size >= minLargeSize, ASSERT_TEXT ); 973 if (sizeInCacheRange(size)) { 974 return size < maxLargeSize ? 975 largeCache.get(extMemPool, size) : hugeCache.get(extMemPool, size); 976 } 977 return nullptr; 978 } 979 980 LargeMemoryBlock *ExtMemoryPool::mallocLargeObject(MemoryPool *pool, size_t allocationSize) 981 { 982 #if __TBB_MALLOC_LOCACHE_STAT 983 mallocCalls++; 984 memAllocKB.fetch_add(allocationSize/1024); 985 #endif 986 LargeMemoryBlock* lmb = loc.get(allocationSize); 987 if (!lmb) { 988 BackRefIdx backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/true); 989 if (backRefIdx.isInvalid()) 990 return nullptr; 991 992 // unalignedSize is set in getLargeBlock 993 lmb = backend.getLargeBlock(allocationSize); 994 if (!lmb) { 995 removeBackRef(backRefIdx); 996 loc.updateCacheState(decrease, allocationSize); 997 return nullptr; 998 } 999 lmb->backRefIdx = backRefIdx; 1000 lmb->pool = pool; 1001 STAT_increment(getThreadId(), ThreadCommonCounters, allocNewLargeObj); 1002 } else { 1003 #if __TBB_MALLOC_LOCACHE_STAT 1004 cacheHits++; 1005 memHitKB.fetch_add(allocationSize/1024); 1006 #endif 1007 } 1008 return lmb; 1009 } 1010 1011 void ExtMemoryPool::freeLargeObject(LargeMemoryBlock *mBlock) 1012 { 1013 loc.put(mBlock); 1014 } 1015 1016 void ExtMemoryPool::freeLargeObjectList(LargeMemoryBlock *head) 1017 { 1018 loc.putList(head); 1019 } 1020 1021 bool ExtMemoryPool::softCachesCleanup() 1022 { 1023 bool ret = false; 1024 if (!softCachesCleanupInProgress.exchange(1, std::memory_order_acq_rel)) { 1025 ret = loc.regularCleanup(); 1026 softCachesCleanupInProgress.store(0, std::memory_order_release); 1027 } 1028 return ret; 1029 } 1030 1031 bool ExtMemoryPool::hardCachesCleanup(bool wait) 1032 { 1033 if (hardCachesCleanupInProgress.exchange(1, std::memory_order_acq_rel)) { 1034 if (!wait) 1035 return false; 1036 1037 AtomicBackoff backoff; 1038 while (hardCachesCleanupInProgress.exchange(1, std::memory_order_acq_rel)) 1039 backoff.pause(); 1040 } 1041 1042 // thread-local caches must be cleaned before LOC, 1043 // because object from thread-local cache can be released to LOC 1044 bool ret = releaseAllLocalCaches(); 1045 ret |= orphanedBlocks.cleanup(&backend); 1046 ret |= loc.cleanAll(); 1047 ret |= backend.clean(); 1048 1049 hardCachesCleanupInProgress.store(0, std::memory_order_release); 1050 return ret; 1051 } 1052 1053 #if BACKEND_HAS_MREMAP 1054 void *ExtMemoryPool::remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment) 1055 { 1056 const size_t oldUnalignedSize = ((LargeObjectHdr*)ptr - 1)->memoryBlock->unalignedSize; 1057 void *o = backend.remap(ptr, oldSize, newSize, alignment); 1058 if (o) { 1059 LargeMemoryBlock *lmb = ((LargeObjectHdr*)o - 1)->memoryBlock; 1060 loc.registerRealloc(oldUnalignedSize, lmb->unalignedSize); 1061 } 1062 return o; 1063 } 1064 #endif /* BACKEND_HAS_MREMAP */ 1065 1066 /*********** End allocation of large objects **********/ 1067 1068 } // namespace internal 1069 } // namespace rml 1070 1071 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 1072 #pragma warning(pop) 1073 #endif 1074