1 /*
2 Copyright (c) 2005-2023 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 */
16
17 #include "tbbmalloc_internal.h"
18 #include "../src/tbb/environment.h"
19
20 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
21 // Suppress warning: unary minus operator applied to unsigned type, result still unsigned
22 // TBB_REVAMP_TODO: review this warning
23 #pragma warning(push)
24 #pragma warning(disable:4146)
25 #endif
26
27 /******************************* Allocation of large objects *********************************************/
28
29 namespace rml {
30 namespace internal {
31
32 /* ---------------------------- Large Object cache init section ---------------------------------------- */
33
init(ExtMemoryPool * memPool)34 void LargeObjectCache::init(ExtMemoryPool *memPool)
35 {
36 extMemPool = memPool;
37 // scalable_allocation_mode can be called before allocator initialization, respect this manual request
38 if (hugeSizeThreshold == 0) {
39 // Huge size threshold initialization if environment variable was set
40 long requestedThreshold = tbb::detail::r1::GetIntegralEnvironmentVariable("TBB_MALLOC_SET_HUGE_SIZE_THRESHOLD");
41 // Read valid env or initialize by default with max possible values
42 if (requestedThreshold != -1) {
43 setHugeSizeThreshold(requestedThreshold);
44 } else {
45 setHugeSizeThreshold(maxHugeSize);
46 }
47 }
48 }
49
50 /* ----------------------------- Huge size threshold settings ----------------------------------------- */
51
setHugeSizeThreshold(size_t value)52 void LargeObjectCache::setHugeSizeThreshold(size_t value)
53 {
54 // Valid in the huge cache range: [MaxLargeSize, MaxHugeSize].
55 if (value <= maxHugeSize) {
56 hugeSizeThreshold = value >= maxLargeSize ? alignToBin(value) : maxLargeSize;
57
58 // Calculate local indexes for the global threshold size (for fast search inside a regular cleanup)
59 largeCache.hugeSizeThresholdIdx = LargeCacheType::numBins;
60 hugeCache.hugeSizeThresholdIdx = HugeCacheType::sizeToIdx(hugeSizeThreshold);
61 }
62 }
63
sizeInCacheRange(size_t size)64 bool LargeObjectCache::sizeInCacheRange(size_t size)
65 {
66 return size < maxHugeSize && (size <= defaultMaxHugeSize || size >= hugeSizeThreshold);
67 }
68
69 /* ----------------------------------------------------------------------------------------------------- */
70
71 /* The functor called by the aggregator for the operation list */
72 template<typename Props>
73 class CacheBinFunctor {
74 typename LargeObjectCacheImpl<Props>::CacheBin *const bin;
75 ExtMemoryPool *const extMemPool;
76 typename LargeObjectCacheImpl<Props>::BinBitMask *const bitMask;
77 const int idx;
78
79 LargeMemoryBlock *toRelease;
80 bool needCleanup;
81 uintptr_t currTime;
82
83 /* Do preprocessing under the operation list. */
84 /* All the OP_PUT_LIST operations are merged in the one operation.
85 All OP_GET operations are merged with the OP_PUT_LIST operations but
86 it demands the update of the moving average value in the bin.
87 Only the last OP_CLEAN_TO_THRESHOLD operation has sense.
88 The OP_CLEAN_ALL operation also should be performed only once.
89 Moreover it cancels the OP_CLEAN_TO_THRESHOLD operation. */
90 class OperationPreprocessor {
91 // TODO: remove the dependency on CacheBin.
92 typename LargeObjectCacheImpl<Props>::CacheBin *const bin;
93
94 /* Contains the relative time in the operation list.
95 It counts in the reverse order since the aggregator also
96 provides operations in the reverse order. */
97 uintptr_t lclTime;
98
99 /* opGet contains only OP_GET operations which cannot be merge with OP_PUT operations
100 opClean contains all OP_CLEAN_TO_THRESHOLD and OP_CLEAN_ALL operations. */
101 CacheBinOperation *opGet, *opClean;
102 /* The time of the last OP_CLEAN_TO_THRESHOLD operations */
103 uintptr_t cleanTime;
104
105 /* lastGetOpTime - the time of the last OP_GET operation.
106 lastGet - the same meaning as CacheBin::lastGet */
107 uintptr_t lastGetOpTime, lastGet;
108
109 /* The total sum of all usedSize changes requested with CBOP_UPDATE_USED_SIZE operations. */
110 size_t updateUsedSize;
111
112 /* The list of blocks for the OP_PUT_LIST operation. */
113 LargeMemoryBlock *head, *tail;
114 int putListNum;
115
116 /* if the OP_CLEAN_ALL is requested. */
117 bool isCleanAll;
118
119 inline void commitOperation(CacheBinOperation *op) const;
120 inline void addOpToOpList(CacheBinOperation *op, CacheBinOperation **opList) const;
121 bool getFromPutList(CacheBinOperation* opGet, uintptr_t currTime);
122 void addToPutList( LargeMemoryBlock *head, LargeMemoryBlock *tail, int num );
123
124 public:
OperationPreprocessor(typename LargeObjectCacheImpl<Props>::CacheBin * bin)125 OperationPreprocessor(typename LargeObjectCacheImpl<Props>::CacheBin *bin) :
126 bin(bin), lclTime(0), opGet(nullptr), opClean(nullptr), cleanTime(0),
127 lastGetOpTime(0), lastGet(0), updateUsedSize(0), head(nullptr), tail(nullptr), putListNum(0), isCleanAll(false) {}
128 void operator()(CacheBinOperation* opList);
getTimeRange() const129 uintptr_t getTimeRange() const { return -lclTime; }
130
131 friend class CacheBinFunctor;
132 };
133
134 public:
CacheBinFunctor(typename LargeObjectCacheImpl<Props>::CacheBin * bin,ExtMemoryPool * extMemPool,typename LargeObjectCacheImpl<Props>::BinBitMask * bitMask,int idx)135 CacheBinFunctor(typename LargeObjectCacheImpl<Props>::CacheBin *bin, ExtMemoryPool *extMemPool,
136 typename LargeObjectCacheImpl<Props>::BinBitMask *bitMask, int idx) :
137 bin(bin), extMemPool(extMemPool), bitMask(bitMask), idx(idx), toRelease(nullptr), needCleanup(false), currTime(0) {}
138 void operator()(CacheBinOperation* opList);
139
isCleanupNeeded() const140 bool isCleanupNeeded() const { return needCleanup; }
getToRelease() const141 LargeMemoryBlock *getToRelease() const { return toRelease; }
getCurrTime() const142 uintptr_t getCurrTime() const { return currTime; }
143 };
144
145 /* ---------------- Cache Bin Aggregator Operation Helpers ---------------- */
146
147 // The list of structures which describe the operation data
148 struct OpGet {
149 static const CacheBinOperationType type = CBOP_GET;
150 LargeMemoryBlock **res;
151 size_t size;
152 uintptr_t currTime;
153 };
154
155 struct OpPutList {
156 static const CacheBinOperationType type = CBOP_PUT_LIST;
157 LargeMemoryBlock *head;
158 };
159
160 struct OpCleanToThreshold {
161 static const CacheBinOperationType type = CBOP_CLEAN_TO_THRESHOLD;
162 LargeMemoryBlock **res;
163 uintptr_t currTime;
164 };
165
166 struct OpCleanAll {
167 static const CacheBinOperationType type = CBOP_CLEAN_ALL;
168 LargeMemoryBlock **res;
169 };
170
171 struct OpUpdateUsedSize {
172 static const CacheBinOperationType type = CBOP_UPDATE_USED_SIZE;
173 size_t size;
174 };
175
176 union CacheBinOperationData {
177 private:
178 OpGet opGet;
179 OpPutList opPutList;
180 OpCleanToThreshold opCleanToThreshold;
181 OpCleanAll opCleanAll;
182 OpUpdateUsedSize opUpdateUsedSize;
183 };
184
185 // Forward declarations
186 template <typename OpTypeData> OpTypeData& opCast(CacheBinOperation &op);
187
188 // Describes the aggregator operation
189 struct CacheBinOperation : public MallocAggregatedOperation<CacheBinOperation>::type {
190 CacheBinOperationType type;
191
192 template <typename OpTypeData>
CacheBinOperationrml::internal::CacheBinOperation193 CacheBinOperation(OpTypeData &d, CacheBinOperationStatus st = CBST_WAIT) {
194 opCast<OpTypeData>(*this) = d;
195 type = OpTypeData::type;
196 MallocAggregatedOperation<CacheBinOperation>::type::status = st;
197 }
198 private:
199 CacheBinOperationData data;
200
201 template <typename OpTypeData>
202 friend OpTypeData& opCast(CacheBinOperation &op);
203 };
204
205 // The opCast function can be the member of CacheBinOperation but it will have
206 // small stylistic ambiguity: it will look like a getter (with a cast) for the
207 // CacheBinOperation::data data member but it should return a reference to
208 // simplify the code from a lot of getter/setter calls. So the global cast in
209 // the style of static_cast (or reinterpret_cast) seems to be more readable and
210 // have more explicit semantic.
211 template <typename OpTypeData>
opCast(CacheBinOperation & op)212 OpTypeData& opCast(CacheBinOperation &op) {
213 return *reinterpret_cast<OpTypeData*>(&op.data);
214 }
215
216 /* ------------------------------------------------------------------------ */
217
218 #if __TBB_MALLOC_LOCACHE_STAT
219 //intptr_t mallocCalls, cacheHits;
220 std::atomic<intptr_t> mallocCalls, cacheHits;
221 //intptr_t memAllocKB, memHitKB;
222 std::atomic<intptr_t> memAllocKB, memHitKB;
223 #endif
224
225 #if MALLOC_DEBUG
lessThanWithOverflow(intptr_t a,intptr_t b)226 inline bool lessThanWithOverflow(intptr_t a, intptr_t b)
227 {
228 return (a < b && (b - a < static_cast<intptr_t>(UINTPTR_MAX/2))) ||
229 (a > b && (a - b > static_cast<intptr_t>(UINTPTR_MAX/2)));
230 }
231 #endif
232
233 /* ----------------------------------- Operation processing methods ------------------------------------ */
234
235 template<typename Props> void CacheBinFunctor<Props>::
commitOperation(CacheBinOperation * op) const236 OperationPreprocessor::commitOperation(CacheBinOperation *op) const
237 {
238 // FencedStore( (intptr_t&)(op->status), CBST_DONE );
239 op->status.store(CBST_DONE, std::memory_order_release);
240 }
241
242 template<typename Props> void CacheBinFunctor<Props>::
addOpToOpList(CacheBinOperation * op,CacheBinOperation ** opList) const243 OperationPreprocessor::addOpToOpList(CacheBinOperation *op, CacheBinOperation **opList) const
244 {
245 op->next = *opList;
246 *opList = op;
247 }
248
249 template<typename Props> bool CacheBinFunctor<Props>::
getFromPutList(CacheBinOperation * opGet,uintptr_t currTime)250 OperationPreprocessor::getFromPutList(CacheBinOperation *opGet, uintptr_t currTime)
251 {
252 if ( head ) {
253 uintptr_t age = head->age;
254 LargeMemoryBlock *next = head->next;
255 *opCast<OpGet>(*opGet).res = head;
256 commitOperation( opGet );
257 head = next;
258 putListNum--;
259 MALLOC_ASSERT( putListNum>=0, ASSERT_TEXT );
260
261 // use moving average with current hit interval
262 bin->updateMeanHitRange( currTime - age );
263 return true;
264 }
265 return false;
266 }
267
268 template<typename Props> void CacheBinFunctor<Props>::
addToPutList(LargeMemoryBlock * h,LargeMemoryBlock * t,int num)269 OperationPreprocessor::addToPutList(LargeMemoryBlock *h, LargeMemoryBlock *t, int num)
270 {
271 if ( head ) {
272 MALLOC_ASSERT( tail, ASSERT_TEXT );
273 tail->next = h;
274 h->prev = tail;
275 tail = t;
276 putListNum += num;
277 } else {
278 head = h;
279 tail = t;
280 putListNum = num;
281 }
282 }
283
284 template<typename Props> void CacheBinFunctor<Props>::
operator ()(CacheBinOperation * opList)285 OperationPreprocessor::operator()(CacheBinOperation* opList)
286 {
287 for ( CacheBinOperation *op = opList, *opNext; op; op = opNext ) {
288 opNext = op->next;
289 switch ( op->type ) {
290 case CBOP_GET:
291 {
292 lclTime--;
293 if ( !lastGetOpTime ) {
294 lastGetOpTime = lclTime;
295 lastGet = 0;
296 } else if ( !lastGet ) lastGet = lclTime;
297
298 if ( !getFromPutList(op,lclTime) ) {
299 opCast<OpGet>(*op).currTime = lclTime;
300 addOpToOpList( op, &opGet );
301 }
302 }
303 break;
304
305 case CBOP_PUT_LIST:
306 {
307 LargeMemoryBlock *head = opCast<OpPutList>(*op).head;
308 LargeMemoryBlock *curr = head, *prev = nullptr;
309
310 int num = 0;
311 do {
312 // we do not kept prev pointers during assigning blocks to bins, set them now
313 curr->prev = prev;
314
315 // Save the local times to the memory blocks. Local times are necessary
316 // for the getFromPutList function which updates the hit range value in
317 // CacheBin when OP_GET and OP_PUT_LIST operations are merged successfully.
318 // The age will be updated to the correct global time after preprocessing
319 // when global cache time is updated.
320 curr->age = --lclTime;
321
322 prev = curr;
323 num += 1;
324
325 STAT_increment(getThreadId(), ThreadCommonCounters, cacheLargeObj);
326 } while ((curr = curr->next) != nullptr);
327
328 LargeMemoryBlock *tail = prev;
329 addToPutList(head, tail, num);
330
331 while ( opGet ) {
332 CacheBinOperation *next = opGet->next;
333 if ( !getFromPutList(opGet, opCast<OpGet>(*opGet).currTime) )
334 break;
335 opGet = next;
336 }
337 }
338 break;
339
340 case CBOP_UPDATE_USED_SIZE:
341 updateUsedSize += opCast<OpUpdateUsedSize>(*op).size;
342 commitOperation( op );
343 break;
344
345 case CBOP_CLEAN_ALL:
346 isCleanAll = true;
347 addOpToOpList( op, &opClean );
348 break;
349
350 case CBOP_CLEAN_TO_THRESHOLD:
351 {
352 uintptr_t currTime = opCast<OpCleanToThreshold>(*op).currTime;
353 // We don't worry about currTime overflow since it is a rare
354 // occurrence and doesn't affect correctness
355 cleanTime = cleanTime < currTime ? currTime : cleanTime;
356 addOpToOpList( op, &opClean );
357 }
358 break;
359
360 default:
361 MALLOC_ASSERT( false, "Unknown operation." );
362 }
363 }
364 MALLOC_ASSERT( !( opGet && head ), "Not all put/get pairs are processed!" );
365 }
366
operator ()(CacheBinOperation * opList)367 template<typename Props> void CacheBinFunctor<Props>::operator()(CacheBinOperation* opList)
368 {
369 MALLOC_ASSERT( opList, "Empty operation list is passed into operation handler." );
370
371 OperationPreprocessor prep(bin);
372 prep(opList);
373
374 if ( uintptr_t timeRange = prep.getTimeRange() ) {
375 uintptr_t startTime = extMemPool->loc.getCurrTimeRange(timeRange);
376 // endTime is used as the current (base) time since the local time is negative.
377 uintptr_t endTime = startTime + timeRange;
378
379 if ( prep.lastGetOpTime && prep.lastGet ) bin->setLastGet(prep.lastGet+endTime);
380
381 if ( CacheBinOperation *opGet = prep.opGet ) {
382 bool isEmpty = false;
383 do {
384 #if __TBB_MALLOC_WHITEBOX_TEST
385 tbbmalloc_whitebox::locGetProcessed++;
386 #endif
387 const OpGet &opGetData = opCast<OpGet>(*opGet);
388 if ( !isEmpty ) {
389 if ( LargeMemoryBlock *res = bin->get() ) {
390 uintptr_t getTime = opGetData.currTime + endTime;
391 // use moving average with current hit interval
392 bin->updateMeanHitRange( getTime - res->age);
393 bin->updateCachedSize( -opGetData.size );
394 *opGetData.res = res;
395 } else {
396 isEmpty = true;
397 uintptr_t lastGetOpTime = prep.lastGetOpTime+endTime;
398 bin->forgetOutdatedState(lastGetOpTime);
399 bin->updateAgeThreshold(lastGetOpTime);
400 }
401 }
402
403 CacheBinOperation *opNext = opGet->next;
404 bin->updateUsedSize( opGetData.size, bitMask, idx );
405 prep.commitOperation( opGet );
406 opGet = opNext;
407 } while ( opGet );
408 if ( prep.lastGetOpTime )
409 bin->setLastGet( prep.lastGetOpTime + endTime );
410 } else if ( LargeMemoryBlock *curr = prep.head ) {
411 curr->prev = nullptr;
412 while ( curr ) {
413 // Update local times to global times
414 curr->age += endTime;
415 curr=curr->next;
416 }
417 #if __TBB_MALLOC_WHITEBOX_TEST
418 tbbmalloc_whitebox::locPutProcessed+=prep.putListNum;
419 #endif
420 toRelease = bin->putList(prep.head, prep.tail, bitMask, idx, prep.putListNum, extMemPool->loc.hugeSizeThreshold);
421 }
422 needCleanup = extMemPool->loc.isCleanupNeededOnRange(timeRange, startTime);
423 currTime = endTime - 1;
424 }
425
426 if ( CacheBinOperation *opClean = prep.opClean ) {
427 if ( prep.isCleanAll )
428 *opCast<OpCleanAll>(*opClean).res = bin->cleanAll(bitMask, idx);
429 else
430 *opCast<OpCleanToThreshold>(*opClean).res = bin->cleanToThreshold(prep.cleanTime, bitMask, idx);
431
432 CacheBinOperation *opNext = opClean->next;
433 prep.commitOperation( opClean );
434
435 while ((opClean = opNext) != nullptr) {
436 opNext = opClean->next;
437 prep.commitOperation(opClean);
438 }
439 }
440
441 if ( size_t size = prep.updateUsedSize )
442 bin->updateUsedSize(size, bitMask, idx);
443 }
444 /* ----------------------------------------------------------------------------------------------------- */
445 /* --------------------------- Methods for creating and executing operations --------------------------- */
446 template<typename Props> void LargeObjectCacheImpl<Props>::
ExecuteOperation(CacheBinOperation * op,ExtMemoryPool * extMemPool,BinBitMask * bitMask,int idx,bool longLifeTime)447 CacheBin::ExecuteOperation(CacheBinOperation *op, ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx, bool longLifeTime)
448 {
449 CacheBinFunctor<Props> func( this, extMemPool, bitMask, idx );
450 aggregator.execute( op, func, longLifeTime );
451
452 if ( LargeMemoryBlock *toRelease = func.getToRelease()) {
453 extMemPool->backend.returnLargeObject(toRelease);
454 }
455
456 if ( func.isCleanupNeeded() ) {
457 extMemPool->loc.doCleanup( func.getCurrTime(), /*doThreshDecr=*/false);
458 }
459 }
460
461 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
get(ExtMemoryPool * extMemPool,size_t size,BinBitMask * bitMask,int idx)462 CacheBin::get(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx)
463 {
464 LargeMemoryBlock *lmb=nullptr;
465 OpGet data = {&lmb, size, static_cast<uintptr_t>(0)};
466 CacheBinOperation op(data);
467 ExecuteOperation( &op, extMemPool, bitMask, idx );
468 return lmb;
469 }
470
471 template<typename Props> void LargeObjectCacheImpl<Props>::
putList(ExtMemoryPool * extMemPool,LargeMemoryBlock * head,BinBitMask * bitMask,int idx)472 CacheBin::putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *head, BinBitMask *bitMask, int idx)
473 {
474 MALLOC_ASSERT(sizeof(LargeMemoryBlock)+sizeof(CacheBinOperation)<=head->unalignedSize, "CacheBinOperation is too large to be placed in LargeMemoryBlock!");
475
476 OpPutList data = {head};
477 CacheBinOperation *op = new (head+1) CacheBinOperation(data, CBST_NOWAIT);
478 ExecuteOperation( op, extMemPool, bitMask, idx, false );
479 }
480
481 template<typename Props> bool LargeObjectCacheImpl<Props>::
cleanToThreshold(ExtMemoryPool * extMemPool,BinBitMask * bitMask,uintptr_t currTime,int idx)482 CacheBin::cleanToThreshold(ExtMemoryPool *extMemPool, BinBitMask *bitMask, uintptr_t currTime, int idx)
483 {
484 LargeMemoryBlock *toRelease = nullptr;
485
486 /* oldest may be more recent then age, that's why cast to signed type
487 was used. age overflow is also processed correctly. */
488 if (last.load(std::memory_order_relaxed) &&
489 (intptr_t)(currTime - oldest.load(std::memory_order_relaxed)) > ageThreshold.load(std::memory_order_relaxed)) {
490 OpCleanToThreshold data = {&toRelease, currTime};
491 CacheBinOperation op(data);
492 ExecuteOperation( &op, extMemPool, bitMask, idx );
493 }
494 bool released = toRelease;
495
496 Backend *backend = &extMemPool->backend;
497 while ( toRelease ) {
498 LargeMemoryBlock *helper = toRelease->next;
499 backend->returnLargeObject(toRelease);
500 toRelease = helper;
501 }
502 return released;
503 }
504
505 template<typename Props> bool LargeObjectCacheImpl<Props>::
releaseAllToBackend(ExtMemoryPool * extMemPool,BinBitMask * bitMask,int idx)506 CacheBin::releaseAllToBackend(ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx)
507 {
508 LargeMemoryBlock *toRelease = nullptr;
509
510 if (last.load(std::memory_order_relaxed)) {
511 OpCleanAll data = {&toRelease};
512 CacheBinOperation op(data);
513 ExecuteOperation(&op, extMemPool, bitMask, idx);
514 }
515 bool released = toRelease;
516
517 Backend *backend = &extMemPool->backend;
518 while ( toRelease ) {
519 LargeMemoryBlock *helper = toRelease->next;
520 MALLOC_ASSERT(!helper || lessThanWithOverflow(helper->age, toRelease->age),
521 ASSERT_TEXT);
522 backend->returnLargeObject(toRelease);
523 toRelease = helper;
524 }
525 return released;
526 }
527
528 template<typename Props> void LargeObjectCacheImpl<Props>::
updateUsedSize(ExtMemoryPool * extMemPool,size_t size,BinBitMask * bitMask,int idx)529 CacheBin::updateUsedSize(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx)
530 {
531 OpUpdateUsedSize data = {size};
532 CacheBinOperation op(data);
533 ExecuteOperation( &op, extMemPool, bitMask, idx );
534 }
535
536 /* ------------------------------ Unsafe methods used with the aggregator ------------------------------ */
537
538 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
putList(LargeMemoryBlock * head,LargeMemoryBlock * tail,BinBitMask * bitMask,int idx,int num,size_t hugeSizeThreshold)539 CacheBin::putList(LargeMemoryBlock *head, LargeMemoryBlock *tail, BinBitMask *bitMask, int idx, int num, size_t hugeSizeThreshold)
540 {
541 size_t size = head->unalignedSize;
542 usedSize.store(usedSize.load(std::memory_order_relaxed) - num * size, std::memory_order_relaxed);
543 MALLOC_ASSERT( !last.load(std::memory_order_relaxed) ||
544 (last.load(std::memory_order_relaxed)->age != 0 && last.load(std::memory_order_relaxed)->age != -1U), ASSERT_TEXT );
545 MALLOC_ASSERT( (tail==head && num==1) || (tail!=head && num>1), ASSERT_TEXT );
546 MALLOC_ASSERT( tail, ASSERT_TEXT );
547 LargeMemoryBlock *toRelease = nullptr;
548 if (size < hugeSizeThreshold && !lastCleanedAge) {
549 // 1st object of such size was released.
550 // Not cache it, and remember when this occurs
551 // to take into account during cache miss.
552 lastCleanedAge = tail->age;
553 toRelease = tail;
554 tail = tail->prev;
555 if (tail)
556 tail->next = nullptr;
557 else
558 head = nullptr;
559 num--;
560 }
561 if (num) {
562 // add [head;tail] list to cache
563 tail->next = first;
564 if (first)
565 first->prev = tail;
566 first = head;
567 if (!last.load(std::memory_order_relaxed)) {
568 MALLOC_ASSERT(0 == oldest.load(std::memory_order_relaxed), ASSERT_TEXT);
569 oldest.store(tail->age, std::memory_order_relaxed);
570 last.store(tail, std::memory_order_relaxed);
571 }
572
573 cachedSize.store(cachedSize.load(std::memory_order_relaxed) + num * size, std::memory_order_relaxed);
574 }
575
576 // No used object, and nothing in the bin, mark the bin as empty
577 if (!usedSize.load(std::memory_order_relaxed) && !first)
578 bitMask->set(idx, false);
579
580 return toRelease;
581 }
582
583 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
get()584 CacheBin::get()
585 {
586 LargeMemoryBlock *result=first;
587 if (result) {
588 first = result->next;
589 if (first)
590 first->prev = nullptr;
591 else {
592 last.store(nullptr, std::memory_order_relaxed);
593 oldest.store(0, std::memory_order_relaxed);
594 }
595 }
596
597 return result;
598 }
599
600 template<typename Props> void LargeObjectCacheImpl<Props>::
forgetOutdatedState(uintptr_t currTime)601 CacheBin::forgetOutdatedState(uintptr_t currTime)
602 {
603 // If the time since the last get is LongWaitFactor times more than ageThreshold
604 // for the bin, treat the bin as rarely-used and forget everything we know
605 // about it.
606 // If LongWaitFactor is too small, we forget too early and
607 // so prevents good caching, while if too high, caching blocks
608 // with unrelated usage pattern occurs.
609 const uintptr_t sinceLastGet = currTime - lastGet;
610 bool doCleanup = false;
611
612 intptr_t threshold = ageThreshold.load(std::memory_order_relaxed);
613 if (threshold)
614 doCleanup = sinceLastGet > static_cast<uintptr_t>(Props::LongWaitFactor * threshold);
615 else if (lastCleanedAge)
616 doCleanup = sinceLastGet > static_cast<uintptr_t>(Props::LongWaitFactor * (lastCleanedAge - lastGet));
617
618 if (doCleanup) {
619 lastCleanedAge = 0;
620 ageThreshold.store(0, std::memory_order_relaxed);
621 }
622
623 }
624
625 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
cleanToThreshold(uintptr_t currTime,BinBitMask * bitMask,int idx)626 CacheBin::cleanToThreshold(uintptr_t currTime, BinBitMask *bitMask, int idx)
627 {
628 /* oldest may be more recent then age, that's why cast to signed type
629 was used. age overflow is also processed correctly. */
630 if ( !last.load(std::memory_order_relaxed) ||
631 (intptr_t)(currTime - last.load(std::memory_order_relaxed)->age) < ageThreshold.load(std::memory_order_relaxed) )
632 return nullptr;
633
634 #if MALLOC_DEBUG
635 uintptr_t nextAge = 0;
636 #endif
637 do {
638 #if MALLOC_DEBUG
639 // check that list ordered
640 MALLOC_ASSERT(!nextAge || lessThanWithOverflow(nextAge, last.load(std::memory_order_relaxed)->age),
641 ASSERT_TEXT);
642 nextAge = last.load(std::memory_order_relaxed)->age;
643 #endif
644 cachedSize.store(cachedSize.load(std::memory_order_relaxed) - last.load(std::memory_order_relaxed)->unalignedSize, std::memory_order_relaxed);
645 last.store(last.load(std::memory_order_relaxed)->prev, std::memory_order_relaxed);
646 } while (last.load(std::memory_order_relaxed) &&
647 (intptr_t)(currTime - last.load(std::memory_order_relaxed)->age) > ageThreshold.load(std::memory_order_relaxed));
648
649 LargeMemoryBlock *toRelease = nullptr;
650 if (last.load(std::memory_order_relaxed)) {
651 toRelease = last.load(std::memory_order_relaxed)->next;
652 oldest.store(last.load(std::memory_order_relaxed)->age, std::memory_order_relaxed);
653 last.load(std::memory_order_relaxed)->next = nullptr;
654 } else {
655 toRelease = first;
656 first = nullptr;
657 oldest.store(0, std::memory_order_relaxed);
658 if (!usedSize.load(std::memory_order_relaxed))
659 bitMask->set(idx, false);
660 }
661 MALLOC_ASSERT( toRelease, ASSERT_TEXT );
662 lastCleanedAge = toRelease->age;
663
664 return toRelease;
665 }
666
667 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
cleanAll(BinBitMask * bitMask,int idx)668 CacheBin::cleanAll(BinBitMask *bitMask, int idx)
669 {
670 if (!last.load(std::memory_order_relaxed)) return nullptr;
671
672 LargeMemoryBlock *toRelease = first;
673 last.store(nullptr, std::memory_order_relaxed);
674 first = nullptr;
675 oldest.store(0, std::memory_order_relaxed);
676 cachedSize.store(0, std::memory_order_relaxed);
677 if (!usedSize.load(std::memory_order_relaxed))
678 bitMask->set(idx, false);
679
680 return toRelease;
681 }
682
683 /* ----------------------------------------------------------------------------------------------------- */
684
685 #if __TBB_MALLOC_BACKEND_STAT
686 template<typename Props> size_t LargeObjectCacheImpl<Props>::
reportStat(int num,FILE * f)687 CacheBin::reportStat(int num, FILE *f)
688 {
689 #if __TBB_MALLOC_LOCACHE_STAT
690 if (first)
691 printf("%d(%lu): total %lu KB thr %ld lastCln %lu oldest %lu\n",
692 num, num*Props::CacheStep+Props::MinSize,
693 cachedSize.load(std::memory_order_relaxed)/1024, ageThresholdageThreshold.load(std::memory_order_relaxed), lastCleanedAge, oldest.load(std::memory_order_relaxed));
694 #else
695 suppress_unused_warning(num);
696 suppress_unused_warning(f);
697 #endif
698 return cachedSize.load(std::memory_order_relaxed);
699 }
700 #endif
701
702 // Release objects from cache blocks that are older than ageThreshold
703 template<typename Props>
regularCleanup(ExtMemoryPool * extMemPool,uintptr_t currTime,bool doThreshDecr)704 bool LargeObjectCacheImpl<Props>::regularCleanup(ExtMemoryPool *extMemPool, uintptr_t currTime, bool doThreshDecr)
705 {
706 bool released = false;
707 BinsSummary binsSummary;
708
709 // Threshold settings is below this cache or starts from zero index
710 if (hugeSizeThresholdIdx == 0) return false;
711
712 // Starting searching for bin that is less than huge size threshold (can be cleaned-up)
713 int startSearchIdx = hugeSizeThresholdIdx - 1;
714
715 for (int i = bitMask.getMaxTrue(startSearchIdx); i >= 0; i = bitMask.getMaxTrue(i-1)) {
716 bin[i].updateBinsSummary(&binsSummary);
717 if (!doThreshDecr && tooLargeLOC.load(std::memory_order_relaxed) > 2 && binsSummary.isLOCTooLarge()) {
718 // if LOC is too large for quite long time, decrease the threshold
719 // based on bin hit statistics.
720 // For this, redo cleanup from the beginning.
721 // Note: on this iteration total usedSz can be not too large
722 // in comparison to total cachedSz, as we calculated it only
723 // partially. We are ok with it.
724 i = bitMask.getMaxTrue(startSearchIdx)+1;
725 doThreshDecr = true;
726 binsSummary.reset();
727 continue;
728 }
729 if (doThreshDecr)
730 bin[i].decreaseThreshold();
731
732 if (bin[i].cleanToThreshold(extMemPool, &bitMask, currTime, i)) {
733 released = true;
734 }
735 }
736 // We want to find if LOC was too large for some time continuously,
737 // so OK with races between incrementing and zeroing, but incrementing
738 // must be atomic.
739 if (binsSummary.isLOCTooLarge()) {
740 tooLargeLOC++;
741 } else {
742 tooLargeLOC.store(0, std::memory_order_relaxed);
743 }
744 return released;
745 }
746
747 template<typename Props>
cleanAll(ExtMemoryPool * extMemPool)748 bool LargeObjectCacheImpl<Props>::cleanAll(ExtMemoryPool *extMemPool)
749 {
750 bool released = false;
751 for (int i = numBins-1; i >= 0; i--) {
752 released |= bin[i].releaseAllToBackend(extMemPool, &bitMask, i);
753 }
754 return released;
755 }
756
757 template<typename Props>
reset()758 void LargeObjectCacheImpl<Props>::reset() {
759 tooLargeLOC.store(0, std::memory_order_relaxed);
760 for (int i = numBins-1; i >= 0; i--)
761 bin[i].init();
762 bitMask.reset();
763 }
764
765 #if __TBB_MALLOC_WHITEBOX_TEST
766 template<typename Props>
getLOCSize() const767 size_t LargeObjectCacheImpl<Props>::getLOCSize() const
768 {
769 size_t size = 0;
770 for (int i = numBins-1; i >= 0; i--)
771 size += bin[i].getSize();
772 return size;
773 }
774
getLOCSize() const775 size_t LargeObjectCache::getLOCSize() const
776 {
777 return largeCache.getLOCSize() + hugeCache.getLOCSize();
778 }
779
780 template<typename Props>
getUsedSize() const781 size_t LargeObjectCacheImpl<Props>::getUsedSize() const
782 {
783 size_t size = 0;
784 for (int i = numBins-1; i >= 0; i--)
785 size += bin[i].getUsedSize();
786 return size;
787 }
788
getUsedSize() const789 size_t LargeObjectCache::getUsedSize() const
790 {
791 return largeCache.getUsedSize() + hugeCache.getUsedSize();
792 }
793 #endif // __TBB_MALLOC_WHITEBOX_TEST
794
isCleanupNeededOnRange(uintptr_t range,uintptr_t currTime)795 inline bool LargeObjectCache::isCleanupNeededOnRange(uintptr_t range, uintptr_t currTime)
796 {
797 return range >= cacheCleanupFreq
798 || currTime+range < currTime-1 // overflow, 0 is power of 2, do cleanup
799 // (prev;prev+range] contains n*cacheCleanupFreq
800 || alignUp(currTime, cacheCleanupFreq)<currTime+range;
801 }
802
doCleanup(uintptr_t currTime,bool doThreshDecr)803 bool LargeObjectCache::doCleanup(uintptr_t currTime, bool doThreshDecr)
804 {
805 if (!doThreshDecr)
806 extMemPool->allLocalCaches.markUnused();
807
808 bool large_cache_cleaned = largeCache.regularCleanup(extMemPool, currTime, doThreshDecr);
809 bool huge_cache_cleaned = hugeCache.regularCleanup(extMemPool, currTime, doThreshDecr);
810 return large_cache_cleaned || huge_cache_cleaned;
811 }
812
decreasingCleanup()813 bool LargeObjectCache::decreasingCleanup()
814 {
815 return doCleanup(cacheCurrTime.load(std::memory_order_acquire), /*doThreshDecr=*/true);
816 }
817
regularCleanup()818 bool LargeObjectCache::regularCleanup()
819 {
820 return doCleanup(cacheCurrTime.load(std::memory_order_acquire), /*doThreshDecr=*/false);
821 }
822
cleanAll()823 bool LargeObjectCache::cleanAll()
824 {
825 bool large_cache_cleaned = largeCache.cleanAll(extMemPool);
826 bool huge_cache_cleaned = hugeCache.cleanAll(extMemPool);
827 return large_cache_cleaned || huge_cache_cleaned;
828 }
829
reset()830 void LargeObjectCache::reset()
831 {
832 largeCache.reset();
833 hugeCache.reset();
834 }
835
836 template<typename Props>
get(ExtMemoryPool * extMemoryPool,size_t size)837 LargeMemoryBlock *LargeObjectCacheImpl<Props>::get(ExtMemoryPool *extMemoryPool, size_t size)
838 {
839 int idx = Props::sizeToIdx(size);
840
841 LargeMemoryBlock *lmb = bin[idx].get(extMemoryPool, size, &bitMask, idx);
842
843 if (lmb) {
844 MALLOC_ITT_SYNC_ACQUIRED(bin+idx);
845 STAT_increment(getThreadId(), ThreadCommonCounters, allocCachedLargeObj);
846 }
847 return lmb;
848 }
849
850 template<typename Props>
updateCacheState(ExtMemoryPool * extMemPool,DecreaseOrIncrease op,size_t size)851 void LargeObjectCacheImpl<Props>::updateCacheState(ExtMemoryPool *extMemPool, DecreaseOrIncrease op, size_t size)
852 {
853 int idx = Props::sizeToIdx(size);
854 MALLOC_ASSERT(idx < static_cast<int>(numBins), ASSERT_TEXT);
855 bin[idx].updateUsedSize(extMemPool, op==decrease? -size : size, &bitMask, idx);
856 }
857
858 #if __TBB_MALLOC_LOCACHE_STAT
859 template<typename Props>
reportStat(FILE * f)860 void LargeObjectCacheImpl<Props>::reportStat(FILE *f)
861 {
862 size_t cachedSize = 0;
863 for (int i=0; i<numBins; i++)
864 cachedSize += bin[i].reportStat(i, f);
865 fprintf(f, "total LOC size %lu MB\n", cachedSize/1024/1024);
866 }
867
reportStat(FILE * f)868 void LargeObjectCache::reportStat(FILE *f)
869 {
870 largeCache.reportStat(f);
871 hugeCache.reportStat(f);
872 fprintf(f, "cache time %lu\n", cacheCurrTime.load(std::memory_order_relaxed));
873 }
874 #endif
875
876 template<typename Props>
putList(ExtMemoryPool * extMemPool,LargeMemoryBlock * toCache)877 void LargeObjectCacheImpl<Props>::putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *toCache)
878 {
879 int toBinIdx = Props::sizeToIdx(toCache->unalignedSize);
880
881 MALLOC_ITT_SYNC_RELEASING(bin+toBinIdx);
882 bin[toBinIdx].putList(extMemPool, toCache, &bitMask, toBinIdx);
883 }
884
updateCacheState(DecreaseOrIncrease op,size_t size)885 void LargeObjectCache::updateCacheState(DecreaseOrIncrease op, size_t size)
886 {
887 if (size < maxLargeSize)
888 largeCache.updateCacheState(extMemPool, op, size);
889 else if (size < maxHugeSize)
890 hugeCache.updateCacheState(extMemPool, op, size);
891 }
892
893
getCurrTimeRange(uintptr_t range)894 uintptr_t LargeObjectCache::getCurrTimeRange(uintptr_t range)
895 {
896 return (cacheCurrTime.fetch_add(range) + 1);
897 }
898
registerRealloc(size_t oldSize,size_t newSize)899 void LargeObjectCache::registerRealloc(size_t oldSize, size_t newSize)
900 {
901 updateCacheState(decrease, oldSize);
902 updateCacheState(increase, alignToBin(newSize));
903 }
904
alignToBin(size_t size)905 size_t LargeObjectCache::alignToBin(size_t size) {
906 return size < maxLargeSize ? LargeCacheType::alignToBin(size) : HugeCacheType::alignToBin(size);
907 }
908
909 // Used for internal purpose
sizeToIdx(size_t size)910 int LargeObjectCache::sizeToIdx(size_t size)
911 {
912 MALLOC_ASSERT(size <= maxHugeSize, ASSERT_TEXT);
913 return size < maxLargeSize ?
914 LargeCacheType::sizeToIdx(size) :
915 LargeCacheType::numBins + HugeCacheType::sizeToIdx(size);
916 }
917
putList(LargeMemoryBlock * list)918 void LargeObjectCache::putList(LargeMemoryBlock *list)
919 {
920 LargeMemoryBlock *toProcess, *n;
921
922 for (LargeMemoryBlock *curr = list; curr; curr = toProcess) {
923 LargeMemoryBlock *tail = curr;
924 toProcess = curr->next;
925 if (!sizeInCacheRange(curr->unalignedSize)) {
926 extMemPool->backend.returnLargeObject(curr);
927 continue;
928 }
929 int currIdx = sizeToIdx(curr->unalignedSize);
930
931 // Find all blocks fitting to same bin. Not use more efficient sorting
932 // algorithm because list is short (commonly,
933 // LocalLOC's HIGH_MARK-LOW_MARK, i.e. 24 items).
934 for (LargeMemoryBlock *b = toProcess; b; b = n) {
935 n = b->next;
936 if (sizeToIdx(b->unalignedSize) == currIdx) {
937 tail->next = b;
938 tail = b;
939 if (toProcess == b)
940 toProcess = toProcess->next;
941 else {
942 b->prev->next = b->next;
943 if (b->next)
944 b->next->prev = b->prev;
945 }
946 }
947 }
948 tail->next = nullptr;
949 if (curr->unalignedSize < maxLargeSize)
950 largeCache.putList(extMemPool, curr);
951 else
952 hugeCache.putList(extMemPool, curr);
953 }
954 }
955
put(LargeMemoryBlock * largeBlock)956 void LargeObjectCache::put(LargeMemoryBlock *largeBlock)
957 {
958 size_t blockSize = largeBlock->unalignedSize;
959 if (sizeInCacheRange(blockSize)) {
960 largeBlock->next = nullptr;
961 if (blockSize < maxLargeSize)
962 largeCache.putList(extMemPool, largeBlock);
963 else
964 hugeCache.putList(extMemPool, largeBlock);
965 } else {
966 extMemPool->backend.returnLargeObject(largeBlock);
967 }
968 }
969
get(size_t size)970 LargeMemoryBlock *LargeObjectCache::get(size_t size)
971 {
972 MALLOC_ASSERT( size >= minLargeSize, ASSERT_TEXT );
973 if (sizeInCacheRange(size)) {
974 return size < maxLargeSize ?
975 largeCache.get(extMemPool, size) : hugeCache.get(extMemPool, size);
976 }
977 return nullptr;
978 }
979
mallocLargeObject(MemoryPool * pool,size_t allocationSize)980 LargeMemoryBlock *ExtMemoryPool::mallocLargeObject(MemoryPool *pool, size_t allocationSize)
981 {
982 #if __TBB_MALLOC_LOCACHE_STAT
983 mallocCalls++;
984 memAllocKB.fetch_add(allocationSize/1024);
985 #endif
986 LargeMemoryBlock* lmb = loc.get(allocationSize);
987 if (!lmb) {
988 BackRefIdx backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/true);
989 if (backRefIdx.isInvalid())
990 return nullptr;
991
992 // unalignedSize is set in getLargeBlock
993 lmb = backend.getLargeBlock(allocationSize);
994 if (!lmb) {
995 removeBackRef(backRefIdx);
996 loc.updateCacheState(decrease, allocationSize);
997 return nullptr;
998 }
999 lmb->backRefIdx = backRefIdx;
1000 lmb->pool = pool;
1001 STAT_increment(getThreadId(), ThreadCommonCounters, allocNewLargeObj);
1002 } else {
1003 #if __TBB_MALLOC_LOCACHE_STAT
1004 cacheHits++;
1005 memHitKB.fetch_add(allocationSize/1024);
1006 #endif
1007 }
1008 return lmb;
1009 }
1010
freeLargeObject(LargeMemoryBlock * mBlock)1011 void ExtMemoryPool::freeLargeObject(LargeMemoryBlock *mBlock)
1012 {
1013 loc.put(mBlock);
1014 }
1015
freeLargeObjectList(LargeMemoryBlock * head)1016 void ExtMemoryPool::freeLargeObjectList(LargeMemoryBlock *head)
1017 {
1018 loc.putList(head);
1019 }
1020
softCachesCleanup()1021 bool ExtMemoryPool::softCachesCleanup()
1022 {
1023 bool ret = false;
1024 if (!softCachesCleanupInProgress.exchange(1, std::memory_order_acq_rel)) {
1025 ret = loc.regularCleanup();
1026 softCachesCleanupInProgress.store(0, std::memory_order_release);
1027 }
1028 return ret;
1029 }
1030
hardCachesCleanup(bool wait)1031 bool ExtMemoryPool::hardCachesCleanup(bool wait)
1032 {
1033 if (hardCachesCleanupInProgress.exchange(1, std::memory_order_acq_rel)) {
1034 if (!wait)
1035 return false;
1036
1037 AtomicBackoff backoff;
1038 while (hardCachesCleanupInProgress.exchange(1, std::memory_order_acq_rel))
1039 backoff.pause();
1040 }
1041
1042 // thread-local caches must be cleaned before LOC,
1043 // because object from thread-local cache can be released to LOC
1044 bool ret = releaseAllLocalCaches();
1045 ret |= orphanedBlocks.cleanup(&backend);
1046 ret |= loc.cleanAll();
1047 ret |= backend.clean();
1048
1049 hardCachesCleanupInProgress.store(0, std::memory_order_release);
1050 return ret;
1051 }
1052
1053 #if BACKEND_HAS_MREMAP
remap(void * ptr,size_t oldSize,size_t newSize,size_t alignment)1054 void *ExtMemoryPool::remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment)
1055 {
1056 const size_t oldUnalignedSize = ((LargeObjectHdr*)ptr - 1)->memoryBlock->unalignedSize;
1057 void *o = backend.remap(ptr, oldSize, newSize, alignment);
1058 if (o) {
1059 LargeMemoryBlock *lmb = ((LargeObjectHdr*)o - 1)->memoryBlock;
1060 loc.registerRealloc(oldUnalignedSize, lmb->unalignedSize);
1061 }
1062 return o;
1063 }
1064 #endif /* BACKEND_HAS_MREMAP */
1065
1066 /*********** End allocation of large objects **********/
1067
1068 } // namespace internal
1069 } // namespace rml
1070
1071 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
1072 #pragma warning(pop)
1073 #endif
1074