1 /*
2 Copyright (c) 2005-2023 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 */
16
17 //! \file test_malloc_whitebox.cpp
18 //! \brief Test for [memory_allocation] functionality
19
20 #if _WIN32 || _WIN64
21 #define _CRT_SECURE_NO_WARNINGS
22 #endif
23
24 // To prevent loading dynamic TBBmalloc at startup, that is not needed for the whitebox test
25 #define __TBB_SOURCE_DIRECTLY_INCLUDED 1
26 // Call thread shutdown API for native threads join
27 #define HARNESS_TBBMALLOC_THREAD_SHUTDOWN 1
28
29 // According to C99 standard INTPTR_MIN defined for C++ if __STDC_LIMIT_MACROS pre-defined
30 #define __STDC_LIMIT_MACROS 1
31
32 // To not depends on ITT support stuff
33 #ifdef DO_ITT_NOTIFY
34 #undef DO_ITT_NOTIFY
35 #endif
36
37 #include "common/test.h"
38
39 #include "common/utils.h"
40 #include "common/utils_assert.h"
41 #include "common/utils_env.h"
42 #include "common/spin_barrier.h"
43
44 #include "oneapi/tbb/detail/_machine.h"
45
46 #define __TBB_MALLOC_WHITEBOX_TEST 1 // to get access to allocator internals
47 // help trigger rare race condition
48 #define WhiteboxTestingYield() (tbb::detail::yield(), tbb::detail::yield(), tbb::detail::yield(), tbb::detail::yield())
49
50 #if __INTEL_COMPILER && __TBB_MIC_OFFLOAD
51 // 2571 is variable has not been declared with compatible "target" attribute
52 // 3218 is class/struct may fail when offloaded because this field is misaligned
53 // or contains data that is misaligned
54 #pragma warning(push)
55 #pragma warning(disable:2571 3218)
56 #endif
57 #define protected public
58 #define private public
59 #include "../../src/tbbmalloc/frontend.cpp"
60 #undef protected
61 #undef private
62 #if __INTEL_COMPILER && __TBB_MIC_OFFLOAD
63 #pragma warning(pop)
64 #endif
65 #include "../../src/tbbmalloc/backend.cpp"
66 #include "../../src/tbbmalloc/backref.cpp"
67
68 namespace tbbmalloc_whitebox {
69 std::atomic<size_t> locGetProcessed{};
70 std::atomic<size_t> locPutProcessed{};
71 }
72 #include "../../src/tbbmalloc/large_objects.cpp"
73 #include "../../src/tbbmalloc/tbbmalloc.cpp"
74
75 const int LARGE_MEM_SIZES_NUM = 10;
76 static const int MinThread = 1;
77 static const int MaxThread = 4;
78
79 class AllocInfo {
80 int *p;
81 int val;
82 int size;
83 public:
AllocInfo()84 AllocInfo() : p(nullptr), val(0), size(0) {}
AllocInfo(int sz)85 explicit AllocInfo(int sz) : p((int*)scalable_malloc(sz*sizeof(int))),
86 val(rand()), size(sz) {
87 REQUIRE(p);
88 for (int k=0; k<size; k++)
89 p[k] = val;
90 }
check() const91 void check() const {
92 for (int k=0; k<size; k++)
93 ASSERT(p[k] == val, nullptr);
94 }
clear()95 void clear() {
96 scalable_free(p);
97 }
98 };
99
100 // Test struct to call ProcessShutdown after all tests
101 struct ShutdownTest {
~ShutdownTestShutdownTest102 ~ShutdownTest() {
103 #if _WIN32 || _WIN64
104 __TBB_mallocProcessShutdownNotification(true);
105 #else
106 __TBB_mallocProcessShutdownNotification(false);
107 #endif
108 }
109 };
110
111 static ShutdownTest shutdownTest;
112
113 class SimpleBarrier: utils::NoAssign {
114 protected:
115 static utils::SpinBarrier barrier;
116 public:
initBarrier(unsigned thrds)117 static void initBarrier(unsigned thrds) { barrier.initialize(thrds); }
118 };
119
120 utils::SpinBarrier SimpleBarrier::barrier;
121
122 class TestLargeObjCache: public SimpleBarrier {
123 public:
124 static int largeMemSizes[LARGE_MEM_SIZES_NUM];
125
TestLargeObjCache()126 TestLargeObjCache( ) {}
127
operator ()(int) const128 void operator()( int /*mynum*/ ) const {
129 AllocInfo allocs[LARGE_MEM_SIZES_NUM];
130
131 // push to maximal cache limit
132 for (int i=0; i<2; i++) {
133 const int sizes[] = { MByte/sizeof(int),
134 (MByte-2*LargeObjectCache::LargeBSProps::CacheStep)/sizeof(int) };
135 for (int q=0; q<2; q++) {
136 size_t curr = 0;
137 for (int j=0; j<LARGE_MEM_SIZES_NUM; j++, curr++)
138 new (allocs+curr) AllocInfo(sizes[q]);
139
140 for (size_t j=0; j<curr; j++) {
141 allocs[j].check();
142 allocs[j].clear();
143 }
144 }
145 }
146
147 barrier.wait();
148
149 // check caching correctness
150 for (int i=0; i<1000; i++) {
151 size_t curr = 0;
152 for (int j=0; j<LARGE_MEM_SIZES_NUM-1; j++, curr++)
153 new (allocs+curr) AllocInfo(largeMemSizes[j]);
154
155 new (allocs+curr)
156 AllocInfo((int)(4*minLargeObjectSize +
157 2*minLargeObjectSize*(1.*rand()/RAND_MAX)));
158 curr++;
159
160 for (size_t j=0; j<curr; j++) {
161 allocs[j].check();
162 allocs[j].clear();
163 }
164 }
165 }
166 };
167
168 int TestLargeObjCache::largeMemSizes[LARGE_MEM_SIZES_NUM];
169
TestLargeObjectCache()170 void TestLargeObjectCache()
171 {
172 for (int i=0; i<LARGE_MEM_SIZES_NUM; i++)
173 TestLargeObjCache::largeMemSizes[i] =
174 (int)(minLargeObjectSize + 2*minLargeObjectSize*(1.*rand()/RAND_MAX));
175
176 for( int p=MaxThread; p>=MinThread; --p ) {
177 TestLargeObjCache::initBarrier( p );
178 utils::NativeParallelFor( p, TestLargeObjCache() );
179 }
180 }
181
182 #if MALLOC_CHECK_RECURSION
183
184 class TestStartupAlloc: public SimpleBarrier {
185 struct TestBlock {
186 void *ptr;
187 size_t sz;
188 };
189 static const int ITERS = 100;
190 public:
TestStartupAlloc()191 TestStartupAlloc() {}
operator ()(int) const192 void operator()(int) const {
193 TestBlock blocks1[ITERS], blocks2[ITERS];
194
195 barrier.wait();
196
197 for (int i=0; i<ITERS; i++) {
198 blocks1[i].sz = rand() % minLargeObjectSize;
199 blocks1[i].ptr = StartupBlock::allocate(blocks1[i].sz);
200 REQUIRE((blocks1[i].ptr && StartupBlock::msize(blocks1[i].ptr)>=blocks1[i].sz
201 && 0==(uintptr_t)blocks1[i].ptr % sizeof(void*)));
202 memset(blocks1[i].ptr, i, blocks1[i].sz);
203 }
204 for (int i=0; i<ITERS; i++) {
205 blocks2[i].sz = rand() % minLargeObjectSize;
206 blocks2[i].ptr = StartupBlock::allocate(blocks2[i].sz);
207 REQUIRE((blocks2[i].ptr && StartupBlock::msize(blocks2[i].ptr)>=blocks2[i].sz
208 && 0==(uintptr_t)blocks2[i].ptr % sizeof(void*)));
209 memset(blocks2[i].ptr, i, blocks2[i].sz);
210
211 for (size_t j=0; j<blocks1[i].sz; j++)
212 REQUIRE(*((char*)blocks1[i].ptr+j) == i);
213 Block *block = (Block *)alignDown(blocks1[i].ptr, slabSize);
214 ((StartupBlock *)block)->free(blocks1[i].ptr);
215 }
216 for (int i=ITERS-1; i>=0; i--) {
217 for (size_t j=0; j<blocks2[i].sz; j++)
218 REQUIRE(*((char*)blocks2[i].ptr+j) == i);
219 Block *block = (Block *)alignDown(blocks2[i].ptr, slabSize);
220 ((StartupBlock *)block)->free(blocks2[i].ptr);
221 }
222 }
223 };
224
225 #endif /* MALLOC_CHECK_RECURSION */
226
227 #include <deque>
228
229 template<int ITERS>
230 class BackRefWork: utils::NoAssign {
231 struct TestBlock {
232 BackRefIdx idx;
233 char data;
TestBlockBackRefWork::TestBlock234 TestBlock(BackRefIdx idx_) : idx(idx_) {}
235 };
236 public:
BackRefWork()237 BackRefWork() {}
operator ()(int) const238 void operator()(int) const {
239 size_t cnt;
240 // it's important to not invalidate pointers to the contents of the container
241 std::deque<TestBlock> blocks;
242
243 // for ITERS==0 consume all available backrefs
244 for (cnt=0; !ITERS || cnt<ITERS; cnt++) {
245 BackRefIdx idx = BackRefIdx::newBackRef(/*largeObj=*/false);
246 if (idx.isInvalid())
247 break;
248 blocks.push_back(TestBlock(idx));
249 setBackRef(blocks.back().idx, &blocks.back().data);
250 }
251 for (size_t i=0; i<cnt; i++)
252 REQUIRE((Block*)&blocks[i].data == getBackRef(blocks[i].idx));
253 for (size_t i=cnt; i>0; i--)
254 removeBackRef(blocks[i-1].idx);
255 }
256 };
257
258 class LocalCachesHit: utils::NoAssign {
259 // set ITERS to trigger possible leak of backreferences
260 // during cleanup on cache overflow and on thread termination
261 static const int ITERS = 2*(FreeBlockPool::POOL_HIGH_MARK +
262 LocalLOC::LOC_HIGH_MARK);
263 public:
LocalCachesHit()264 LocalCachesHit() {}
operator ()(int) const265 void operator()(int) const {
266 void *objsSmall[ITERS], *objsLarge[ITERS];
267
268 for (int i=0; i<ITERS; i++) {
269 objsSmall[i] = scalable_malloc(minLargeObjectSize-1);
270 objsLarge[i] = scalable_malloc(minLargeObjectSize);
271 }
272 for (int i=0; i<ITERS; i++) {
273 scalable_free(objsSmall[i]);
274 scalable_free(objsLarge[i]);
275 }
276 }
277 };
278
allocatedBackRefCount()279 static size_t allocatedBackRefCount()
280 {
281 size_t cnt = 0;
282 for (int i=0; i<=backRefMain.load(std::memory_order_relaxed)->lastUsed.load(std::memory_order_relaxed); i++)
283 cnt += backRefMain.load(std::memory_order_relaxed)->backRefBl[i]->allocatedCount;
284 return cnt;
285 }
286
287 class TestInvalidBackrefs: public SimpleBarrier {
288 #if __ANDROID__
289 // Android requires lower iters due to lack of virtual memory.
290 static const int BACKREF_GROWTH_ITERS = 50*1024;
291 #else
292 static const int BACKREF_GROWTH_ITERS = 200*1024;
293 #endif
294
295 static std::atomic<bool> backrefGrowthDone;
296 static void *ptrs[BACKREF_GROWTH_ITERS];
297 public:
TestInvalidBackrefs()298 TestInvalidBackrefs() {}
operator ()(int id) const299 void operator()(int id) const {
300
301 if (!id) {
302 backrefGrowthDone = false;
303 barrier.wait();
304
305 for (int i=0; i<BACKREF_GROWTH_ITERS; i++)
306 ptrs[i] = scalable_malloc(minLargeObjectSize);
307 backrefGrowthDone = true;
308 for (int i=0; i<BACKREF_GROWTH_ITERS; i++)
309 scalable_free(ptrs[i]);
310 } else {
311 void *p2 = scalable_malloc(minLargeObjectSize-1);
312 char *p1 = (char*)scalable_malloc(minLargeObjectSize-1);
313 LargeObjectHdr *hdr =
314 (LargeObjectHdr*)(p1+minLargeObjectSize-1 - sizeof(LargeObjectHdr));
315 hdr->backRefIdx.main = 7;
316 hdr->backRefIdx.largeObj = 1;
317 hdr->backRefIdx.offset = 2000;
318
319 barrier.wait();
320
321 int yield_count = 0;
322 while (!backrefGrowthDone) {
323 scalable_free(p2);
324 p2 = scalable_malloc(minLargeObjectSize-1);
325 if (yield_count++ == 100) {
326 yield_count = 0;
327 std::this_thread::yield();
328 }
329 }
330 scalable_free(p1);
331 scalable_free(p2);
332 }
333 }
334 };
335
336 std::atomic<bool> TestInvalidBackrefs::backrefGrowthDone;
337 void *TestInvalidBackrefs::ptrs[BACKREF_GROWTH_ITERS];
338
TestBackRef()339 void TestBackRef() {
340 size_t beforeNumBackRef, afterNumBackRef;
341
342 beforeNumBackRef = allocatedBackRefCount();
343 for( int p=MaxThread; p>=MinThread; --p )
344 utils::NativeParallelFor( p, BackRefWork<2*BR_MAX_CNT+2>() );
345 afterNumBackRef = allocatedBackRefCount();
346 REQUIRE_MESSAGE(beforeNumBackRef==afterNumBackRef, "backreference leak detected");
347 // lastUsed marks peak resource consumption. As we allocate below the mark,
348 // it must not move up, otherwise there is a resource leak.
349 int sustLastUsed = backRefMain.load(std::memory_order_relaxed)->lastUsed.load(std::memory_order_relaxed);
350 utils::NativeParallelFor( 1, BackRefWork<2*BR_MAX_CNT+2>() );
351 REQUIRE_MESSAGE(sustLastUsed == backRefMain.load(std::memory_order_relaxed)->lastUsed.load(std::memory_order_relaxed), "backreference leak detected");
352 // check leak of back references while per-thread caches are in use
353 // warm up needed to cover bootStrapMalloc call
354 utils::NativeParallelFor( 1, LocalCachesHit() );
355 beforeNumBackRef = allocatedBackRefCount();
356 utils::NativeParallelFor( 2, LocalCachesHit() );
357 int res = scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, nullptr);
358 REQUIRE(res == TBBMALLOC_OK);
359 afterNumBackRef = allocatedBackRefCount();
360 REQUIRE_MESSAGE(beforeNumBackRef>=afterNumBackRef, "backreference leak detected");
361
362 // This is a regression test against race condition between backreference
363 // extension and checking invalid BackRefIdx.
364 // While detecting is object large or small, scalable_free 1st check for
365 // large objects, so there is a chance to prepend small object with
366 // seems valid BackRefIdx for large objects, and thus trigger the bug.
367 TestInvalidBackrefs::initBarrier(MaxThread);
368 utils::NativeParallelFor( MaxThread, TestInvalidBackrefs() );
369 // Consume all available backrefs and check they work correctly.
370 // For now test 32-bit machines only, because for 64-bit memory consumption is too high.
371 if (sizeof(uintptr_t) == 4)
372 utils::NativeParallelFor( MaxThread, BackRefWork<0>() );
373 }
374
getMem(intptr_t,size_t & bytes)375 void *getMem(intptr_t /*pool_id*/, size_t &bytes)
376 {
377 const size_t BUF_SIZE = 8*1024*1024;
378 static char space[BUF_SIZE];
379 static size_t pos;
380
381 if (pos + bytes > BUF_SIZE)
382 return nullptr;
383
384 void *ret = space + pos;
385 pos += bytes;
386
387 return ret;
388 }
389
putMem(intptr_t,void *,size_t)390 int putMem(intptr_t /*pool_id*/, void* /*raw_ptr*/, size_t /*raw_bytes*/)
391 {
392 return 0;
393 }
394
395 struct MallocPoolHeader {
396 void *rawPtr;
397 size_t userSize;
398 };
399
getMallocMem(intptr_t,size_t & bytes)400 void *getMallocMem(intptr_t /*pool_id*/, size_t &bytes)
401 {
402 void *rawPtr = malloc(bytes+sizeof(MallocPoolHeader));
403 void *ret = (void *)((uintptr_t)rawPtr+sizeof(MallocPoolHeader));
404
405 MallocPoolHeader *hdr = (MallocPoolHeader*)ret-1;
406 hdr->rawPtr = rawPtr;
407 hdr->userSize = bytes;
408
409 return ret;
410 }
411
putMallocMem(intptr_t,void * ptr,size_t bytes)412 int putMallocMem(intptr_t /*pool_id*/, void *ptr, size_t bytes)
413 {
414 MallocPoolHeader *hdr = (MallocPoolHeader*)ptr-1;
415 ASSERT(bytes == hdr->userSize, "Invalid size in pool callback.");
416 free(hdr->rawPtr);
417
418 return 0;
419 }
420
421 class StressLOCacheWork: utils::NoAssign {
422 rml::MemoryPool *my_mallocPool;
423 public:
StressLOCacheWork(rml::MemoryPool * mallocPool)424 StressLOCacheWork(rml::MemoryPool *mallocPool) : my_mallocPool(mallocPool) {}
operator ()(int) const425 void operator()(int) const {
426 for (size_t sz=minLargeObjectSize; sz<1*1024*1024;
427 sz+=LargeObjectCache::LargeBSProps::CacheStep) {
428 void *ptr = pool_malloc(my_mallocPool, sz);
429 REQUIRE_MESSAGE(ptr, "Memory was not allocated");
430 memset(ptr, sz, sz);
431 pool_free(my_mallocPool, ptr);
432 }
433 }
434 };
435
TestPools()436 void TestPools() {
437 rml::MemPoolPolicy pol(getMem, putMem);
438 size_t beforeNumBackRef, afterNumBackRef;
439
440 rml::MemoryPool *pool1;
441 rml::MemoryPool *pool2;
442 pool_create_v1(0, &pol, &pool1);
443 pool_create_v1(0, &pol, &pool2);
444 pool_destroy(pool1);
445 pool_destroy(pool2);
446
447 scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, nullptr);
448 beforeNumBackRef = allocatedBackRefCount();
449 rml::MemoryPool *fixedPool;
450
451 pool_create_v1(0, &pol, &fixedPool);
452 pol.pAlloc = getMallocMem;
453 pol.pFree = putMallocMem;
454 pol.granularity = 8;
455 rml::MemoryPool *mallocPool;
456
457 pool_create_v1(0, &pol, &mallocPool);
458 /* check that large object cache (LOC) returns correct size for cached objects
459 passBackendSz Byte objects are cached in LOC, but bypassed the backend, so
460 memory requested directly from allocation callback.
461 nextPassBackendSz Byte objects must fit to another LOC bin,
462 so that their allocation/releasing leads to cache cleanup.
463 All this is expecting to lead to releasing of passBackendSz Byte object
464 from LOC during LOC cleanup, and putMallocMem checks that returned size
465 is correct.
466 */
467 const size_t passBackendSz = Backend::maxBinned_HugePage+1,
468 anotherLOCBinSz = minLargeObjectSize+1;
469 for (int i=0; i<10; i++) { // run long enough to be cached
470 void *p = pool_malloc(mallocPool, passBackendSz);
471 REQUIRE_MESSAGE(p, "Memory was not allocated");
472 pool_free(mallocPool, p);
473 }
474 // run long enough to passBackendSz allocation was cleaned from cache
475 // and returned back to putMallocMem for size checking
476 for (int i=0; i<1000; i++) {
477 void *p = pool_malloc(mallocPool, anotherLOCBinSz);
478 REQUIRE_MESSAGE(p, "Memory was not allocated");
479 pool_free(mallocPool, p);
480 }
481
482 void *smallObj = pool_malloc(fixedPool, 10);
483 REQUIRE_MESSAGE(smallObj, "Memory was not allocated");
484 memset(smallObj, 1, 10);
485 void *ptr = pool_malloc(fixedPool, 1024);
486 REQUIRE_MESSAGE(ptr, "Memory was not allocated");
487 memset(ptr, 1, 1024);
488 void *largeObj = pool_malloc(fixedPool, minLargeObjectSize);
489 REQUIRE_MESSAGE(largeObj, "Memory was not allocated");
490 memset(largeObj, 1, minLargeObjectSize);
491 ptr = pool_malloc(fixedPool, minLargeObjectSize);
492 REQUIRE_MESSAGE(ptr, "Memory was not allocated");
493 memset(ptr, minLargeObjectSize, minLargeObjectSize);
494 pool_malloc(fixedPool, 10*minLargeObjectSize); // no leak for unsuccessful allocations
495 pool_free(fixedPool, smallObj);
496 pool_free(fixedPool, largeObj);
497
498 // provoke large object cache cleanup and hope no leaks occurs
499 for( int p=MaxThread; p>=MinThread; --p )
500 utils::NativeParallelFor( p, StressLOCacheWork(mallocPool) );
501 pool_destroy(mallocPool);
502 pool_destroy(fixedPool);
503
504 scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, nullptr);
505 afterNumBackRef = allocatedBackRefCount();
506 REQUIRE_MESSAGE(beforeNumBackRef==afterNumBackRef, "backreference leak detected");
507
508 {
509 // test usedSize/cachedSize and LOC bitmask correctness
510 void *p[5];
511 pool_create_v1(0, &pol, &mallocPool);
512 const LargeObjectCache *loc = &((rml::internal::MemoryPool*)mallocPool)->extMemPool.loc;
513 const int LargeCacheStep = LargeObjectCache::LargeBSProps::CacheStep;
514 p[3] = pool_malloc(mallocPool, minLargeObjectSize+2*LargeCacheStep);
515 for (int i=0; i<10; i++) {
516 p[0] = pool_malloc(mallocPool, minLargeObjectSize);
517 p[1] = pool_malloc(mallocPool, minLargeObjectSize+LargeCacheStep);
518 pool_free(mallocPool, p[0]);
519 pool_free(mallocPool, p[1]);
520 }
521 REQUIRE(loc->getUsedSize());
522 pool_free(mallocPool, p[3]);
523 REQUIRE(loc->getLOCSize() < 3*(minLargeObjectSize+LargeCacheStep));
524 const size_t maxLocalLOCSize = LocalLOCImpl<3,30>::getMaxSize();
525 REQUIRE(loc->getUsedSize() <= maxLocalLOCSize);
526 for (int i=0; i<3; i++)
527 p[i] = pool_malloc(mallocPool, minLargeObjectSize+i*LargeCacheStep);
528 size_t currUser = loc->getUsedSize();
529 REQUIRE((!loc->getLOCSize() && currUser >= 3*(minLargeObjectSize+LargeCacheStep)));
530 p[4] = pool_malloc(mallocPool, minLargeObjectSize+3*LargeCacheStep);
531 REQUIRE(loc->getUsedSize() - currUser >= minLargeObjectSize+3*LargeCacheStep);
532 pool_free(mallocPool, p[4]);
533 REQUIRE(loc->getUsedSize() <= currUser+maxLocalLOCSize);
534 pool_reset(mallocPool);
535 REQUIRE((!loc->getLOCSize() && !loc->getUsedSize()));
536 pool_destroy(mallocPool);
537 }
538 // To test LOC we need bigger lists than released by current LocalLOC
539 // in production code. Create special LocalLOC.
540 {
541 LocalLOCImpl<2, 20> lLOC;
542 pool_create_v1(0, &pol, &mallocPool);
543 rml::internal::ExtMemoryPool *mPool = &((rml::internal::MemoryPool*)mallocPool)->extMemPool;
544 const LargeObjectCache *loc = &((rml::internal::MemoryPool*)mallocPool)->extMemPool.loc;
545 const int LargeCacheStep = LargeObjectCache::LargeBSProps::CacheStep;
546 for (int i=0; i<22; i++) {
547 void *o = pool_malloc(mallocPool, minLargeObjectSize+i*LargeCacheStep);
548 bool ret = lLOC.put(((LargeObjectHdr*)o - 1)->memoryBlock, mPool);
549 REQUIRE(ret);
550
551 o = pool_malloc(mallocPool, minLargeObjectSize+i*LargeCacheStep);
552 ret = lLOC.put(((LargeObjectHdr*)o - 1)->memoryBlock, mPool);
553 REQUIRE(ret);
554 }
555 lLOC.externalCleanup(mPool);
556 REQUIRE(!loc->getUsedSize());
557
558 pool_destroy(mallocPool);
559 }
560 }
561
TestObjectRecognition()562 void TestObjectRecognition() {
563 size_t headersSize = sizeof(LargeMemoryBlock)+sizeof(LargeObjectHdr);
564 unsigned falseObjectSize = 113; // unsigned is the type expected by getObjectSize
565 size_t obtainedSize;
566
567 REQUIRE_MESSAGE(sizeof(BackRefIdx)==sizeof(uintptr_t), "Unexpected size of BackRefIdx");
568 REQUIRE_MESSAGE(getObjectSize(falseObjectSize)!=falseObjectSize, "Error in test: bad choice for false object size");
569
570 void* mem = scalable_malloc(2*slabSize);
571 REQUIRE_MESSAGE(mem, "Memory was not allocated");
572 Block* falseBlock = (Block*)alignUp((uintptr_t)mem, slabSize);
573 falseBlock->objectSize = falseObjectSize;
574 char* falseSO = (char*)falseBlock + falseObjectSize*7;
575 REQUIRE_MESSAGE(alignDown(falseSO, slabSize)==(void*)falseBlock, "Error in test: false object offset is too big");
576
577 void* bufferLOH = scalable_malloc(2*slabSize + headersSize);
578 REQUIRE_MESSAGE(bufferLOH, "Memory was not allocated");
579 LargeObjectHdr* falseLO =
580 (LargeObjectHdr*)alignUp((uintptr_t)bufferLOH + headersSize, slabSize);
581 LargeObjectHdr* headerLO = (LargeObjectHdr*)falseLO-1;
582 headerLO->memoryBlock = (LargeMemoryBlock*)bufferLOH;
583 headerLO->memoryBlock->unalignedSize = 2*slabSize + headersSize;
584 headerLO->memoryBlock->objectSize = slabSize + headersSize;
585 headerLO->backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/true);
586 setBackRef(headerLO->backRefIdx, headerLO);
587 REQUIRE_MESSAGE(scalable_msize(falseLO) == slabSize + headersSize,
588 "Error in test: LOH falsification failed");
589 removeBackRef(headerLO->backRefIdx);
590
591 const int NUM_OF_IDX = BR_MAX_CNT+2;
592 BackRefIdx idxs[NUM_OF_IDX];
593 for (int cnt=0; cnt<2; cnt++) {
594 for (int main = -10; main<10; main++) {
595 falseBlock->backRefIdx.main = (uint16_t)main;
596 headerLO->backRefIdx.main = (uint16_t)main;
597
598 for (int bl = -10; bl<BR_MAX_CNT+10; bl++) {
599 falseBlock->backRefIdx.offset = (uint16_t)bl;
600 headerLO->backRefIdx.offset = (uint16_t)bl;
601
602 for (int largeObj = 0; largeObj<2; largeObj++) {
603 falseBlock->backRefIdx.largeObj = largeObj;
604 headerLO->backRefIdx.largeObj = largeObj;
605
606 obtainedSize = __TBB_malloc_safer_msize(falseSO, nullptr);
607 REQUIRE_MESSAGE(obtainedSize==0, "Incorrect pointer accepted");
608 obtainedSize = __TBB_malloc_safer_msize(falseLO, nullptr);
609 REQUIRE_MESSAGE(obtainedSize==0, "Incorrect pointer accepted");
610 }
611 }
612 }
613 if (cnt == 1) {
614 for (int i=0; i<NUM_OF_IDX; i++)
615 removeBackRef(idxs[i]);
616 break;
617 }
618 for (int i=0; i<NUM_OF_IDX; i++) {
619 idxs[i] = BackRefIdx::newBackRef(/*largeObj=*/false);
620 setBackRef(idxs[i], nullptr);
621 }
622 }
623 char *smallPtr = (char*)scalable_malloc(falseObjectSize);
624 obtainedSize = __TBB_malloc_safer_msize(smallPtr, nullptr);
625 REQUIRE_MESSAGE(obtainedSize==getObjectSize(falseObjectSize), "Correct pointer not accepted?");
626 scalable_free(smallPtr);
627
628 obtainedSize = __TBB_malloc_safer_msize(mem, nullptr);
629 REQUIRE_MESSAGE(obtainedSize>=2*slabSize, "Correct pointer not accepted?");
630 scalable_free(mem);
631 scalable_free(bufferLOH);
632 }
633
634 class TestBackendWork: public SimpleBarrier {
635 struct TestBlock {
636 intptr_t data;
637 BackRefIdx idx;
638 };
639 static const int ITERS = 20;
640
641 rml::internal::Backend *backend;
642 public:
TestBackendWork(rml::internal::Backend * bknd)643 TestBackendWork(rml::internal::Backend *bknd) : backend(bknd) {}
operator ()(int) const644 void operator()(int) const {
645 barrier.wait();
646
647 for (int i=0; i<ITERS; i++) {
648 BlockI *slabBlock = backend->getSlabBlock(1);
649 REQUIRE_MESSAGE(slabBlock, "Memory was not allocated");
650 uintptr_t prevBlock = (uintptr_t)slabBlock;
651 backend->putSlabBlock(slabBlock);
652
653 LargeMemoryBlock *largeBlock = backend->getLargeBlock(16*1024);
654 REQUIRE_MESSAGE(largeBlock, "Memory was not allocated");
655 REQUIRE_MESSAGE((uintptr_t)largeBlock != prevBlock,
656 "Large block cannot be reused from slab memory, only in fixed_pool case.");
657 backend->putLargeBlock(largeBlock);
658 }
659 }
660 };
661
TestBackend()662 void TestBackend()
663 {
664 rml::MemPoolPolicy pol(getMallocMem, putMallocMem);
665 rml::MemoryPool *mPool;
666 pool_create_v1(0, &pol, &mPool);
667 rml::internal::ExtMemoryPool *ePool = &((rml::internal::MemoryPool*)mPool)->extMemPool;
668 rml::internal::Backend *backend = &ePool->backend;
669
670 for( int p=MaxThread; p>=MinThread; --p ) {
671 // regression test against an race condition in backend synchronization,
672 // triggered only when WhiteboxTestingYield() call yields
673 #if TBB_USE_DEBUG
674 int num_iters = 10;
675 #else
676 int num_iters = 100;
677 #endif
678 for (int i = 0; i < num_iters; i++) {
679 TestBackendWork::initBarrier(p);
680 utils::NativeParallelFor( p, TestBackendWork(backend) );
681 }
682 }
683
684 BlockI *block = backend->getSlabBlock(1);
685 REQUIRE_MESSAGE(block, "Memory was not allocated");
686 backend->putSlabBlock(block);
687
688 // Checks if the backend increases and decreases the amount of allocated memory when memory is allocated.
689 const size_t memSize0 = backend->getTotalMemSize();
690 LargeMemoryBlock *lmb = backend->getLargeBlock(4*MByte);
691 REQUIRE( lmb );
692
693 const size_t memSize1 = backend->getTotalMemSize();
694 REQUIRE_MESSAGE( (intptr_t)(memSize1-memSize0) >= 4*MByte, "The backend has not increased the amount of using memory." );
695
696 backend->putLargeBlock(lmb);
697 const size_t memSize2 = backend->getTotalMemSize();
698 REQUIRE_MESSAGE( memSize2 == memSize0, "The backend has not decreased the amount of using memory." );
699
700 pool_destroy(mPool);
701 }
702
TestBitMask()703 void TestBitMask()
704 {
705 BitMaskMin<256> mask;
706
707 mask.reset();
708 mask.set(10, 1);
709 mask.set(5, 1);
710 mask.set(1, 1);
711 REQUIRE(mask.getMinTrue(2) == 5);
712
713 mask.reset();
714 mask.set(0, 1);
715 mask.set(64, 1);
716 mask.set(63, 1);
717 mask.set(200, 1);
718 mask.set(255, 1);
719 REQUIRE(mask.getMinTrue(0) == 0);
720 REQUIRE(mask.getMinTrue(1) == 63);
721 REQUIRE(mask.getMinTrue(63) == 63);
722 REQUIRE(mask.getMinTrue(64) == 64);
723 REQUIRE(mask.getMinTrue(101) == 200);
724 REQUIRE(mask.getMinTrue(201) == 255);
725 mask.set(255, 0);
726 REQUIRE(mask.getMinTrue(201) == -1);
727 }
728
getMemSize()729 size_t getMemSize()
730 {
731 return defaultMemPool->extMemPool.backend.getTotalMemSize();
732 }
733
734 class CheckNotCached {
735 static size_t memSize;
736 public:
operator ()() const737 void operator() () const {
738 int res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 1);
739 REQUIRE(res == TBBMALLOC_OK);
740 if (memSize==(size_t)-1) {
741 memSize = getMemSize();
742 } else {
743 REQUIRE(getMemSize() == memSize);
744 memSize=(size_t)-1;
745 }
746 }
747 };
748
749 size_t CheckNotCached::memSize = (size_t)-1;
750
751 class RunTestHeapLimit: public SimpleBarrier {
752 public:
operator ()(int) const753 void operator()( int /*mynum*/ ) const {
754 // Provoke bootstrap heap initialization before recording memory size.
755 // NOTE: The initialization should be processed only with a "large"
756 // object. Since the "small" object allocation lead to blocking of a
757 // slab as an active block and it is impossible to release it with
758 // foreign thread.
759 scalable_free(scalable_malloc(minLargeObjectSize));
760 barrier.wait(CheckNotCached());
761 for (size_t n = minLargeObjectSize; n < 5*1024*1024; n += 128*1024)
762 scalable_free(scalable_malloc(n));
763 barrier.wait(CheckNotCached());
764 }
765 };
766
TestHeapLimit()767 void TestHeapLimit()
768 {
769 if(!isMallocInitialized()) doInitialization();
770 // tiny limit to stop caching
771 int res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 1);
772 REQUIRE(res == TBBMALLOC_OK);
773 // Provoke bootstrap heap initialization before recording memory size.
774 scalable_free(scalable_malloc(8));
775 size_t n, sizeBefore = getMemSize();
776
777 // Try to provoke call to OS for memory to check that
778 // requests are not fulfilled from caches.
779 // Single call is not enough here because of backend fragmentation.
780 for (n = minLargeObjectSize; n < 10*1024*1024; n += 16*1024) {
781 void *p = scalable_malloc(n);
782 bool leave = (sizeBefore != getMemSize());
783 scalable_free(p);
784 if (leave)
785 break;
786 REQUIRE_MESSAGE(sizeBefore == getMemSize(), "No caching expected");
787 }
788 REQUIRE_MESSAGE(n < 10*1024*1024, "scalable_malloc doesn't provoke OS request for memory, "
789 "is some internal cache still used?");
790
791 for( int p=MaxThread; p>=MinThread; --p ) {
792 RunTestHeapLimit::initBarrier( p );
793 utils::NativeParallelFor( p, RunTestHeapLimit() );
794 }
795 // it's try to match limit as well as set limit, so call here
796 res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 1);
797 REQUIRE(res == TBBMALLOC_OK);
798 size_t m = getMemSize();
799 REQUIRE(sizeBefore == m);
800 // restore default
801 res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 0);
802 REQUIRE(res == TBBMALLOC_OK);
803 }
804
checkNoHugePages()805 void checkNoHugePages()
806 {
807 REQUIRE_MESSAGE(!hugePages.isEnabled, "scalable_allocation_mode "
808 "must have priority over environment variable");
809 }
810
811 /*---------------------------------------------------------------------------*/
812 // The regression test against bugs in TBBMALLOC_CLEAN_ALL_BUFFERS allocation command.
813 // The idea is to allocate and deallocate a set of objects randomly in parallel.
814 // For large sizes (16K), it forces conflicts in backend during coalescing.
815 // For small sizes (4K), it forces cross-thread deallocations and then orphaned slabs.
816 // Global cleanup should process orphaned slabs and the queue of postponed coalescing
817 // requests, otherwise it will not be able to unmap all unused memory.
818
819 const int num_allocs = 10*1024;
820 void *ptrs[num_allocs];
821 std::atomic<int> alloc_counter;
822 static thread_local bool free_was_called = false;
823
multiThreadAlloc(size_t alloc_size)824 inline void multiThreadAlloc(size_t alloc_size) {
825 for( int i = alloc_counter++; i < num_allocs; i = alloc_counter++ ) {
826 ptrs[i] = scalable_malloc( alloc_size );
827 REQUIRE_MESSAGE( ptrs[i] != nullptr, "scalable_malloc returned zero." );
828 }
829 }
crossThreadDealloc()830 inline void crossThreadDealloc() {
831 free_was_called = false;
832 for( int i = --alloc_counter; i >= 0; i = --alloc_counter ) {
833 if (i < num_allocs) {
834 scalable_free(ptrs[i]);
835 free_was_called = true;
836 }
837 }
838 }
839
840 template<int AllocSize>
841 struct TestCleanAllBuffersBody : public SimpleBarrier {
operator ()TestCleanAllBuffersBody842 void operator() ( int ) const {
843 barrier.wait();
844 multiThreadAlloc(AllocSize);
845 barrier.wait();
846 crossThreadDealloc();
847 }
848 };
849
850 template<int AllocSize>
TestCleanAllBuffers()851 void TestCleanAllBuffers() {
852 const int num_threads = 8;
853 // Clean up if something was allocated before the test
854 scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS,nullptr);
855
856 size_t memory_in_use_before = getMemSize();
857 alloc_counter = 0;
858 TestCleanAllBuffersBody<AllocSize>::initBarrier(num_threads);
859
860 utils::NativeParallelFor(num_threads, TestCleanAllBuffersBody<AllocSize>());
861 // TODO: reproduce the bug conditions more reliably
862 if ( defaultMemPool->extMemPool.backend.coalescQ.blocksToFree.load(std::memory_order_relaxed) == nullptr ) {
863 INFO( "Warning: The queue of postponed coalescing requests is empty. ");
864 INFO( "Unable to create the condition for bug reproduction.\n" );
865 }
866 int result = scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS,nullptr);
867 REQUIRE_MESSAGE( result == TBBMALLOC_OK, "The cleanup request has not cleaned anything." );
868 size_t memory_in_use_after = getMemSize();
869
870 size_t memory_leak = memory_in_use_after - memory_in_use_before;
871 INFO( "memory_in_use_before = " << memory_in_use_before << ", memory_in_use_after = " << memory_in_use_after << "\n" );
872 REQUIRE_MESSAGE( memory_leak == 0, "Cleanup was unable to release all allocated memory." );
873 }
874
875 //! Force cross thread deallocation of small objects to create a set of privatizable slab blocks.
876 //! TBBMALLOC_CLEAN_THREAD_BUFFERS command have to privatize all the block.
877 struct TestCleanThreadBuffersBody : public SimpleBarrier {
operator ()TestCleanThreadBuffersBody878 void operator() ( int ) const {
879 barrier.wait();
880 multiThreadAlloc(2*1024);
881 barrier.wait();
882 crossThreadDealloc();
883 barrier.wait();
884 int result = scalable_allocation_command(TBBMALLOC_CLEAN_THREAD_BUFFERS,nullptr);
885 if (result != TBBMALLOC_OK && free_was_called) {
886 REPORT("Warning: clean-up request for this particular thread has not cleaned anything.");
887 }
888
889 // Check that TLS was cleaned fully
890 TLSData *tlsCurr = defaultMemPool->getTLS(/*create=*/false);
891 if (tlsCurr) {
892 for (int i = 0; i < numBlockBinLimit; i++) {
893 REQUIRE_MESSAGE(!(tlsCurr->bin[i].activeBlk), "Some bin was not cleaned.");
894 }
895 REQUIRE_MESSAGE(!(tlsCurr->lloc.head.load(std::memory_order_relaxed)), "Local LOC was not cleaned.");
896 REQUIRE_MESSAGE(!(tlsCurr->freeSlabBlocks.head.load(std::memory_order_relaxed)), "Free Block pool was not cleaned.");
897 }
898 }
899 };
900
TestCleanThreadBuffers()901 void TestCleanThreadBuffers() {
902 const int num_threads = 8;
903 // Clean up if something was allocated before the test
904 scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS,nullptr);
905
906 alloc_counter = 0;
907 TestCleanThreadBuffersBody::initBarrier(num_threads);
908 utils::NativeParallelFor(num_threads, TestCleanThreadBuffersBody());
909 }
910
911 /*---------------------------------------------------------------------------*/
912 /*------------------------- Large Object Cache tests ------------------------*/
913 #if _MSC_VER==1600 || _MSC_VER==1500
914 // ignore C4275: non dll-interface class 'stdext::exception' used as
915 // base for dll-interface class 'std::bad_cast'
916 #pragma warning (disable: 4275)
917 #endif
918 #include <vector>
919 #include <list>
920
921 // default constructor of CacheBin
922 template<typename Props>
CacheBin()923 rml::internal::LargeObjectCacheImpl<Props>::CacheBin::CacheBin() {}
924
925 template<typename Props>
926 class CacheBinModel {
927
928 typedef typename rml::internal::LargeObjectCacheImpl<Props>::CacheBin CacheBinType;
929
930 // The emulated cache bin.
931 CacheBinType cacheBinModel;
932 // The reference to real cache bin inside the large object cache.
933 CacheBinType &cacheBin;
934
935 const size_t size;
936
937 // save only current time
938 std::list<uintptr_t> objects;
939
doCleanup()940 void doCleanup() {
941 if ( cacheBinModel.cachedSize.load(std::memory_order_relaxed) >
942 Props::TooLargeFactor*cacheBinModel.usedSize.load(std::memory_order_relaxed)) tooLargeLOC++;
943 else tooLargeLOC = 0;
944
945 intptr_t threshold = cacheBinModel.ageThreshold.load(std::memory_order_relaxed);
946 if (tooLargeLOC > 3 && threshold) {
947 threshold = (threshold + cacheBinModel.meanHitRange.load(std::memory_order_relaxed)) / 2;
948 cacheBinModel.ageThreshold.store(threshold, std::memory_order_relaxed);
949 }
950
951 uintptr_t currTime = cacheCurrTime;
952 while (!objects.empty() && (intptr_t)(currTime - objects.front()) > threshold) {
953 cacheBinModel.cachedSize.store(cacheBinModel.cachedSize.load(std::memory_order_relaxed) - size, std::memory_order_relaxed);
954 cacheBinModel.lastCleanedAge = objects.front();
955 objects.pop_front();
956 }
957
958 cacheBinModel.oldest.store(objects.empty() ? 0 : objects.front(), std::memory_order_relaxed);
959 }
960
961 public:
CacheBinModel(CacheBinType & _cacheBin,size_t allocSize)962 CacheBinModel(CacheBinType &_cacheBin, size_t allocSize) : cacheBin(_cacheBin), size(allocSize) {
963 cacheBinModel.oldest.store(cacheBin.oldest.load(std::memory_order_relaxed), std::memory_order_relaxed);
964 cacheBinModel.lastCleanedAge = cacheBin.lastCleanedAge;
965 cacheBinModel.ageThreshold.store(cacheBin.ageThreshold.load(std::memory_order_relaxed), std::memory_order_relaxed);
966 cacheBinModel.usedSize.store(cacheBin.usedSize.load(std::memory_order_relaxed), std::memory_order_relaxed);
967 cacheBinModel.cachedSize.store(cacheBin.cachedSize.load(std::memory_order_relaxed), std::memory_order_relaxed);
968 cacheBinModel.meanHitRange.store(cacheBin.meanHitRange.load(std::memory_order_relaxed), std::memory_order_relaxed);
969 cacheBinModel.lastGet = cacheBin.lastGet;
970 }
get()971 void get() {
972 uintptr_t currTime = ++cacheCurrTime;
973
974 if ( objects.empty() ) {
975 const uintptr_t sinceLastGet = currTime - cacheBinModel.lastGet;
976 intptr_t threshold = cacheBinModel.ageThreshold.load(std::memory_order_relaxed);
977 if ((threshold && sinceLastGet > Props::LongWaitFactor * threshold) ||
978 (cacheBinModel.lastCleanedAge && sinceLastGet > Props::LongWaitFactor * (cacheBinModel.lastCleanedAge - cacheBinModel.lastGet))) {
979 cacheBinModel.lastCleanedAge = 0;
980 cacheBinModel.ageThreshold.store(0, std::memory_order_relaxed);
981 }
982
983 if (cacheBinModel.lastCleanedAge)
984 cacheBinModel.ageThreshold.store(Props::OnMissFactor * (currTime - cacheBinModel.lastCleanedAge), std::memory_order_relaxed);
985 } else {
986 uintptr_t obj_age = objects.back();
987 objects.pop_back();
988 if (objects.empty()) cacheBinModel.oldest.store(0, std::memory_order_relaxed);
989
990 intptr_t hitRange = currTime - obj_age;
991 intptr_t mean = cacheBinModel.meanHitRange.load(std::memory_order_relaxed);
992 mean = mean ? (mean + hitRange) / 2 : hitRange;
993 cacheBinModel.meanHitRange.store(mean, std::memory_order_relaxed);
994
995 cacheBinModel.cachedSize.store(cacheBinModel.cachedSize.load(std::memory_order_relaxed) - size, std::memory_order_relaxed);
996 }
997
998 cacheBinModel.usedSize.store(cacheBinModel.usedSize.load(std::memory_order_relaxed) + size, std::memory_order_relaxed);
999 cacheBinModel.lastGet = currTime;
1000
1001 if ( currTime % rml::internal::cacheCleanupFreq == 0 ) doCleanup();
1002 }
1003
putList(int num)1004 void putList( int num ) {
1005 uintptr_t currTime = cacheCurrTime;
1006 cacheCurrTime += num;
1007
1008 cacheBinModel.usedSize.store(cacheBinModel.usedSize.load(std::memory_order_relaxed) - num * size, std::memory_order_relaxed);
1009
1010 bool cleanUpNeeded = false;
1011 if ( !cacheBinModel.lastCleanedAge ) {
1012 cacheBinModel.lastCleanedAge = ++currTime;
1013 cleanUpNeeded |= currTime % rml::internal::cacheCleanupFreq == 0;
1014 num--;
1015 }
1016
1017 for ( int i=1; i<=num; ++i ) {
1018 currTime+=1;
1019 cleanUpNeeded |= currTime % rml::internal::cacheCleanupFreq == 0;
1020 if (objects.empty())
1021 cacheBinModel.oldest.store(currTime, std::memory_order_relaxed);
1022 objects.push_back(currTime);
1023 }
1024
1025 cacheBinModel.cachedSize.store(cacheBinModel.cachedSize.load(std::memory_order_relaxed) + num * size, std::memory_order_relaxed);
1026
1027 if ( cleanUpNeeded ) doCleanup();
1028 }
1029
check()1030 void check() {
1031 CHECK_FAST(cacheBinModel.oldest.load(std::memory_order_relaxed) == cacheBin.oldest.load(std::memory_order_relaxed));
1032 CHECK_FAST(cacheBinModel.lastCleanedAge == cacheBin.lastCleanedAge);
1033 CHECK_FAST(cacheBinModel.ageThreshold.load(std::memory_order_relaxed) == cacheBin.ageThreshold.load(std::memory_order_relaxed));
1034 CHECK_FAST(cacheBinModel.usedSize.load(std::memory_order_relaxed) == cacheBin.usedSize.load(std::memory_order_relaxed));
1035 CHECK_FAST(cacheBinModel.cachedSize.load(std::memory_order_relaxed) == cacheBin.cachedSize.load(std::memory_order_relaxed));
1036 CHECK_FAST(cacheBinModel.meanHitRange.load(std::memory_order_relaxed) == cacheBin.meanHitRange.load(std::memory_order_relaxed));
1037 CHECK_FAST(cacheBinModel.lastGet == cacheBin.lastGet);
1038 }
1039
1040 static uintptr_t cacheCurrTime;
1041 static intptr_t tooLargeLOC;
1042 };
1043
1044 template<typename Props> uintptr_t CacheBinModel<Props>::cacheCurrTime;
1045 template<typename Props> intptr_t CacheBinModel<Props>::tooLargeLOC;
1046
1047 template <typename Scenario>
LOCModelTester()1048 void LOCModelTester() {
1049 defaultMemPool->extMemPool.loc.cleanAll();
1050 defaultMemPool->extMemPool.loc.reset();
1051
1052 const size_t size = 16 * 1024;
1053 const size_t headersSize = sizeof(rml::internal::LargeMemoryBlock)+sizeof(rml::internal::LargeObjectHdr);
1054 const size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+rml::internal::largeObjectAlignment);
1055 const int binIdx = defaultMemPool->extMemPool.loc.largeCache.sizeToIdx( allocationSize );
1056
1057 CacheBinModel<rml::internal::LargeObjectCache::LargeCacheTypeProps>::cacheCurrTime = defaultMemPool->extMemPool.loc.cacheCurrTime;
1058 CacheBinModel<rml::internal::LargeObjectCache::LargeCacheTypeProps>::tooLargeLOC = defaultMemPool->extMemPool.loc.largeCache.tooLargeLOC;
1059 CacheBinModel<rml::internal::LargeObjectCache::LargeCacheTypeProps> cacheBinModel(defaultMemPool->extMemPool.loc.largeCache.bin[binIdx], allocationSize);
1060
1061 Scenario scen;
1062 for (rml::internal::LargeMemoryBlock *lmb = scen.next(); (intptr_t)lmb != (intptr_t)-1; lmb = scen.next()) {
1063 if ( lmb ) {
1064 int num=1;
1065 for (rml::internal::LargeMemoryBlock *curr = lmb; curr->next; curr=curr->next) num+=1;
1066 defaultMemPool->extMemPool.freeLargeObject(lmb);
1067 cacheBinModel.putList(num);
1068 } else {
1069 scen.saveLmb(defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize));
1070 cacheBinModel.get();
1071 }
1072
1073 cacheBinModel.check();
1074 }
1075 }
1076
1077 class TestBootstrap {
1078 bool allocating;
1079 std::vector<rml::internal::LargeMemoryBlock*> lmbArray;
1080 public:
TestBootstrap()1081 TestBootstrap() : allocating(true) {}
1082
next()1083 rml::internal::LargeMemoryBlock* next() {
1084 if ( allocating )
1085 return nullptr;
1086 if ( !lmbArray.empty() ) {
1087 rml::internal::LargeMemoryBlock *ret = lmbArray.back();
1088 lmbArray.pop_back();
1089 return ret;
1090 }
1091 return (rml::internal::LargeMemoryBlock*)-1;
1092 }
1093
saveLmb(rml::internal::LargeMemoryBlock * lmb)1094 void saveLmb( rml::internal::LargeMemoryBlock *lmb ) {
1095 lmb->next = nullptr;
1096 lmbArray.push_back(lmb);
1097 if ( lmbArray.size() == 1000 ) allocating = false;
1098 }
1099 };
1100
1101 class TestRandom {
1102 std::vector<rml::internal::LargeMemoryBlock*> lmbArray;
1103 int numOps;
1104 public:
TestRandom()1105 TestRandom() : numOps(100000) {
1106 srand(1234);
1107 }
1108
next()1109 rml::internal::LargeMemoryBlock* next() {
1110 if ( numOps-- ) {
1111 if ( lmbArray.empty() || rand() / (RAND_MAX>>1) == 0 )
1112 return nullptr;
1113 size_t ind = rand()%lmbArray.size();
1114 if ( ind != lmbArray.size()-1 ) std::swap(lmbArray[ind],lmbArray[lmbArray.size()-1]);
1115 rml::internal::LargeMemoryBlock *lmb = lmbArray.back();
1116 lmbArray.pop_back();
1117 return lmb;
1118 }
1119 return (rml::internal::LargeMemoryBlock*)-1;
1120 }
1121
saveLmb(rml::internal::LargeMemoryBlock * lmb)1122 void saveLmb( rml::internal::LargeMemoryBlock *lmb ) {
1123 lmb->next = nullptr;
1124 lmbArray.push_back(lmb);
1125 }
1126 };
1127
1128 class TestCollapsingMallocFree : public SimpleBarrier {
1129 public:
1130 static const int NUM_ALLOCS = 100000;
1131 const int num_threads;
1132
TestCollapsingMallocFree(int _num_threads)1133 TestCollapsingMallocFree( int _num_threads ) : num_threads(_num_threads) {
1134 initBarrier( num_threads );
1135 }
1136
operator ()(int) const1137 void operator() ( int ) const {
1138 const size_t size = 16 * 1024;
1139 const size_t headersSize = sizeof(rml::internal::LargeMemoryBlock)+sizeof(rml::internal::LargeObjectHdr);
1140 const size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+rml::internal::largeObjectAlignment);
1141
1142 barrier.wait();
1143 for ( int i=0; i<NUM_ALLOCS; ++i ) {
1144 defaultMemPool->extMemPool.freeLargeObject(
1145 defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize) );
1146 }
1147 }
1148
check()1149 void check() {
1150 REQUIRE( tbbmalloc_whitebox::locGetProcessed == tbbmalloc_whitebox::locPutProcessed);
1151 REQUIRE_MESSAGE( tbbmalloc_whitebox::locGetProcessed < num_threads*NUM_ALLOCS, "No one Malloc/Free pair was collapsed." );
1152 }
1153 };
1154
1155 class TestCollapsingBootstrap : public SimpleBarrier {
1156 class CheckNumAllocs {
1157 const int num_threads;
1158 public:
CheckNumAllocs(int _num_threads)1159 CheckNumAllocs( int _num_threads ) : num_threads(_num_threads) {}
operator ()() const1160 void operator()() const {
1161 REQUIRE( tbbmalloc_whitebox::locGetProcessed == num_threads*NUM_ALLOCS );
1162 REQUIRE( tbbmalloc_whitebox::locPutProcessed == 0 );
1163 }
1164 };
1165 public:
1166 static const int NUM_ALLOCS = 1000;
1167 const int num_threads;
1168
TestCollapsingBootstrap(int _num_threads)1169 TestCollapsingBootstrap( int _num_threads ) : num_threads(_num_threads) {
1170 initBarrier( num_threads );
1171 }
1172
operator ()(int) const1173 void operator() ( int ) const {
1174 const size_t size = 16 * 1024;
1175 size_t headersSize = sizeof(rml::internal::LargeMemoryBlock)+sizeof(rml::internal::LargeObjectHdr);
1176 size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+rml::internal::largeObjectAlignment);
1177
1178 barrier.wait();
1179 rml::internal::LargeMemoryBlock *lmbArray[NUM_ALLOCS];
1180 for ( int i=0; i<NUM_ALLOCS; ++i )
1181 lmbArray[i] = defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize);
1182
1183 barrier.wait(CheckNumAllocs(num_threads));
1184 for ( int i=0; i<NUM_ALLOCS; ++i )
1185 defaultMemPool->extMemPool.freeLargeObject( lmbArray[i] );
1186 }
1187
check()1188 void check() {
1189 REQUIRE( tbbmalloc_whitebox::locGetProcessed == tbbmalloc_whitebox::locPutProcessed );
1190 REQUIRE( tbbmalloc_whitebox::locGetProcessed == num_threads*NUM_ALLOCS );
1191 }
1192 };
1193
1194 template <typename Scenario>
LOCCollapsingTester(int num_threads)1195 void LOCCollapsingTester( int num_threads ) {
1196 tbbmalloc_whitebox::locGetProcessed = 0;
1197 tbbmalloc_whitebox::locPutProcessed = 0;
1198 defaultMemPool->extMemPool.loc.cleanAll();
1199 defaultMemPool->extMemPool.loc.reset();
1200
1201 Scenario scen(num_threads);
1202 utils::NativeParallelFor(num_threads, scen);
1203
1204 scen.check();
1205 }
1206
TestLOC()1207 void TestLOC() {
1208 LOCModelTester<TestBootstrap>();
1209 LOCModelTester<TestRandom>();
1210
1211 const int num_threads = 16;
1212 LOCCollapsingTester<TestCollapsingBootstrap>( num_threads );
1213 if ( num_threads > 1 ) {
1214 INFO( "num_threads = " << num_threads );
1215 LOCCollapsingTester<TestCollapsingMallocFree>( num_threads );
1216 } else {
1217 REPORT( "Warning: concurrency is too low for TestMallocFreeCollapsing ( num_threads = %d )\n", num_threads );
1218 }
1219 }
1220 /*---------------------------------------------------------------------------*/
1221
findCacheLine(void * p)1222 void *findCacheLine(void *p) {
1223 return (void*)alignDown((uintptr_t)p, estimatedCacheLineSize);
1224 }
1225
1226 // test that internals of Block are at expected cache lines
TestSlabAlignment()1227 void TestSlabAlignment() {
1228 const size_t min_sz = 8;
1229 const int space = 2*16*1024; // fill at least 2 slabs
1230 void *pointers[space / min_sz]; // the worst case is min_sz byte object
1231
1232 for (size_t sz = min_sz; sz <= 64; sz *= 2) {
1233 for (size_t i = 0; i < space/sz; i++) {
1234 pointers[i] = scalable_malloc(sz);
1235 Block *block = (Block *)alignDown(pointers[i], slabSize);
1236 REQUIRE_MESSAGE(findCacheLine(&block->isFull) != findCacheLine(pointers[i]),
1237 "A user object must not share a cache line with slab control structures.");
1238 REQUIRE_MESSAGE(findCacheLine(&block->next) != findCacheLine(&block->nextPrivatizable),
1239 "GlobalBlockFields and LocalBlockFields must be on different cache lines.");
1240 }
1241 for (size_t i = 0; i < space/sz; i++)
1242 scalable_free(pointers[i]);
1243 }
1244 }
1245
1246 #include "common/memory_usage.h"
1247
1248 // TODO: Consider adding Huge Pages support on macOS (special mmap flag).
1249 // Transparent Huge pages support could be enabled by different system parsing mechanism,
1250 // because there is no /proc/meminfo on macOS
1251 #if __unix__
TestTHP()1252 void TestTHP() {
1253 // Get backend from default memory pool
1254 rml::internal::Backend *backend = &(defaultMemPool->extMemPool.backend);
1255
1256 // Configure malloc to use huge pages
1257 scalable_allocation_mode(USE_HUGE_PAGES, 1);
1258 REQUIRE_MESSAGE(hugePages.isEnabled, "Huge pages should be enabled via scalable_allocation_mode");
1259
1260 const int HUGE_PAGE_SIZE = 2 * 1024 * 1024;
1261
1262 // allocCount transparent huge pages should be allocated
1263 const int allocCount = 10;
1264
1265 // Allocate huge page aligned memory regions to track system
1266 // counters for transparent huge pages
1267 void* allocPtrs[allocCount];
1268
1269 // Wait for the system to update process memory info files after other tests
1270 utils::Sleep(4000);
1271
1272 // Parse system info regarding current THP status
1273 size_t currentSystemTHPCount = utils::getSystemTHPCount();
1274 size_t currentSystemTHPAllocatedSize = utils::getSystemTHPAllocatedSize();
1275
1276 for (int i = 0; i < allocCount; i++) {
1277 // Allocation size have to be aligned on page size
1278 size_t allocSize = HUGE_PAGE_SIZE - (i * 1000);
1279
1280 // Map memory
1281 allocPtrs[i] = backend->allocRawMem(allocSize);
1282
1283 REQUIRE_MESSAGE(allocPtrs[i], "Allocation not succeeded.");
1284 REQUIRE_MESSAGE(allocSize == HUGE_PAGE_SIZE,
1285 "Allocation size have to be aligned on Huge Page size internally.");
1286
1287 // First touch policy - no real pages allocated by OS without accessing the region
1288 memset(allocPtrs[i], 1, allocSize);
1289
1290 REQUIRE_MESSAGE(isAligned(allocPtrs[i], HUGE_PAGE_SIZE),
1291 "The pointer returned by scalable_malloc is not aligned on huge page size.");
1292 }
1293
1294 // Wait for the system to update process memory info files after allocations
1295 utils::Sleep(4000);
1296
1297 // Generally, kernel tries to allocate transparent huge pages, but sometimes it cannot do this
1298 // (tested on SLES 11/12), so consider this system info checks as a remark.
1299 // Also, some systems can allocate more memory then needed in background (tested on Ubuntu 14.04)
1300 size_t newSystemTHPCount = utils::getSystemTHPCount();
1301 size_t newSystemTHPAllocatedSize = utils::getSystemTHPAllocatedSize();
1302 if ((newSystemTHPCount - currentSystemTHPCount) < allocCount
1303 && (newSystemTHPAllocatedSize - currentSystemTHPAllocatedSize) / (2 * 1024) < allocCount) {
1304 REPORT( "Warning: the system didn't allocate needed amount of THPs.\n" );
1305 }
1306
1307 // Test memory unmap
1308 for (int i = 0; i < allocCount; i++) {
1309 REQUIRE_MESSAGE(backend->freeRawMem(allocPtrs[i], HUGE_PAGE_SIZE),
1310 "Something went wrong during raw memory free");
1311 }
1312 }
1313 #endif // __unix__
1314
getStabilizedMemUsage()1315 inline size_t getStabilizedMemUsage() {
1316 for (int i = 0; i < 3; i++) utils::GetMemoryUsage();
1317 return utils::GetMemoryUsage();
1318 }
1319
reallocAndRetrieve(void * origPtr,size_t reallocSize,size_t & origBlockSize,size_t & reallocBlockSize)1320 inline void* reallocAndRetrieve(void* origPtr, size_t reallocSize, size_t& origBlockSize, size_t& reallocBlockSize) {
1321 rml::internal::LargeMemoryBlock* origLmb = ((rml::internal::LargeObjectHdr *)origPtr - 1)->memoryBlock;
1322 origBlockSize = origLmb->unalignedSize;
1323
1324 void* reallocPtr = rml::internal::reallocAligned(defaultMemPool, origPtr, reallocSize, 0);
1325
1326 // Retrieved reallocated block information
1327 rml::internal::LargeMemoryBlock* reallocLmb = ((rml::internal::LargeObjectHdr *)reallocPtr - 1)->memoryBlock;
1328 reallocBlockSize = reallocLmb->unalignedSize;
1329
1330 return reallocPtr;
1331 }
1332
TestReallocDecreasing()1333 void TestReallocDecreasing() {
1334
1335 /* Testing that actual reallocation happens for large objects that do not fit the backend cache
1336 but decrease in size by a factor of >= 2. */
1337
1338 size_t startSize = 100 * 1024 * 1024;
1339 size_t maxBinnedSize = defaultMemPool->extMemPool.backend.getMaxBinnedSize();
1340 void* origPtr = scalable_malloc(startSize);
1341 void* reallocPtr = nullptr;
1342
1343 // Realloc on 1MB less size
1344 size_t origBlockSize = 42;
1345 size_t reallocBlockSize = 43;
1346 reallocPtr = reallocAndRetrieve(origPtr, startSize - 1 * 1024 * 1024, origBlockSize, reallocBlockSize);
1347 REQUIRE_MESSAGE(origBlockSize == reallocBlockSize, "Reallocated block size shouldn't change");
1348 REQUIRE_MESSAGE(reallocPtr == origPtr, "Original pointer shouldn't change");
1349
1350 // Repeated decreasing reallocation while max cache bin size reached
1351 size_t reallocSize = (startSize / 2) - 1000; // exact realloc
1352 while(reallocSize > maxBinnedSize) {
1353
1354 // Prevent huge/large objects caching
1355 defaultMemPool->extMemPool.loc.cleanAll();
1356 // Prevent local large object caching
1357 TLSData *tls = defaultMemPool->getTLS(/*create=*/false);
1358 tls->lloc.externalCleanup(&defaultMemPool->extMemPool);
1359
1360 size_t sysMemUsageBefore = getStabilizedMemUsage();
1361 size_t totalMemSizeBefore = defaultMemPool->extMemPool.backend.getTotalMemSize();
1362
1363 reallocPtr = reallocAndRetrieve(origPtr, reallocSize, origBlockSize, reallocBlockSize);
1364
1365 REQUIRE_MESSAGE(origBlockSize > reallocBlockSize, "Reallocated block size should decrease.");
1366
1367 size_t sysMemUsageAfter = getStabilizedMemUsage();
1368 size_t totalMemSizeAfter = defaultMemPool->extMemPool.backend.getTotalMemSize();
1369
1370 // Prevent false checking when backend caching occurred or could not read system memory usage info
1371 if (totalMemSizeBefore > totalMemSizeAfter && sysMemUsageAfter != 0 && sysMemUsageBefore != 0) {
1372 REQUIRE_MESSAGE(sysMemUsageBefore > sysMemUsageAfter, "Memory were not released");
1373 }
1374
1375 origPtr = reallocPtr;
1376 reallocSize = (reallocSize / 2) - 1000; // exact realloc
1377 }
1378 scalable_free(reallocPtr);
1379
1380 /* TODO: Decreasing reallocation of large objects that fit backend cache */
1381 /* TODO: Small objects decreasing reallocation test */
1382 }
1383 #if !__TBB_WIN8UI_SUPPORT && defined(_WIN32)
1384
1385 #include "../../src/tbbmalloc_proxy/function_replacement.cpp"
1386 #include <string>
1387 namespace FunctionReplacement {
1388 FunctionInfo funcInfo = { "funcname","dllname" };
1389 char **func_replacement_log;
1390 int status;
1391
LogCleanup()1392 void LogCleanup() {
1393 // Free all allocated memory
1394 for (unsigned i = 0; i < Log::record_number; i++){
1395 HeapFree(GetProcessHeap(), 0, Log::records[i]);
1396 }
1397 for (unsigned i = 0; i < Log::RECORDS_COUNT + 1; i++){
1398 Log::records[i] = nullptr;
1399 }
1400 Log::replacement_status = true;
1401 Log::record_number = 0;
1402 }
1403
TestEmptyLog()1404 void TestEmptyLog() {
1405 status = TBB_malloc_replacement_log(&func_replacement_log);
1406
1407 REQUIRE_MESSAGE(status == -1, "Status is true, but log is empty");
1408 REQUIRE_MESSAGE(*func_replacement_log == nullptr, "Log must be empty");
1409 }
1410
TestLogOverload()1411 void TestLogOverload() {
1412 for (int i = 0; i < 1000; i++)
1413 Log::record(funcInfo, "opcode string", true);
1414
1415 status = TBB_malloc_replacement_log(&func_replacement_log);
1416 // Find last record
1417 for (; *(func_replacement_log + 1) != 0; func_replacement_log++) {}
1418
1419 std::string last_line(*func_replacement_log);
1420 REQUIRE_MESSAGE(status == 0, "False status, but all functions found");
1421 REQUIRE_MESSAGE(last_line.compare("Log was truncated.") == 0, "Log overflow was not handled");
1422
1423 // Change status
1424 Log::record(funcInfo, "opcode string", false);
1425 status = TBB_malloc_replacement_log(nullptr);
1426 REQUIRE_MESSAGE(status == -1, "Status is true, but we have false search case");
1427
1428 LogCleanup();
1429 }
1430
TestFalseSearchCase()1431 void TestFalseSearchCase() {
1432 Log::record(funcInfo, "opcode string", false);
1433 std::string expected_line = "Fail: "+ std::string(funcInfo.funcName) + " (" +
1434 std::string(funcInfo.dllName) + "), byte pattern: <opcode string>";
1435
1436 status = TBB_malloc_replacement_log(&func_replacement_log);
1437
1438 REQUIRE_MESSAGE(expected_line.compare(*func_replacement_log) == 0, "Wrong last string contnent");
1439 REQUIRE_MESSAGE(status == -1, "Status is true, but we have false search case");
1440 LogCleanup();
1441 }
1442
TestWrongFunctionInDll()1443 void TestWrongFunctionInDll(){
1444 HMODULE ucrtbase_handle = GetModuleHandle("ucrtbase.dll");
1445 if (ucrtbase_handle) {
1446 IsPrologueKnown("ucrtbase.dll", "fake_function", nullptr, ucrtbase_handle);
1447 std::string expected_line = "Fail: fake_function (ucrtbase.dll), byte pattern: <unknown>";
1448
1449 status = TBB_malloc_replacement_log(&func_replacement_log);
1450
1451 REQUIRE_MESSAGE(expected_line.compare(*func_replacement_log) == 0, "Wrong last string contnent");
1452 REQUIRE_MESSAGE(status == -1, "Status is true, but we have false search case");
1453 LogCleanup();
1454 } else {
1455 INFO("Cannot found ucrtbase.dll on system, test skipped!\n");
1456 }
1457 }
1458 }
1459
TesFunctionReplacementLog()1460 void TesFunctionReplacementLog() {
1461 using namespace FunctionReplacement;
1462 // Do not reorder the test cases
1463 TestEmptyLog();
1464 TestLogOverload();
1465 TestFalseSearchCase();
1466 TestWrongFunctionInDll();
1467 }
1468
1469 #endif /*!__TBB_WIN8UI_SUPPORT && defined(_WIN32)*/
1470
1471 #include <cmath> // pow function
1472
1473 // Huge objects cache: Size = MinSize * (2 ^ (Index / StepFactor) formula gives value for the bin size,
1474 // but it is not matched with our sizeToIdx approximation algorithm, where step sizes between major
1475 // (power of 2) sizes are equal. Used internally for the test. Static cast to avoid warnings.
hocIdxToSizeFormula(int idx)1476 inline size_t hocIdxToSizeFormula(int idx) {
1477 return static_cast<size_t>(float(rml::internal::LargeObjectCache::maxLargeSize) *
1478 pow(2, float(idx) / float(rml::internal::LargeObjectCache::HugeBSProps::StepFactor)));
1479 }
1480 // Large objects cache arithmetic progression
locIdxToSizeFormula(int idx)1481 inline size_t locIdxToSizeFormula(int idx) {
1482 return rml::internal::LargeObjectCache::LargeBSProps::MinSize +
1483 (idx * rml::internal::LargeObjectCache::LargeBSProps::CacheStep);
1484 }
1485
1486 template <typename CacheType>
TestLOCacheBinsConverterImpl(int idx,size_t checkingSize)1487 void TestLOCacheBinsConverterImpl(int idx, size_t checkingSize) {
1488 size_t alignedSize = CacheType::alignToBin(checkingSize);
1489 REQUIRE_MESSAGE(alignedSize >= checkingSize, "Size is not correctly aligned");
1490 int calcIdx = CacheType::sizeToIdx(alignedSize);
1491 REQUIRE_MESSAGE(calcIdx == idx, "Index from size calculated not correctly");
1492 }
1493
TestLOCacheBinsConverter()1494 void TestLOCacheBinsConverter(){
1495 typedef rml::internal::LargeObjectCache::LargeCacheType LargeCacheType;
1496 typedef rml::internal::LargeObjectCache::HugeCacheType HugeCacheType;
1497
1498 size_t checkingSize = 0;
1499 for (int idx = 0; idx < LargeCacheType::numBins; idx++) {
1500 checkingSize = locIdxToSizeFormula(idx);
1501 TestLOCacheBinsConverterImpl<LargeCacheType>(idx, checkingSize);
1502 }
1503 for (int idx = 0; idx < HugeCacheType::numBins; idx++) {
1504 checkingSize = hocIdxToSizeFormula(idx);
1505 TestLOCacheBinsConverterImpl<HugeCacheType>(idx, checkingSize);
1506 }
1507 }
1508
1509 struct HOThresholdTester {
1510 LargeObjectCache* loc;
1511 size_t hugeSize;
1512
1513 static const size_t sieveSize = LargeObjectCache::defaultMaxHugeSize;
1514 // Sieve starts from 64MB (24-th cache bin), enough to check 4 bins radius range
1515 // for decent memory consumption (especially for 32-bit arch)
1516 static const int MIN_BIN_IDX = 21;
1517 static const int MAX_BIN_IDX = 27;
1518
1519 enum CleanupType {
1520 NO_CLEANUP,
1521 REGULAR_CLEANUP,
1522 HARD_CLEANUP
1523 };
1524
populateCacheHOThresholdTester1525 void populateCache() {
1526 LargeMemoryBlock* loArray[MAX_BIN_IDX - MIN_BIN_IDX];
1527 // To avoid backend::softCacheCleanup consequences (cleanup by isLOCToolarge),
1528 // firstly allocate all objects and then cache them at once.
1529 // Morevover, just because first cache item will still be dropped from cache because of the lack of history,
1530 // redo allocation 2 times.
1531 for (int idx = MIN_BIN_IDX; idx < MAX_BIN_IDX; ++idx) {
1532 size_t allocationSize = alignedSizeFromIdx(idx);
1533 int localIdx = idx - MIN_BIN_IDX;
1534 loArray[localIdx] = defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize);
1535 REQUIRE_MESSAGE(loArray[localIdx], "Large object was not allocated.");
1536 loc->put(loArray[localIdx]);
1537 loArray[localIdx] = defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize);
1538 }
1539 for (int idx = MIN_BIN_IDX; idx < MAX_BIN_IDX; ++idx) {
1540 loc->put(loArray[idx - MIN_BIN_IDX]);
1541 }
1542 }
cleanHOThresholdTester1543 void clean(bool all) {
1544 if (all) {
1545 // Should avoid any threshold and clean all bins
1546 loc->cleanAll();
1547 } else {
1548 // Regular cleanup should do nothing for bins above threshold. Decreasing option used
1549 // for the test to be sure that all objects below defaultMaxHugeSize (sieveSize) were cleaned
1550 loc->regularCleanup();
1551 loc->decreasingCleanup();
1552 }
1553 }
checkHOThresholdTester1554 void check(CleanupType type) {
1555 for (int idx = MIN_BIN_IDX; idx < MAX_BIN_IDX; ++idx) {
1556 size_t objectSize = alignedSizeFromIdx(idx);
1557 // Cache object below sieve threshold and above huge object threshold should be cached
1558 // (other should be sieved). Unless all cache is dropped. Regular cleanup drops object only below sieve size.
1559 if (type == NO_CLEANUP && sizeInCacheRange(objectSize)) {
1560 REQUIRE_MESSAGE(objectInCacheBin(idx, objectSize), "Object was released from cache, it shouldn't.");
1561 } else if (type == REGULAR_CLEANUP && (objectSize >= hugeSize)) {
1562 REQUIRE_MESSAGE(objectInCacheBin(idx, objectSize), "Object was released from cache, it shouldn't.");
1563 } else { // HARD_CLEANUP
1564 REQUIRE_MESSAGE(cacheBinEmpty(idx), "Object is still cached.");
1565 }
1566 }
1567 }
1568
1569 private:
cacheBinEmptyHOThresholdTester1570 bool cacheBinEmpty(int idx) {
1571 return (loc->hugeCache.bin[idx].cachedSize.load(std::memory_order_relaxed) == 0 && loc->hugeCache.bin[idx].get() == nullptr);
1572 }
objectInCacheBinHOThresholdTester1573 bool objectInCacheBin(int idx, size_t size) {
1574 return (loc->hugeCache.bin[idx].cachedSize.load(std::memory_order_relaxed) != 0 &&
1575 loc->hugeCache.bin[idx].cachedSize.load(std::memory_order_relaxed) % size == 0);
1576 }
sizeInCacheRangeHOThresholdTester1577 bool sizeInCacheRange(size_t size) {
1578 return size <= sieveSize || size >= hugeSize;
1579 }
alignedSizeFromIdxHOThresholdTester1580 size_t alignedSizeFromIdx(int idx) {
1581 return rml::internal::LargeObjectCache::alignToBin(hocIdxToSizeFormula(idx));
1582 }
1583 };
1584
1585 // TBBMALLOC_SET_HUGE_OBJECT_THRESHOLD value should be set before the test,
1586 // through scalable API or env variable
TestHugeSizeThresholdImpl(LargeObjectCache * loc,size_t hugeSize,bool fullTesting)1587 void TestHugeSizeThresholdImpl(LargeObjectCache* loc, size_t hugeSize, bool fullTesting) {
1588 HOThresholdTester test = {loc, hugeSize};
1589 test.populateCache();
1590 // Check the default sieve value
1591 test.check(HOThresholdTester::NO_CLEANUP);
1592
1593 if(fullTesting) {
1594 // Check that objects above threshold stay in cache after regular cleanup
1595 test.clean(/*all*/false);
1596 test.check(HOThresholdTester::REGULAR_CLEANUP);
1597 }
1598 // Check that all objects dropped from cache after hard cleanup (ignore huge obects threshold)
1599 test.clean(/*all*/true);
1600 test.check(HOThresholdTester::HARD_CLEANUP);
1601 // Restore previous settings
1602 loc->setHugeSizeThreshold(LargeObjectCache::maxHugeSize);
1603 loc->reset();
1604 }
1605
1606 /*
1607 * Test for default huge size and behaviour when huge object settings defined
1608 */
TestHugeSizeThreshold()1609 void TestHugeSizeThreshold() {
1610 // Clean up if something was allocated before the test and reset cache state
1611 scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, nullptr);
1612 LargeObjectCache* loc = &defaultMemPool->extMemPool.loc;
1613 // Restore default settings just in case
1614 loc->setHugeSizeThreshold(LargeObjectCache::maxHugeSize);
1615 loc->reset();
1616 // Firstly check default huge size value (with max huge object threshold).
1617 // Everything that more then this value should be released to OS without caching.
1618 TestHugeSizeThresholdImpl(loc, loc->hugeSizeThreshold, false);
1619 // Then set huge object threshold.
1620 // All objects with sizes after threshold will be released only after the hard cleanup.
1621 #if !__TBB_WIN8UI_SUPPORT
1622 // Unit testing for environment variable
1623 utils::SetEnv("TBB_MALLOC_SET_HUGE_SIZE_THRESHOLD","67108864");
1624 // Large object cache reads threshold environment during initialization.
1625 // Reset the value before the test.
1626 loc->hugeSizeThreshold = 0;
1627 // Reset logical time to prevent regular cleanup
1628 loc->cacheCurrTime = 0;
1629 loc->init(&defaultMemPool->extMemPool);
1630 TestHugeSizeThresholdImpl(loc, 64 * MByte, true);
1631 #endif
1632 // Unit testing for scalable_allocation_command
1633 scalable_allocation_mode(TBBMALLOC_SET_HUGE_SIZE_THRESHOLD, 56 * MByte);
1634 TestHugeSizeThresholdImpl(loc, 56 * MByte, true);
1635 // Verify that objects whose sizes align to maxHugeSize are not cached.
1636 size_t sz = LargeObjectCache::maxHugeSize;
1637 size_t aligned_sz = LargeObjectCache::alignToBin(sz);
1638 REQUIRE_MESSAGE(sz == aligned_sz, "maxHugeSize should be aligned.");
1639 REQUIRE_MESSAGE(!loc->sizeInCacheRange(sz), "Upper bound sized object shouldn't be cached.");
1640 REQUIRE_MESSAGE(loc->get(sz) == nullptr, "Upper bound sized object shouldn't be cached.");
1641 }
1642
1643 //! \brief \ref error_guessing
1644 TEST_CASE("Main test case") {
1645 scalable_allocation_mode(USE_HUGE_PAGES, 0);
1646 #if !__TBB_WIN8UI_SUPPORT
1647 utils::SetEnv("TBB_MALLOC_USE_HUGE_PAGES","yes");
1648 #endif
1649 checkNoHugePages();
1650 // backreference requires that initialization was done
1651 if(!isMallocInitialized()) doInitialization();
1652 checkNoHugePages();
1653 // to succeed, leak detection must be the 1st memory-intensive test
1654 TestBackRef();
1655 TestCleanAllBuffers<4*1024>();
1656 TestCleanAllBuffers<16*1024>();
1657 TestCleanThreadBuffers();
1658 TestPools();
1659 TestBackend();
1660
1661 #if MALLOC_CHECK_RECURSION
1662 for( int p=MaxThread; p>=MinThread; --p ) {
1663 TestStartupAlloc::initBarrier( p );
1664 utils::NativeParallelFor( p, TestStartupAlloc() );
1665 REQUIRE_MESSAGE(!firstStartupBlock, "Startup heap memory leak detected");
1666 }
1667 #endif
1668 TestLargeObjectCache();
1669 TestObjectRecognition();
1670 TestBitMask();
1671 TestHeapLimit();
1672 TestLOC();
1673 TestSlabAlignment();
1674 }
1675
1676 //! \brief \ref error_guessing
1677 TEST_CASE("Decreasing reallocation") {
1678 if (!isMallocInitialized()) doInitialization();
1679 TestReallocDecreasing();
1680 }
1681
1682 //! \brief \ref error_guessing
1683 TEST_CASE("Large object cache bins converter") {
1684 if (!isMallocInitialized()) doInitialization();
1685 TestLOCacheBinsConverter();
1686 }
1687
1688 //! \brief \ref error_guessing
1689 TEST_CASE("Huge size threshold settings") {
1690 if (!isMallocInitialized()) doInitialization();
1691 TestHugeSizeThreshold();
1692 }
1693
1694 #if __unix__
1695 //! \brief \ref error_guessing
1696 TEST_CASE("Transparent huge pages") {
1697 if (utils::isTHPEnabledOnMachine()) {
1698 if (!isMallocInitialized()) doInitialization();
1699 TestTHP();
1700 } else {
1701 INFO("Transparent Huge Pages is not supported on the system - skipped the test\n");
1702 }
1703 }
1704 #endif
1705
1706 #if !__TBB_WIN8UI_SUPPORT && defined(_WIN32)
1707 //! \brief \ref error_guessing
1708 TEST_CASE("Function replacement log") {
1709 TesFunctionReplacementLog();
1710 }
1711 #endif
1712