1 /*
2     Copyright (c) 2005-2023 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 //! \file test_malloc_whitebox.cpp
18 //! \brief Test for [memory_allocation] functionality
19 
20 #if _WIN32 || _WIN64
21 #define _CRT_SECURE_NO_WARNINGS
22 #endif
23 
24 // To prevent loading dynamic TBBmalloc at startup, that is not needed for the whitebox test
25 #define __TBB_SOURCE_DIRECTLY_INCLUDED 1
26 // Call thread shutdown API for native threads join
27 #define HARNESS_TBBMALLOC_THREAD_SHUTDOWN 1
28 
29 // According to C99 standard INTPTR_MIN defined for C++ if __STDC_LIMIT_MACROS pre-defined
30 #define __STDC_LIMIT_MACROS 1
31 
32 // To not depends on ITT support stuff
33 #ifdef DO_ITT_NOTIFY
34 #undef DO_ITT_NOTIFY
35 #endif
36 
37 #include "common/test.h"
38 
39 #include "common/utils.h"
40 #include "common/utils_assert.h"
41 #include "common/utils_env.h"
42 #include "common/spin_barrier.h"
43 
44 #include "oneapi/tbb/detail/_machine.h"
45 
46 #define __TBB_MALLOC_WHITEBOX_TEST 1 // to get access to allocator internals
47 // help trigger rare race condition
48 #define WhiteboxTestingYield() (tbb::detail::yield(), tbb::detail::yield(), tbb::detail::yield(), tbb::detail::yield())
49 
50 #if __INTEL_COMPILER && __TBB_MIC_OFFLOAD
51 // 2571 is variable has not been declared with compatible "target" attribute
52 // 3218 is class/struct may fail when offloaded because this field is misaligned
53 //         or contains data that is misaligned
54     #pragma warning(push)
55     #pragma warning(disable:2571 3218)
56 #endif
57 #define protected public
58 #define private public
59 #include "../../src/tbbmalloc/frontend.cpp"
60 #undef protected
61 #undef private
62 #if __INTEL_COMPILER && __TBB_MIC_OFFLOAD
63     #pragma warning(pop)
64 #endif
65 #include "../../src/tbbmalloc/backend.cpp"
66 #include "../../src/tbbmalloc/backref.cpp"
67 
68 namespace tbbmalloc_whitebox {
69     std::atomic<size_t> locGetProcessed{};
70     std::atomic<size_t> locPutProcessed{};
71 }
72 #include "../../src/tbbmalloc/large_objects.cpp"
73 #include "../../src/tbbmalloc/tbbmalloc.cpp"
74 
75 const int LARGE_MEM_SIZES_NUM = 10;
76 static const int MinThread = 1;
77 static const int MaxThread = 4;
78 
79 class AllocInfo {
80     int *p;
81     int val;
82     int size;
83 public:
AllocInfo()84     AllocInfo() : p(nullptr), val(0), size(0) {}
AllocInfo(int sz)85     explicit AllocInfo(int sz) : p((int*)scalable_malloc(sz*sizeof(int))),
86                                    val(rand()), size(sz) {
87         REQUIRE(p);
88         for (int k=0; k<size; k++)
89             p[k] = val;
90     }
check() const91     void check() const {
92         for (int k=0; k<size; k++)
93             ASSERT(p[k] == val, nullptr);
94     }
clear()95     void clear() {
96         scalable_free(p);
97     }
98 };
99 
100 // Test struct to call ProcessShutdown after all tests
101 struct ShutdownTest {
~ShutdownTestShutdownTest102     ~ShutdownTest() {
103     #if _WIN32 || _WIN64
104         __TBB_mallocProcessShutdownNotification(true);
105     #else
106         __TBB_mallocProcessShutdownNotification(false);
107     #endif
108     }
109 };
110 
111 static ShutdownTest shutdownTest;
112 
113 class SimpleBarrier: utils::NoAssign {
114 protected:
115     static utils::SpinBarrier barrier;
116 public:
initBarrier(unsigned thrds)117     static void initBarrier(unsigned thrds) { barrier.initialize(thrds); }
118 };
119 
120 utils::SpinBarrier SimpleBarrier::barrier;
121 
122 class TestLargeObjCache: public SimpleBarrier {
123 public:
124     static int largeMemSizes[LARGE_MEM_SIZES_NUM];
125 
TestLargeObjCache()126     TestLargeObjCache( ) {}
127 
operator ()(int) const128     void operator()( int /*mynum*/ ) const {
129         AllocInfo allocs[LARGE_MEM_SIZES_NUM];
130 
131         // push to maximal cache limit
132         for (int i=0; i<2; i++) {
133             const int sizes[] = { MByte/sizeof(int),
134                                   (MByte-2*LargeObjectCache::LargeBSProps::CacheStep)/sizeof(int) };
135             for (int q=0; q<2; q++) {
136                 size_t curr = 0;
137                 for (int j=0; j<LARGE_MEM_SIZES_NUM; j++, curr++)
138                     new (allocs+curr) AllocInfo(sizes[q]);
139 
140                 for (size_t j=0; j<curr; j++) {
141                     allocs[j].check();
142                     allocs[j].clear();
143                 }
144             }
145         }
146 
147         barrier.wait();
148 
149         // check caching correctness
150         for (int i=0; i<1000; i++) {
151             size_t curr = 0;
152             for (int j=0; j<LARGE_MEM_SIZES_NUM-1; j++, curr++)
153                 new (allocs+curr) AllocInfo(largeMemSizes[j]);
154 
155             new (allocs+curr)
156                 AllocInfo((int)(4*minLargeObjectSize +
157                                 2*minLargeObjectSize*(1.*rand()/RAND_MAX)));
158             curr++;
159 
160             for (size_t j=0; j<curr; j++) {
161                 allocs[j].check();
162                 allocs[j].clear();
163             }
164         }
165     }
166 };
167 
168 int TestLargeObjCache::largeMemSizes[LARGE_MEM_SIZES_NUM];
169 
TestLargeObjectCache()170 void TestLargeObjectCache()
171 {
172     for (int i=0; i<LARGE_MEM_SIZES_NUM; i++)
173         TestLargeObjCache::largeMemSizes[i] =
174             (int)(minLargeObjectSize + 2*minLargeObjectSize*(1.*rand()/RAND_MAX));
175 
176     for( int p=MaxThread; p>=MinThread; --p ) {
177         TestLargeObjCache::initBarrier( p );
178         utils::NativeParallelFor( p, TestLargeObjCache() );
179     }
180 }
181 
182 #if MALLOC_CHECK_RECURSION
183 
184 class TestStartupAlloc: public SimpleBarrier {
185     struct TestBlock {
186         void *ptr;
187         size_t sz;
188     };
189     static const int ITERS = 100;
190 public:
TestStartupAlloc()191     TestStartupAlloc() {}
operator ()(int) const192     void operator()(int) const {
193         TestBlock blocks1[ITERS], blocks2[ITERS];
194 
195         barrier.wait();
196 
197         for (int i=0; i<ITERS; i++) {
198             blocks1[i].sz = rand() % minLargeObjectSize;
199             blocks1[i].ptr = StartupBlock::allocate(blocks1[i].sz);
200             REQUIRE((blocks1[i].ptr && StartupBlock::msize(blocks1[i].ptr)>=blocks1[i].sz
201                    && 0==(uintptr_t)blocks1[i].ptr % sizeof(void*)));
202             memset(blocks1[i].ptr, i, blocks1[i].sz);
203         }
204         for (int i=0; i<ITERS; i++) {
205             blocks2[i].sz = rand() % minLargeObjectSize;
206             blocks2[i].ptr = StartupBlock::allocate(blocks2[i].sz);
207             REQUIRE((blocks2[i].ptr && StartupBlock::msize(blocks2[i].ptr)>=blocks2[i].sz
208                    && 0==(uintptr_t)blocks2[i].ptr % sizeof(void*)));
209             memset(blocks2[i].ptr, i, blocks2[i].sz);
210 
211             for (size_t j=0; j<blocks1[i].sz; j++)
212                 REQUIRE(*((char*)blocks1[i].ptr+j) == i);
213             Block *block = (Block *)alignDown(blocks1[i].ptr, slabSize);
214             ((StartupBlock *)block)->free(blocks1[i].ptr);
215         }
216         for (int i=ITERS-1; i>=0; i--) {
217             for (size_t j=0; j<blocks2[i].sz; j++)
218                 REQUIRE(*((char*)blocks2[i].ptr+j) == i);
219             Block *block = (Block *)alignDown(blocks2[i].ptr, slabSize);
220             ((StartupBlock *)block)->free(blocks2[i].ptr);
221         }
222     }
223 };
224 
225 #endif /* MALLOC_CHECK_RECURSION */
226 
227 #include <deque>
228 
229 template<int ITERS>
230 class BackRefWork: utils::NoAssign {
231     struct TestBlock {
232         BackRefIdx idx;
233         char       data;
TestBlockBackRefWork::TestBlock234         TestBlock(BackRefIdx idx_) : idx(idx_) {}
235     };
236 public:
BackRefWork()237     BackRefWork() {}
operator ()(int) const238     void operator()(int) const {
239         size_t cnt;
240         // it's important to not invalidate pointers to the contents of the container
241         std::deque<TestBlock> blocks;
242 
243         // for ITERS==0 consume all available backrefs
244         for (cnt=0; !ITERS || cnt<ITERS; cnt++) {
245             BackRefIdx idx = BackRefIdx::newBackRef(/*largeObj=*/false);
246             if (idx.isInvalid())
247                 break;
248             blocks.push_back(TestBlock(idx));
249             setBackRef(blocks.back().idx, &blocks.back().data);
250         }
251         for (size_t i=0; i<cnt; i++)
252             REQUIRE((Block*)&blocks[i].data == getBackRef(blocks[i].idx));
253         for (size_t i=cnt; i>0; i--)
254             removeBackRef(blocks[i-1].idx);
255     }
256 };
257 
258 class LocalCachesHit: utils::NoAssign {
259     // set ITERS to trigger possible leak of backreferences
260     // during cleanup on cache overflow and on thread termination
261     static const int ITERS = 2*(FreeBlockPool::POOL_HIGH_MARK +
262                                 LocalLOC::LOC_HIGH_MARK);
263 public:
LocalCachesHit()264     LocalCachesHit() {}
operator ()(int) const265     void operator()(int) const {
266         void *objsSmall[ITERS], *objsLarge[ITERS];
267 
268         for (int i=0; i<ITERS; i++) {
269             objsSmall[i] = scalable_malloc(minLargeObjectSize-1);
270             objsLarge[i] = scalable_malloc(minLargeObjectSize);
271         }
272         for (int i=0; i<ITERS; i++) {
273             scalable_free(objsSmall[i]);
274             scalable_free(objsLarge[i]);
275         }
276     }
277 };
278 
allocatedBackRefCount()279 static size_t allocatedBackRefCount()
280 {
281     size_t cnt = 0;
282     for (int i=0; i<=backRefMain.load(std::memory_order_relaxed)->lastUsed.load(std::memory_order_relaxed); i++)
283         cnt += backRefMain.load(std::memory_order_relaxed)->backRefBl[i]->allocatedCount;
284     return cnt;
285 }
286 
287 class TestInvalidBackrefs: public SimpleBarrier {
288 #if __ANDROID__
289     // Android requires lower iters due to lack of virtual memory.
290     static const int BACKREF_GROWTH_ITERS = 50*1024;
291 #else
292     static const int BACKREF_GROWTH_ITERS = 200*1024;
293 #endif
294 
295     static std::atomic<bool> backrefGrowthDone;
296     static void *ptrs[BACKREF_GROWTH_ITERS];
297 public:
TestInvalidBackrefs()298     TestInvalidBackrefs() {}
operator ()(int id) const299     void operator()(int id) const {
300 
301         if (!id) {
302             backrefGrowthDone = false;
303             barrier.wait();
304 
305             for (int i=0; i<BACKREF_GROWTH_ITERS; i++)
306                 ptrs[i] = scalable_malloc(minLargeObjectSize);
307             backrefGrowthDone = true;
308             for (int i=0; i<BACKREF_GROWTH_ITERS; i++)
309                 scalable_free(ptrs[i]);
310         } else {
311             void *p2 = scalable_malloc(minLargeObjectSize-1);
312             char *p1 = (char*)scalable_malloc(minLargeObjectSize-1);
313             LargeObjectHdr *hdr =
314                 (LargeObjectHdr*)(p1+minLargeObjectSize-1 - sizeof(LargeObjectHdr));
315             hdr->backRefIdx.main = 7;
316             hdr->backRefIdx.largeObj = 1;
317             hdr->backRefIdx.offset = 2000;
318 
319             barrier.wait();
320 
321             int yield_count = 0;
322             while (!backrefGrowthDone) {
323                 scalable_free(p2);
324                 p2 = scalable_malloc(minLargeObjectSize-1);
325                 if (yield_count++ == 100) {
326                     yield_count = 0;
327                     std::this_thread::yield();
328                 }
329             }
330             scalable_free(p1);
331             scalable_free(p2);
332         }
333     }
334 };
335 
336 std::atomic<bool> TestInvalidBackrefs::backrefGrowthDone;
337 void *TestInvalidBackrefs::ptrs[BACKREF_GROWTH_ITERS];
338 
TestBackRef()339 void TestBackRef() {
340     size_t beforeNumBackRef, afterNumBackRef;
341 
342     beforeNumBackRef = allocatedBackRefCount();
343     for( int p=MaxThread; p>=MinThread; --p )
344         utils::NativeParallelFor( p, BackRefWork<2*BR_MAX_CNT+2>() );
345     afterNumBackRef = allocatedBackRefCount();
346     REQUIRE_MESSAGE(beforeNumBackRef==afterNumBackRef, "backreference leak detected");
347     // lastUsed marks peak resource consumption. As we allocate below the mark,
348     // it must not move up, otherwise there is a resource leak.
349     int sustLastUsed = backRefMain.load(std::memory_order_relaxed)->lastUsed.load(std::memory_order_relaxed);
350     utils::NativeParallelFor( 1, BackRefWork<2*BR_MAX_CNT+2>() );
351     REQUIRE_MESSAGE(sustLastUsed == backRefMain.load(std::memory_order_relaxed)->lastUsed.load(std::memory_order_relaxed), "backreference leak detected");
352     // check leak of back references while per-thread caches are in use
353     // warm up needed to cover bootStrapMalloc call
354     utils::NativeParallelFor( 1, LocalCachesHit() );
355     beforeNumBackRef = allocatedBackRefCount();
356     utils::NativeParallelFor( 2, LocalCachesHit() );
357     int res = scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, nullptr);
358     REQUIRE(res == TBBMALLOC_OK);
359     afterNumBackRef = allocatedBackRefCount();
360     REQUIRE_MESSAGE(beforeNumBackRef>=afterNumBackRef, "backreference leak detected");
361 
362     // This is a regression test against race condition between backreference
363     // extension and checking invalid BackRefIdx.
364     // While detecting is object large or small, scalable_free 1st check for
365     // large objects, so there is a chance to prepend small object with
366     // seems valid BackRefIdx for large objects, and thus trigger the bug.
367     TestInvalidBackrefs::initBarrier(MaxThread);
368     utils::NativeParallelFor( MaxThread, TestInvalidBackrefs() );
369     // Consume all available backrefs and check they work correctly.
370     // For now test 32-bit machines only, because for 64-bit memory consumption is too high.
371     if (sizeof(uintptr_t) == 4)
372         utils::NativeParallelFor( MaxThread, BackRefWork<0>() );
373 }
374 
getMem(intptr_t,size_t & bytes)375 void *getMem(intptr_t /*pool_id*/, size_t &bytes)
376 {
377     const size_t BUF_SIZE = 8*1024*1024;
378     static char space[BUF_SIZE];
379     static size_t pos;
380 
381     if (pos + bytes > BUF_SIZE)
382         return nullptr;
383 
384     void *ret = space + pos;
385     pos += bytes;
386 
387     return ret;
388 }
389 
putMem(intptr_t,void *,size_t)390 int putMem(intptr_t /*pool_id*/, void* /*raw_ptr*/, size_t /*raw_bytes*/)
391 {
392     return 0;
393 }
394 
395 struct MallocPoolHeader {
396     void  *rawPtr;
397     size_t userSize;
398 };
399 
getMallocMem(intptr_t,size_t & bytes)400 void *getMallocMem(intptr_t /*pool_id*/, size_t &bytes)
401 {
402     void *rawPtr = malloc(bytes+sizeof(MallocPoolHeader));
403     void *ret = (void *)((uintptr_t)rawPtr+sizeof(MallocPoolHeader));
404 
405     MallocPoolHeader *hdr = (MallocPoolHeader*)ret-1;
406     hdr->rawPtr = rawPtr;
407     hdr->userSize = bytes;
408 
409     return ret;
410 }
411 
putMallocMem(intptr_t,void * ptr,size_t bytes)412 int putMallocMem(intptr_t /*pool_id*/, void *ptr, size_t bytes)
413 {
414     MallocPoolHeader *hdr = (MallocPoolHeader*)ptr-1;
415     ASSERT(bytes == hdr->userSize, "Invalid size in pool callback.");
416     free(hdr->rawPtr);
417 
418     return 0;
419 }
420 
421 class StressLOCacheWork: utils::NoAssign {
422     rml::MemoryPool *my_mallocPool;
423 public:
StressLOCacheWork(rml::MemoryPool * mallocPool)424     StressLOCacheWork(rml::MemoryPool *mallocPool) : my_mallocPool(mallocPool) {}
operator ()(int) const425     void operator()(int) const {
426         for (size_t sz=minLargeObjectSize; sz<1*1024*1024;
427              sz+=LargeObjectCache::LargeBSProps::CacheStep) {
428             void *ptr = pool_malloc(my_mallocPool, sz);
429             REQUIRE_MESSAGE(ptr, "Memory was not allocated");
430             memset(ptr, sz, sz);
431             pool_free(my_mallocPool, ptr);
432         }
433     }
434 };
435 
TestPools()436 void TestPools() {
437     rml::MemPoolPolicy pol(getMem, putMem);
438     size_t beforeNumBackRef, afterNumBackRef;
439 
440     rml::MemoryPool *pool1;
441     rml::MemoryPool *pool2;
442     pool_create_v1(0, &pol, &pool1);
443     pool_create_v1(0, &pol, &pool2);
444     pool_destroy(pool1);
445     pool_destroy(pool2);
446 
447     scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, nullptr);
448     beforeNumBackRef = allocatedBackRefCount();
449     rml::MemoryPool *fixedPool;
450 
451     pool_create_v1(0, &pol, &fixedPool);
452     pol.pAlloc = getMallocMem;
453     pol.pFree = putMallocMem;
454     pol.granularity = 8;
455     rml::MemoryPool *mallocPool;
456 
457     pool_create_v1(0, &pol, &mallocPool);
458 /* check that large object cache (LOC) returns correct size for cached objects
459    passBackendSz Byte objects are cached in LOC, but bypassed the backend, so
460    memory requested directly from allocation callback.
461    nextPassBackendSz Byte objects must fit to another LOC bin,
462    so that their allocation/releasing leads to cache cleanup.
463    All this is expecting to lead to releasing of passBackendSz Byte object
464    from LOC during LOC cleanup, and putMallocMem checks that returned size
465    is correct.
466 */
467     const size_t passBackendSz = Backend::maxBinned_HugePage+1,
468         anotherLOCBinSz = minLargeObjectSize+1;
469     for (int i=0; i<10; i++) { // run long enough to be cached
470         void *p = pool_malloc(mallocPool, passBackendSz);
471         REQUIRE_MESSAGE(p, "Memory was not allocated");
472         pool_free(mallocPool, p);
473     }
474     // run long enough to passBackendSz allocation was cleaned from cache
475     // and returned back to putMallocMem for size checking
476     for (int i=0; i<1000; i++) {
477         void *p = pool_malloc(mallocPool, anotherLOCBinSz);
478         REQUIRE_MESSAGE(p, "Memory was not allocated");
479         pool_free(mallocPool, p);
480     }
481 
482     void *smallObj =  pool_malloc(fixedPool, 10);
483     REQUIRE_MESSAGE(smallObj, "Memory was not allocated");
484     memset(smallObj, 1, 10);
485     void *ptr = pool_malloc(fixedPool, 1024);
486     REQUIRE_MESSAGE(ptr, "Memory was not allocated");
487     memset(ptr, 1, 1024);
488     void *largeObj = pool_malloc(fixedPool, minLargeObjectSize);
489     REQUIRE_MESSAGE(largeObj, "Memory was not allocated");
490     memset(largeObj, 1, minLargeObjectSize);
491     ptr = pool_malloc(fixedPool, minLargeObjectSize);
492     REQUIRE_MESSAGE(ptr, "Memory was not allocated");
493     memset(ptr, minLargeObjectSize, minLargeObjectSize);
494     pool_malloc(fixedPool, 10*minLargeObjectSize); // no leak for unsuccessful allocations
495     pool_free(fixedPool, smallObj);
496     pool_free(fixedPool, largeObj);
497 
498     // provoke large object cache cleanup and hope no leaks occurs
499     for( int p=MaxThread; p>=MinThread; --p )
500         utils::NativeParallelFor( p, StressLOCacheWork(mallocPool) );
501     pool_destroy(mallocPool);
502     pool_destroy(fixedPool);
503 
504     scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, nullptr);
505     afterNumBackRef = allocatedBackRefCount();
506     REQUIRE_MESSAGE(beforeNumBackRef==afterNumBackRef, "backreference leak detected");
507 
508     {
509         // test usedSize/cachedSize and LOC bitmask correctness
510         void *p[5];
511         pool_create_v1(0, &pol, &mallocPool);
512         const LargeObjectCache *loc = &((rml::internal::MemoryPool*)mallocPool)->extMemPool.loc;
513         const int LargeCacheStep = LargeObjectCache::LargeBSProps::CacheStep;
514         p[3] = pool_malloc(mallocPool, minLargeObjectSize+2*LargeCacheStep);
515         for (int i=0; i<10; i++) {
516             p[0] = pool_malloc(mallocPool, minLargeObjectSize);
517             p[1] = pool_malloc(mallocPool, minLargeObjectSize+LargeCacheStep);
518             pool_free(mallocPool, p[0]);
519             pool_free(mallocPool, p[1]);
520         }
521         REQUIRE(loc->getUsedSize());
522         pool_free(mallocPool, p[3]);
523         REQUIRE(loc->getLOCSize() < 3*(minLargeObjectSize+LargeCacheStep));
524         const size_t maxLocalLOCSize = LocalLOCImpl<3,30>::getMaxSize();
525         REQUIRE(loc->getUsedSize() <= maxLocalLOCSize);
526         for (int i=0; i<3; i++)
527             p[i] = pool_malloc(mallocPool, minLargeObjectSize+i*LargeCacheStep);
528         size_t currUser = loc->getUsedSize();
529         REQUIRE((!loc->getLOCSize() && currUser >= 3*(minLargeObjectSize+LargeCacheStep)));
530         p[4] = pool_malloc(mallocPool, minLargeObjectSize+3*LargeCacheStep);
531         REQUIRE(loc->getUsedSize() - currUser >= minLargeObjectSize+3*LargeCacheStep);
532         pool_free(mallocPool, p[4]);
533         REQUIRE(loc->getUsedSize() <= currUser+maxLocalLOCSize);
534         pool_reset(mallocPool);
535         REQUIRE((!loc->getLOCSize() && !loc->getUsedSize()));
536         pool_destroy(mallocPool);
537     }
538     // To test LOC we need bigger lists than released by current LocalLOC
539     //   in production code. Create special LocalLOC.
540     {
541         LocalLOCImpl<2, 20> lLOC;
542         pool_create_v1(0, &pol, &mallocPool);
543         rml::internal::ExtMemoryPool *mPool = &((rml::internal::MemoryPool*)mallocPool)->extMemPool;
544         const LargeObjectCache *loc = &((rml::internal::MemoryPool*)mallocPool)->extMemPool.loc;
545         const int LargeCacheStep = LargeObjectCache::LargeBSProps::CacheStep;
546         for (int i=0; i<22; i++) {
547             void *o = pool_malloc(mallocPool, minLargeObjectSize+i*LargeCacheStep);
548             bool ret = lLOC.put(((LargeObjectHdr*)o - 1)->memoryBlock, mPool);
549             REQUIRE(ret);
550 
551             o = pool_malloc(mallocPool, minLargeObjectSize+i*LargeCacheStep);
552             ret = lLOC.put(((LargeObjectHdr*)o - 1)->memoryBlock, mPool);
553             REQUIRE(ret);
554         }
555         lLOC.externalCleanup(mPool);
556         REQUIRE(!loc->getUsedSize());
557 
558         pool_destroy(mallocPool);
559     }
560 }
561 
TestObjectRecognition()562 void TestObjectRecognition() {
563     size_t headersSize = sizeof(LargeMemoryBlock)+sizeof(LargeObjectHdr);
564     unsigned falseObjectSize = 113; // unsigned is the type expected by getObjectSize
565     size_t obtainedSize;
566 
567     REQUIRE_MESSAGE(sizeof(BackRefIdx)==sizeof(uintptr_t), "Unexpected size of BackRefIdx");
568     REQUIRE_MESSAGE(getObjectSize(falseObjectSize)!=falseObjectSize, "Error in test: bad choice for false object size");
569 
570     void* mem = scalable_malloc(2*slabSize);
571     REQUIRE_MESSAGE(mem, "Memory was not allocated");
572     Block* falseBlock = (Block*)alignUp((uintptr_t)mem, slabSize);
573     falseBlock->objectSize = falseObjectSize;
574     char* falseSO = (char*)falseBlock + falseObjectSize*7;
575     REQUIRE_MESSAGE(alignDown(falseSO, slabSize)==(void*)falseBlock, "Error in test: false object offset is too big");
576 
577     void* bufferLOH = scalable_malloc(2*slabSize + headersSize);
578     REQUIRE_MESSAGE(bufferLOH, "Memory was not allocated");
579     LargeObjectHdr* falseLO =
580         (LargeObjectHdr*)alignUp((uintptr_t)bufferLOH + headersSize, slabSize);
581     LargeObjectHdr* headerLO = (LargeObjectHdr*)falseLO-1;
582     headerLO->memoryBlock = (LargeMemoryBlock*)bufferLOH;
583     headerLO->memoryBlock->unalignedSize = 2*slabSize + headersSize;
584     headerLO->memoryBlock->objectSize = slabSize + headersSize;
585     headerLO->backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/true);
586     setBackRef(headerLO->backRefIdx, headerLO);
587     REQUIRE_MESSAGE(scalable_msize(falseLO) == slabSize + headersSize,
588            "Error in test: LOH falsification failed");
589     removeBackRef(headerLO->backRefIdx);
590 
591     const int NUM_OF_IDX = BR_MAX_CNT+2;
592     BackRefIdx idxs[NUM_OF_IDX];
593     for (int cnt=0; cnt<2; cnt++) {
594         for (int main = -10; main<10; main++) {
595             falseBlock->backRefIdx.main = (uint16_t)main;
596             headerLO->backRefIdx.main = (uint16_t)main;
597 
598             for (int bl = -10; bl<BR_MAX_CNT+10; bl++) {
599                 falseBlock->backRefIdx.offset = (uint16_t)bl;
600                 headerLO->backRefIdx.offset = (uint16_t)bl;
601 
602                 for (int largeObj = 0; largeObj<2; largeObj++) {
603                     falseBlock->backRefIdx.largeObj = largeObj;
604                     headerLO->backRefIdx.largeObj = largeObj;
605 
606                     obtainedSize = __TBB_malloc_safer_msize(falseSO, nullptr);
607                     REQUIRE_MESSAGE(obtainedSize==0, "Incorrect pointer accepted");
608                     obtainedSize = __TBB_malloc_safer_msize(falseLO, nullptr);
609                     REQUIRE_MESSAGE(obtainedSize==0, "Incorrect pointer accepted");
610                 }
611             }
612         }
613         if (cnt == 1) {
614             for (int i=0; i<NUM_OF_IDX; i++)
615                 removeBackRef(idxs[i]);
616             break;
617         }
618         for (int i=0; i<NUM_OF_IDX; i++) {
619             idxs[i] = BackRefIdx::newBackRef(/*largeObj=*/false);
620             setBackRef(idxs[i], nullptr);
621         }
622     }
623     char *smallPtr = (char*)scalable_malloc(falseObjectSize);
624     obtainedSize = __TBB_malloc_safer_msize(smallPtr, nullptr);
625     REQUIRE_MESSAGE(obtainedSize==getObjectSize(falseObjectSize), "Correct pointer not accepted?");
626     scalable_free(smallPtr);
627 
628     obtainedSize = __TBB_malloc_safer_msize(mem, nullptr);
629     REQUIRE_MESSAGE(obtainedSize>=2*slabSize, "Correct pointer not accepted?");
630     scalable_free(mem);
631     scalable_free(bufferLOH);
632 }
633 
634 class TestBackendWork: public SimpleBarrier {
635     struct TestBlock {
636         intptr_t   data;
637         BackRefIdx idx;
638     };
639     static const int ITERS = 20;
640 
641     rml::internal::Backend *backend;
642 public:
TestBackendWork(rml::internal::Backend * bknd)643     TestBackendWork(rml::internal::Backend *bknd) : backend(bknd) {}
operator ()(int) const644     void operator()(int) const {
645         barrier.wait();
646 
647         for (int i=0; i<ITERS; i++) {
648             BlockI *slabBlock = backend->getSlabBlock(1);
649             REQUIRE_MESSAGE(slabBlock, "Memory was not allocated");
650             uintptr_t prevBlock = (uintptr_t)slabBlock;
651             backend->putSlabBlock(slabBlock);
652 
653             LargeMemoryBlock *largeBlock = backend->getLargeBlock(16*1024);
654             REQUIRE_MESSAGE(largeBlock, "Memory was not allocated");
655             REQUIRE_MESSAGE((uintptr_t)largeBlock != prevBlock,
656                     "Large block cannot be reused from slab memory, only in fixed_pool case.");
657             backend->putLargeBlock(largeBlock);
658         }
659     }
660 };
661 
TestBackend()662 void TestBackend()
663 {
664     rml::MemPoolPolicy pol(getMallocMem, putMallocMem);
665     rml::MemoryPool *mPool;
666     pool_create_v1(0, &pol, &mPool);
667     rml::internal::ExtMemoryPool *ePool = &((rml::internal::MemoryPool*)mPool)->extMemPool;
668     rml::internal::Backend *backend = &ePool->backend;
669 
670     for( int p=MaxThread; p>=MinThread; --p ) {
671         // regression test against an race condition in backend synchronization,
672         // triggered only when WhiteboxTestingYield() call yields
673 #if TBB_USE_DEBUG
674         int num_iters = 10;
675 #else
676         int num_iters = 100;
677 #endif
678         for (int i = 0; i < num_iters; i++) {
679             TestBackendWork::initBarrier(p);
680             utils::NativeParallelFor( p, TestBackendWork(backend) );
681         }
682     }
683 
684     BlockI *block = backend->getSlabBlock(1);
685     REQUIRE_MESSAGE(block, "Memory was not allocated");
686     backend->putSlabBlock(block);
687 
688     // Checks if the backend increases and decreases the amount of allocated memory when memory is allocated.
689     const size_t memSize0 = backend->getTotalMemSize();
690     LargeMemoryBlock *lmb = backend->getLargeBlock(4*MByte);
691     REQUIRE( lmb );
692 
693     const size_t memSize1 = backend->getTotalMemSize();
694     REQUIRE_MESSAGE( (intptr_t)(memSize1-memSize0) >= 4*MByte, "The backend has not increased the amount of using memory." );
695 
696     backend->putLargeBlock(lmb);
697     const size_t memSize2 = backend->getTotalMemSize();
698     REQUIRE_MESSAGE( memSize2 == memSize0, "The backend has not decreased the amount of using memory." );
699 
700     pool_destroy(mPool);
701 }
702 
TestBitMask()703 void TestBitMask()
704 {
705     BitMaskMin<256> mask;
706 
707     mask.reset();
708     mask.set(10, 1);
709     mask.set(5, 1);
710     mask.set(1, 1);
711     REQUIRE(mask.getMinTrue(2) == 5);
712 
713     mask.reset();
714     mask.set(0, 1);
715     mask.set(64, 1);
716     mask.set(63, 1);
717     mask.set(200, 1);
718     mask.set(255, 1);
719     REQUIRE(mask.getMinTrue(0) == 0);
720     REQUIRE(mask.getMinTrue(1) == 63);
721     REQUIRE(mask.getMinTrue(63) == 63);
722     REQUIRE(mask.getMinTrue(64) == 64);
723     REQUIRE(mask.getMinTrue(101) == 200);
724     REQUIRE(mask.getMinTrue(201) == 255);
725     mask.set(255, 0);
726     REQUIRE(mask.getMinTrue(201) == -1);
727 }
728 
getMemSize()729 size_t getMemSize()
730 {
731     return defaultMemPool->extMemPool.backend.getTotalMemSize();
732 }
733 
734 class CheckNotCached {
735     static size_t memSize;
736 public:
operator ()() const737     void operator() () const {
738         int res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 1);
739         REQUIRE(res == TBBMALLOC_OK);
740         if (memSize==(size_t)-1) {
741             memSize = getMemSize();
742         } else {
743             REQUIRE(getMemSize() == memSize);
744             memSize=(size_t)-1;
745         }
746     }
747 };
748 
749 size_t CheckNotCached::memSize = (size_t)-1;
750 
751 class RunTestHeapLimit: public SimpleBarrier {
752 public:
operator ()(int) const753     void operator()( int /*mynum*/ ) const {
754         // Provoke bootstrap heap initialization before recording memory size.
755         // NOTE: The initialization should be processed only with a "large"
756         // object. Since the "small" object allocation lead to blocking of a
757         // slab as an active block and it is impossible to release it with
758         // foreign thread.
759         scalable_free(scalable_malloc(minLargeObjectSize));
760         barrier.wait(CheckNotCached());
761         for (size_t n = minLargeObjectSize; n < 5*1024*1024; n += 128*1024)
762             scalable_free(scalable_malloc(n));
763         barrier.wait(CheckNotCached());
764     }
765 };
766 
TestHeapLimit()767 void TestHeapLimit()
768 {
769     if(!isMallocInitialized()) doInitialization();
770     // tiny limit to stop caching
771     int res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 1);
772     REQUIRE(res == TBBMALLOC_OK);
773      // Provoke bootstrap heap initialization before recording memory size.
774     scalable_free(scalable_malloc(8));
775     size_t n, sizeBefore = getMemSize();
776 
777     // Try to provoke call to OS for memory to check that
778     // requests are not fulfilled from caches.
779     // Single call is not enough here because of backend fragmentation.
780     for (n = minLargeObjectSize; n < 10*1024*1024; n += 16*1024) {
781         void *p = scalable_malloc(n);
782         bool leave = (sizeBefore != getMemSize());
783         scalable_free(p);
784         if (leave)
785             break;
786         REQUIRE_MESSAGE(sizeBefore == getMemSize(), "No caching expected");
787     }
788     REQUIRE_MESSAGE(n < 10*1024*1024, "scalable_malloc doesn't provoke OS request for memory, "
789            "is some internal cache still used?");
790 
791     for( int p=MaxThread; p>=MinThread; --p ) {
792         RunTestHeapLimit::initBarrier( p );
793         utils::NativeParallelFor( p, RunTestHeapLimit() );
794     }
795     // it's try to match limit as well as set limit, so call here
796     res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 1);
797     REQUIRE(res == TBBMALLOC_OK);
798     size_t m = getMemSize();
799     REQUIRE(sizeBefore == m);
800     // restore default
801     res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 0);
802     REQUIRE(res == TBBMALLOC_OK);
803 }
804 
checkNoHugePages()805 void checkNoHugePages()
806 {
807     REQUIRE_MESSAGE(!hugePages.isEnabled, "scalable_allocation_mode "
808            "must have priority over environment variable");
809 }
810 
811 /*---------------------------------------------------------------------------*/
812 // The regression test against bugs in TBBMALLOC_CLEAN_ALL_BUFFERS allocation command.
813 // The idea is to allocate and deallocate a set of objects randomly in parallel.
814 // For large sizes (16K), it forces conflicts in backend during coalescing.
815 // For small sizes (4K), it forces cross-thread deallocations and then orphaned slabs.
816 // Global cleanup should process orphaned slabs and the queue of postponed coalescing
817 // requests, otherwise it will not be able to unmap all unused memory.
818 
819 const int num_allocs = 10*1024;
820 void *ptrs[num_allocs];
821 std::atomic<int> alloc_counter;
822 static thread_local bool free_was_called = false;
823 
multiThreadAlloc(size_t alloc_size)824 inline void multiThreadAlloc(size_t alloc_size) {
825     for( int i = alloc_counter++; i < num_allocs; i = alloc_counter++ ) {
826        ptrs[i] = scalable_malloc( alloc_size );
827        REQUIRE_MESSAGE( ptrs[i] != nullptr, "scalable_malloc returned zero." );
828     }
829 }
crossThreadDealloc()830 inline void crossThreadDealloc() {
831     free_was_called = false;
832     for( int i = --alloc_counter; i >= 0; i = --alloc_counter ) {
833         if (i < num_allocs) {
834             scalable_free(ptrs[i]);
835             free_was_called = true;
836         }
837     }
838 }
839 
840 template<int AllocSize>
841 struct TestCleanAllBuffersBody : public SimpleBarrier {
operator ()TestCleanAllBuffersBody842     void operator() ( int ) const {
843         barrier.wait();
844         multiThreadAlloc(AllocSize);
845         barrier.wait();
846         crossThreadDealloc();
847     }
848 };
849 
850 template<int AllocSize>
TestCleanAllBuffers()851 void TestCleanAllBuffers() {
852     const int num_threads = 8;
853     // Clean up if something was allocated before the test
854     scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS,nullptr);
855 
856     size_t memory_in_use_before = getMemSize();
857     alloc_counter = 0;
858     TestCleanAllBuffersBody<AllocSize>::initBarrier(num_threads);
859 
860     utils::NativeParallelFor(num_threads, TestCleanAllBuffersBody<AllocSize>());
861     // TODO: reproduce the bug conditions more reliably
862     if ( defaultMemPool->extMemPool.backend.coalescQ.blocksToFree.load(std::memory_order_relaxed) == nullptr ) {
863         INFO( "Warning: The queue of postponed coalescing requests is empty. ");
864         INFO( "Unable to create the condition for bug reproduction.\n" );
865     }
866     int result = scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS,nullptr);
867     REQUIRE_MESSAGE( result == TBBMALLOC_OK, "The cleanup request has not cleaned anything." );
868     size_t memory_in_use_after = getMemSize();
869 
870     size_t memory_leak = memory_in_use_after - memory_in_use_before;
871     INFO( "memory_in_use_before = " <<  memory_in_use_before << ", memory_in_use_after = " << memory_in_use_after << "\n" );
872     REQUIRE_MESSAGE( memory_leak == 0, "Cleanup was unable to release all allocated memory." );
873 }
874 
875 //! Force cross thread deallocation of small objects to create a set of privatizable slab blocks.
876 //! TBBMALLOC_CLEAN_THREAD_BUFFERS command have to privatize all the block.
877 struct TestCleanThreadBuffersBody : public SimpleBarrier {
operator ()TestCleanThreadBuffersBody878     void operator() ( int ) const {
879         barrier.wait();
880         multiThreadAlloc(2*1024);
881         barrier.wait();
882         crossThreadDealloc();
883         barrier.wait();
884         int result = scalable_allocation_command(TBBMALLOC_CLEAN_THREAD_BUFFERS,nullptr);
885         if (result != TBBMALLOC_OK && free_was_called) {
886             REPORT("Warning: clean-up request for this particular thread has not cleaned anything.");
887         }
888 
889         // Check that TLS was cleaned fully
890         TLSData *tlsCurr = defaultMemPool->getTLS(/*create=*/false);
891         if (tlsCurr) {
892             for (int i = 0; i < numBlockBinLimit; i++) {
893                 REQUIRE_MESSAGE(!(tlsCurr->bin[i].activeBlk), "Some bin was not cleaned.");
894             }
895             REQUIRE_MESSAGE(!(tlsCurr->lloc.head.load(std::memory_order_relaxed)), "Local LOC was not cleaned.");
896             REQUIRE_MESSAGE(!(tlsCurr->freeSlabBlocks.head.load(std::memory_order_relaxed)), "Free Block pool was not cleaned.");
897         }
898     }
899 };
900 
TestCleanThreadBuffers()901 void TestCleanThreadBuffers() {
902     const int num_threads = 8;
903     // Clean up if something was allocated before the test
904     scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS,nullptr);
905 
906     alloc_counter = 0;
907     TestCleanThreadBuffersBody::initBarrier(num_threads);
908     utils::NativeParallelFor(num_threads, TestCleanThreadBuffersBody());
909 }
910 
911 /*---------------------------------------------------------------------------*/
912 /*------------------------- Large Object Cache tests ------------------------*/
913 #if _MSC_VER==1600 || _MSC_VER==1500
914     // ignore C4275: non dll-interface class 'stdext::exception' used as
915     // base for dll-interface class 'std::bad_cast'
916     #pragma warning (disable: 4275)
917 #endif
918 #include <vector>
919 #include <list>
920 
921 // default constructor of CacheBin
922 template<typename Props>
CacheBin()923 rml::internal::LargeObjectCacheImpl<Props>::CacheBin::CacheBin() {}
924 
925 template<typename Props>
926 class CacheBinModel {
927 
928     typedef typename rml::internal::LargeObjectCacheImpl<Props>::CacheBin CacheBinType;
929 
930     // The emulated cache bin.
931     CacheBinType cacheBinModel;
932     // The reference to real cache bin inside the large object cache.
933     CacheBinType &cacheBin;
934 
935     const size_t size;
936 
937     // save only current time
938     std::list<uintptr_t> objects;
939 
doCleanup()940     void doCleanup() {
941         if ( cacheBinModel.cachedSize.load(std::memory_order_relaxed) >
942             Props::TooLargeFactor*cacheBinModel.usedSize.load(std::memory_order_relaxed)) tooLargeLOC++;
943         else tooLargeLOC = 0;
944 
945         intptr_t threshold = cacheBinModel.ageThreshold.load(std::memory_order_relaxed);
946         if (tooLargeLOC > 3 && threshold) {
947             threshold = (threshold + cacheBinModel.meanHitRange.load(std::memory_order_relaxed)) / 2;
948             cacheBinModel.ageThreshold.store(threshold, std::memory_order_relaxed);
949         }
950 
951         uintptr_t currTime = cacheCurrTime;
952         while (!objects.empty() && (intptr_t)(currTime - objects.front()) > threshold) {
953             cacheBinModel.cachedSize.store(cacheBinModel.cachedSize.load(std::memory_order_relaxed) - size, std::memory_order_relaxed);
954             cacheBinModel.lastCleanedAge = objects.front();
955             objects.pop_front();
956         }
957 
958         cacheBinModel.oldest.store(objects.empty() ? 0 : objects.front(), std::memory_order_relaxed);
959     }
960 
961 public:
CacheBinModel(CacheBinType & _cacheBin,size_t allocSize)962     CacheBinModel(CacheBinType &_cacheBin, size_t allocSize) : cacheBin(_cacheBin), size(allocSize) {
963         cacheBinModel.oldest.store(cacheBin.oldest.load(std::memory_order_relaxed), std::memory_order_relaxed);
964         cacheBinModel.lastCleanedAge = cacheBin.lastCleanedAge;
965         cacheBinModel.ageThreshold.store(cacheBin.ageThreshold.load(std::memory_order_relaxed), std::memory_order_relaxed);
966         cacheBinModel.usedSize.store(cacheBin.usedSize.load(std::memory_order_relaxed), std::memory_order_relaxed);
967         cacheBinModel.cachedSize.store(cacheBin.cachedSize.load(std::memory_order_relaxed), std::memory_order_relaxed);
968         cacheBinModel.meanHitRange.store(cacheBin.meanHitRange.load(std::memory_order_relaxed), std::memory_order_relaxed);
969         cacheBinModel.lastGet = cacheBin.lastGet;
970     }
get()971     void get() {
972         uintptr_t currTime = ++cacheCurrTime;
973 
974         if ( objects.empty() ) {
975             const uintptr_t sinceLastGet = currTime - cacheBinModel.lastGet;
976             intptr_t threshold = cacheBinModel.ageThreshold.load(std::memory_order_relaxed);
977             if ((threshold && sinceLastGet > Props::LongWaitFactor * threshold) ||
978                 (cacheBinModel.lastCleanedAge && sinceLastGet > Props::LongWaitFactor * (cacheBinModel.lastCleanedAge - cacheBinModel.lastGet))) {
979                 cacheBinModel.lastCleanedAge = 0;
980                 cacheBinModel.ageThreshold.store(0, std::memory_order_relaxed);
981             }
982 
983             if (cacheBinModel.lastCleanedAge)
984                 cacheBinModel.ageThreshold.store(Props::OnMissFactor * (currTime - cacheBinModel.lastCleanedAge), std::memory_order_relaxed);
985         } else {
986             uintptr_t obj_age = objects.back();
987             objects.pop_back();
988             if (objects.empty()) cacheBinModel.oldest.store(0, std::memory_order_relaxed);
989 
990             intptr_t hitRange = currTime - obj_age;
991             intptr_t mean = cacheBinModel.meanHitRange.load(std::memory_order_relaxed);
992             mean = mean ? (mean + hitRange) / 2 : hitRange;
993             cacheBinModel.meanHitRange.store(mean, std::memory_order_relaxed);
994 
995             cacheBinModel.cachedSize.store(cacheBinModel.cachedSize.load(std::memory_order_relaxed) - size, std::memory_order_relaxed);
996         }
997 
998         cacheBinModel.usedSize.store(cacheBinModel.usedSize.load(std::memory_order_relaxed) + size, std::memory_order_relaxed);
999         cacheBinModel.lastGet = currTime;
1000 
1001         if ( currTime % rml::internal::cacheCleanupFreq == 0 ) doCleanup();
1002     }
1003 
putList(int num)1004     void putList( int num ) {
1005         uintptr_t currTime = cacheCurrTime;
1006         cacheCurrTime += num;
1007 
1008         cacheBinModel.usedSize.store(cacheBinModel.usedSize.load(std::memory_order_relaxed) - num * size, std::memory_order_relaxed);
1009 
1010         bool cleanUpNeeded = false;
1011         if ( !cacheBinModel.lastCleanedAge ) {
1012             cacheBinModel.lastCleanedAge = ++currTime;
1013             cleanUpNeeded |= currTime % rml::internal::cacheCleanupFreq == 0;
1014             num--;
1015         }
1016 
1017         for ( int i=1; i<=num; ++i ) {
1018             currTime+=1;
1019             cleanUpNeeded |= currTime % rml::internal::cacheCleanupFreq == 0;
1020             if (objects.empty())
1021                 cacheBinModel.oldest.store(currTime, std::memory_order_relaxed);
1022             objects.push_back(currTime);
1023         }
1024 
1025         cacheBinModel.cachedSize.store(cacheBinModel.cachedSize.load(std::memory_order_relaxed) + num * size, std::memory_order_relaxed);
1026 
1027         if ( cleanUpNeeded ) doCleanup();
1028     }
1029 
check()1030     void check() {
1031         CHECK_FAST(cacheBinModel.oldest.load(std::memory_order_relaxed) == cacheBin.oldest.load(std::memory_order_relaxed));
1032         CHECK_FAST(cacheBinModel.lastCleanedAge == cacheBin.lastCleanedAge);
1033         CHECK_FAST(cacheBinModel.ageThreshold.load(std::memory_order_relaxed) == cacheBin.ageThreshold.load(std::memory_order_relaxed));
1034         CHECK_FAST(cacheBinModel.usedSize.load(std::memory_order_relaxed) == cacheBin.usedSize.load(std::memory_order_relaxed));
1035         CHECK_FAST(cacheBinModel.cachedSize.load(std::memory_order_relaxed) == cacheBin.cachedSize.load(std::memory_order_relaxed));
1036         CHECK_FAST(cacheBinModel.meanHitRange.load(std::memory_order_relaxed) == cacheBin.meanHitRange.load(std::memory_order_relaxed));
1037         CHECK_FAST(cacheBinModel.lastGet == cacheBin.lastGet);
1038     }
1039 
1040     static uintptr_t cacheCurrTime;
1041     static intptr_t tooLargeLOC;
1042 };
1043 
1044 template<typename Props> uintptr_t CacheBinModel<Props>::cacheCurrTime;
1045 template<typename Props> intptr_t CacheBinModel<Props>::tooLargeLOC;
1046 
1047 template <typename Scenario>
LOCModelTester()1048 void LOCModelTester() {
1049     defaultMemPool->extMemPool.loc.cleanAll();
1050     defaultMemPool->extMemPool.loc.reset();
1051 
1052     const size_t size = 16 * 1024;
1053     const size_t headersSize = sizeof(rml::internal::LargeMemoryBlock)+sizeof(rml::internal::LargeObjectHdr);
1054     const size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+rml::internal::largeObjectAlignment);
1055     const int binIdx = defaultMemPool->extMemPool.loc.largeCache.sizeToIdx( allocationSize );
1056 
1057     CacheBinModel<rml::internal::LargeObjectCache::LargeCacheTypeProps>::cacheCurrTime = defaultMemPool->extMemPool.loc.cacheCurrTime;
1058     CacheBinModel<rml::internal::LargeObjectCache::LargeCacheTypeProps>::tooLargeLOC = defaultMemPool->extMemPool.loc.largeCache.tooLargeLOC;
1059     CacheBinModel<rml::internal::LargeObjectCache::LargeCacheTypeProps> cacheBinModel(defaultMemPool->extMemPool.loc.largeCache.bin[binIdx], allocationSize);
1060 
1061     Scenario scen;
1062     for (rml::internal::LargeMemoryBlock *lmb = scen.next(); (intptr_t)lmb != (intptr_t)-1; lmb = scen.next()) {
1063         if ( lmb ) {
1064             int num=1;
1065             for (rml::internal::LargeMemoryBlock *curr = lmb; curr->next; curr=curr->next) num+=1;
1066             defaultMemPool->extMemPool.freeLargeObject(lmb);
1067             cacheBinModel.putList(num);
1068         } else {
1069             scen.saveLmb(defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize));
1070             cacheBinModel.get();
1071         }
1072 
1073         cacheBinModel.check();
1074     }
1075 }
1076 
1077 class TestBootstrap {
1078     bool allocating;
1079     std::vector<rml::internal::LargeMemoryBlock*> lmbArray;
1080 public:
TestBootstrap()1081     TestBootstrap() : allocating(true) {}
1082 
next()1083     rml::internal::LargeMemoryBlock* next() {
1084         if ( allocating )
1085             return nullptr;
1086         if ( !lmbArray.empty() ) {
1087             rml::internal::LargeMemoryBlock *ret = lmbArray.back();
1088             lmbArray.pop_back();
1089             return ret;
1090         }
1091         return (rml::internal::LargeMemoryBlock*)-1;
1092     }
1093 
saveLmb(rml::internal::LargeMemoryBlock * lmb)1094     void saveLmb( rml::internal::LargeMemoryBlock *lmb ) {
1095         lmb->next = nullptr;
1096         lmbArray.push_back(lmb);
1097         if ( lmbArray.size() == 1000 ) allocating = false;
1098     }
1099 };
1100 
1101 class TestRandom {
1102     std::vector<rml::internal::LargeMemoryBlock*> lmbArray;
1103     int numOps;
1104 public:
TestRandom()1105     TestRandom() : numOps(100000) {
1106         srand(1234);
1107     }
1108 
next()1109     rml::internal::LargeMemoryBlock* next() {
1110         if ( numOps-- ) {
1111             if ( lmbArray.empty() || rand() / (RAND_MAX>>1) == 0 )
1112                 return nullptr;
1113             size_t ind = rand()%lmbArray.size();
1114             if ( ind != lmbArray.size()-1 ) std::swap(lmbArray[ind],lmbArray[lmbArray.size()-1]);
1115             rml::internal::LargeMemoryBlock *lmb = lmbArray.back();
1116             lmbArray.pop_back();
1117             return lmb;
1118         }
1119         return (rml::internal::LargeMemoryBlock*)-1;
1120     }
1121 
saveLmb(rml::internal::LargeMemoryBlock * lmb)1122     void saveLmb( rml::internal::LargeMemoryBlock *lmb ) {
1123         lmb->next = nullptr;
1124         lmbArray.push_back(lmb);
1125     }
1126 };
1127 
1128 class TestCollapsingMallocFree : public SimpleBarrier {
1129 public:
1130     static const int NUM_ALLOCS = 100000;
1131     const int num_threads;
1132 
TestCollapsingMallocFree(int _num_threads)1133     TestCollapsingMallocFree( int _num_threads ) : num_threads(_num_threads) {
1134         initBarrier( num_threads );
1135     }
1136 
operator ()(int) const1137     void operator() ( int ) const {
1138         const size_t size = 16 * 1024;
1139         const size_t headersSize = sizeof(rml::internal::LargeMemoryBlock)+sizeof(rml::internal::LargeObjectHdr);
1140         const size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+rml::internal::largeObjectAlignment);
1141 
1142         barrier.wait();
1143         for ( int i=0; i<NUM_ALLOCS; ++i ) {
1144             defaultMemPool->extMemPool.freeLargeObject(
1145                 defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize) );
1146         }
1147     }
1148 
check()1149     void check() {
1150         REQUIRE( tbbmalloc_whitebox::locGetProcessed == tbbmalloc_whitebox::locPutProcessed);
1151         REQUIRE_MESSAGE( tbbmalloc_whitebox::locGetProcessed < num_threads*NUM_ALLOCS, "No one Malloc/Free pair was collapsed." );
1152     }
1153 };
1154 
1155 class TestCollapsingBootstrap : public SimpleBarrier {
1156     class CheckNumAllocs {
1157         const int num_threads;
1158     public:
CheckNumAllocs(int _num_threads)1159         CheckNumAllocs( int _num_threads ) : num_threads(_num_threads) {}
operator ()() const1160         void operator()() const {
1161             REQUIRE( tbbmalloc_whitebox::locGetProcessed == num_threads*NUM_ALLOCS );
1162             REQUIRE( tbbmalloc_whitebox::locPutProcessed == 0 );
1163         }
1164     };
1165 public:
1166     static const int NUM_ALLOCS = 1000;
1167     const int num_threads;
1168 
TestCollapsingBootstrap(int _num_threads)1169     TestCollapsingBootstrap( int _num_threads ) : num_threads(_num_threads) {
1170         initBarrier( num_threads );
1171     }
1172 
operator ()(int) const1173     void operator() ( int ) const {
1174         const size_t size = 16 * 1024;
1175         size_t headersSize = sizeof(rml::internal::LargeMemoryBlock)+sizeof(rml::internal::LargeObjectHdr);
1176         size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+rml::internal::largeObjectAlignment);
1177 
1178         barrier.wait();
1179         rml::internal::LargeMemoryBlock *lmbArray[NUM_ALLOCS];
1180         for ( int i=0; i<NUM_ALLOCS; ++i )
1181             lmbArray[i] = defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize);
1182 
1183         barrier.wait(CheckNumAllocs(num_threads));
1184         for ( int i=0; i<NUM_ALLOCS; ++i )
1185             defaultMemPool->extMemPool.freeLargeObject( lmbArray[i] );
1186     }
1187 
check()1188     void check() {
1189         REQUIRE( tbbmalloc_whitebox::locGetProcessed == tbbmalloc_whitebox::locPutProcessed );
1190         REQUIRE( tbbmalloc_whitebox::locGetProcessed == num_threads*NUM_ALLOCS );
1191     }
1192 };
1193 
1194 template <typename Scenario>
LOCCollapsingTester(int num_threads)1195 void LOCCollapsingTester( int num_threads ) {
1196     tbbmalloc_whitebox::locGetProcessed = 0;
1197     tbbmalloc_whitebox::locPutProcessed = 0;
1198     defaultMemPool->extMemPool.loc.cleanAll();
1199     defaultMemPool->extMemPool.loc.reset();
1200 
1201     Scenario scen(num_threads);
1202     utils::NativeParallelFor(num_threads, scen);
1203 
1204     scen.check();
1205 }
1206 
TestLOC()1207 void TestLOC() {
1208     LOCModelTester<TestBootstrap>();
1209     LOCModelTester<TestRandom>();
1210 
1211     const int num_threads = 16;
1212     LOCCollapsingTester<TestCollapsingBootstrap>( num_threads );
1213     if ( num_threads > 1 ) {
1214         INFO( "num_threads = " << num_threads );
1215         LOCCollapsingTester<TestCollapsingMallocFree>( num_threads );
1216     } else {
1217         REPORT( "Warning: concurrency is too low for TestMallocFreeCollapsing ( num_threads = %d )\n", num_threads );
1218     }
1219 }
1220 /*---------------------------------------------------------------------------*/
1221 
findCacheLine(void * p)1222 void *findCacheLine(void *p) {
1223     return (void*)alignDown((uintptr_t)p, estimatedCacheLineSize);
1224 }
1225 
1226 // test that internals of Block are at expected cache lines
TestSlabAlignment()1227 void TestSlabAlignment() {
1228     const size_t min_sz = 8;
1229     const int space = 2*16*1024; // fill at least 2 slabs
1230     void *pointers[space / min_sz];  // the worst case is min_sz byte object
1231 
1232     for (size_t sz = min_sz; sz <= 64; sz *= 2) {
1233         for (size_t i = 0; i < space/sz; i++) {
1234             pointers[i] = scalable_malloc(sz);
1235             Block *block = (Block *)alignDown(pointers[i], slabSize);
1236             REQUIRE_MESSAGE(findCacheLine(&block->isFull) != findCacheLine(pointers[i]),
1237                           "A user object must not share a cache line with slab control structures.");
1238             REQUIRE_MESSAGE(findCacheLine(&block->next) != findCacheLine(&block->nextPrivatizable),
1239                           "GlobalBlockFields and LocalBlockFields must be on different cache lines.");
1240         }
1241         for (size_t i = 0; i < space/sz; i++)
1242             scalable_free(pointers[i]);
1243     }
1244 }
1245 
1246 #include "common/memory_usage.h"
1247 
1248 // TODO: Consider adding Huge Pages support on macOS (special mmap flag).
1249 // Transparent Huge pages support could be enabled by different system parsing mechanism,
1250 // because there is no /proc/meminfo on macOS
1251 #if __unix__
TestTHP()1252 void TestTHP() {
1253     // Get backend from default memory pool
1254     rml::internal::Backend *backend = &(defaultMemPool->extMemPool.backend);
1255 
1256     // Configure malloc to use huge pages
1257     scalable_allocation_mode(USE_HUGE_PAGES, 1);
1258     REQUIRE_MESSAGE(hugePages.isEnabled, "Huge pages should be enabled via scalable_allocation_mode");
1259 
1260     const int HUGE_PAGE_SIZE = 2 * 1024 * 1024;
1261 
1262     // allocCount transparent huge pages should be allocated
1263     const int allocCount = 10;
1264 
1265     // Allocate huge page aligned memory regions to track system
1266     // counters for transparent huge pages
1267     void*  allocPtrs[allocCount];
1268 
1269     // Wait for the system to update process memory info files after other tests
1270     utils::Sleep(4000);
1271 
1272     // Parse system info regarding current THP status
1273     size_t currentSystemTHPCount = utils::getSystemTHPCount();
1274     size_t currentSystemTHPAllocatedSize = utils::getSystemTHPAllocatedSize();
1275 
1276     for (int i = 0; i < allocCount; i++) {
1277         // Allocation size have to be aligned on page size
1278         size_t allocSize = HUGE_PAGE_SIZE - (i * 1000);
1279 
1280         // Map memory
1281         allocPtrs[i] = backend->allocRawMem(allocSize);
1282 
1283         REQUIRE_MESSAGE(allocPtrs[i], "Allocation not succeeded.");
1284         REQUIRE_MESSAGE(allocSize == HUGE_PAGE_SIZE,
1285             "Allocation size have to be aligned on Huge Page size internally.");
1286 
1287         // First touch policy - no real pages allocated by OS without accessing the region
1288         memset(allocPtrs[i], 1, allocSize);
1289 
1290         REQUIRE_MESSAGE(isAligned(allocPtrs[i], HUGE_PAGE_SIZE),
1291             "The pointer returned by scalable_malloc is not aligned on huge page size.");
1292     }
1293 
1294     // Wait for the system to update process memory info files after allocations
1295     utils::Sleep(4000);
1296 
1297     // Generally, kernel tries to allocate transparent huge pages, but sometimes it cannot do this
1298     // (tested on SLES 11/12), so consider this system info checks as a remark.
1299     // Also, some systems can allocate more memory then needed in background (tested on Ubuntu 14.04)
1300     size_t newSystemTHPCount = utils::getSystemTHPCount();
1301     size_t newSystemTHPAllocatedSize = utils::getSystemTHPAllocatedSize();
1302     if ((newSystemTHPCount - currentSystemTHPCount) < allocCount
1303             && (newSystemTHPAllocatedSize - currentSystemTHPAllocatedSize) / (2 * 1024) < allocCount) {
1304         REPORT( "Warning: the system didn't allocate needed amount of THPs.\n" );
1305     }
1306 
1307     // Test memory unmap
1308     for (int i = 0; i < allocCount; i++) {
1309         REQUIRE_MESSAGE(backend->freeRawMem(allocPtrs[i], HUGE_PAGE_SIZE),
1310                 "Something went wrong during raw memory free");
1311     }
1312 }
1313 #endif // __unix__
1314 
getStabilizedMemUsage()1315 inline size_t getStabilizedMemUsage() {
1316     for (int i = 0; i < 3; i++) utils::GetMemoryUsage();
1317     return utils::GetMemoryUsage();
1318 }
1319 
reallocAndRetrieve(void * origPtr,size_t reallocSize,size_t & origBlockSize,size_t & reallocBlockSize)1320 inline void* reallocAndRetrieve(void* origPtr, size_t reallocSize, size_t& origBlockSize, size_t& reallocBlockSize) {
1321     rml::internal::LargeMemoryBlock* origLmb = ((rml::internal::LargeObjectHdr *)origPtr - 1)->memoryBlock;
1322     origBlockSize = origLmb->unalignedSize;
1323 
1324     void* reallocPtr = rml::internal::reallocAligned(defaultMemPool, origPtr, reallocSize, 0);
1325 
1326     // Retrieved reallocated block information
1327     rml::internal::LargeMemoryBlock* reallocLmb = ((rml::internal::LargeObjectHdr *)reallocPtr - 1)->memoryBlock;
1328     reallocBlockSize = reallocLmb->unalignedSize;
1329 
1330     return reallocPtr;
1331 }
1332 
TestReallocDecreasing()1333 void TestReallocDecreasing() {
1334 
1335     /* Testing that actual reallocation happens for large objects that do not fit the backend cache
1336        but decrease in size by a factor of >= 2. */
1337 
1338     size_t startSize = 100 * 1024 * 1024;
1339     size_t maxBinnedSize = defaultMemPool->extMemPool.backend.getMaxBinnedSize();
1340     void*  origPtr = scalable_malloc(startSize);
1341     void*  reallocPtr = nullptr;
1342 
1343     // Realloc on 1MB less size
1344     size_t origBlockSize = 42;
1345     size_t reallocBlockSize = 43;
1346     reallocPtr = reallocAndRetrieve(origPtr, startSize - 1 * 1024 * 1024, origBlockSize, reallocBlockSize);
1347     REQUIRE_MESSAGE(origBlockSize == reallocBlockSize, "Reallocated block size shouldn't change");
1348     REQUIRE_MESSAGE(reallocPtr == origPtr, "Original pointer shouldn't change");
1349 
1350     // Repeated decreasing reallocation while max cache bin size reached
1351     size_t reallocSize = (startSize / 2) - 1000; // exact realloc
1352     while(reallocSize > maxBinnedSize) {
1353 
1354         // Prevent huge/large objects caching
1355         defaultMemPool->extMemPool.loc.cleanAll();
1356         // Prevent local large object caching
1357         TLSData *tls = defaultMemPool->getTLS(/*create=*/false);
1358         tls->lloc.externalCleanup(&defaultMemPool->extMemPool);
1359 
1360         size_t sysMemUsageBefore = getStabilizedMemUsage();
1361         size_t totalMemSizeBefore = defaultMemPool->extMemPool.backend.getTotalMemSize();
1362 
1363         reallocPtr = reallocAndRetrieve(origPtr, reallocSize, origBlockSize, reallocBlockSize);
1364 
1365         REQUIRE_MESSAGE(origBlockSize > reallocBlockSize, "Reallocated block size should decrease.");
1366 
1367         size_t sysMemUsageAfter = getStabilizedMemUsage();
1368         size_t totalMemSizeAfter = defaultMemPool->extMemPool.backend.getTotalMemSize();
1369 
1370         // Prevent false checking when backend caching occurred or could not read system memory usage info
1371         if (totalMemSizeBefore > totalMemSizeAfter && sysMemUsageAfter != 0 && sysMemUsageBefore != 0) {
1372             REQUIRE_MESSAGE(sysMemUsageBefore > sysMemUsageAfter, "Memory were not released");
1373         }
1374 
1375         origPtr = reallocPtr;
1376         reallocSize = (reallocSize / 2) - 1000; // exact realloc
1377     }
1378     scalable_free(reallocPtr);
1379 
1380     /* TODO: Decreasing reallocation of large objects that fit backend cache */
1381     /* TODO: Small objects decreasing reallocation test */
1382 }
1383 #if !__TBB_WIN8UI_SUPPORT && defined(_WIN32)
1384 
1385 #include "../../src/tbbmalloc_proxy/function_replacement.cpp"
1386 #include <string>
1387 namespace FunctionReplacement {
1388     FunctionInfo funcInfo = { "funcname","dllname" };
1389     char **func_replacement_log;
1390     int status;
1391 
LogCleanup()1392     void LogCleanup() {
1393         // Free all allocated memory
1394         for (unsigned i = 0; i < Log::record_number; i++){
1395             HeapFree(GetProcessHeap(), 0, Log::records[i]);
1396         }
1397         for (unsigned i = 0; i < Log::RECORDS_COUNT + 1; i++){
1398             Log::records[i] = nullptr;
1399         }
1400         Log::replacement_status = true;
1401         Log::record_number = 0;
1402     }
1403 
TestEmptyLog()1404     void TestEmptyLog() {
1405         status = TBB_malloc_replacement_log(&func_replacement_log);
1406 
1407         REQUIRE_MESSAGE(status == -1, "Status is true, but log is empty");
1408         REQUIRE_MESSAGE(*func_replacement_log == nullptr, "Log must be empty");
1409     }
1410 
TestLogOverload()1411     void TestLogOverload() {
1412         for (int i = 0; i < 1000; i++)
1413             Log::record(funcInfo, "opcode string", true);
1414 
1415         status = TBB_malloc_replacement_log(&func_replacement_log);
1416         // Find last record
1417         for (; *(func_replacement_log + 1) != 0; func_replacement_log++) {}
1418 
1419         std::string last_line(*func_replacement_log);
1420         REQUIRE_MESSAGE(status == 0, "False status, but all functions found");
1421         REQUIRE_MESSAGE(last_line.compare("Log was truncated.") == 0, "Log overflow was not handled");
1422 
1423         // Change status
1424         Log::record(funcInfo, "opcode string", false);
1425         status = TBB_malloc_replacement_log(nullptr);
1426         REQUIRE_MESSAGE(status == -1, "Status is true, but we have false search case");
1427 
1428         LogCleanup();
1429     }
1430 
TestFalseSearchCase()1431     void TestFalseSearchCase() {
1432         Log::record(funcInfo, "opcode string", false);
1433         std::string expected_line = "Fail: "+ std::string(funcInfo.funcName) + " (" +
1434                          std::string(funcInfo.dllName) + "), byte pattern: <opcode string>";
1435 
1436         status = TBB_malloc_replacement_log(&func_replacement_log);
1437 
1438         REQUIRE_MESSAGE(expected_line.compare(*func_replacement_log) == 0, "Wrong last string contnent");
1439         REQUIRE_MESSAGE(status == -1, "Status is true, but we have false search case");
1440         LogCleanup();
1441     }
1442 
TestWrongFunctionInDll()1443     void TestWrongFunctionInDll(){
1444         HMODULE ucrtbase_handle = GetModuleHandle("ucrtbase.dll");
1445         if (ucrtbase_handle) {
1446             IsPrologueKnown("ucrtbase.dll", "fake_function", nullptr, ucrtbase_handle);
1447             std::string expected_line = "Fail: fake_function (ucrtbase.dll), byte pattern: <unknown>";
1448 
1449             status = TBB_malloc_replacement_log(&func_replacement_log);
1450 
1451             REQUIRE_MESSAGE(expected_line.compare(*func_replacement_log) == 0, "Wrong last string contnent");
1452             REQUIRE_MESSAGE(status == -1, "Status is true, but we have false search case");
1453             LogCleanup();
1454         } else {
1455             INFO("Cannot found ucrtbase.dll on system, test skipped!\n");
1456         }
1457     }
1458 }
1459 
TesFunctionReplacementLog()1460 void TesFunctionReplacementLog() {
1461     using namespace FunctionReplacement;
1462     // Do not reorder the test cases
1463     TestEmptyLog();
1464     TestLogOverload();
1465     TestFalseSearchCase();
1466     TestWrongFunctionInDll();
1467 }
1468 
1469 #endif /*!__TBB_WIN8UI_SUPPORT && defined(_WIN32)*/
1470 
1471 #include <cmath> // pow function
1472 
1473 // Huge objects cache: Size = MinSize * (2 ^ (Index / StepFactor) formula gives value for the bin size,
1474 // but it is not matched with our sizeToIdx approximation algorithm, where step sizes between major
1475 // (power of 2) sizes are equal. Used internally for the test. Static cast to avoid warnings.
hocIdxToSizeFormula(int idx)1476 inline size_t hocIdxToSizeFormula(int idx) {
1477     return static_cast<size_t>(float(rml::internal::LargeObjectCache::maxLargeSize) *
1478         pow(2, float(idx) / float(rml::internal::LargeObjectCache::HugeBSProps::StepFactor)));
1479 }
1480 // Large objects cache arithmetic progression
locIdxToSizeFormula(int idx)1481 inline size_t locIdxToSizeFormula(int idx) {
1482     return rml::internal::LargeObjectCache::LargeBSProps::MinSize +
1483         (idx * rml::internal::LargeObjectCache::LargeBSProps::CacheStep);
1484 }
1485 
1486 template <typename CacheType>
TestLOCacheBinsConverterImpl(int idx,size_t checkingSize)1487 void TestLOCacheBinsConverterImpl(int idx, size_t checkingSize) {
1488     size_t alignedSize = CacheType::alignToBin(checkingSize);
1489     REQUIRE_MESSAGE(alignedSize >= checkingSize, "Size is not correctly aligned");
1490     int calcIdx = CacheType::sizeToIdx(alignedSize);
1491     REQUIRE_MESSAGE(calcIdx == idx, "Index from size calculated not correctly");
1492 }
1493 
TestLOCacheBinsConverter()1494 void TestLOCacheBinsConverter(){
1495     typedef rml::internal::LargeObjectCache::LargeCacheType LargeCacheType;
1496     typedef rml::internal::LargeObjectCache::HugeCacheType HugeCacheType;
1497 
1498     size_t checkingSize = 0;
1499     for (int idx = 0; idx < LargeCacheType::numBins; idx++) {
1500         checkingSize = locIdxToSizeFormula(idx);
1501         TestLOCacheBinsConverterImpl<LargeCacheType>(idx, checkingSize);
1502     }
1503     for (int idx = 0; idx < HugeCacheType::numBins; idx++) {
1504         checkingSize = hocIdxToSizeFormula(idx);
1505         TestLOCacheBinsConverterImpl<HugeCacheType>(idx, checkingSize);
1506     }
1507 }
1508 
1509 struct HOThresholdTester {
1510     LargeObjectCache* loc;
1511     size_t hugeSize;
1512 
1513     static const size_t sieveSize = LargeObjectCache::defaultMaxHugeSize;
1514     // Sieve starts from 64MB (24-th cache bin), enough to check 4 bins radius range
1515     // for decent memory consumption (especially for 32-bit arch)
1516     static const int MIN_BIN_IDX = 21;
1517     static const int MAX_BIN_IDX = 27;
1518 
1519     enum CleanupType {
1520         NO_CLEANUP,
1521         REGULAR_CLEANUP,
1522         HARD_CLEANUP
1523     };
1524 
populateCacheHOThresholdTester1525     void populateCache() {
1526         LargeMemoryBlock* loArray[MAX_BIN_IDX - MIN_BIN_IDX];
1527         // To avoid backend::softCacheCleanup consequences (cleanup by isLOCToolarge),
1528         // firstly allocate all objects and then cache them at once.
1529         // Morevover, just because first cache item will still be dropped from cache because of the lack of history,
1530         // redo allocation 2 times.
1531         for (int idx = MIN_BIN_IDX; idx < MAX_BIN_IDX; ++idx) {
1532             size_t allocationSize = alignedSizeFromIdx(idx);
1533             int localIdx = idx - MIN_BIN_IDX;
1534             loArray[localIdx] = defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize);
1535             REQUIRE_MESSAGE(loArray[localIdx], "Large object was not allocated.");
1536             loc->put(loArray[localIdx]);
1537             loArray[localIdx] = defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize);
1538         }
1539         for (int idx = MIN_BIN_IDX; idx < MAX_BIN_IDX; ++idx) {
1540             loc->put(loArray[idx - MIN_BIN_IDX]);
1541         }
1542     }
cleanHOThresholdTester1543     void clean(bool all) {
1544         if (all) {
1545             // Should avoid any threshold and clean all bins
1546             loc->cleanAll();
1547         } else {
1548             // Regular cleanup should do nothing for bins above threshold. Decreasing option used
1549             // for the test to be sure that all objects below defaultMaxHugeSize (sieveSize) were cleaned
1550             loc->regularCleanup();
1551             loc->decreasingCleanup();
1552         }
1553     }
checkHOThresholdTester1554     void check(CleanupType type) {
1555         for (int idx = MIN_BIN_IDX; idx < MAX_BIN_IDX; ++idx) {
1556             size_t objectSize = alignedSizeFromIdx(idx);
1557             // Cache object below sieve threshold and above huge object threshold should be cached
1558             // (other should be sieved). Unless all cache is dropped. Regular cleanup drops object only below sieve size.
1559             if (type == NO_CLEANUP && sizeInCacheRange(objectSize)) {
1560                 REQUIRE_MESSAGE(objectInCacheBin(idx, objectSize), "Object was released from cache, it shouldn't.");
1561             } else if (type == REGULAR_CLEANUP && (objectSize >= hugeSize)) {
1562                 REQUIRE_MESSAGE(objectInCacheBin(idx, objectSize), "Object was released from cache, it shouldn't.");
1563             } else { // HARD_CLEANUP
1564                 REQUIRE_MESSAGE(cacheBinEmpty(idx), "Object is still cached.");
1565             }
1566         }
1567     }
1568 
1569 private:
cacheBinEmptyHOThresholdTester1570     bool cacheBinEmpty(int idx) {
1571         return (loc->hugeCache.bin[idx].cachedSize.load(std::memory_order_relaxed) == 0 && loc->hugeCache.bin[idx].get() == nullptr);
1572     }
objectInCacheBinHOThresholdTester1573     bool objectInCacheBin(int idx, size_t size) {
1574         return (loc->hugeCache.bin[idx].cachedSize.load(std::memory_order_relaxed) != 0 &&
1575             loc->hugeCache.bin[idx].cachedSize.load(std::memory_order_relaxed) % size == 0);
1576     }
sizeInCacheRangeHOThresholdTester1577     bool sizeInCacheRange(size_t size) {
1578         return size <= sieveSize || size >= hugeSize;
1579     }
alignedSizeFromIdxHOThresholdTester1580     size_t alignedSizeFromIdx(int idx) {
1581         return rml::internal::LargeObjectCache::alignToBin(hocIdxToSizeFormula(idx));
1582     }
1583 };
1584 
1585 // TBBMALLOC_SET_HUGE_OBJECT_THRESHOLD value should be set before the test,
1586 // through scalable API or env variable
TestHugeSizeThresholdImpl(LargeObjectCache * loc,size_t hugeSize,bool fullTesting)1587 void TestHugeSizeThresholdImpl(LargeObjectCache* loc, size_t hugeSize, bool fullTesting) {
1588     HOThresholdTester test = {loc, hugeSize};
1589     test.populateCache();
1590     // Check the default sieve value
1591     test.check(HOThresholdTester::NO_CLEANUP);
1592 
1593     if(fullTesting) {
1594         // Check that objects above threshold stay in cache after regular cleanup
1595         test.clean(/*all*/false);
1596         test.check(HOThresholdTester::REGULAR_CLEANUP);
1597     }
1598     // Check that all objects dropped from cache after hard cleanup (ignore huge obects threshold)
1599     test.clean(/*all*/true);
1600     test.check(HOThresholdTester::HARD_CLEANUP);
1601     // Restore previous settings
1602     loc->setHugeSizeThreshold(LargeObjectCache::maxHugeSize);
1603     loc->reset();
1604 }
1605 
1606 /*
1607  *  Test for default huge size and behaviour when huge object settings defined
1608  */
TestHugeSizeThreshold()1609 void TestHugeSizeThreshold() {
1610     // Clean up if something was allocated before the test and reset cache state
1611     scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, nullptr);
1612     LargeObjectCache* loc = &defaultMemPool->extMemPool.loc;
1613     // Restore default settings just in case
1614     loc->setHugeSizeThreshold(LargeObjectCache::maxHugeSize);
1615     loc->reset();
1616     // Firstly check default huge size value (with max huge object threshold).
1617     // Everything that more then this value should be released to OS without caching.
1618     TestHugeSizeThresholdImpl(loc, loc->hugeSizeThreshold, false);
1619     // Then set huge object threshold.
1620     // All objects with sizes after threshold will be released only after the hard cleanup.
1621 #if !__TBB_WIN8UI_SUPPORT
1622     // Unit testing for environment variable
1623     utils::SetEnv("TBB_MALLOC_SET_HUGE_SIZE_THRESHOLD","67108864");
1624     // Large object cache reads threshold environment during initialization.
1625     // Reset the value before the test.
1626     loc->hugeSizeThreshold = 0;
1627     // Reset logical time to prevent regular cleanup
1628     loc->cacheCurrTime = 0;
1629     loc->init(&defaultMemPool->extMemPool);
1630     TestHugeSizeThresholdImpl(loc, 64 * MByte, true);
1631 #endif
1632     // Unit testing for scalable_allocation_command
1633     scalable_allocation_mode(TBBMALLOC_SET_HUGE_SIZE_THRESHOLD, 56 * MByte);
1634     TestHugeSizeThresholdImpl(loc, 56 * MByte, true);
1635     // Verify that objects whose sizes align to maxHugeSize are not cached.
1636     size_t sz = LargeObjectCache::maxHugeSize;
1637     size_t aligned_sz = LargeObjectCache::alignToBin(sz);
1638     REQUIRE_MESSAGE(sz == aligned_sz, "maxHugeSize should be aligned.");
1639     REQUIRE_MESSAGE(!loc->sizeInCacheRange(sz), "Upper bound sized object shouldn't be cached.");
1640     REQUIRE_MESSAGE(loc->get(sz) == nullptr, "Upper bound sized object shouldn't be cached.");
1641 }
1642 
1643 //! \brief \ref error_guessing
1644 TEST_CASE("Main test case") {
1645     scalable_allocation_mode(USE_HUGE_PAGES, 0);
1646 #if !__TBB_WIN8UI_SUPPORT
1647     utils::SetEnv("TBB_MALLOC_USE_HUGE_PAGES","yes");
1648 #endif
1649     checkNoHugePages();
1650     // backreference requires that initialization was done
1651     if(!isMallocInitialized()) doInitialization();
1652     checkNoHugePages();
1653     // to succeed, leak detection must be the 1st memory-intensive test
1654     TestBackRef();
1655     TestCleanAllBuffers<4*1024>();
1656     TestCleanAllBuffers<16*1024>();
1657     TestCleanThreadBuffers();
1658     TestPools();
1659     TestBackend();
1660 
1661 #if MALLOC_CHECK_RECURSION
1662     for( int p=MaxThread; p>=MinThread; --p ) {
1663         TestStartupAlloc::initBarrier( p );
1664         utils::NativeParallelFor( p, TestStartupAlloc() );
1665         REQUIRE_MESSAGE(!firstStartupBlock, "Startup heap memory leak detected");
1666     }
1667 #endif
1668     TestLargeObjectCache();
1669     TestObjectRecognition();
1670     TestBitMask();
1671     TestHeapLimit();
1672     TestLOC();
1673     TestSlabAlignment();
1674 }
1675 
1676 //! \brief \ref error_guessing
1677 TEST_CASE("Decreasing reallocation") {
1678     if (!isMallocInitialized()) doInitialization();
1679     TestReallocDecreasing();
1680 }
1681 
1682 //! \brief \ref error_guessing
1683 TEST_CASE("Large object cache bins converter") {
1684     if (!isMallocInitialized()) doInitialization();
1685     TestLOCacheBinsConverter();
1686 }
1687 
1688 //! \brief \ref error_guessing
1689 TEST_CASE("Huge size threshold settings") {
1690     if (!isMallocInitialized()) doInitialization();
1691     TestHugeSizeThreshold();
1692 }
1693 
1694 #if __unix__
1695 //! \brief \ref error_guessing
1696 TEST_CASE("Transparent huge pages") {
1697     if (utils::isTHPEnabledOnMachine()) {
1698         if (!isMallocInitialized()) doInitialization();
1699         TestTHP();
1700     } else {
1701         INFO("Transparent Huge Pages is not supported on the system - skipped the test\n");
1702     }
1703 }
1704 #endif
1705 
1706 #if !__TBB_WIN8UI_SUPPORT && defined(_WIN32)
1707 //! \brief \ref error_guessing
1708 TEST_CASE("Function replacement log") {
1709     TesFunctionReplacementLog();
1710 }
1711 #endif
1712