151c0b2f7Stbbdev /*
2*c4a799dfSJhaShweta1     Copyright (c) 2005-2023 Intel Corporation
351c0b2f7Stbbdev 
451c0b2f7Stbbdev     Licensed under the Apache License, Version 2.0 (the "License");
551c0b2f7Stbbdev     you may not use this file except in compliance with the License.
651c0b2f7Stbbdev     You may obtain a copy of the License at
751c0b2f7Stbbdev 
851c0b2f7Stbbdev         http://www.apache.org/licenses/LICENSE-2.0
951c0b2f7Stbbdev 
1051c0b2f7Stbbdev     Unless required by applicable law or agreed to in writing, software
1151c0b2f7Stbbdev     distributed under the License is distributed on an "AS IS" BASIS,
1251c0b2f7Stbbdev     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1351c0b2f7Stbbdev     See the License for the specific language governing permissions and
1451c0b2f7Stbbdev     limitations under the License.
1551c0b2f7Stbbdev */
1651c0b2f7Stbbdev 
1751c0b2f7Stbbdev //! \file test_malloc_whitebox.cpp
1851c0b2f7Stbbdev //! \brief Test for [memory_allocation] functionality
1951c0b2f7Stbbdev 
20478de5b1Stbbdev #if _WIN32 || _WIN64
21478de5b1Stbbdev #define _CRT_SECURE_NO_WARNINGS
22478de5b1Stbbdev #endif
23478de5b1Stbbdev 
2451c0b2f7Stbbdev // To prevent loading dynamic TBBmalloc at startup, that is not needed for the whitebox test
2551c0b2f7Stbbdev #define __TBB_SOURCE_DIRECTLY_INCLUDED 1
2651c0b2f7Stbbdev // Call thread shutdown API for native threads join
2751c0b2f7Stbbdev #define HARNESS_TBBMALLOC_THREAD_SHUTDOWN 1
2851c0b2f7Stbbdev 
2951c0b2f7Stbbdev // According to C99 standard INTPTR_MIN defined for C++ if __STDC_LIMIT_MACROS pre-defined
3051c0b2f7Stbbdev #define __STDC_LIMIT_MACROS 1
3151c0b2f7Stbbdev 
3251c0b2f7Stbbdev // To not depends on ITT support stuff
3351c0b2f7Stbbdev #ifdef DO_ITT_NOTIFY
3451c0b2f7Stbbdev #undef DO_ITT_NOTIFY
3551c0b2f7Stbbdev #endif
3651c0b2f7Stbbdev 
3751c0b2f7Stbbdev #include "common/test.h"
3851c0b2f7Stbbdev 
3951c0b2f7Stbbdev #include "common/utils.h"
4051c0b2f7Stbbdev #include "common/utils_assert.h"
4151c0b2f7Stbbdev #include "common/utils_env.h"
4251c0b2f7Stbbdev #include "common/spin_barrier.h"
4351c0b2f7Stbbdev 
4449e08aacStbbdev #include "oneapi/tbb/detail/_machine.h"
4551c0b2f7Stbbdev 
4651c0b2f7Stbbdev #define __TBB_MALLOC_WHITEBOX_TEST 1 // to get access to allocator internals
4751c0b2f7Stbbdev // help trigger rare race condition
4851c0b2f7Stbbdev #define WhiteboxTestingYield() (tbb::detail::yield(), tbb::detail::yield(), tbb::detail::yield(), tbb::detail::yield())
4951c0b2f7Stbbdev 
5051c0b2f7Stbbdev #if __INTEL_COMPILER && __TBB_MIC_OFFLOAD
5151c0b2f7Stbbdev // 2571 is variable has not been declared with compatible "target" attribute
5251c0b2f7Stbbdev // 3218 is class/struct may fail when offloaded because this field is misaligned
5351c0b2f7Stbbdev //         or contains data that is misaligned
5451c0b2f7Stbbdev     #pragma warning(push)
5551c0b2f7Stbbdev     #pragma warning(disable:2571 3218)
5651c0b2f7Stbbdev #endif
5751c0b2f7Stbbdev #define protected public
5851c0b2f7Stbbdev #define private public
5951c0b2f7Stbbdev #include "../../src/tbbmalloc/frontend.cpp"
6051c0b2f7Stbbdev #undef protected
6151c0b2f7Stbbdev #undef private
6251c0b2f7Stbbdev #if __INTEL_COMPILER && __TBB_MIC_OFFLOAD
6351c0b2f7Stbbdev     #pragma warning(pop)
6451c0b2f7Stbbdev #endif
6551c0b2f7Stbbdev #include "../../src/tbbmalloc/backend.cpp"
6651c0b2f7Stbbdev #include "../../src/tbbmalloc/backref.cpp"
6751c0b2f7Stbbdev 
6851c0b2f7Stbbdev namespace tbbmalloc_whitebox {
69478de5b1Stbbdev     std::atomic<size_t> locGetProcessed{};
70478de5b1Stbbdev     std::atomic<size_t> locPutProcessed{};
7151c0b2f7Stbbdev }
7251c0b2f7Stbbdev #include "../../src/tbbmalloc/large_objects.cpp"
7351c0b2f7Stbbdev #include "../../src/tbbmalloc/tbbmalloc.cpp"
7451c0b2f7Stbbdev 
7551c0b2f7Stbbdev const int LARGE_MEM_SIZES_NUM = 10;
7651c0b2f7Stbbdev static const int MinThread = 1;
7751c0b2f7Stbbdev static const int MaxThread = 4;
7851c0b2f7Stbbdev 
7951c0b2f7Stbbdev class AllocInfo {
8051c0b2f7Stbbdev     int *p;
8151c0b2f7Stbbdev     int val;
8251c0b2f7Stbbdev     int size;
8351c0b2f7Stbbdev public:
AllocInfo()8457f524caSIlya Isaev     AllocInfo() : p(nullptr), val(0), size(0) {}
AllocInfo(int sz)8551c0b2f7Stbbdev     explicit AllocInfo(int sz) : p((int*)scalable_malloc(sz*sizeof(int))),
8651c0b2f7Stbbdev                                    val(rand()), size(sz) {
8751c0b2f7Stbbdev         REQUIRE(p);
8851c0b2f7Stbbdev         for (int k=0; k<size; k++)
8951c0b2f7Stbbdev             p[k] = val;
9051c0b2f7Stbbdev     }
check() const9151c0b2f7Stbbdev     void check() const {
9251c0b2f7Stbbdev         for (int k=0; k<size; k++)
9357f524caSIlya Isaev             ASSERT(p[k] == val, nullptr);
9451c0b2f7Stbbdev     }
clear()9551c0b2f7Stbbdev     void clear() {
9651c0b2f7Stbbdev         scalable_free(p);
9751c0b2f7Stbbdev     }
9851c0b2f7Stbbdev };
9951c0b2f7Stbbdev 
100478de5b1Stbbdev // Test struct to call ProcessShutdown after all tests
101478de5b1Stbbdev struct ShutdownTest {
~ShutdownTestShutdownTest102478de5b1Stbbdev     ~ShutdownTest() {
103478de5b1Stbbdev     #if _WIN32 || _WIN64
104478de5b1Stbbdev         __TBB_mallocProcessShutdownNotification(true);
105478de5b1Stbbdev     #else
106478de5b1Stbbdev         __TBB_mallocProcessShutdownNotification(false);
107478de5b1Stbbdev     #endif
108478de5b1Stbbdev     }
109478de5b1Stbbdev };
110478de5b1Stbbdev 
111478de5b1Stbbdev static ShutdownTest shutdownTest;
112478de5b1Stbbdev 
11351c0b2f7Stbbdev class SimpleBarrier: utils::NoAssign {
11451c0b2f7Stbbdev protected:
11551c0b2f7Stbbdev     static utils::SpinBarrier barrier;
11651c0b2f7Stbbdev public:
initBarrier(unsigned thrds)11751c0b2f7Stbbdev     static void initBarrier(unsigned thrds) { barrier.initialize(thrds); }
11851c0b2f7Stbbdev };
11951c0b2f7Stbbdev 
12051c0b2f7Stbbdev utils::SpinBarrier SimpleBarrier::barrier;
12151c0b2f7Stbbdev 
12251c0b2f7Stbbdev class TestLargeObjCache: public SimpleBarrier {
12351c0b2f7Stbbdev public:
12451c0b2f7Stbbdev     static int largeMemSizes[LARGE_MEM_SIZES_NUM];
12551c0b2f7Stbbdev 
TestLargeObjCache()12651c0b2f7Stbbdev     TestLargeObjCache( ) {}
12751c0b2f7Stbbdev 
operator ()(int) const12851c0b2f7Stbbdev     void operator()( int /*mynum*/ ) const {
12951c0b2f7Stbbdev         AllocInfo allocs[LARGE_MEM_SIZES_NUM];
13051c0b2f7Stbbdev 
13151c0b2f7Stbbdev         // push to maximal cache limit
13251c0b2f7Stbbdev         for (int i=0; i<2; i++) {
13351c0b2f7Stbbdev             const int sizes[] = { MByte/sizeof(int),
13451c0b2f7Stbbdev                                   (MByte-2*LargeObjectCache::LargeBSProps::CacheStep)/sizeof(int) };
13551c0b2f7Stbbdev             for (int q=0; q<2; q++) {
13651c0b2f7Stbbdev                 size_t curr = 0;
13751c0b2f7Stbbdev                 for (int j=0; j<LARGE_MEM_SIZES_NUM; j++, curr++)
13851c0b2f7Stbbdev                     new (allocs+curr) AllocInfo(sizes[q]);
13951c0b2f7Stbbdev 
14051c0b2f7Stbbdev                 for (size_t j=0; j<curr; j++) {
14151c0b2f7Stbbdev                     allocs[j].check();
14251c0b2f7Stbbdev                     allocs[j].clear();
14351c0b2f7Stbbdev                 }
14451c0b2f7Stbbdev             }
14551c0b2f7Stbbdev         }
14651c0b2f7Stbbdev 
14751c0b2f7Stbbdev         barrier.wait();
14851c0b2f7Stbbdev 
14951c0b2f7Stbbdev         // check caching correctness
15051c0b2f7Stbbdev         for (int i=0; i<1000; i++) {
15151c0b2f7Stbbdev             size_t curr = 0;
15251c0b2f7Stbbdev             for (int j=0; j<LARGE_MEM_SIZES_NUM-1; j++, curr++)
15351c0b2f7Stbbdev                 new (allocs+curr) AllocInfo(largeMemSizes[j]);
15451c0b2f7Stbbdev 
15551c0b2f7Stbbdev             new (allocs+curr)
15651c0b2f7Stbbdev                 AllocInfo((int)(4*minLargeObjectSize +
15751c0b2f7Stbbdev                                 2*minLargeObjectSize*(1.*rand()/RAND_MAX)));
15851c0b2f7Stbbdev             curr++;
15951c0b2f7Stbbdev 
16051c0b2f7Stbbdev             for (size_t j=0; j<curr; j++) {
16151c0b2f7Stbbdev                 allocs[j].check();
16251c0b2f7Stbbdev                 allocs[j].clear();
16351c0b2f7Stbbdev             }
16451c0b2f7Stbbdev         }
16551c0b2f7Stbbdev     }
16651c0b2f7Stbbdev };
16751c0b2f7Stbbdev 
16851c0b2f7Stbbdev int TestLargeObjCache::largeMemSizes[LARGE_MEM_SIZES_NUM];
16951c0b2f7Stbbdev 
TestLargeObjectCache()17051c0b2f7Stbbdev void TestLargeObjectCache()
17151c0b2f7Stbbdev {
17251c0b2f7Stbbdev     for (int i=0; i<LARGE_MEM_SIZES_NUM; i++)
17351c0b2f7Stbbdev         TestLargeObjCache::largeMemSizes[i] =
17451c0b2f7Stbbdev             (int)(minLargeObjectSize + 2*minLargeObjectSize*(1.*rand()/RAND_MAX));
17551c0b2f7Stbbdev 
17651c0b2f7Stbbdev     for( int p=MaxThread; p>=MinThread; --p ) {
17751c0b2f7Stbbdev         TestLargeObjCache::initBarrier( p );
17851c0b2f7Stbbdev         utils::NativeParallelFor( p, TestLargeObjCache() );
17951c0b2f7Stbbdev     }
18051c0b2f7Stbbdev }
18151c0b2f7Stbbdev 
18251c0b2f7Stbbdev #if MALLOC_CHECK_RECURSION
18351c0b2f7Stbbdev 
18451c0b2f7Stbbdev class TestStartupAlloc: public SimpleBarrier {
18551c0b2f7Stbbdev     struct TestBlock {
18651c0b2f7Stbbdev         void *ptr;
18751c0b2f7Stbbdev         size_t sz;
18851c0b2f7Stbbdev     };
18951c0b2f7Stbbdev     static const int ITERS = 100;
19051c0b2f7Stbbdev public:
TestStartupAlloc()19151c0b2f7Stbbdev     TestStartupAlloc() {}
operator ()(int) const19251c0b2f7Stbbdev     void operator()(int) const {
19351c0b2f7Stbbdev         TestBlock blocks1[ITERS], blocks2[ITERS];
19451c0b2f7Stbbdev 
19551c0b2f7Stbbdev         barrier.wait();
19651c0b2f7Stbbdev 
19751c0b2f7Stbbdev         for (int i=0; i<ITERS; i++) {
19851c0b2f7Stbbdev             blocks1[i].sz = rand() % minLargeObjectSize;
19951c0b2f7Stbbdev             blocks1[i].ptr = StartupBlock::allocate(blocks1[i].sz);
20051c0b2f7Stbbdev             REQUIRE((blocks1[i].ptr && StartupBlock::msize(blocks1[i].ptr)>=blocks1[i].sz
20151c0b2f7Stbbdev                    && 0==(uintptr_t)blocks1[i].ptr % sizeof(void*)));
20251c0b2f7Stbbdev             memset(blocks1[i].ptr, i, blocks1[i].sz);
20351c0b2f7Stbbdev         }
20451c0b2f7Stbbdev         for (int i=0; i<ITERS; i++) {
20551c0b2f7Stbbdev             blocks2[i].sz = rand() % minLargeObjectSize;
20651c0b2f7Stbbdev             blocks2[i].ptr = StartupBlock::allocate(blocks2[i].sz);
20751c0b2f7Stbbdev             REQUIRE((blocks2[i].ptr && StartupBlock::msize(blocks2[i].ptr)>=blocks2[i].sz
20851c0b2f7Stbbdev                    && 0==(uintptr_t)blocks2[i].ptr % sizeof(void*)));
20951c0b2f7Stbbdev             memset(blocks2[i].ptr, i, blocks2[i].sz);
21051c0b2f7Stbbdev 
21151c0b2f7Stbbdev             for (size_t j=0; j<blocks1[i].sz; j++)
21251c0b2f7Stbbdev                 REQUIRE(*((char*)blocks1[i].ptr+j) == i);
21351c0b2f7Stbbdev             Block *block = (Block *)alignDown(blocks1[i].ptr, slabSize);
21451c0b2f7Stbbdev             ((StartupBlock *)block)->free(blocks1[i].ptr);
21551c0b2f7Stbbdev         }
21651c0b2f7Stbbdev         for (int i=ITERS-1; i>=0; i--) {
21751c0b2f7Stbbdev             for (size_t j=0; j<blocks2[i].sz; j++)
21851c0b2f7Stbbdev                 REQUIRE(*((char*)blocks2[i].ptr+j) == i);
21951c0b2f7Stbbdev             Block *block = (Block *)alignDown(blocks2[i].ptr, slabSize);
22051c0b2f7Stbbdev             ((StartupBlock *)block)->free(blocks2[i].ptr);
22151c0b2f7Stbbdev         }
22251c0b2f7Stbbdev     }
22351c0b2f7Stbbdev };
22451c0b2f7Stbbdev 
22551c0b2f7Stbbdev #endif /* MALLOC_CHECK_RECURSION */
22651c0b2f7Stbbdev 
22751c0b2f7Stbbdev #include <deque>
22851c0b2f7Stbbdev 
22951c0b2f7Stbbdev template<int ITERS>
23051c0b2f7Stbbdev class BackRefWork: utils::NoAssign {
23151c0b2f7Stbbdev     struct TestBlock {
23251c0b2f7Stbbdev         BackRefIdx idx;
23351c0b2f7Stbbdev         char       data;
TestBlockBackRefWork::TestBlock23451c0b2f7Stbbdev         TestBlock(BackRefIdx idx_) : idx(idx_) {}
23551c0b2f7Stbbdev     };
23651c0b2f7Stbbdev public:
BackRefWork()23751c0b2f7Stbbdev     BackRefWork() {}
operator ()(int) const23851c0b2f7Stbbdev     void operator()(int) const {
23951c0b2f7Stbbdev         size_t cnt;
24051c0b2f7Stbbdev         // it's important to not invalidate pointers to the contents of the container
24151c0b2f7Stbbdev         std::deque<TestBlock> blocks;
24251c0b2f7Stbbdev 
24351c0b2f7Stbbdev         // for ITERS==0 consume all available backrefs
24451c0b2f7Stbbdev         for (cnt=0; !ITERS || cnt<ITERS; cnt++) {
24551c0b2f7Stbbdev             BackRefIdx idx = BackRefIdx::newBackRef(/*largeObj=*/false);
24651c0b2f7Stbbdev             if (idx.isInvalid())
24751c0b2f7Stbbdev                 break;
24851c0b2f7Stbbdev             blocks.push_back(TestBlock(idx));
24951c0b2f7Stbbdev             setBackRef(blocks.back().idx, &blocks.back().data);
25051c0b2f7Stbbdev         }
25151c0b2f7Stbbdev         for (size_t i=0; i<cnt; i++)
25251c0b2f7Stbbdev             REQUIRE((Block*)&blocks[i].data == getBackRef(blocks[i].idx));
25351c0b2f7Stbbdev         for (size_t i=cnt; i>0; i--)
25451c0b2f7Stbbdev             removeBackRef(blocks[i-1].idx);
25551c0b2f7Stbbdev     }
25651c0b2f7Stbbdev };
25751c0b2f7Stbbdev 
25851c0b2f7Stbbdev class LocalCachesHit: utils::NoAssign {
25951c0b2f7Stbbdev     // set ITERS to trigger possible leak of backreferences
26051c0b2f7Stbbdev     // during cleanup on cache overflow and on thread termination
26151c0b2f7Stbbdev     static const int ITERS = 2*(FreeBlockPool::POOL_HIGH_MARK +
26251c0b2f7Stbbdev                                 LocalLOC::LOC_HIGH_MARK);
26351c0b2f7Stbbdev public:
LocalCachesHit()26451c0b2f7Stbbdev     LocalCachesHit() {}
operator ()(int) const26551c0b2f7Stbbdev     void operator()(int) const {
26651c0b2f7Stbbdev         void *objsSmall[ITERS], *objsLarge[ITERS];
26751c0b2f7Stbbdev 
26851c0b2f7Stbbdev         for (int i=0; i<ITERS; i++) {
26951c0b2f7Stbbdev             objsSmall[i] = scalable_malloc(minLargeObjectSize-1);
27051c0b2f7Stbbdev             objsLarge[i] = scalable_malloc(minLargeObjectSize);
27151c0b2f7Stbbdev         }
27251c0b2f7Stbbdev         for (int i=0; i<ITERS; i++) {
27351c0b2f7Stbbdev             scalable_free(objsSmall[i]);
27451c0b2f7Stbbdev             scalable_free(objsLarge[i]);
27551c0b2f7Stbbdev         }
27651c0b2f7Stbbdev     }
27751c0b2f7Stbbdev };
27851c0b2f7Stbbdev 
allocatedBackRefCount()27951c0b2f7Stbbdev static size_t allocatedBackRefCount()
28051c0b2f7Stbbdev {
28151c0b2f7Stbbdev     size_t cnt = 0;
2821ecde27fSIlya Mishin     for (int i=0; i<=backRefMain.load(std::memory_order_relaxed)->lastUsed.load(std::memory_order_relaxed); i++)
2831ecde27fSIlya Mishin         cnt += backRefMain.load(std::memory_order_relaxed)->backRefBl[i]->allocatedCount;
28451c0b2f7Stbbdev     return cnt;
28551c0b2f7Stbbdev }
28651c0b2f7Stbbdev 
28751c0b2f7Stbbdev class TestInvalidBackrefs: public SimpleBarrier {
28851c0b2f7Stbbdev #if __ANDROID__
28951c0b2f7Stbbdev     // Android requires lower iters due to lack of virtual memory.
29051c0b2f7Stbbdev     static const int BACKREF_GROWTH_ITERS = 50*1024;
29151c0b2f7Stbbdev #else
29251c0b2f7Stbbdev     static const int BACKREF_GROWTH_ITERS = 200*1024;
29351c0b2f7Stbbdev #endif
29451c0b2f7Stbbdev 
29551c0b2f7Stbbdev     static std::atomic<bool> backrefGrowthDone;
29651c0b2f7Stbbdev     static void *ptrs[BACKREF_GROWTH_ITERS];
29751c0b2f7Stbbdev public:
TestInvalidBackrefs()29851c0b2f7Stbbdev     TestInvalidBackrefs() {}
operator ()(int id) const29951c0b2f7Stbbdev     void operator()(int id) const {
30051c0b2f7Stbbdev 
30151c0b2f7Stbbdev         if (!id) {
30251c0b2f7Stbbdev             backrefGrowthDone = false;
30351c0b2f7Stbbdev             barrier.wait();
30451c0b2f7Stbbdev 
30551c0b2f7Stbbdev             for (int i=0; i<BACKREF_GROWTH_ITERS; i++)
30651c0b2f7Stbbdev                 ptrs[i] = scalable_malloc(minLargeObjectSize);
30751c0b2f7Stbbdev             backrefGrowthDone = true;
30851c0b2f7Stbbdev             for (int i=0; i<BACKREF_GROWTH_ITERS; i++)
30951c0b2f7Stbbdev                 scalable_free(ptrs[i]);
31051c0b2f7Stbbdev         } else {
31151c0b2f7Stbbdev             void *p2 = scalable_malloc(minLargeObjectSize-1);
31251c0b2f7Stbbdev             char *p1 = (char*)scalable_malloc(minLargeObjectSize-1);
31351c0b2f7Stbbdev             LargeObjectHdr *hdr =
31451c0b2f7Stbbdev                 (LargeObjectHdr*)(p1+minLargeObjectSize-1 - sizeof(LargeObjectHdr));
3151ecde27fSIlya Mishin             hdr->backRefIdx.main = 7;
31651c0b2f7Stbbdev             hdr->backRefIdx.largeObj = 1;
31751c0b2f7Stbbdev             hdr->backRefIdx.offset = 2000;
31851c0b2f7Stbbdev 
31951c0b2f7Stbbdev             barrier.wait();
32051c0b2f7Stbbdev 
321478de5b1Stbbdev             int yield_count = 0;
32251c0b2f7Stbbdev             while (!backrefGrowthDone) {
32351c0b2f7Stbbdev                 scalable_free(p2);
32451c0b2f7Stbbdev                 p2 = scalable_malloc(minLargeObjectSize-1);
325478de5b1Stbbdev                 if (yield_count++ == 100) {
326478de5b1Stbbdev                     yield_count = 0;
327478de5b1Stbbdev                     std::this_thread::yield();
328478de5b1Stbbdev                 }
32951c0b2f7Stbbdev             }
33051c0b2f7Stbbdev             scalable_free(p1);
33151c0b2f7Stbbdev             scalable_free(p2);
33251c0b2f7Stbbdev         }
33351c0b2f7Stbbdev     }
33451c0b2f7Stbbdev };
33551c0b2f7Stbbdev 
33651c0b2f7Stbbdev std::atomic<bool> TestInvalidBackrefs::backrefGrowthDone;
33751c0b2f7Stbbdev void *TestInvalidBackrefs::ptrs[BACKREF_GROWTH_ITERS];
33851c0b2f7Stbbdev 
TestBackRef()33951c0b2f7Stbbdev void TestBackRef() {
34051c0b2f7Stbbdev     size_t beforeNumBackRef, afterNumBackRef;
34151c0b2f7Stbbdev 
34251c0b2f7Stbbdev     beforeNumBackRef = allocatedBackRefCount();
34351c0b2f7Stbbdev     for( int p=MaxThread; p>=MinThread; --p )
34451c0b2f7Stbbdev         utils::NativeParallelFor( p, BackRefWork<2*BR_MAX_CNT+2>() );
34551c0b2f7Stbbdev     afterNumBackRef = allocatedBackRefCount();
34651c0b2f7Stbbdev     REQUIRE_MESSAGE(beforeNumBackRef==afterNumBackRef, "backreference leak detected");
34751c0b2f7Stbbdev     // lastUsed marks peak resource consumption. As we allocate below the mark,
34851c0b2f7Stbbdev     // it must not move up, otherwise there is a resource leak.
3491ecde27fSIlya Mishin     int sustLastUsed = backRefMain.load(std::memory_order_relaxed)->lastUsed.load(std::memory_order_relaxed);
35051c0b2f7Stbbdev     utils::NativeParallelFor( 1, BackRefWork<2*BR_MAX_CNT+2>() );
3511ecde27fSIlya Mishin     REQUIRE_MESSAGE(sustLastUsed == backRefMain.load(std::memory_order_relaxed)->lastUsed.load(std::memory_order_relaxed), "backreference leak detected");
35251c0b2f7Stbbdev     // check leak of back references while per-thread caches are in use
35351c0b2f7Stbbdev     // warm up needed to cover bootStrapMalloc call
35451c0b2f7Stbbdev     utils::NativeParallelFor( 1, LocalCachesHit() );
35551c0b2f7Stbbdev     beforeNumBackRef = allocatedBackRefCount();
35651c0b2f7Stbbdev     utils::NativeParallelFor( 2, LocalCachesHit() );
35757f524caSIlya Isaev     int res = scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, nullptr);
35851c0b2f7Stbbdev     REQUIRE(res == TBBMALLOC_OK);
35951c0b2f7Stbbdev     afterNumBackRef = allocatedBackRefCount();
36051c0b2f7Stbbdev     REQUIRE_MESSAGE(beforeNumBackRef>=afterNumBackRef, "backreference leak detected");
36151c0b2f7Stbbdev 
36251c0b2f7Stbbdev     // This is a regression test against race condition between backreference
36351c0b2f7Stbbdev     // extension and checking invalid BackRefIdx.
36451c0b2f7Stbbdev     // While detecting is object large or small, scalable_free 1st check for
36551c0b2f7Stbbdev     // large objects, so there is a chance to prepend small object with
36651c0b2f7Stbbdev     // seems valid BackRefIdx for large objects, and thus trigger the bug.
36751c0b2f7Stbbdev     TestInvalidBackrefs::initBarrier(MaxThread);
36851c0b2f7Stbbdev     utils::NativeParallelFor( MaxThread, TestInvalidBackrefs() );
36951c0b2f7Stbbdev     // Consume all available backrefs and check they work correctly.
37051c0b2f7Stbbdev     // For now test 32-bit machines only, because for 64-bit memory consumption is too high.
37151c0b2f7Stbbdev     if (sizeof(uintptr_t) == 4)
37251c0b2f7Stbbdev         utils::NativeParallelFor( MaxThread, BackRefWork<0>() );
37351c0b2f7Stbbdev }
37451c0b2f7Stbbdev 
getMem(intptr_t,size_t & bytes)37551c0b2f7Stbbdev void *getMem(intptr_t /*pool_id*/, size_t &bytes)
37651c0b2f7Stbbdev {
37751c0b2f7Stbbdev     const size_t BUF_SIZE = 8*1024*1024;
37851c0b2f7Stbbdev     static char space[BUF_SIZE];
37951c0b2f7Stbbdev     static size_t pos;
38051c0b2f7Stbbdev 
38151c0b2f7Stbbdev     if (pos + bytes > BUF_SIZE)
38257f524caSIlya Isaev         return nullptr;
38351c0b2f7Stbbdev 
38451c0b2f7Stbbdev     void *ret = space + pos;
38551c0b2f7Stbbdev     pos += bytes;
38651c0b2f7Stbbdev 
38751c0b2f7Stbbdev     return ret;
38851c0b2f7Stbbdev }
38951c0b2f7Stbbdev 
putMem(intptr_t,void *,size_t)39051c0b2f7Stbbdev int putMem(intptr_t /*pool_id*/, void* /*raw_ptr*/, size_t /*raw_bytes*/)
39151c0b2f7Stbbdev {
39251c0b2f7Stbbdev     return 0;
39351c0b2f7Stbbdev }
39451c0b2f7Stbbdev 
39551c0b2f7Stbbdev struct MallocPoolHeader {
39651c0b2f7Stbbdev     void  *rawPtr;
39751c0b2f7Stbbdev     size_t userSize;
39851c0b2f7Stbbdev };
39951c0b2f7Stbbdev 
getMallocMem(intptr_t,size_t & bytes)40051c0b2f7Stbbdev void *getMallocMem(intptr_t /*pool_id*/, size_t &bytes)
40151c0b2f7Stbbdev {
40251c0b2f7Stbbdev     void *rawPtr = malloc(bytes+sizeof(MallocPoolHeader));
40351c0b2f7Stbbdev     void *ret = (void *)((uintptr_t)rawPtr+sizeof(MallocPoolHeader));
40451c0b2f7Stbbdev 
40551c0b2f7Stbbdev     MallocPoolHeader *hdr = (MallocPoolHeader*)ret-1;
40651c0b2f7Stbbdev     hdr->rawPtr = rawPtr;
40751c0b2f7Stbbdev     hdr->userSize = bytes;
40851c0b2f7Stbbdev 
40951c0b2f7Stbbdev     return ret;
41051c0b2f7Stbbdev }
41151c0b2f7Stbbdev 
putMallocMem(intptr_t,void * ptr,size_t bytes)41251c0b2f7Stbbdev int putMallocMem(intptr_t /*pool_id*/, void *ptr, size_t bytes)
41351c0b2f7Stbbdev {
41451c0b2f7Stbbdev     MallocPoolHeader *hdr = (MallocPoolHeader*)ptr-1;
41551c0b2f7Stbbdev     ASSERT(bytes == hdr->userSize, "Invalid size in pool callback.");
41651c0b2f7Stbbdev     free(hdr->rawPtr);
41751c0b2f7Stbbdev 
41851c0b2f7Stbbdev     return 0;
41951c0b2f7Stbbdev }
42051c0b2f7Stbbdev 
42151c0b2f7Stbbdev class StressLOCacheWork: utils::NoAssign {
42251c0b2f7Stbbdev     rml::MemoryPool *my_mallocPool;
42351c0b2f7Stbbdev public:
StressLOCacheWork(rml::MemoryPool * mallocPool)42451c0b2f7Stbbdev     StressLOCacheWork(rml::MemoryPool *mallocPool) : my_mallocPool(mallocPool) {}
operator ()(int) const42551c0b2f7Stbbdev     void operator()(int) const {
42651c0b2f7Stbbdev         for (size_t sz=minLargeObjectSize; sz<1*1024*1024;
42751c0b2f7Stbbdev              sz+=LargeObjectCache::LargeBSProps::CacheStep) {
42851c0b2f7Stbbdev             void *ptr = pool_malloc(my_mallocPool, sz);
42951c0b2f7Stbbdev             REQUIRE_MESSAGE(ptr, "Memory was not allocated");
43051c0b2f7Stbbdev             memset(ptr, sz, sz);
43151c0b2f7Stbbdev             pool_free(my_mallocPool, ptr);
43251c0b2f7Stbbdev         }
43351c0b2f7Stbbdev     }
43451c0b2f7Stbbdev };
43551c0b2f7Stbbdev 
TestPools()43651c0b2f7Stbbdev void TestPools() {
43751c0b2f7Stbbdev     rml::MemPoolPolicy pol(getMem, putMem);
43851c0b2f7Stbbdev     size_t beforeNumBackRef, afterNumBackRef;
43951c0b2f7Stbbdev 
44051c0b2f7Stbbdev     rml::MemoryPool *pool1;
44151c0b2f7Stbbdev     rml::MemoryPool *pool2;
44251c0b2f7Stbbdev     pool_create_v1(0, &pol, &pool1);
44351c0b2f7Stbbdev     pool_create_v1(0, &pol, &pool2);
44451c0b2f7Stbbdev     pool_destroy(pool1);
44551c0b2f7Stbbdev     pool_destroy(pool2);
44651c0b2f7Stbbdev 
44757f524caSIlya Isaev     scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, nullptr);
44851c0b2f7Stbbdev     beforeNumBackRef = allocatedBackRefCount();
44951c0b2f7Stbbdev     rml::MemoryPool *fixedPool;
45051c0b2f7Stbbdev 
45151c0b2f7Stbbdev     pool_create_v1(0, &pol, &fixedPool);
45251c0b2f7Stbbdev     pol.pAlloc = getMallocMem;
45351c0b2f7Stbbdev     pol.pFree = putMallocMem;
45451c0b2f7Stbbdev     pol.granularity = 8;
45551c0b2f7Stbbdev     rml::MemoryPool *mallocPool;
45651c0b2f7Stbbdev 
45751c0b2f7Stbbdev     pool_create_v1(0, &pol, &mallocPool);
45851c0b2f7Stbbdev /* check that large object cache (LOC) returns correct size for cached objects
45951c0b2f7Stbbdev    passBackendSz Byte objects are cached in LOC, but bypassed the backend, so
46051c0b2f7Stbbdev    memory requested directly from allocation callback.
46151c0b2f7Stbbdev    nextPassBackendSz Byte objects must fit to another LOC bin,
462*c4a799dfSJhaShweta1    so that their allocation/releasing leads to cache cleanup.
46351c0b2f7Stbbdev    All this is expecting to lead to releasing of passBackendSz Byte object
46451c0b2f7Stbbdev    from LOC during LOC cleanup, and putMallocMem checks that returned size
46551c0b2f7Stbbdev    is correct.
46651c0b2f7Stbbdev */
46751c0b2f7Stbbdev     const size_t passBackendSz = Backend::maxBinned_HugePage+1,
46851c0b2f7Stbbdev         anotherLOCBinSz = minLargeObjectSize+1;
46951c0b2f7Stbbdev     for (int i=0; i<10; i++) { // run long enough to be cached
47051c0b2f7Stbbdev         void *p = pool_malloc(mallocPool, passBackendSz);
47151c0b2f7Stbbdev         REQUIRE_MESSAGE(p, "Memory was not allocated");
47251c0b2f7Stbbdev         pool_free(mallocPool, p);
47351c0b2f7Stbbdev     }
47451c0b2f7Stbbdev     // run long enough to passBackendSz allocation was cleaned from cache
47551c0b2f7Stbbdev     // and returned back to putMallocMem for size checking
47651c0b2f7Stbbdev     for (int i=0; i<1000; i++) {
47751c0b2f7Stbbdev         void *p = pool_malloc(mallocPool, anotherLOCBinSz);
47851c0b2f7Stbbdev         REQUIRE_MESSAGE(p, "Memory was not allocated");
47951c0b2f7Stbbdev         pool_free(mallocPool, p);
48051c0b2f7Stbbdev     }
48151c0b2f7Stbbdev 
48251c0b2f7Stbbdev     void *smallObj =  pool_malloc(fixedPool, 10);
48351c0b2f7Stbbdev     REQUIRE_MESSAGE(smallObj, "Memory was not allocated");
48451c0b2f7Stbbdev     memset(smallObj, 1, 10);
48551c0b2f7Stbbdev     void *ptr = pool_malloc(fixedPool, 1024);
48651c0b2f7Stbbdev     REQUIRE_MESSAGE(ptr, "Memory was not allocated");
48751c0b2f7Stbbdev     memset(ptr, 1, 1024);
48851c0b2f7Stbbdev     void *largeObj = pool_malloc(fixedPool, minLargeObjectSize);
48951c0b2f7Stbbdev     REQUIRE_MESSAGE(largeObj, "Memory was not allocated");
49051c0b2f7Stbbdev     memset(largeObj, 1, minLargeObjectSize);
49151c0b2f7Stbbdev     ptr = pool_malloc(fixedPool, minLargeObjectSize);
49251c0b2f7Stbbdev     REQUIRE_MESSAGE(ptr, "Memory was not allocated");
49351c0b2f7Stbbdev     memset(ptr, minLargeObjectSize, minLargeObjectSize);
49451c0b2f7Stbbdev     pool_malloc(fixedPool, 10*minLargeObjectSize); // no leak for unsuccessful allocations
49551c0b2f7Stbbdev     pool_free(fixedPool, smallObj);
49651c0b2f7Stbbdev     pool_free(fixedPool, largeObj);
49751c0b2f7Stbbdev 
49851c0b2f7Stbbdev     // provoke large object cache cleanup and hope no leaks occurs
49951c0b2f7Stbbdev     for( int p=MaxThread; p>=MinThread; --p )
50051c0b2f7Stbbdev         utils::NativeParallelFor( p, StressLOCacheWork(mallocPool) );
50151c0b2f7Stbbdev     pool_destroy(mallocPool);
50251c0b2f7Stbbdev     pool_destroy(fixedPool);
50351c0b2f7Stbbdev 
50457f524caSIlya Isaev     scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, nullptr);
50551c0b2f7Stbbdev     afterNumBackRef = allocatedBackRefCount();
50651c0b2f7Stbbdev     REQUIRE_MESSAGE(beforeNumBackRef==afterNumBackRef, "backreference leak detected");
50751c0b2f7Stbbdev 
50851c0b2f7Stbbdev     {
50951c0b2f7Stbbdev         // test usedSize/cachedSize and LOC bitmask correctness
51051c0b2f7Stbbdev         void *p[5];
51151c0b2f7Stbbdev         pool_create_v1(0, &pol, &mallocPool);
51251c0b2f7Stbbdev         const LargeObjectCache *loc = &((rml::internal::MemoryPool*)mallocPool)->extMemPool.loc;
51351c0b2f7Stbbdev         const int LargeCacheStep = LargeObjectCache::LargeBSProps::CacheStep;
51451c0b2f7Stbbdev         p[3] = pool_malloc(mallocPool, minLargeObjectSize+2*LargeCacheStep);
51551c0b2f7Stbbdev         for (int i=0; i<10; i++) {
51651c0b2f7Stbbdev             p[0] = pool_malloc(mallocPool, minLargeObjectSize);
51751c0b2f7Stbbdev             p[1] = pool_malloc(mallocPool, minLargeObjectSize+LargeCacheStep);
51851c0b2f7Stbbdev             pool_free(mallocPool, p[0]);
51951c0b2f7Stbbdev             pool_free(mallocPool, p[1]);
52051c0b2f7Stbbdev         }
52151c0b2f7Stbbdev         REQUIRE(loc->getUsedSize());
52251c0b2f7Stbbdev         pool_free(mallocPool, p[3]);
52351c0b2f7Stbbdev         REQUIRE(loc->getLOCSize() < 3*(minLargeObjectSize+LargeCacheStep));
52451c0b2f7Stbbdev         const size_t maxLocalLOCSize = LocalLOCImpl<3,30>::getMaxSize();
52551c0b2f7Stbbdev         REQUIRE(loc->getUsedSize() <= maxLocalLOCSize);
52651c0b2f7Stbbdev         for (int i=0; i<3; i++)
52751c0b2f7Stbbdev             p[i] = pool_malloc(mallocPool, minLargeObjectSize+i*LargeCacheStep);
52851c0b2f7Stbbdev         size_t currUser = loc->getUsedSize();
52951c0b2f7Stbbdev         REQUIRE((!loc->getLOCSize() && currUser >= 3*(minLargeObjectSize+LargeCacheStep)));
53051c0b2f7Stbbdev         p[4] = pool_malloc(mallocPool, minLargeObjectSize+3*LargeCacheStep);
53151c0b2f7Stbbdev         REQUIRE(loc->getUsedSize() - currUser >= minLargeObjectSize+3*LargeCacheStep);
53251c0b2f7Stbbdev         pool_free(mallocPool, p[4]);
53351c0b2f7Stbbdev         REQUIRE(loc->getUsedSize() <= currUser+maxLocalLOCSize);
53451c0b2f7Stbbdev         pool_reset(mallocPool);
53551c0b2f7Stbbdev         REQUIRE((!loc->getLOCSize() && !loc->getUsedSize()));
53651c0b2f7Stbbdev         pool_destroy(mallocPool);
53751c0b2f7Stbbdev     }
53851c0b2f7Stbbdev     // To test LOC we need bigger lists than released by current LocalLOC
53951c0b2f7Stbbdev     //   in production code. Create special LocalLOC.
54051c0b2f7Stbbdev     {
54151c0b2f7Stbbdev         LocalLOCImpl<2, 20> lLOC;
54251c0b2f7Stbbdev         pool_create_v1(0, &pol, &mallocPool);
54351c0b2f7Stbbdev         rml::internal::ExtMemoryPool *mPool = &((rml::internal::MemoryPool*)mallocPool)->extMemPool;
54451c0b2f7Stbbdev         const LargeObjectCache *loc = &((rml::internal::MemoryPool*)mallocPool)->extMemPool.loc;
54551c0b2f7Stbbdev         const int LargeCacheStep = LargeObjectCache::LargeBSProps::CacheStep;
54651c0b2f7Stbbdev         for (int i=0; i<22; i++) {
54751c0b2f7Stbbdev             void *o = pool_malloc(mallocPool, minLargeObjectSize+i*LargeCacheStep);
54851c0b2f7Stbbdev             bool ret = lLOC.put(((LargeObjectHdr*)o - 1)->memoryBlock, mPool);
54951c0b2f7Stbbdev             REQUIRE(ret);
55051c0b2f7Stbbdev 
55151c0b2f7Stbbdev             o = pool_malloc(mallocPool, minLargeObjectSize+i*LargeCacheStep);
55251c0b2f7Stbbdev             ret = lLOC.put(((LargeObjectHdr*)o - 1)->memoryBlock, mPool);
55351c0b2f7Stbbdev             REQUIRE(ret);
55451c0b2f7Stbbdev         }
55551c0b2f7Stbbdev         lLOC.externalCleanup(mPool);
55651c0b2f7Stbbdev         REQUIRE(!loc->getUsedSize());
55751c0b2f7Stbbdev 
55851c0b2f7Stbbdev         pool_destroy(mallocPool);
55951c0b2f7Stbbdev     }
56051c0b2f7Stbbdev }
56151c0b2f7Stbbdev 
TestObjectRecognition()56251c0b2f7Stbbdev void TestObjectRecognition() {
56351c0b2f7Stbbdev     size_t headersSize = sizeof(LargeMemoryBlock)+sizeof(LargeObjectHdr);
56451c0b2f7Stbbdev     unsigned falseObjectSize = 113; // unsigned is the type expected by getObjectSize
56551c0b2f7Stbbdev     size_t obtainedSize;
56651c0b2f7Stbbdev 
56751c0b2f7Stbbdev     REQUIRE_MESSAGE(sizeof(BackRefIdx)==sizeof(uintptr_t), "Unexpected size of BackRefIdx");
56851c0b2f7Stbbdev     REQUIRE_MESSAGE(getObjectSize(falseObjectSize)!=falseObjectSize, "Error in test: bad choice for false object size");
56951c0b2f7Stbbdev 
57051c0b2f7Stbbdev     void* mem = scalable_malloc(2*slabSize);
57151c0b2f7Stbbdev     REQUIRE_MESSAGE(mem, "Memory was not allocated");
57251c0b2f7Stbbdev     Block* falseBlock = (Block*)alignUp((uintptr_t)mem, slabSize);
57351c0b2f7Stbbdev     falseBlock->objectSize = falseObjectSize;
57451c0b2f7Stbbdev     char* falseSO = (char*)falseBlock + falseObjectSize*7;
57551c0b2f7Stbbdev     REQUIRE_MESSAGE(alignDown(falseSO, slabSize)==(void*)falseBlock, "Error in test: false object offset is too big");
57651c0b2f7Stbbdev 
57751c0b2f7Stbbdev     void* bufferLOH = scalable_malloc(2*slabSize + headersSize);
57851c0b2f7Stbbdev     REQUIRE_MESSAGE(bufferLOH, "Memory was not allocated");
57951c0b2f7Stbbdev     LargeObjectHdr* falseLO =
58051c0b2f7Stbbdev         (LargeObjectHdr*)alignUp((uintptr_t)bufferLOH + headersSize, slabSize);
58151c0b2f7Stbbdev     LargeObjectHdr* headerLO = (LargeObjectHdr*)falseLO-1;
58251c0b2f7Stbbdev     headerLO->memoryBlock = (LargeMemoryBlock*)bufferLOH;
58351c0b2f7Stbbdev     headerLO->memoryBlock->unalignedSize = 2*slabSize + headersSize;
58451c0b2f7Stbbdev     headerLO->memoryBlock->objectSize = slabSize + headersSize;
58551c0b2f7Stbbdev     headerLO->backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/true);
58651c0b2f7Stbbdev     setBackRef(headerLO->backRefIdx, headerLO);
58751c0b2f7Stbbdev     REQUIRE_MESSAGE(scalable_msize(falseLO) == slabSize + headersSize,
58851c0b2f7Stbbdev            "Error in test: LOH falsification failed");
58951c0b2f7Stbbdev     removeBackRef(headerLO->backRefIdx);
59051c0b2f7Stbbdev 
59151c0b2f7Stbbdev     const int NUM_OF_IDX = BR_MAX_CNT+2;
59251c0b2f7Stbbdev     BackRefIdx idxs[NUM_OF_IDX];
59351c0b2f7Stbbdev     for (int cnt=0; cnt<2; cnt++) {
5941ecde27fSIlya Mishin         for (int main = -10; main<10; main++) {
5951ecde27fSIlya Mishin             falseBlock->backRefIdx.main = (uint16_t)main;
5961ecde27fSIlya Mishin             headerLO->backRefIdx.main = (uint16_t)main;
59751c0b2f7Stbbdev 
59851c0b2f7Stbbdev             for (int bl = -10; bl<BR_MAX_CNT+10; bl++) {
59951c0b2f7Stbbdev                 falseBlock->backRefIdx.offset = (uint16_t)bl;
60051c0b2f7Stbbdev                 headerLO->backRefIdx.offset = (uint16_t)bl;
60151c0b2f7Stbbdev 
60251c0b2f7Stbbdev                 for (int largeObj = 0; largeObj<2; largeObj++) {
60351c0b2f7Stbbdev                     falseBlock->backRefIdx.largeObj = largeObj;
60451c0b2f7Stbbdev                     headerLO->backRefIdx.largeObj = largeObj;
60551c0b2f7Stbbdev 
60657f524caSIlya Isaev                     obtainedSize = __TBB_malloc_safer_msize(falseSO, nullptr);
60751c0b2f7Stbbdev                     REQUIRE_MESSAGE(obtainedSize==0, "Incorrect pointer accepted");
60857f524caSIlya Isaev                     obtainedSize = __TBB_malloc_safer_msize(falseLO, nullptr);
60951c0b2f7Stbbdev                     REQUIRE_MESSAGE(obtainedSize==0, "Incorrect pointer accepted");
61051c0b2f7Stbbdev                 }
61151c0b2f7Stbbdev             }
61251c0b2f7Stbbdev         }
61351c0b2f7Stbbdev         if (cnt == 1) {
61451c0b2f7Stbbdev             for (int i=0; i<NUM_OF_IDX; i++)
61551c0b2f7Stbbdev                 removeBackRef(idxs[i]);
61651c0b2f7Stbbdev             break;
61751c0b2f7Stbbdev         }
61851c0b2f7Stbbdev         for (int i=0; i<NUM_OF_IDX; i++) {
61951c0b2f7Stbbdev             idxs[i] = BackRefIdx::newBackRef(/*largeObj=*/false);
62057f524caSIlya Isaev             setBackRef(idxs[i], nullptr);
62151c0b2f7Stbbdev         }
62251c0b2f7Stbbdev     }
62351c0b2f7Stbbdev     char *smallPtr = (char*)scalable_malloc(falseObjectSize);
62457f524caSIlya Isaev     obtainedSize = __TBB_malloc_safer_msize(smallPtr, nullptr);
62551c0b2f7Stbbdev     REQUIRE_MESSAGE(obtainedSize==getObjectSize(falseObjectSize), "Correct pointer not accepted?");
62651c0b2f7Stbbdev     scalable_free(smallPtr);
62751c0b2f7Stbbdev 
62857f524caSIlya Isaev     obtainedSize = __TBB_malloc_safer_msize(mem, nullptr);
62951c0b2f7Stbbdev     REQUIRE_MESSAGE(obtainedSize>=2*slabSize, "Correct pointer not accepted?");
63051c0b2f7Stbbdev     scalable_free(mem);
63151c0b2f7Stbbdev     scalable_free(bufferLOH);
63251c0b2f7Stbbdev }
63351c0b2f7Stbbdev 
63451c0b2f7Stbbdev class TestBackendWork: public SimpleBarrier {
63551c0b2f7Stbbdev     struct TestBlock {
63651c0b2f7Stbbdev         intptr_t   data;
63751c0b2f7Stbbdev         BackRefIdx idx;
63851c0b2f7Stbbdev     };
63951c0b2f7Stbbdev     static const int ITERS = 20;
64051c0b2f7Stbbdev 
64151c0b2f7Stbbdev     rml::internal::Backend *backend;
64251c0b2f7Stbbdev public:
TestBackendWork(rml::internal::Backend * bknd)64351c0b2f7Stbbdev     TestBackendWork(rml::internal::Backend *bknd) : backend(bknd) {}
operator ()(int) const64451c0b2f7Stbbdev     void operator()(int) const {
64551c0b2f7Stbbdev         barrier.wait();
64651c0b2f7Stbbdev 
64751c0b2f7Stbbdev         for (int i=0; i<ITERS; i++) {
64851c0b2f7Stbbdev             BlockI *slabBlock = backend->getSlabBlock(1);
64951c0b2f7Stbbdev             REQUIRE_MESSAGE(slabBlock, "Memory was not allocated");
65051c0b2f7Stbbdev             uintptr_t prevBlock = (uintptr_t)slabBlock;
65151c0b2f7Stbbdev             backend->putSlabBlock(slabBlock);
65251c0b2f7Stbbdev 
65351c0b2f7Stbbdev             LargeMemoryBlock *largeBlock = backend->getLargeBlock(16*1024);
65451c0b2f7Stbbdev             REQUIRE_MESSAGE(largeBlock, "Memory was not allocated");
65551c0b2f7Stbbdev             REQUIRE_MESSAGE((uintptr_t)largeBlock != prevBlock,
65651c0b2f7Stbbdev                     "Large block cannot be reused from slab memory, only in fixed_pool case.");
65751c0b2f7Stbbdev             backend->putLargeBlock(largeBlock);
65851c0b2f7Stbbdev         }
65951c0b2f7Stbbdev     }
66051c0b2f7Stbbdev };
66151c0b2f7Stbbdev 
TestBackend()66251c0b2f7Stbbdev void TestBackend()
66351c0b2f7Stbbdev {
66451c0b2f7Stbbdev     rml::MemPoolPolicy pol(getMallocMem, putMallocMem);
66551c0b2f7Stbbdev     rml::MemoryPool *mPool;
66651c0b2f7Stbbdev     pool_create_v1(0, &pol, &mPool);
66751c0b2f7Stbbdev     rml::internal::ExtMemoryPool *ePool = &((rml::internal::MemoryPool*)mPool)->extMemPool;
66851c0b2f7Stbbdev     rml::internal::Backend *backend = &ePool->backend;
66951c0b2f7Stbbdev 
67051c0b2f7Stbbdev     for( int p=MaxThread; p>=MinThread; --p ) {
67151c0b2f7Stbbdev         // regression test against an race condition in backend synchronization,
67251c0b2f7Stbbdev         // triggered only when WhiteboxTestingYield() call yields
673478de5b1Stbbdev #if TBB_USE_DEBUG
674478de5b1Stbbdev         int num_iters = 10;
675478de5b1Stbbdev #else
676478de5b1Stbbdev         int num_iters = 100;
677478de5b1Stbbdev #endif
678478de5b1Stbbdev         for (int i = 0; i < num_iters; i++) {
67951c0b2f7Stbbdev             TestBackendWork::initBarrier(p);
68051c0b2f7Stbbdev             utils::NativeParallelFor( p, TestBackendWork(backend) );
68151c0b2f7Stbbdev         }
68251c0b2f7Stbbdev     }
68351c0b2f7Stbbdev 
68451c0b2f7Stbbdev     BlockI *block = backend->getSlabBlock(1);
68551c0b2f7Stbbdev     REQUIRE_MESSAGE(block, "Memory was not allocated");
68651c0b2f7Stbbdev     backend->putSlabBlock(block);
68751c0b2f7Stbbdev 
68851c0b2f7Stbbdev     // Checks if the backend increases and decreases the amount of allocated memory when memory is allocated.
68951c0b2f7Stbbdev     const size_t memSize0 = backend->getTotalMemSize();
69051c0b2f7Stbbdev     LargeMemoryBlock *lmb = backend->getLargeBlock(4*MByte);
69151c0b2f7Stbbdev     REQUIRE( lmb );
69251c0b2f7Stbbdev 
69351c0b2f7Stbbdev     const size_t memSize1 = backend->getTotalMemSize();
69451c0b2f7Stbbdev     REQUIRE_MESSAGE( (intptr_t)(memSize1-memSize0) >= 4*MByte, "The backend has not increased the amount of using memory." );
69551c0b2f7Stbbdev 
69651c0b2f7Stbbdev     backend->putLargeBlock(lmb);
69751c0b2f7Stbbdev     const size_t memSize2 = backend->getTotalMemSize();
69851c0b2f7Stbbdev     REQUIRE_MESSAGE( memSize2 == memSize0, "The backend has not decreased the amount of using memory." );
69951c0b2f7Stbbdev 
70051c0b2f7Stbbdev     pool_destroy(mPool);
70151c0b2f7Stbbdev }
70251c0b2f7Stbbdev 
TestBitMask()70351c0b2f7Stbbdev void TestBitMask()
70451c0b2f7Stbbdev {
70551c0b2f7Stbbdev     BitMaskMin<256> mask;
70651c0b2f7Stbbdev 
70751c0b2f7Stbbdev     mask.reset();
70851c0b2f7Stbbdev     mask.set(10, 1);
70951c0b2f7Stbbdev     mask.set(5, 1);
71051c0b2f7Stbbdev     mask.set(1, 1);
71151c0b2f7Stbbdev     REQUIRE(mask.getMinTrue(2) == 5);
71251c0b2f7Stbbdev 
71351c0b2f7Stbbdev     mask.reset();
71451c0b2f7Stbbdev     mask.set(0, 1);
71551c0b2f7Stbbdev     mask.set(64, 1);
71651c0b2f7Stbbdev     mask.set(63, 1);
71751c0b2f7Stbbdev     mask.set(200, 1);
71851c0b2f7Stbbdev     mask.set(255, 1);
71951c0b2f7Stbbdev     REQUIRE(mask.getMinTrue(0) == 0);
72051c0b2f7Stbbdev     REQUIRE(mask.getMinTrue(1) == 63);
72151c0b2f7Stbbdev     REQUIRE(mask.getMinTrue(63) == 63);
72251c0b2f7Stbbdev     REQUIRE(mask.getMinTrue(64) == 64);
72351c0b2f7Stbbdev     REQUIRE(mask.getMinTrue(101) == 200);
72451c0b2f7Stbbdev     REQUIRE(mask.getMinTrue(201) == 255);
72551c0b2f7Stbbdev     mask.set(255, 0);
72651c0b2f7Stbbdev     REQUIRE(mask.getMinTrue(201) == -1);
72751c0b2f7Stbbdev }
72851c0b2f7Stbbdev 
getMemSize()72951c0b2f7Stbbdev size_t getMemSize()
73051c0b2f7Stbbdev {
73151c0b2f7Stbbdev     return defaultMemPool->extMemPool.backend.getTotalMemSize();
73251c0b2f7Stbbdev }
73351c0b2f7Stbbdev 
73451c0b2f7Stbbdev class CheckNotCached {
73551c0b2f7Stbbdev     static size_t memSize;
73651c0b2f7Stbbdev public:
operator ()() const73751c0b2f7Stbbdev     void operator() () const {
73851c0b2f7Stbbdev         int res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 1);
73951c0b2f7Stbbdev         REQUIRE(res == TBBMALLOC_OK);
74051c0b2f7Stbbdev         if (memSize==(size_t)-1) {
74151c0b2f7Stbbdev             memSize = getMemSize();
74251c0b2f7Stbbdev         } else {
74351c0b2f7Stbbdev             REQUIRE(getMemSize() == memSize);
74451c0b2f7Stbbdev             memSize=(size_t)-1;
74551c0b2f7Stbbdev         }
74651c0b2f7Stbbdev     }
74751c0b2f7Stbbdev };
74851c0b2f7Stbbdev 
74951c0b2f7Stbbdev size_t CheckNotCached::memSize = (size_t)-1;
75051c0b2f7Stbbdev 
75151c0b2f7Stbbdev class RunTestHeapLimit: public SimpleBarrier {
75251c0b2f7Stbbdev public:
operator ()(int) const75351c0b2f7Stbbdev     void operator()( int /*mynum*/ ) const {
75451c0b2f7Stbbdev         // Provoke bootstrap heap initialization before recording memory size.
75551c0b2f7Stbbdev         // NOTE: The initialization should be processed only with a "large"
75651c0b2f7Stbbdev         // object. Since the "small" object allocation lead to blocking of a
75751c0b2f7Stbbdev         // slab as an active block and it is impossible to release it with
75851c0b2f7Stbbdev         // foreign thread.
75951c0b2f7Stbbdev         scalable_free(scalable_malloc(minLargeObjectSize));
76051c0b2f7Stbbdev         barrier.wait(CheckNotCached());
76151c0b2f7Stbbdev         for (size_t n = minLargeObjectSize; n < 5*1024*1024; n += 128*1024)
76251c0b2f7Stbbdev             scalable_free(scalable_malloc(n));
76351c0b2f7Stbbdev         barrier.wait(CheckNotCached());
76451c0b2f7Stbbdev     }
76551c0b2f7Stbbdev };
76651c0b2f7Stbbdev 
TestHeapLimit()76751c0b2f7Stbbdev void TestHeapLimit()
76851c0b2f7Stbbdev {
76951c0b2f7Stbbdev     if(!isMallocInitialized()) doInitialization();
77051c0b2f7Stbbdev     // tiny limit to stop caching
77151c0b2f7Stbbdev     int res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 1);
77251c0b2f7Stbbdev     REQUIRE(res == TBBMALLOC_OK);
77351c0b2f7Stbbdev      // Provoke bootstrap heap initialization before recording memory size.
77451c0b2f7Stbbdev     scalable_free(scalable_malloc(8));
77551c0b2f7Stbbdev     size_t n, sizeBefore = getMemSize();
77651c0b2f7Stbbdev 
77751c0b2f7Stbbdev     // Try to provoke call to OS for memory to check that
77851c0b2f7Stbbdev     // requests are not fulfilled from caches.
77951c0b2f7Stbbdev     // Single call is not enough here because of backend fragmentation.
78051c0b2f7Stbbdev     for (n = minLargeObjectSize; n < 10*1024*1024; n += 16*1024) {
78151c0b2f7Stbbdev         void *p = scalable_malloc(n);
78251c0b2f7Stbbdev         bool leave = (sizeBefore != getMemSize());
78351c0b2f7Stbbdev         scalable_free(p);
78451c0b2f7Stbbdev         if (leave)
78551c0b2f7Stbbdev             break;
78651c0b2f7Stbbdev         REQUIRE_MESSAGE(sizeBefore == getMemSize(), "No caching expected");
78751c0b2f7Stbbdev     }
78851c0b2f7Stbbdev     REQUIRE_MESSAGE(n < 10*1024*1024, "scalable_malloc doesn't provoke OS request for memory, "
78951c0b2f7Stbbdev            "is some internal cache still used?");
79051c0b2f7Stbbdev 
79151c0b2f7Stbbdev     for( int p=MaxThread; p>=MinThread; --p ) {
79251c0b2f7Stbbdev         RunTestHeapLimit::initBarrier( p );
79351c0b2f7Stbbdev         utils::NativeParallelFor( p, RunTestHeapLimit() );
79451c0b2f7Stbbdev     }
79551c0b2f7Stbbdev     // it's try to match limit as well as set limit, so call here
79651c0b2f7Stbbdev     res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 1);
79751c0b2f7Stbbdev     REQUIRE(res == TBBMALLOC_OK);
79851c0b2f7Stbbdev     size_t m = getMemSize();
79951c0b2f7Stbbdev     REQUIRE(sizeBefore == m);
80051c0b2f7Stbbdev     // restore default
80151c0b2f7Stbbdev     res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 0);
80251c0b2f7Stbbdev     REQUIRE(res == TBBMALLOC_OK);
80351c0b2f7Stbbdev }
80451c0b2f7Stbbdev 
checkNoHugePages()80551c0b2f7Stbbdev void checkNoHugePages()
80651c0b2f7Stbbdev {
80751c0b2f7Stbbdev     REQUIRE_MESSAGE(!hugePages.isEnabled, "scalable_allocation_mode "
80851c0b2f7Stbbdev            "must have priority over environment variable");
80951c0b2f7Stbbdev }
81051c0b2f7Stbbdev 
81151c0b2f7Stbbdev /*---------------------------------------------------------------------------*/
81251c0b2f7Stbbdev // The regression test against bugs in TBBMALLOC_CLEAN_ALL_BUFFERS allocation command.
81351c0b2f7Stbbdev // The idea is to allocate and deallocate a set of objects randomly in parallel.
81451c0b2f7Stbbdev // For large sizes (16K), it forces conflicts in backend during coalescing.
81551c0b2f7Stbbdev // For small sizes (4K), it forces cross-thread deallocations and then orphaned slabs.
81651c0b2f7Stbbdev // Global cleanup should process orphaned slabs and the queue of postponed coalescing
81751c0b2f7Stbbdev // requests, otherwise it will not be able to unmap all unused memory.
81851c0b2f7Stbbdev 
81951c0b2f7Stbbdev const int num_allocs = 10*1024;
82051c0b2f7Stbbdev void *ptrs[num_allocs];
82151c0b2f7Stbbdev std::atomic<int> alloc_counter;
8228dcbd5b1Stbbdev static thread_local bool free_was_called = false;
82351c0b2f7Stbbdev 
multiThreadAlloc(size_t alloc_size)82451c0b2f7Stbbdev inline void multiThreadAlloc(size_t alloc_size) {
82551c0b2f7Stbbdev     for( int i = alloc_counter++; i < num_allocs; i = alloc_counter++ ) {
82651c0b2f7Stbbdev        ptrs[i] = scalable_malloc( alloc_size );
82751c0b2f7Stbbdev        REQUIRE_MESSAGE( ptrs[i] != nullptr, "scalable_malloc returned zero." );
82851c0b2f7Stbbdev     }
82951c0b2f7Stbbdev }
crossThreadDealloc()83051c0b2f7Stbbdev inline void crossThreadDealloc() {
8318dcbd5b1Stbbdev     free_was_called = false;
83251c0b2f7Stbbdev     for( int i = --alloc_counter; i >= 0; i = --alloc_counter ) {
8338dcbd5b1Stbbdev         if (i < num_allocs) {
8348dcbd5b1Stbbdev             scalable_free(ptrs[i]);
8358dcbd5b1Stbbdev             free_was_called = true;
8368dcbd5b1Stbbdev         }
83751c0b2f7Stbbdev     }
83851c0b2f7Stbbdev }
83951c0b2f7Stbbdev 
84051c0b2f7Stbbdev template<int AllocSize>
84151c0b2f7Stbbdev struct TestCleanAllBuffersBody : public SimpleBarrier {
operator ()TestCleanAllBuffersBody84251c0b2f7Stbbdev     void operator() ( int ) const {
84351c0b2f7Stbbdev         barrier.wait();
84451c0b2f7Stbbdev         multiThreadAlloc(AllocSize);
84551c0b2f7Stbbdev         barrier.wait();
84651c0b2f7Stbbdev         crossThreadDealloc();
84751c0b2f7Stbbdev     }
84851c0b2f7Stbbdev };
84951c0b2f7Stbbdev 
85051c0b2f7Stbbdev template<int AllocSize>
TestCleanAllBuffers()85151c0b2f7Stbbdev void TestCleanAllBuffers() {
85251c0b2f7Stbbdev     const int num_threads = 8;
85351c0b2f7Stbbdev     // Clean up if something was allocated before the test
85457f524caSIlya Isaev     scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS,nullptr);
85551c0b2f7Stbbdev 
85651c0b2f7Stbbdev     size_t memory_in_use_before = getMemSize();
85751c0b2f7Stbbdev     alloc_counter = 0;
85851c0b2f7Stbbdev     TestCleanAllBuffersBody<AllocSize>::initBarrier(num_threads);
85951c0b2f7Stbbdev 
86051c0b2f7Stbbdev     utils::NativeParallelFor(num_threads, TestCleanAllBuffersBody<AllocSize>());
86151c0b2f7Stbbdev     // TODO: reproduce the bug conditions more reliably
86257f524caSIlya Isaev     if ( defaultMemPool->extMemPool.backend.coalescQ.blocksToFree.load(std::memory_order_relaxed) == nullptr ) {
86351c0b2f7Stbbdev         INFO( "Warning: The queue of postponed coalescing requests is empty. ");
86451c0b2f7Stbbdev         INFO( "Unable to create the condition for bug reproduction.\n" );
86551c0b2f7Stbbdev     }
86657f524caSIlya Isaev     int result = scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS,nullptr);
86751c0b2f7Stbbdev     REQUIRE_MESSAGE( result == TBBMALLOC_OK, "The cleanup request has not cleaned anything." );
86851c0b2f7Stbbdev     size_t memory_in_use_after = getMemSize();
86951c0b2f7Stbbdev 
87051c0b2f7Stbbdev     size_t memory_leak = memory_in_use_after - memory_in_use_before;
87151c0b2f7Stbbdev     INFO( "memory_in_use_before = " <<  memory_in_use_before << ", memory_in_use_after = " << memory_in_use_after << "\n" );
87251c0b2f7Stbbdev     REQUIRE_MESSAGE( memory_leak == 0, "Cleanup was unable to release all allocated memory." );
87351c0b2f7Stbbdev }
87451c0b2f7Stbbdev 
87551c0b2f7Stbbdev //! Force cross thread deallocation of small objects to create a set of privatizable slab blocks.
87651c0b2f7Stbbdev //! TBBMALLOC_CLEAN_THREAD_BUFFERS command have to privatize all the block.
87751c0b2f7Stbbdev struct TestCleanThreadBuffersBody : public SimpleBarrier {
operator ()TestCleanThreadBuffersBody87851c0b2f7Stbbdev     void operator() ( int ) const {
87951c0b2f7Stbbdev         barrier.wait();
88051c0b2f7Stbbdev         multiThreadAlloc(2*1024);
88151c0b2f7Stbbdev         barrier.wait();
88251c0b2f7Stbbdev         crossThreadDealloc();
88351c0b2f7Stbbdev         barrier.wait();
88457f524caSIlya Isaev         int result = scalable_allocation_command(TBBMALLOC_CLEAN_THREAD_BUFFERS,nullptr);
8858dcbd5b1Stbbdev         if (result != TBBMALLOC_OK && free_was_called) {
88651c0b2f7Stbbdev             REPORT("Warning: clean-up request for this particular thread has not cleaned anything.");
88751c0b2f7Stbbdev         }
88851c0b2f7Stbbdev 
88951c0b2f7Stbbdev         // Check that TLS was cleaned fully
89051c0b2f7Stbbdev         TLSData *tlsCurr = defaultMemPool->getTLS(/*create=*/false);
8918dcbd5b1Stbbdev         if (tlsCurr) {
89251c0b2f7Stbbdev             for (int i = 0; i < numBlockBinLimit; i++) {
89351c0b2f7Stbbdev                 REQUIRE_MESSAGE(!(tlsCurr->bin[i].activeBlk), "Some bin was not cleaned.");
89451c0b2f7Stbbdev             }
89551c0b2f7Stbbdev             REQUIRE_MESSAGE(!(tlsCurr->lloc.head.load(std::memory_order_relaxed)), "Local LOC was not cleaned.");
89651c0b2f7Stbbdev             REQUIRE_MESSAGE(!(tlsCurr->freeSlabBlocks.head.load(std::memory_order_relaxed)), "Free Block pool was not cleaned.");
89751c0b2f7Stbbdev         }
8988dcbd5b1Stbbdev     }
89951c0b2f7Stbbdev };
90051c0b2f7Stbbdev 
TestCleanThreadBuffers()90151c0b2f7Stbbdev void TestCleanThreadBuffers() {
90251c0b2f7Stbbdev     const int num_threads = 8;
90351c0b2f7Stbbdev     // Clean up if something was allocated before the test
90457f524caSIlya Isaev     scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS,nullptr);
90551c0b2f7Stbbdev 
90651c0b2f7Stbbdev     alloc_counter = 0;
90751c0b2f7Stbbdev     TestCleanThreadBuffersBody::initBarrier(num_threads);
90851c0b2f7Stbbdev     utils::NativeParallelFor(num_threads, TestCleanThreadBuffersBody());
90951c0b2f7Stbbdev }
91051c0b2f7Stbbdev 
91151c0b2f7Stbbdev /*---------------------------------------------------------------------------*/
91251c0b2f7Stbbdev /*------------------------- Large Object Cache tests ------------------------*/
91351c0b2f7Stbbdev #if _MSC_VER==1600 || _MSC_VER==1500
91451c0b2f7Stbbdev     // ignore C4275: non dll-interface class 'stdext::exception' used as
91551c0b2f7Stbbdev     // base for dll-interface class 'std::bad_cast'
91651c0b2f7Stbbdev     #pragma warning (disable: 4275)
91751c0b2f7Stbbdev #endif
91851c0b2f7Stbbdev #include <vector>
91951c0b2f7Stbbdev #include <list>
92051c0b2f7Stbbdev 
92151c0b2f7Stbbdev // default constructor of CacheBin
92251c0b2f7Stbbdev template<typename Props>
CacheBin()92351c0b2f7Stbbdev rml::internal::LargeObjectCacheImpl<Props>::CacheBin::CacheBin() {}
92451c0b2f7Stbbdev 
92551c0b2f7Stbbdev template<typename Props>
92651c0b2f7Stbbdev class CacheBinModel {
92751c0b2f7Stbbdev 
92851c0b2f7Stbbdev     typedef typename rml::internal::LargeObjectCacheImpl<Props>::CacheBin CacheBinType;
92951c0b2f7Stbbdev 
93051c0b2f7Stbbdev     // The emulated cache bin.
93151c0b2f7Stbbdev     CacheBinType cacheBinModel;
93251c0b2f7Stbbdev     // The reference to real cache bin inside the large object cache.
93351c0b2f7Stbbdev     CacheBinType &cacheBin;
93451c0b2f7Stbbdev 
93551c0b2f7Stbbdev     const size_t size;
93651c0b2f7Stbbdev 
93751c0b2f7Stbbdev     // save only current time
93851c0b2f7Stbbdev     std::list<uintptr_t> objects;
93951c0b2f7Stbbdev 
doCleanup()94051c0b2f7Stbbdev     void doCleanup() {
941478de5b1Stbbdev         if ( cacheBinModel.cachedSize.load(std::memory_order_relaxed) >
942478de5b1Stbbdev             Props::TooLargeFactor*cacheBinModel.usedSize.load(std::memory_order_relaxed)) tooLargeLOC++;
94351c0b2f7Stbbdev         else tooLargeLOC = 0;
94451c0b2f7Stbbdev 
945478de5b1Stbbdev         intptr_t threshold = cacheBinModel.ageThreshold.load(std::memory_order_relaxed);
946478de5b1Stbbdev         if (tooLargeLOC > 3 && threshold) {
947478de5b1Stbbdev             threshold = (threshold + cacheBinModel.meanHitRange.load(std::memory_order_relaxed)) / 2;
948478de5b1Stbbdev             cacheBinModel.ageThreshold.store(threshold, std::memory_order_relaxed);
949478de5b1Stbbdev         }
95051c0b2f7Stbbdev 
95151c0b2f7Stbbdev         uintptr_t currTime = cacheCurrTime;
952478de5b1Stbbdev         while (!objects.empty() && (intptr_t)(currTime - objects.front()) > threshold) {
953478de5b1Stbbdev             cacheBinModel.cachedSize.store(cacheBinModel.cachedSize.load(std::memory_order_relaxed) - size, std::memory_order_relaxed);
95451c0b2f7Stbbdev             cacheBinModel.lastCleanedAge = objects.front();
95551c0b2f7Stbbdev             objects.pop_front();
95651c0b2f7Stbbdev         }
95751c0b2f7Stbbdev 
958478de5b1Stbbdev         cacheBinModel.oldest.store(objects.empty() ? 0 : objects.front(), std::memory_order_relaxed);
95951c0b2f7Stbbdev     }
96051c0b2f7Stbbdev 
96151c0b2f7Stbbdev public:
CacheBinModel(CacheBinType & _cacheBin,size_t allocSize)96251c0b2f7Stbbdev     CacheBinModel(CacheBinType &_cacheBin, size_t allocSize) : cacheBin(_cacheBin), size(allocSize) {
963478de5b1Stbbdev         cacheBinModel.oldest.store(cacheBin.oldest.load(std::memory_order_relaxed), std::memory_order_relaxed);
96451c0b2f7Stbbdev         cacheBinModel.lastCleanedAge = cacheBin.lastCleanedAge;
965478de5b1Stbbdev         cacheBinModel.ageThreshold.store(cacheBin.ageThreshold.load(std::memory_order_relaxed), std::memory_order_relaxed);
966478de5b1Stbbdev         cacheBinModel.usedSize.store(cacheBin.usedSize.load(std::memory_order_relaxed), std::memory_order_relaxed);
967478de5b1Stbbdev         cacheBinModel.cachedSize.store(cacheBin.cachedSize.load(std::memory_order_relaxed), std::memory_order_relaxed);
968478de5b1Stbbdev         cacheBinModel.meanHitRange.store(cacheBin.meanHitRange.load(std::memory_order_relaxed), std::memory_order_relaxed);
96951c0b2f7Stbbdev         cacheBinModel.lastGet = cacheBin.lastGet;
97051c0b2f7Stbbdev     }
get()97151c0b2f7Stbbdev     void get() {
97251c0b2f7Stbbdev         uintptr_t currTime = ++cacheCurrTime;
97351c0b2f7Stbbdev 
97451c0b2f7Stbbdev         if ( objects.empty() ) {
97551c0b2f7Stbbdev             const uintptr_t sinceLastGet = currTime - cacheBinModel.lastGet;
976478de5b1Stbbdev             intptr_t threshold = cacheBinModel.ageThreshold.load(std::memory_order_relaxed);
977478de5b1Stbbdev             if ((threshold && sinceLastGet > Props::LongWaitFactor * threshold) ||
978478de5b1Stbbdev                 (cacheBinModel.lastCleanedAge && sinceLastGet > Props::LongWaitFactor * (cacheBinModel.lastCleanedAge - cacheBinModel.lastGet))) {
979478de5b1Stbbdev                 cacheBinModel.lastCleanedAge = 0;
980478de5b1Stbbdev                 cacheBinModel.ageThreshold.store(0, std::memory_order_relaxed);
981478de5b1Stbbdev             }
98251c0b2f7Stbbdev 
98351c0b2f7Stbbdev             if (cacheBinModel.lastCleanedAge)
984478de5b1Stbbdev                 cacheBinModel.ageThreshold.store(Props::OnMissFactor * (currTime - cacheBinModel.lastCleanedAge), std::memory_order_relaxed);
98551c0b2f7Stbbdev         } else {
98651c0b2f7Stbbdev             uintptr_t obj_age = objects.back();
98751c0b2f7Stbbdev             objects.pop_back();
988478de5b1Stbbdev             if (objects.empty()) cacheBinModel.oldest.store(0, std::memory_order_relaxed);
98951c0b2f7Stbbdev 
99051c0b2f7Stbbdev             intptr_t hitRange = currTime - obj_age;
991478de5b1Stbbdev             intptr_t mean = cacheBinModel.meanHitRange.load(std::memory_order_relaxed);
992478de5b1Stbbdev             mean = mean ? (mean + hitRange) / 2 : hitRange;
993478de5b1Stbbdev             cacheBinModel.meanHitRange.store(mean, std::memory_order_relaxed);
99451c0b2f7Stbbdev 
995478de5b1Stbbdev             cacheBinModel.cachedSize.store(cacheBinModel.cachedSize.load(std::memory_order_relaxed) - size, std::memory_order_relaxed);
99651c0b2f7Stbbdev         }
99751c0b2f7Stbbdev 
998478de5b1Stbbdev         cacheBinModel.usedSize.store(cacheBinModel.usedSize.load(std::memory_order_relaxed) + size, std::memory_order_relaxed);
99951c0b2f7Stbbdev         cacheBinModel.lastGet = currTime;
100051c0b2f7Stbbdev 
100151c0b2f7Stbbdev         if ( currTime % rml::internal::cacheCleanupFreq == 0 ) doCleanup();
100251c0b2f7Stbbdev     }
100351c0b2f7Stbbdev 
putList(int num)100451c0b2f7Stbbdev     void putList( int num ) {
100551c0b2f7Stbbdev         uintptr_t currTime = cacheCurrTime;
100651c0b2f7Stbbdev         cacheCurrTime += num;
100751c0b2f7Stbbdev 
1008478de5b1Stbbdev         cacheBinModel.usedSize.store(cacheBinModel.usedSize.load(std::memory_order_relaxed) - num * size, std::memory_order_relaxed);
100951c0b2f7Stbbdev 
101051c0b2f7Stbbdev         bool cleanUpNeeded = false;
101151c0b2f7Stbbdev         if ( !cacheBinModel.lastCleanedAge ) {
101251c0b2f7Stbbdev             cacheBinModel.lastCleanedAge = ++currTime;
101351c0b2f7Stbbdev             cleanUpNeeded |= currTime % rml::internal::cacheCleanupFreq == 0;
101451c0b2f7Stbbdev             num--;
101551c0b2f7Stbbdev         }
101651c0b2f7Stbbdev 
101751c0b2f7Stbbdev         for ( int i=1; i<=num; ++i ) {
101851c0b2f7Stbbdev             currTime+=1;
101951c0b2f7Stbbdev             cleanUpNeeded |= currTime % rml::internal::cacheCleanupFreq == 0;
102051c0b2f7Stbbdev             if (objects.empty())
1021478de5b1Stbbdev                 cacheBinModel.oldest.store(currTime, std::memory_order_relaxed);
102251c0b2f7Stbbdev             objects.push_back(currTime);
102351c0b2f7Stbbdev         }
102451c0b2f7Stbbdev 
1025478de5b1Stbbdev         cacheBinModel.cachedSize.store(cacheBinModel.cachedSize.load(std::memory_order_relaxed) + num * size, std::memory_order_relaxed);
102651c0b2f7Stbbdev 
102751c0b2f7Stbbdev         if ( cleanUpNeeded ) doCleanup();
102851c0b2f7Stbbdev     }
102951c0b2f7Stbbdev 
check()103051c0b2f7Stbbdev     void check() {
1031478de5b1Stbbdev         CHECK_FAST(cacheBinModel.oldest.load(std::memory_order_relaxed) == cacheBin.oldest.load(std::memory_order_relaxed));
1032478de5b1Stbbdev         CHECK_FAST(cacheBinModel.lastCleanedAge == cacheBin.lastCleanedAge);
1033478de5b1Stbbdev         CHECK_FAST(cacheBinModel.ageThreshold.load(std::memory_order_relaxed) == cacheBin.ageThreshold.load(std::memory_order_relaxed));
1034478de5b1Stbbdev         CHECK_FAST(cacheBinModel.usedSize.load(std::memory_order_relaxed) == cacheBin.usedSize.load(std::memory_order_relaxed));
1035478de5b1Stbbdev         CHECK_FAST(cacheBinModel.cachedSize.load(std::memory_order_relaxed) == cacheBin.cachedSize.load(std::memory_order_relaxed));
1036478de5b1Stbbdev         CHECK_FAST(cacheBinModel.meanHitRange.load(std::memory_order_relaxed) == cacheBin.meanHitRange.load(std::memory_order_relaxed));
1037478de5b1Stbbdev         CHECK_FAST(cacheBinModel.lastGet == cacheBin.lastGet);
103851c0b2f7Stbbdev     }
103951c0b2f7Stbbdev 
104051c0b2f7Stbbdev     static uintptr_t cacheCurrTime;
104151c0b2f7Stbbdev     static intptr_t tooLargeLOC;
104251c0b2f7Stbbdev };
104351c0b2f7Stbbdev 
104451c0b2f7Stbbdev template<typename Props> uintptr_t CacheBinModel<Props>::cacheCurrTime;
104551c0b2f7Stbbdev template<typename Props> intptr_t CacheBinModel<Props>::tooLargeLOC;
104651c0b2f7Stbbdev 
104751c0b2f7Stbbdev template <typename Scenario>
LOCModelTester()104851c0b2f7Stbbdev void LOCModelTester() {
104951c0b2f7Stbbdev     defaultMemPool->extMemPool.loc.cleanAll();
105051c0b2f7Stbbdev     defaultMemPool->extMemPool.loc.reset();
105151c0b2f7Stbbdev 
105251c0b2f7Stbbdev     const size_t size = 16 * 1024;
105351c0b2f7Stbbdev     const size_t headersSize = sizeof(rml::internal::LargeMemoryBlock)+sizeof(rml::internal::LargeObjectHdr);
105451c0b2f7Stbbdev     const size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+rml::internal::largeObjectAlignment);
105551c0b2f7Stbbdev     const int binIdx = defaultMemPool->extMemPool.loc.largeCache.sizeToIdx( allocationSize );
105651c0b2f7Stbbdev 
105751c0b2f7Stbbdev     CacheBinModel<rml::internal::LargeObjectCache::LargeCacheTypeProps>::cacheCurrTime = defaultMemPool->extMemPool.loc.cacheCurrTime;
105851c0b2f7Stbbdev     CacheBinModel<rml::internal::LargeObjectCache::LargeCacheTypeProps>::tooLargeLOC = defaultMemPool->extMemPool.loc.largeCache.tooLargeLOC;
105951c0b2f7Stbbdev     CacheBinModel<rml::internal::LargeObjectCache::LargeCacheTypeProps> cacheBinModel(defaultMemPool->extMemPool.loc.largeCache.bin[binIdx], allocationSize);
106051c0b2f7Stbbdev 
106151c0b2f7Stbbdev     Scenario scen;
106251c0b2f7Stbbdev     for (rml::internal::LargeMemoryBlock *lmb = scen.next(); (intptr_t)lmb != (intptr_t)-1; lmb = scen.next()) {
106351c0b2f7Stbbdev         if ( lmb ) {
106451c0b2f7Stbbdev             int num=1;
106551c0b2f7Stbbdev             for (rml::internal::LargeMemoryBlock *curr = lmb; curr->next; curr=curr->next) num+=1;
106651c0b2f7Stbbdev             defaultMemPool->extMemPool.freeLargeObject(lmb);
106751c0b2f7Stbbdev             cacheBinModel.putList(num);
106851c0b2f7Stbbdev         } else {
106951c0b2f7Stbbdev             scen.saveLmb(defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize));
107051c0b2f7Stbbdev             cacheBinModel.get();
107151c0b2f7Stbbdev         }
107251c0b2f7Stbbdev 
107351c0b2f7Stbbdev         cacheBinModel.check();
107451c0b2f7Stbbdev     }
107551c0b2f7Stbbdev }
107651c0b2f7Stbbdev 
107751c0b2f7Stbbdev class TestBootstrap {
107851c0b2f7Stbbdev     bool allocating;
107951c0b2f7Stbbdev     std::vector<rml::internal::LargeMemoryBlock*> lmbArray;
108051c0b2f7Stbbdev public:
TestBootstrap()108151c0b2f7Stbbdev     TestBootstrap() : allocating(true) {}
108251c0b2f7Stbbdev 
next()108351c0b2f7Stbbdev     rml::internal::LargeMemoryBlock* next() {
108451c0b2f7Stbbdev         if ( allocating )
108557f524caSIlya Isaev             return nullptr;
108651c0b2f7Stbbdev         if ( !lmbArray.empty() ) {
108751c0b2f7Stbbdev             rml::internal::LargeMemoryBlock *ret = lmbArray.back();
108851c0b2f7Stbbdev             lmbArray.pop_back();
108951c0b2f7Stbbdev             return ret;
109051c0b2f7Stbbdev         }
109151c0b2f7Stbbdev         return (rml::internal::LargeMemoryBlock*)-1;
109251c0b2f7Stbbdev     }
109351c0b2f7Stbbdev 
saveLmb(rml::internal::LargeMemoryBlock * lmb)109451c0b2f7Stbbdev     void saveLmb( rml::internal::LargeMemoryBlock *lmb ) {
109557f524caSIlya Isaev         lmb->next = nullptr;
109651c0b2f7Stbbdev         lmbArray.push_back(lmb);
109751c0b2f7Stbbdev         if ( lmbArray.size() == 1000 ) allocating = false;
109851c0b2f7Stbbdev     }
109951c0b2f7Stbbdev };
110051c0b2f7Stbbdev 
110151c0b2f7Stbbdev class TestRandom {
110251c0b2f7Stbbdev     std::vector<rml::internal::LargeMemoryBlock*> lmbArray;
110351c0b2f7Stbbdev     int numOps;
110451c0b2f7Stbbdev public:
TestRandom()110551c0b2f7Stbbdev     TestRandom() : numOps(100000) {
110651c0b2f7Stbbdev         srand(1234);
110751c0b2f7Stbbdev     }
110851c0b2f7Stbbdev 
next()110951c0b2f7Stbbdev     rml::internal::LargeMemoryBlock* next() {
111051c0b2f7Stbbdev         if ( numOps-- ) {
111151c0b2f7Stbbdev             if ( lmbArray.empty() || rand() / (RAND_MAX>>1) == 0 )
111257f524caSIlya Isaev                 return nullptr;
111351c0b2f7Stbbdev             size_t ind = rand()%lmbArray.size();
111451c0b2f7Stbbdev             if ( ind != lmbArray.size()-1 ) std::swap(lmbArray[ind],lmbArray[lmbArray.size()-1]);
111551c0b2f7Stbbdev             rml::internal::LargeMemoryBlock *lmb = lmbArray.back();
111651c0b2f7Stbbdev             lmbArray.pop_back();
111751c0b2f7Stbbdev             return lmb;
111851c0b2f7Stbbdev         }
111951c0b2f7Stbbdev         return (rml::internal::LargeMemoryBlock*)-1;
112051c0b2f7Stbbdev     }
112151c0b2f7Stbbdev 
saveLmb(rml::internal::LargeMemoryBlock * lmb)112251c0b2f7Stbbdev     void saveLmb( rml::internal::LargeMemoryBlock *lmb ) {
112357f524caSIlya Isaev         lmb->next = nullptr;
112451c0b2f7Stbbdev         lmbArray.push_back(lmb);
112551c0b2f7Stbbdev     }
112651c0b2f7Stbbdev };
112751c0b2f7Stbbdev 
112851c0b2f7Stbbdev class TestCollapsingMallocFree : public SimpleBarrier {
112951c0b2f7Stbbdev public:
113051c0b2f7Stbbdev     static const int NUM_ALLOCS = 100000;
113151c0b2f7Stbbdev     const int num_threads;
113251c0b2f7Stbbdev 
TestCollapsingMallocFree(int _num_threads)113351c0b2f7Stbbdev     TestCollapsingMallocFree( int _num_threads ) : num_threads(_num_threads) {
113451c0b2f7Stbbdev         initBarrier( num_threads );
113551c0b2f7Stbbdev     }
113651c0b2f7Stbbdev 
operator ()(int) const113751c0b2f7Stbbdev     void operator() ( int ) const {
113851c0b2f7Stbbdev         const size_t size = 16 * 1024;
113951c0b2f7Stbbdev         const size_t headersSize = sizeof(rml::internal::LargeMemoryBlock)+sizeof(rml::internal::LargeObjectHdr);
114051c0b2f7Stbbdev         const size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+rml::internal::largeObjectAlignment);
114151c0b2f7Stbbdev 
114251c0b2f7Stbbdev         barrier.wait();
114351c0b2f7Stbbdev         for ( int i=0; i<NUM_ALLOCS; ++i ) {
114451c0b2f7Stbbdev             defaultMemPool->extMemPool.freeLargeObject(
114551c0b2f7Stbbdev                 defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize) );
114651c0b2f7Stbbdev         }
114751c0b2f7Stbbdev     }
114851c0b2f7Stbbdev 
check()114951c0b2f7Stbbdev     void check() {
115051c0b2f7Stbbdev         REQUIRE( tbbmalloc_whitebox::locGetProcessed == tbbmalloc_whitebox::locPutProcessed);
115151c0b2f7Stbbdev         REQUIRE_MESSAGE( tbbmalloc_whitebox::locGetProcessed < num_threads*NUM_ALLOCS, "No one Malloc/Free pair was collapsed." );
115251c0b2f7Stbbdev     }
115351c0b2f7Stbbdev };
115451c0b2f7Stbbdev 
115551c0b2f7Stbbdev class TestCollapsingBootstrap : public SimpleBarrier {
115651c0b2f7Stbbdev     class CheckNumAllocs {
115751c0b2f7Stbbdev         const int num_threads;
115851c0b2f7Stbbdev     public:
CheckNumAllocs(int _num_threads)115951c0b2f7Stbbdev         CheckNumAllocs( int _num_threads ) : num_threads(_num_threads) {}
operator ()() const116051c0b2f7Stbbdev         void operator()() const {
116151c0b2f7Stbbdev             REQUIRE( tbbmalloc_whitebox::locGetProcessed == num_threads*NUM_ALLOCS );
116251c0b2f7Stbbdev             REQUIRE( tbbmalloc_whitebox::locPutProcessed == 0 );
116351c0b2f7Stbbdev         }
116451c0b2f7Stbbdev     };
116551c0b2f7Stbbdev public:
116651c0b2f7Stbbdev     static const int NUM_ALLOCS = 1000;
116751c0b2f7Stbbdev     const int num_threads;
116851c0b2f7Stbbdev 
TestCollapsingBootstrap(int _num_threads)116951c0b2f7Stbbdev     TestCollapsingBootstrap( int _num_threads ) : num_threads(_num_threads) {
117051c0b2f7Stbbdev         initBarrier( num_threads );
117151c0b2f7Stbbdev     }
117251c0b2f7Stbbdev 
operator ()(int) const117351c0b2f7Stbbdev     void operator() ( int ) const {
117451c0b2f7Stbbdev         const size_t size = 16 * 1024;
117551c0b2f7Stbbdev         size_t headersSize = sizeof(rml::internal::LargeMemoryBlock)+sizeof(rml::internal::LargeObjectHdr);
117651c0b2f7Stbbdev         size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+rml::internal::largeObjectAlignment);
117751c0b2f7Stbbdev 
117851c0b2f7Stbbdev         barrier.wait();
117951c0b2f7Stbbdev         rml::internal::LargeMemoryBlock *lmbArray[NUM_ALLOCS];
118051c0b2f7Stbbdev         for ( int i=0; i<NUM_ALLOCS; ++i )
118151c0b2f7Stbbdev             lmbArray[i] = defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize);
118251c0b2f7Stbbdev 
118351c0b2f7Stbbdev         barrier.wait(CheckNumAllocs(num_threads));
118451c0b2f7Stbbdev         for ( int i=0; i<NUM_ALLOCS; ++i )
118551c0b2f7Stbbdev             defaultMemPool->extMemPool.freeLargeObject( lmbArray[i] );
118651c0b2f7Stbbdev     }
118751c0b2f7Stbbdev 
check()118851c0b2f7Stbbdev     void check() {
118951c0b2f7Stbbdev         REQUIRE( tbbmalloc_whitebox::locGetProcessed == tbbmalloc_whitebox::locPutProcessed );
119051c0b2f7Stbbdev         REQUIRE( tbbmalloc_whitebox::locGetProcessed == num_threads*NUM_ALLOCS );
119151c0b2f7Stbbdev     }
119251c0b2f7Stbbdev };
119351c0b2f7Stbbdev 
119451c0b2f7Stbbdev template <typename Scenario>
LOCCollapsingTester(int num_threads)119551c0b2f7Stbbdev void LOCCollapsingTester( int num_threads ) {
119651c0b2f7Stbbdev     tbbmalloc_whitebox::locGetProcessed = 0;
119751c0b2f7Stbbdev     tbbmalloc_whitebox::locPutProcessed = 0;
119851c0b2f7Stbbdev     defaultMemPool->extMemPool.loc.cleanAll();
119951c0b2f7Stbbdev     defaultMemPool->extMemPool.loc.reset();
120051c0b2f7Stbbdev 
120151c0b2f7Stbbdev     Scenario scen(num_threads);
120251c0b2f7Stbbdev     utils::NativeParallelFor(num_threads, scen);
120351c0b2f7Stbbdev 
120451c0b2f7Stbbdev     scen.check();
120551c0b2f7Stbbdev }
120651c0b2f7Stbbdev 
TestLOC()120751c0b2f7Stbbdev void TestLOC() {
120851c0b2f7Stbbdev     LOCModelTester<TestBootstrap>();
120951c0b2f7Stbbdev     LOCModelTester<TestRandom>();
121051c0b2f7Stbbdev 
121151c0b2f7Stbbdev     const int num_threads = 16;
121251c0b2f7Stbbdev     LOCCollapsingTester<TestCollapsingBootstrap>( num_threads );
121351c0b2f7Stbbdev     if ( num_threads > 1 ) {
121451c0b2f7Stbbdev         INFO( "num_threads = " << num_threads );
121551c0b2f7Stbbdev         LOCCollapsingTester<TestCollapsingMallocFree>( num_threads );
121651c0b2f7Stbbdev     } else {
121751c0b2f7Stbbdev         REPORT( "Warning: concurrency is too low for TestMallocFreeCollapsing ( num_threads = %d )\n", num_threads );
121851c0b2f7Stbbdev     }
121951c0b2f7Stbbdev }
122051c0b2f7Stbbdev /*---------------------------------------------------------------------------*/
122151c0b2f7Stbbdev 
findCacheLine(void * p)122251c0b2f7Stbbdev void *findCacheLine(void *p) {
122351c0b2f7Stbbdev     return (void*)alignDown((uintptr_t)p, estimatedCacheLineSize);
122451c0b2f7Stbbdev }
122551c0b2f7Stbbdev 
122651c0b2f7Stbbdev // test that internals of Block are at expected cache lines
TestSlabAlignment()122751c0b2f7Stbbdev void TestSlabAlignment() {
122851c0b2f7Stbbdev     const size_t min_sz = 8;
122951c0b2f7Stbbdev     const int space = 2*16*1024; // fill at least 2 slabs
123051c0b2f7Stbbdev     void *pointers[space / min_sz];  // the worst case is min_sz byte object
123151c0b2f7Stbbdev 
123251c0b2f7Stbbdev     for (size_t sz = min_sz; sz <= 64; sz *= 2) {
123351c0b2f7Stbbdev         for (size_t i = 0; i < space/sz; i++) {
123451c0b2f7Stbbdev             pointers[i] = scalable_malloc(sz);
123551c0b2f7Stbbdev             Block *block = (Block *)alignDown(pointers[i], slabSize);
123651c0b2f7Stbbdev             REQUIRE_MESSAGE(findCacheLine(&block->isFull) != findCacheLine(pointers[i]),
123751c0b2f7Stbbdev                           "A user object must not share a cache line with slab control structures.");
123851c0b2f7Stbbdev             REQUIRE_MESSAGE(findCacheLine(&block->next) != findCacheLine(&block->nextPrivatizable),
123951c0b2f7Stbbdev                           "GlobalBlockFields and LocalBlockFields must be on different cache lines.");
124051c0b2f7Stbbdev         }
124151c0b2f7Stbbdev         for (size_t i = 0; i < space/sz; i++)
124251c0b2f7Stbbdev             scalable_free(pointers[i]);
124351c0b2f7Stbbdev     }
124451c0b2f7Stbbdev }
124551c0b2f7Stbbdev 
124651c0b2f7Stbbdev #include "common/memory_usage.h"
124751c0b2f7Stbbdev 
124851c0b2f7Stbbdev // TODO: Consider adding Huge Pages support on macOS (special mmap flag).
124951c0b2f7Stbbdev // Transparent Huge pages support could be enabled by different system parsing mechanism,
125051c0b2f7Stbbdev // because there is no /proc/meminfo on macOS
1251734f0bc0SPablo Romero #if __unix__
TestTHP()125251c0b2f7Stbbdev void TestTHP() {
125351c0b2f7Stbbdev     // Get backend from default memory pool
125451c0b2f7Stbbdev     rml::internal::Backend *backend = &(defaultMemPool->extMemPool.backend);
125551c0b2f7Stbbdev 
125651c0b2f7Stbbdev     // Configure malloc to use huge pages
125751c0b2f7Stbbdev     scalable_allocation_mode(USE_HUGE_PAGES, 1);
125851c0b2f7Stbbdev     REQUIRE_MESSAGE(hugePages.isEnabled, "Huge pages should be enabled via scalable_allocation_mode");
125951c0b2f7Stbbdev 
126051c0b2f7Stbbdev     const int HUGE_PAGE_SIZE = 2 * 1024 * 1024;
126151c0b2f7Stbbdev 
126251c0b2f7Stbbdev     // allocCount transparent huge pages should be allocated
126351c0b2f7Stbbdev     const int allocCount = 10;
126451c0b2f7Stbbdev 
126551c0b2f7Stbbdev     // Allocate huge page aligned memory regions to track system
126651c0b2f7Stbbdev     // counters for transparent huge pages
126751c0b2f7Stbbdev     void*  allocPtrs[allocCount];
126851c0b2f7Stbbdev 
126951c0b2f7Stbbdev     // Wait for the system to update process memory info files after other tests
127051c0b2f7Stbbdev     utils::Sleep(4000);
127151c0b2f7Stbbdev 
127251c0b2f7Stbbdev     // Parse system info regarding current THP status
127351c0b2f7Stbbdev     size_t currentSystemTHPCount = utils::getSystemTHPCount();
127451c0b2f7Stbbdev     size_t currentSystemTHPAllocatedSize = utils::getSystemTHPAllocatedSize();
127551c0b2f7Stbbdev 
127651c0b2f7Stbbdev     for (int i = 0; i < allocCount; i++) {
127751c0b2f7Stbbdev         // Allocation size have to be aligned on page size
127851c0b2f7Stbbdev         size_t allocSize = HUGE_PAGE_SIZE - (i * 1000);
127951c0b2f7Stbbdev 
128051c0b2f7Stbbdev         // Map memory
128151c0b2f7Stbbdev         allocPtrs[i] = backend->allocRawMem(allocSize);
128251c0b2f7Stbbdev 
128351c0b2f7Stbbdev         REQUIRE_MESSAGE(allocPtrs[i], "Allocation not succeeded.");
128451c0b2f7Stbbdev         REQUIRE_MESSAGE(allocSize == HUGE_PAGE_SIZE,
128551c0b2f7Stbbdev             "Allocation size have to be aligned on Huge Page size internally.");
128651c0b2f7Stbbdev 
128751c0b2f7Stbbdev         // First touch policy - no real pages allocated by OS without accessing the region
128851c0b2f7Stbbdev         memset(allocPtrs[i], 1, allocSize);
128951c0b2f7Stbbdev 
129051c0b2f7Stbbdev         REQUIRE_MESSAGE(isAligned(allocPtrs[i], HUGE_PAGE_SIZE),
129151c0b2f7Stbbdev             "The pointer returned by scalable_malloc is not aligned on huge page size.");
129251c0b2f7Stbbdev     }
129351c0b2f7Stbbdev 
129451c0b2f7Stbbdev     // Wait for the system to update process memory info files after allocations
129551c0b2f7Stbbdev     utils::Sleep(4000);
129651c0b2f7Stbbdev 
129751c0b2f7Stbbdev     // Generally, kernel tries to allocate transparent huge pages, but sometimes it cannot do this
129851c0b2f7Stbbdev     // (tested on SLES 11/12), so consider this system info checks as a remark.
129951c0b2f7Stbbdev     // Also, some systems can allocate more memory then needed in background (tested on Ubuntu 14.04)
130051c0b2f7Stbbdev     size_t newSystemTHPCount = utils::getSystemTHPCount();
130151c0b2f7Stbbdev     size_t newSystemTHPAllocatedSize = utils::getSystemTHPAllocatedSize();
130251c0b2f7Stbbdev     if ((newSystemTHPCount - currentSystemTHPCount) < allocCount
130351c0b2f7Stbbdev             && (newSystemTHPAllocatedSize - currentSystemTHPAllocatedSize) / (2 * 1024) < allocCount) {
130451c0b2f7Stbbdev         REPORT( "Warning: the system didn't allocate needed amount of THPs.\n" );
130551c0b2f7Stbbdev     }
130651c0b2f7Stbbdev 
130751c0b2f7Stbbdev     // Test memory unmap
130851c0b2f7Stbbdev     for (int i = 0; i < allocCount; i++) {
130951c0b2f7Stbbdev         REQUIRE_MESSAGE(backend->freeRawMem(allocPtrs[i], HUGE_PAGE_SIZE),
131051c0b2f7Stbbdev                 "Something went wrong during raw memory free");
131151c0b2f7Stbbdev     }
131251c0b2f7Stbbdev }
1313734f0bc0SPablo Romero #endif // __unix__
131451c0b2f7Stbbdev 
getStabilizedMemUsage()131551c0b2f7Stbbdev inline size_t getStabilizedMemUsage() {
131651c0b2f7Stbbdev     for (int i = 0; i < 3; i++) utils::GetMemoryUsage();
131751c0b2f7Stbbdev     return utils::GetMemoryUsage();
131851c0b2f7Stbbdev }
131951c0b2f7Stbbdev 
reallocAndRetrieve(void * origPtr,size_t reallocSize,size_t & origBlockSize,size_t & reallocBlockSize)132051c0b2f7Stbbdev inline void* reallocAndRetrieve(void* origPtr, size_t reallocSize, size_t& origBlockSize, size_t& reallocBlockSize) {
132151c0b2f7Stbbdev     rml::internal::LargeMemoryBlock* origLmb = ((rml::internal::LargeObjectHdr *)origPtr - 1)->memoryBlock;
132251c0b2f7Stbbdev     origBlockSize = origLmb->unalignedSize;
132351c0b2f7Stbbdev 
132451c0b2f7Stbbdev     void* reallocPtr = rml::internal::reallocAligned(defaultMemPool, origPtr, reallocSize, 0);
132551c0b2f7Stbbdev 
132651c0b2f7Stbbdev     // Retrieved reallocated block information
132751c0b2f7Stbbdev     rml::internal::LargeMemoryBlock* reallocLmb = ((rml::internal::LargeObjectHdr *)reallocPtr - 1)->memoryBlock;
132851c0b2f7Stbbdev     reallocBlockSize = reallocLmb->unalignedSize;
132951c0b2f7Stbbdev 
133051c0b2f7Stbbdev     return reallocPtr;
133151c0b2f7Stbbdev }
133251c0b2f7Stbbdev 
TestReallocDecreasing()133351c0b2f7Stbbdev void TestReallocDecreasing() {
133451c0b2f7Stbbdev 
133551c0b2f7Stbbdev     /* Testing that actual reallocation happens for large objects that do not fit the backend cache
133651c0b2f7Stbbdev        but decrease in size by a factor of >= 2. */
133751c0b2f7Stbbdev 
133851c0b2f7Stbbdev     size_t startSize = 100 * 1024 * 1024;
133951c0b2f7Stbbdev     size_t maxBinnedSize = defaultMemPool->extMemPool.backend.getMaxBinnedSize();
134051c0b2f7Stbbdev     void*  origPtr = scalable_malloc(startSize);
134157f524caSIlya Isaev     void*  reallocPtr = nullptr;
134251c0b2f7Stbbdev 
134351c0b2f7Stbbdev     // Realloc on 1MB less size
134451c0b2f7Stbbdev     size_t origBlockSize = 42;
134551c0b2f7Stbbdev     size_t reallocBlockSize = 43;
134651c0b2f7Stbbdev     reallocPtr = reallocAndRetrieve(origPtr, startSize - 1 * 1024 * 1024, origBlockSize, reallocBlockSize);
134751c0b2f7Stbbdev     REQUIRE_MESSAGE(origBlockSize == reallocBlockSize, "Reallocated block size shouldn't change");
134851c0b2f7Stbbdev     REQUIRE_MESSAGE(reallocPtr == origPtr, "Original pointer shouldn't change");
134951c0b2f7Stbbdev 
135051c0b2f7Stbbdev     // Repeated decreasing reallocation while max cache bin size reached
135151c0b2f7Stbbdev     size_t reallocSize = (startSize / 2) - 1000; // exact realloc
135251c0b2f7Stbbdev     while(reallocSize > maxBinnedSize) {
135351c0b2f7Stbbdev 
135451c0b2f7Stbbdev         // Prevent huge/large objects caching
135551c0b2f7Stbbdev         defaultMemPool->extMemPool.loc.cleanAll();
135651c0b2f7Stbbdev         // Prevent local large object caching
135751c0b2f7Stbbdev         TLSData *tls = defaultMemPool->getTLS(/*create=*/false);
135851c0b2f7Stbbdev         tls->lloc.externalCleanup(&defaultMemPool->extMemPool);
135951c0b2f7Stbbdev 
136051c0b2f7Stbbdev         size_t sysMemUsageBefore = getStabilizedMemUsage();
136151c0b2f7Stbbdev         size_t totalMemSizeBefore = defaultMemPool->extMemPool.backend.getTotalMemSize();
136251c0b2f7Stbbdev 
136351c0b2f7Stbbdev         reallocPtr = reallocAndRetrieve(origPtr, reallocSize, origBlockSize, reallocBlockSize);
136451c0b2f7Stbbdev 
1365*c4a799dfSJhaShweta1         REQUIRE_MESSAGE(origBlockSize > reallocBlockSize, "Reallocated block size should decrease.");
136651c0b2f7Stbbdev 
136751c0b2f7Stbbdev         size_t sysMemUsageAfter = getStabilizedMemUsage();
136851c0b2f7Stbbdev         size_t totalMemSizeAfter = defaultMemPool->extMemPool.backend.getTotalMemSize();
136951c0b2f7Stbbdev 
137051c0b2f7Stbbdev         // Prevent false checking when backend caching occurred or could not read system memory usage info
137151c0b2f7Stbbdev         if (totalMemSizeBefore > totalMemSizeAfter && sysMemUsageAfter != 0 && sysMemUsageBefore != 0) {
137251c0b2f7Stbbdev             REQUIRE_MESSAGE(sysMemUsageBefore > sysMemUsageAfter, "Memory were not released");
137351c0b2f7Stbbdev         }
137451c0b2f7Stbbdev 
137551c0b2f7Stbbdev         origPtr = reallocPtr;
137651c0b2f7Stbbdev         reallocSize = (reallocSize / 2) - 1000; // exact realloc
137751c0b2f7Stbbdev     }
137851c0b2f7Stbbdev     scalable_free(reallocPtr);
137951c0b2f7Stbbdev 
138051c0b2f7Stbbdev     /* TODO: Decreasing reallocation of large objects that fit backend cache */
138151c0b2f7Stbbdev     /* TODO: Small objects decreasing reallocation test */
138251c0b2f7Stbbdev }
138351c0b2f7Stbbdev #if !__TBB_WIN8UI_SUPPORT && defined(_WIN32)
138451c0b2f7Stbbdev 
138551c0b2f7Stbbdev #include "../../src/tbbmalloc_proxy/function_replacement.cpp"
138651c0b2f7Stbbdev #include <string>
138751c0b2f7Stbbdev namespace FunctionReplacement {
138851c0b2f7Stbbdev     FunctionInfo funcInfo = { "funcname","dllname" };
138951c0b2f7Stbbdev     char **func_replacement_log;
139051c0b2f7Stbbdev     int status;
139151c0b2f7Stbbdev 
LogCleanup()139251c0b2f7Stbbdev     void LogCleanup() {
139351c0b2f7Stbbdev         // Free all allocated memory
139451c0b2f7Stbbdev         for (unsigned i = 0; i < Log::record_number; i++){
139551c0b2f7Stbbdev             HeapFree(GetProcessHeap(), 0, Log::records[i]);
139651c0b2f7Stbbdev         }
139751c0b2f7Stbbdev         for (unsigned i = 0; i < Log::RECORDS_COUNT + 1; i++){
139857f524caSIlya Isaev             Log::records[i] = nullptr;
139951c0b2f7Stbbdev         }
140051c0b2f7Stbbdev         Log::replacement_status = true;
140151c0b2f7Stbbdev         Log::record_number = 0;
140251c0b2f7Stbbdev     }
140351c0b2f7Stbbdev 
TestEmptyLog()140451c0b2f7Stbbdev     void TestEmptyLog() {
140551c0b2f7Stbbdev         status = TBB_malloc_replacement_log(&func_replacement_log);
140651c0b2f7Stbbdev 
140751c0b2f7Stbbdev         REQUIRE_MESSAGE(status == -1, "Status is true, but log is empty");
140851c0b2f7Stbbdev         REQUIRE_MESSAGE(*func_replacement_log == nullptr, "Log must be empty");
140951c0b2f7Stbbdev     }
141051c0b2f7Stbbdev 
TestLogOverload()141151c0b2f7Stbbdev     void TestLogOverload() {
141251c0b2f7Stbbdev         for (int i = 0; i < 1000; i++)
141351c0b2f7Stbbdev             Log::record(funcInfo, "opcode string", true);
141451c0b2f7Stbbdev 
141551c0b2f7Stbbdev         status = TBB_malloc_replacement_log(&func_replacement_log);
141651c0b2f7Stbbdev         // Find last record
141751c0b2f7Stbbdev         for (; *(func_replacement_log + 1) != 0; func_replacement_log++) {}
141851c0b2f7Stbbdev 
141951c0b2f7Stbbdev         std::string last_line(*func_replacement_log);
142051c0b2f7Stbbdev         REQUIRE_MESSAGE(status == 0, "False status, but all functions found");
142151c0b2f7Stbbdev         REQUIRE_MESSAGE(last_line.compare("Log was truncated.") == 0, "Log overflow was not handled");
142251c0b2f7Stbbdev 
142351c0b2f7Stbbdev         // Change status
142451c0b2f7Stbbdev         Log::record(funcInfo, "opcode string", false);
142557f524caSIlya Isaev         status = TBB_malloc_replacement_log(nullptr);
142651c0b2f7Stbbdev         REQUIRE_MESSAGE(status == -1, "Status is true, but we have false search case");
142751c0b2f7Stbbdev 
142851c0b2f7Stbbdev         LogCleanup();
142951c0b2f7Stbbdev     }
143051c0b2f7Stbbdev 
TestFalseSearchCase()143151c0b2f7Stbbdev     void TestFalseSearchCase() {
143251c0b2f7Stbbdev         Log::record(funcInfo, "opcode string", false);
143351c0b2f7Stbbdev         std::string expected_line = "Fail: "+ std::string(funcInfo.funcName) + " (" +
143451c0b2f7Stbbdev                          std::string(funcInfo.dllName) + "), byte pattern: <opcode string>";
143551c0b2f7Stbbdev 
143651c0b2f7Stbbdev         status = TBB_malloc_replacement_log(&func_replacement_log);
143751c0b2f7Stbbdev 
143851c0b2f7Stbbdev         REQUIRE_MESSAGE(expected_line.compare(*func_replacement_log) == 0, "Wrong last string contnent");
143951c0b2f7Stbbdev         REQUIRE_MESSAGE(status == -1, "Status is true, but we have false search case");
144051c0b2f7Stbbdev         LogCleanup();
144151c0b2f7Stbbdev     }
144251c0b2f7Stbbdev 
TestWrongFunctionInDll()144351c0b2f7Stbbdev     void TestWrongFunctionInDll(){
144451c0b2f7Stbbdev         HMODULE ucrtbase_handle = GetModuleHandle("ucrtbase.dll");
144551c0b2f7Stbbdev         if (ucrtbase_handle) {
144657f524caSIlya Isaev             IsPrologueKnown("ucrtbase.dll", "fake_function", nullptr, ucrtbase_handle);
144751c0b2f7Stbbdev             std::string expected_line = "Fail: fake_function (ucrtbase.dll), byte pattern: <unknown>";
144851c0b2f7Stbbdev 
144951c0b2f7Stbbdev             status = TBB_malloc_replacement_log(&func_replacement_log);
145051c0b2f7Stbbdev 
145151c0b2f7Stbbdev             REQUIRE_MESSAGE(expected_line.compare(*func_replacement_log) == 0, "Wrong last string contnent");
145251c0b2f7Stbbdev             REQUIRE_MESSAGE(status == -1, "Status is true, but we have false search case");
145351c0b2f7Stbbdev             LogCleanup();
145451c0b2f7Stbbdev         } else {
145551c0b2f7Stbbdev             INFO("Cannot found ucrtbase.dll on system, test skipped!\n");
145651c0b2f7Stbbdev         }
145751c0b2f7Stbbdev     }
145851c0b2f7Stbbdev }
145951c0b2f7Stbbdev 
TesFunctionReplacementLog()146051c0b2f7Stbbdev void TesFunctionReplacementLog() {
146151c0b2f7Stbbdev     using namespace FunctionReplacement;
146251c0b2f7Stbbdev     // Do not reorder the test cases
146351c0b2f7Stbbdev     TestEmptyLog();
146451c0b2f7Stbbdev     TestLogOverload();
146551c0b2f7Stbbdev     TestFalseSearchCase();
146651c0b2f7Stbbdev     TestWrongFunctionInDll();
146751c0b2f7Stbbdev }
146851c0b2f7Stbbdev 
146951c0b2f7Stbbdev #endif /*!__TBB_WIN8UI_SUPPORT && defined(_WIN32)*/
147051c0b2f7Stbbdev 
147151c0b2f7Stbbdev #include <cmath> // pow function
147251c0b2f7Stbbdev 
147351c0b2f7Stbbdev // Huge objects cache: Size = MinSize * (2 ^ (Index / StepFactor) formula gives value for the bin size,
147451c0b2f7Stbbdev // but it is not matched with our sizeToIdx approximation algorithm, where step sizes between major
147551c0b2f7Stbbdev // (power of 2) sizes are equal. Used internally for the test. Static cast to avoid warnings.
hocIdxToSizeFormula(int idx)147651c0b2f7Stbbdev inline size_t hocIdxToSizeFormula(int idx) {
147751c0b2f7Stbbdev     return static_cast<size_t>(float(rml::internal::LargeObjectCache::maxLargeSize) *
147851c0b2f7Stbbdev         pow(2, float(idx) / float(rml::internal::LargeObjectCache::HugeBSProps::StepFactor)));
147951c0b2f7Stbbdev }
148051c0b2f7Stbbdev // Large objects cache arithmetic progression
locIdxToSizeFormula(int idx)148151c0b2f7Stbbdev inline size_t locIdxToSizeFormula(int idx) {
148251c0b2f7Stbbdev     return rml::internal::LargeObjectCache::LargeBSProps::MinSize +
148351c0b2f7Stbbdev         (idx * rml::internal::LargeObjectCache::LargeBSProps::CacheStep);
148451c0b2f7Stbbdev }
148551c0b2f7Stbbdev 
148651c0b2f7Stbbdev template <typename CacheType>
TestLOCacheBinsConverterImpl(int idx,size_t checkingSize)148751c0b2f7Stbbdev void TestLOCacheBinsConverterImpl(int idx, size_t checkingSize) {
148851c0b2f7Stbbdev     size_t alignedSize = CacheType::alignToBin(checkingSize);
148951c0b2f7Stbbdev     REQUIRE_MESSAGE(alignedSize >= checkingSize, "Size is not correctly aligned");
149051c0b2f7Stbbdev     int calcIdx = CacheType::sizeToIdx(alignedSize);
149151c0b2f7Stbbdev     REQUIRE_MESSAGE(calcIdx == idx, "Index from size calculated not correctly");
149251c0b2f7Stbbdev }
149351c0b2f7Stbbdev 
TestLOCacheBinsConverter()149451c0b2f7Stbbdev void TestLOCacheBinsConverter(){
149551c0b2f7Stbbdev     typedef rml::internal::LargeObjectCache::LargeCacheType LargeCacheType;
149651c0b2f7Stbbdev     typedef rml::internal::LargeObjectCache::HugeCacheType HugeCacheType;
149751c0b2f7Stbbdev 
149851c0b2f7Stbbdev     size_t checkingSize = 0;
149951c0b2f7Stbbdev     for (int idx = 0; idx < LargeCacheType::numBins; idx++) {
150051c0b2f7Stbbdev         checkingSize = locIdxToSizeFormula(idx);
150151c0b2f7Stbbdev         TestLOCacheBinsConverterImpl<LargeCacheType>(idx, checkingSize);
150251c0b2f7Stbbdev     }
150351c0b2f7Stbbdev     for (int idx = 0; idx < HugeCacheType::numBins; idx++) {
150451c0b2f7Stbbdev         checkingSize = hocIdxToSizeFormula(idx);
150551c0b2f7Stbbdev         TestLOCacheBinsConverterImpl<HugeCacheType>(idx, checkingSize);
150651c0b2f7Stbbdev     }
150751c0b2f7Stbbdev }
150851c0b2f7Stbbdev 
150951c0b2f7Stbbdev struct HOThresholdTester {
151051c0b2f7Stbbdev     LargeObjectCache* loc;
151151c0b2f7Stbbdev     size_t hugeSize;
151251c0b2f7Stbbdev 
151351c0b2f7Stbbdev     static const size_t sieveSize = LargeObjectCache::defaultMaxHugeSize;
151451c0b2f7Stbbdev     // Sieve starts from 64MB (24-th cache bin), enough to check 4 bins radius range
151551c0b2f7Stbbdev     // for decent memory consumption (especially for 32-bit arch)
151651c0b2f7Stbbdev     static const int MIN_BIN_IDX = 21;
151751c0b2f7Stbbdev     static const int MAX_BIN_IDX = 27;
151851c0b2f7Stbbdev 
151951c0b2f7Stbbdev     enum CleanupType {
152051c0b2f7Stbbdev         NO_CLEANUP,
152151c0b2f7Stbbdev         REGULAR_CLEANUP,
152251c0b2f7Stbbdev         HARD_CLEANUP
152351c0b2f7Stbbdev     };
152451c0b2f7Stbbdev 
populateCacheHOThresholdTester152551c0b2f7Stbbdev     void populateCache() {
152651c0b2f7Stbbdev         LargeMemoryBlock* loArray[MAX_BIN_IDX - MIN_BIN_IDX];
152751c0b2f7Stbbdev         // To avoid backend::softCacheCleanup consequences (cleanup by isLOCToolarge),
152851c0b2f7Stbbdev         // firstly allocate all objects and then cache them at once.
152951c0b2f7Stbbdev         // Morevover, just because first cache item will still be dropped from cache because of the lack of history,
153051c0b2f7Stbbdev         // redo allocation 2 times.
153151c0b2f7Stbbdev         for (int idx = MIN_BIN_IDX; idx < MAX_BIN_IDX; ++idx) {
153251c0b2f7Stbbdev             size_t allocationSize = alignedSizeFromIdx(idx);
153351c0b2f7Stbbdev             int localIdx = idx - MIN_BIN_IDX;
153451c0b2f7Stbbdev             loArray[localIdx] = defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize);
153551c0b2f7Stbbdev             REQUIRE_MESSAGE(loArray[localIdx], "Large object was not allocated.");
153651c0b2f7Stbbdev             loc->put(loArray[localIdx]);
153751c0b2f7Stbbdev             loArray[localIdx] = defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize);
153851c0b2f7Stbbdev         }
153951c0b2f7Stbbdev         for (int idx = MIN_BIN_IDX; idx < MAX_BIN_IDX; ++idx) {
154051c0b2f7Stbbdev             loc->put(loArray[idx - MIN_BIN_IDX]);
154151c0b2f7Stbbdev         }
154251c0b2f7Stbbdev     }
cleanHOThresholdTester154351c0b2f7Stbbdev     void clean(bool all) {
154451c0b2f7Stbbdev         if (all) {
154551c0b2f7Stbbdev             // Should avoid any threshold and clean all bins
154651c0b2f7Stbbdev             loc->cleanAll();
154751c0b2f7Stbbdev         } else {
154851c0b2f7Stbbdev             // Regular cleanup should do nothing for bins above threshold. Decreasing option used
154951c0b2f7Stbbdev             // for the test to be sure that all objects below defaultMaxHugeSize (sieveSize) were cleaned
155051c0b2f7Stbbdev             loc->regularCleanup();
155151c0b2f7Stbbdev             loc->decreasingCleanup();
155251c0b2f7Stbbdev         }
155351c0b2f7Stbbdev     }
checkHOThresholdTester155451c0b2f7Stbbdev     void check(CleanupType type) {
155551c0b2f7Stbbdev         for (int idx = MIN_BIN_IDX; idx < MAX_BIN_IDX; ++idx) {
155651c0b2f7Stbbdev             size_t objectSize = alignedSizeFromIdx(idx);
155751c0b2f7Stbbdev             // Cache object below sieve threshold and above huge object threshold should be cached
155851c0b2f7Stbbdev             // (other should be sieved). Unless all cache is dropped. Regular cleanup drops object only below sieve size.
155951c0b2f7Stbbdev             if (type == NO_CLEANUP && sizeInCacheRange(objectSize)) {
156051c0b2f7Stbbdev                 REQUIRE_MESSAGE(objectInCacheBin(idx, objectSize), "Object was released from cache, it shouldn't.");
156151c0b2f7Stbbdev             } else if (type == REGULAR_CLEANUP && (objectSize >= hugeSize)) {
156251c0b2f7Stbbdev                 REQUIRE_MESSAGE(objectInCacheBin(idx, objectSize), "Object was released from cache, it shouldn't.");
156351c0b2f7Stbbdev             } else { // HARD_CLEANUP
156451c0b2f7Stbbdev                 REQUIRE_MESSAGE(cacheBinEmpty(idx), "Object is still cached.");
156551c0b2f7Stbbdev             }
156651c0b2f7Stbbdev         }
156751c0b2f7Stbbdev     }
156851c0b2f7Stbbdev 
156951c0b2f7Stbbdev private:
cacheBinEmptyHOThresholdTester157051c0b2f7Stbbdev     bool cacheBinEmpty(int idx) {
157157f524caSIlya Isaev         return (loc->hugeCache.bin[idx].cachedSize.load(std::memory_order_relaxed) == 0 && loc->hugeCache.bin[idx].get() == nullptr);
157251c0b2f7Stbbdev     }
objectInCacheBinHOThresholdTester157351c0b2f7Stbbdev     bool objectInCacheBin(int idx, size_t size) {
1574478de5b1Stbbdev         return (loc->hugeCache.bin[idx].cachedSize.load(std::memory_order_relaxed) != 0 &&
1575478de5b1Stbbdev             loc->hugeCache.bin[idx].cachedSize.load(std::memory_order_relaxed) % size == 0);
157651c0b2f7Stbbdev     }
sizeInCacheRangeHOThresholdTester157751c0b2f7Stbbdev     bool sizeInCacheRange(size_t size) {
157851c0b2f7Stbbdev         return size <= sieveSize || size >= hugeSize;
157951c0b2f7Stbbdev     }
alignedSizeFromIdxHOThresholdTester158051c0b2f7Stbbdev     size_t alignedSizeFromIdx(int idx) {
158151c0b2f7Stbbdev         return rml::internal::LargeObjectCache::alignToBin(hocIdxToSizeFormula(idx));
158251c0b2f7Stbbdev     }
158351c0b2f7Stbbdev };
158451c0b2f7Stbbdev 
158551c0b2f7Stbbdev // TBBMALLOC_SET_HUGE_OBJECT_THRESHOLD value should be set before the test,
158651c0b2f7Stbbdev // through scalable API or env variable
TestHugeSizeThresholdImpl(LargeObjectCache * loc,size_t hugeSize,bool fullTesting)158751c0b2f7Stbbdev void TestHugeSizeThresholdImpl(LargeObjectCache* loc, size_t hugeSize, bool fullTesting) {
158851c0b2f7Stbbdev     HOThresholdTester test = {loc, hugeSize};
158951c0b2f7Stbbdev     test.populateCache();
159051c0b2f7Stbbdev     // Check the default sieve value
159151c0b2f7Stbbdev     test.check(HOThresholdTester::NO_CLEANUP);
159251c0b2f7Stbbdev 
159351c0b2f7Stbbdev     if(fullTesting) {
159451c0b2f7Stbbdev         // Check that objects above threshold stay in cache after regular cleanup
159551c0b2f7Stbbdev         test.clean(/*all*/false);
159651c0b2f7Stbbdev         test.check(HOThresholdTester::REGULAR_CLEANUP);
159751c0b2f7Stbbdev     }
159851c0b2f7Stbbdev     // Check that all objects dropped from cache after hard cleanup (ignore huge obects threshold)
159951c0b2f7Stbbdev     test.clean(/*all*/true);
160051c0b2f7Stbbdev     test.check(HOThresholdTester::HARD_CLEANUP);
160151c0b2f7Stbbdev     // Restore previous settings
160251c0b2f7Stbbdev     loc->setHugeSizeThreshold(LargeObjectCache::maxHugeSize);
160351c0b2f7Stbbdev     loc->reset();
160451c0b2f7Stbbdev }
160551c0b2f7Stbbdev 
160651c0b2f7Stbbdev /*
160751c0b2f7Stbbdev  *  Test for default huge size and behaviour when huge object settings defined
160851c0b2f7Stbbdev  */
TestHugeSizeThreshold()160951c0b2f7Stbbdev void TestHugeSizeThreshold() {
161051c0b2f7Stbbdev     // Clean up if something was allocated before the test and reset cache state
161157f524caSIlya Isaev     scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, nullptr);
161251c0b2f7Stbbdev     LargeObjectCache* loc = &defaultMemPool->extMemPool.loc;
161351c0b2f7Stbbdev     // Restore default settings just in case
161451c0b2f7Stbbdev     loc->setHugeSizeThreshold(LargeObjectCache::maxHugeSize);
161551c0b2f7Stbbdev     loc->reset();
161651c0b2f7Stbbdev     // Firstly check default huge size value (with max huge object threshold).
161751c0b2f7Stbbdev     // Everything that more then this value should be released to OS without caching.
161851c0b2f7Stbbdev     TestHugeSizeThresholdImpl(loc, loc->hugeSizeThreshold, false);
161951c0b2f7Stbbdev     // Then set huge object threshold.
162051c0b2f7Stbbdev     // All objects with sizes after threshold will be released only after the hard cleanup.
162151c0b2f7Stbbdev #if !__TBB_WIN8UI_SUPPORT
162251c0b2f7Stbbdev     // Unit testing for environment variable
162351c0b2f7Stbbdev     utils::SetEnv("TBB_MALLOC_SET_HUGE_SIZE_THRESHOLD","67108864");
162451c0b2f7Stbbdev     // Large object cache reads threshold environment during initialization.
162551c0b2f7Stbbdev     // Reset the value before the test.
162651c0b2f7Stbbdev     loc->hugeSizeThreshold = 0;
16278dcbd5b1Stbbdev     // Reset logical time to prevent regular cleanup
16288dcbd5b1Stbbdev     loc->cacheCurrTime = 0;
162951c0b2f7Stbbdev     loc->init(&defaultMemPool->extMemPool);
163051c0b2f7Stbbdev     TestHugeSizeThresholdImpl(loc, 64 * MByte, true);
163151c0b2f7Stbbdev #endif
163251c0b2f7Stbbdev     // Unit testing for scalable_allocation_command
163351c0b2f7Stbbdev     scalable_allocation_mode(TBBMALLOC_SET_HUGE_SIZE_THRESHOLD, 56 * MByte);
163451c0b2f7Stbbdev     TestHugeSizeThresholdImpl(loc, 56 * MByte, true);
1635451e76efSDmitri Mokhov     // Verify that objects whose sizes align to maxHugeSize are not cached.
1636451e76efSDmitri Mokhov     size_t sz = LargeObjectCache::maxHugeSize;
1637451e76efSDmitri Mokhov     size_t aligned_sz = LargeObjectCache::alignToBin(sz);
1638451e76efSDmitri Mokhov     REQUIRE_MESSAGE(sz == aligned_sz, "maxHugeSize should be aligned.");
1639451e76efSDmitri Mokhov     REQUIRE_MESSAGE(!loc->sizeInCacheRange(sz), "Upper bound sized object shouldn't be cached.");
1640451e76efSDmitri Mokhov     REQUIRE_MESSAGE(loc->get(sz) == nullptr, "Upper bound sized object shouldn't be cached.");
164151c0b2f7Stbbdev }
164251c0b2f7Stbbdev 
164351c0b2f7Stbbdev //! \brief \ref error_guessing
164451c0b2f7Stbbdev TEST_CASE("Main test case") {
164551c0b2f7Stbbdev     scalable_allocation_mode(USE_HUGE_PAGES, 0);
164651c0b2f7Stbbdev #if !__TBB_WIN8UI_SUPPORT
164751c0b2f7Stbbdev     utils::SetEnv("TBB_MALLOC_USE_HUGE_PAGES","yes");
164851c0b2f7Stbbdev #endif
164951c0b2f7Stbbdev     checkNoHugePages();
165051c0b2f7Stbbdev     // backreference requires that initialization was done
165151c0b2f7Stbbdev     if(!isMallocInitialized()) doInitialization();
165251c0b2f7Stbbdev     checkNoHugePages();
165351c0b2f7Stbbdev     // to succeed, leak detection must be the 1st memory-intensive test
165451c0b2f7Stbbdev     TestBackRef();
165551c0b2f7Stbbdev     TestCleanAllBuffers<4*1024>();
165651c0b2f7Stbbdev     TestCleanAllBuffers<16*1024>();
165751c0b2f7Stbbdev     TestCleanThreadBuffers();
165851c0b2f7Stbbdev     TestPools();
165951c0b2f7Stbbdev     TestBackend();
166051c0b2f7Stbbdev 
166151c0b2f7Stbbdev #if MALLOC_CHECK_RECURSION
166251c0b2f7Stbbdev     for( int p=MaxThread; p>=MinThread; --p ) {
166351c0b2f7Stbbdev         TestStartupAlloc::initBarrier( p );
166451c0b2f7Stbbdev         utils::NativeParallelFor( p, TestStartupAlloc() );
166551c0b2f7Stbbdev         REQUIRE_MESSAGE(!firstStartupBlock, "Startup heap memory leak detected");
166651c0b2f7Stbbdev     }
166751c0b2f7Stbbdev #endif
166851c0b2f7Stbbdev     TestLargeObjectCache();
166951c0b2f7Stbbdev     TestObjectRecognition();
167051c0b2f7Stbbdev     TestBitMask();
167151c0b2f7Stbbdev     TestHeapLimit();
167251c0b2f7Stbbdev     TestLOC();
167351c0b2f7Stbbdev     TestSlabAlignment();
167451c0b2f7Stbbdev }
167551c0b2f7Stbbdev 
167651c0b2f7Stbbdev //! \brief \ref error_guessing
167751c0b2f7Stbbdev TEST_CASE("Decreasing reallocation") {
167851c0b2f7Stbbdev     if (!isMallocInitialized()) doInitialization();
167951c0b2f7Stbbdev     TestReallocDecreasing();
168051c0b2f7Stbbdev }
168151c0b2f7Stbbdev 
168251c0b2f7Stbbdev //! \brief \ref error_guessing
168351c0b2f7Stbbdev TEST_CASE("Large object cache bins converter") {
168451c0b2f7Stbbdev     if (!isMallocInitialized()) doInitialization();
168551c0b2f7Stbbdev     TestLOCacheBinsConverter();
168651c0b2f7Stbbdev }
168751c0b2f7Stbbdev 
168851c0b2f7Stbbdev //! \brief \ref error_guessing
168951c0b2f7Stbbdev TEST_CASE("Huge size threshold settings") {
169051c0b2f7Stbbdev     if (!isMallocInitialized()) doInitialization();
169151c0b2f7Stbbdev     TestHugeSizeThreshold();
169251c0b2f7Stbbdev }
169351c0b2f7Stbbdev 
1694734f0bc0SPablo Romero #if __unix__
169551c0b2f7Stbbdev //! \brief \ref error_guessing
169651c0b2f7Stbbdev TEST_CASE("Transparent huge pages") {
169751c0b2f7Stbbdev     if (utils::isTHPEnabledOnMachine()) {
169851c0b2f7Stbbdev         if (!isMallocInitialized()) doInitialization();
169951c0b2f7Stbbdev         TestTHP();
170051c0b2f7Stbbdev     } else {
170151c0b2f7Stbbdev         INFO("Transparent Huge Pages is not supported on the system - skipped the test\n");
170251c0b2f7Stbbdev     }
170351c0b2f7Stbbdev }
170451c0b2f7Stbbdev #endif
170551c0b2f7Stbbdev 
170651c0b2f7Stbbdev #if !__TBB_WIN8UI_SUPPORT && defined(_WIN32)
170751c0b2f7Stbbdev //! \brief \ref error_guessing
170851c0b2f7Stbbdev TEST_CASE("Function replacement log") {
170951c0b2f7Stbbdev     TesFunctionReplacementLog();
171051c0b2f7Stbbdev }
171151c0b2f7Stbbdev #endif
1712