1 /*
2     Copyright (c) 2005-2020 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 //! \file test_malloc_whitebox.cpp
18 //! \brief Test for [memory_allocation] functionality
19 
20 // To prevent loading dynamic TBBmalloc at startup, that is not needed for the whitebox test
21 #define __TBB_SOURCE_DIRECTLY_INCLUDED 1
22 // Call thread shutdown API for native threads join
23 #define HARNESS_TBBMALLOC_THREAD_SHUTDOWN 1
24 
25 // According to C99 standard INTPTR_MIN defined for C++ if __STDC_LIMIT_MACROS pre-defined
26 #define __STDC_LIMIT_MACROS 1
27 
28 // To not depends on ITT support stuff
29 #ifdef DO_ITT_NOTIFY
30 #undef DO_ITT_NOTIFY
31 #endif
32 
33 #include "common/test.h"
34 
35 #include "common/utils.h"
36 #include "common/utils_assert.h"
37 #include "common/utils_env.h"
38 #include "common/spin_barrier.h"
39 
40 #include "oneapi/tbb/detail/_machine.h"
41 
42 #define __TBB_MALLOC_WHITEBOX_TEST 1 // to get access to allocator internals
43 // help trigger rare race condition
44 #define WhiteboxTestingYield() (tbb::detail::yield(), tbb::detail::yield(), tbb::detail::yield(), tbb::detail::yield())
45 
46 #if __INTEL_COMPILER && __TBB_MIC_OFFLOAD
47 // 2571 is variable has not been declared with compatible "target" attribute
48 // 3218 is class/struct may fail when offloaded because this field is misaligned
49 //         or contains data that is misaligned
50     #pragma warning(push)
51     #pragma warning(disable:2571 3218)
52 #endif
53 #define protected public
54 #define private public
55 #include "../../src/tbbmalloc/frontend.cpp"
56 #undef protected
57 #undef private
58 #if __INTEL_COMPILER && __TBB_MIC_OFFLOAD
59     #pragma warning(pop)
60 #endif
61 #include "../../src/tbbmalloc/backend.cpp"
62 #include "../../src/tbbmalloc/backref.cpp"
63 
64 namespace tbbmalloc_whitebox {
65     size_t locGetProcessed = 0;
66     size_t locPutProcessed = 0;
67 }
68 #include "../../src/tbbmalloc/large_objects.cpp"
69 #include "../../src/tbbmalloc/tbbmalloc.cpp"
70 
71 const int LARGE_MEM_SIZES_NUM = 10;
72 static const int MinThread = 1;
73 static const int MaxThread = 4;
74 
75 class AllocInfo {
76     int *p;
77     int val;
78     int size;
79 public:
80     AllocInfo() : p(NULL), val(0), size(0) {}
81     explicit AllocInfo(int sz) : p((int*)scalable_malloc(sz*sizeof(int))),
82                                    val(rand()), size(sz) {
83         REQUIRE(p);
84         for (int k=0; k<size; k++)
85             p[k] = val;
86     }
87     void check() const {
88         for (int k=0; k<size; k++)
89             ASSERT(p[k] == val, NULL);
90     }
91     void clear() {
92         scalable_free(p);
93     }
94 };
95 
96 class SimpleBarrier: utils::NoAssign {
97 protected:
98     static utils::SpinBarrier barrier;
99 public:
100     static void initBarrier(unsigned thrds) { barrier.initialize(thrds); }
101 };
102 
103 utils::SpinBarrier SimpleBarrier::barrier;
104 
105 class TestLargeObjCache: public SimpleBarrier {
106 public:
107     static int largeMemSizes[LARGE_MEM_SIZES_NUM];
108 
109     TestLargeObjCache( ) {}
110 
111     void operator()( int /*mynum*/ ) const {
112         AllocInfo allocs[LARGE_MEM_SIZES_NUM];
113 
114         // push to maximal cache limit
115         for (int i=0; i<2; i++) {
116             const int sizes[] = { MByte/sizeof(int),
117                                   (MByte-2*LargeObjectCache::LargeBSProps::CacheStep)/sizeof(int) };
118             for (int q=0; q<2; q++) {
119                 size_t curr = 0;
120                 for (int j=0; j<LARGE_MEM_SIZES_NUM; j++, curr++)
121                     new (allocs+curr) AllocInfo(sizes[q]);
122 
123                 for (size_t j=0; j<curr; j++) {
124                     allocs[j].check();
125                     allocs[j].clear();
126                 }
127             }
128         }
129 
130         barrier.wait();
131 
132         // check caching correctness
133         for (int i=0; i<1000; i++) {
134             size_t curr = 0;
135             for (int j=0; j<LARGE_MEM_SIZES_NUM-1; j++, curr++)
136                 new (allocs+curr) AllocInfo(largeMemSizes[j]);
137 
138             new (allocs+curr)
139                 AllocInfo((int)(4*minLargeObjectSize +
140                                 2*minLargeObjectSize*(1.*rand()/RAND_MAX)));
141             curr++;
142 
143             for (size_t j=0; j<curr; j++) {
144                 allocs[j].check();
145                 allocs[j].clear();
146             }
147         }
148     }
149 };
150 
151 int TestLargeObjCache::largeMemSizes[LARGE_MEM_SIZES_NUM];
152 
153 void TestLargeObjectCache()
154 {
155     for (int i=0; i<LARGE_MEM_SIZES_NUM; i++)
156         TestLargeObjCache::largeMemSizes[i] =
157             (int)(minLargeObjectSize + 2*minLargeObjectSize*(1.*rand()/RAND_MAX));
158 
159     for( int p=MaxThread; p>=MinThread; --p ) {
160         TestLargeObjCache::initBarrier( p );
161         utils::NativeParallelFor( p, TestLargeObjCache() );
162     }
163 }
164 
165 #if MALLOC_CHECK_RECURSION
166 
167 class TestStartupAlloc: public SimpleBarrier {
168     struct TestBlock {
169         void *ptr;
170         size_t sz;
171     };
172     static const int ITERS = 100;
173 public:
174     TestStartupAlloc() {}
175     void operator()(int) const {
176         TestBlock blocks1[ITERS], blocks2[ITERS];
177 
178         barrier.wait();
179 
180         for (int i=0; i<ITERS; i++) {
181             blocks1[i].sz = rand() % minLargeObjectSize;
182             blocks1[i].ptr = StartupBlock::allocate(blocks1[i].sz);
183             REQUIRE((blocks1[i].ptr && StartupBlock::msize(blocks1[i].ptr)>=blocks1[i].sz
184                    && 0==(uintptr_t)blocks1[i].ptr % sizeof(void*)));
185             memset(blocks1[i].ptr, i, blocks1[i].sz);
186         }
187         for (int i=0; i<ITERS; i++) {
188             blocks2[i].sz = rand() % minLargeObjectSize;
189             blocks2[i].ptr = StartupBlock::allocate(blocks2[i].sz);
190             REQUIRE((blocks2[i].ptr && StartupBlock::msize(blocks2[i].ptr)>=blocks2[i].sz
191                    && 0==(uintptr_t)blocks2[i].ptr % sizeof(void*)));
192             memset(blocks2[i].ptr, i, blocks2[i].sz);
193 
194             for (size_t j=0; j<blocks1[i].sz; j++)
195                 REQUIRE(*((char*)blocks1[i].ptr+j) == i);
196             Block *block = (Block *)alignDown(blocks1[i].ptr, slabSize);
197             ((StartupBlock *)block)->free(blocks1[i].ptr);
198         }
199         for (int i=ITERS-1; i>=0; i--) {
200             for (size_t j=0; j<blocks2[i].sz; j++)
201                 REQUIRE(*((char*)blocks2[i].ptr+j) == i);
202             Block *block = (Block *)alignDown(blocks2[i].ptr, slabSize);
203             ((StartupBlock *)block)->free(blocks2[i].ptr);
204         }
205     }
206 };
207 
208 #endif /* MALLOC_CHECK_RECURSION */
209 
210 #include <deque>
211 
212 template<int ITERS>
213 class BackRefWork: utils::NoAssign {
214     struct TestBlock {
215         BackRefIdx idx;
216         char       data;
217         TestBlock(BackRefIdx idx_) : idx(idx_) {}
218     };
219 public:
220     BackRefWork() {}
221     void operator()(int) const {
222         size_t cnt;
223         // it's important to not invalidate pointers to the contents of the container
224         std::deque<TestBlock> blocks;
225 
226         // for ITERS==0 consume all available backrefs
227         for (cnt=0; !ITERS || cnt<ITERS; cnt++) {
228             BackRefIdx idx = BackRefIdx::newBackRef(/*largeObj=*/false);
229             if (idx.isInvalid())
230                 break;
231             blocks.push_back(TestBlock(idx));
232             setBackRef(blocks.back().idx, &blocks.back().data);
233         }
234         for (size_t i=0; i<cnt; i++)
235             REQUIRE((Block*)&blocks[i].data == getBackRef(blocks[i].idx));
236         for (size_t i=cnt; i>0; i--)
237             removeBackRef(blocks[i-1].idx);
238     }
239 };
240 
241 class LocalCachesHit: utils::NoAssign {
242     // set ITERS to trigger possible leak of backreferences
243     // during cleanup on cache overflow and on thread termination
244     static const int ITERS = 2*(FreeBlockPool::POOL_HIGH_MARK +
245                                 LocalLOC::LOC_HIGH_MARK);
246 public:
247     LocalCachesHit() {}
248     void operator()(int) const {
249         void *objsSmall[ITERS], *objsLarge[ITERS];
250 
251         for (int i=0; i<ITERS; i++) {
252             objsSmall[i] = scalable_malloc(minLargeObjectSize-1);
253             objsLarge[i] = scalable_malloc(minLargeObjectSize);
254         }
255         for (int i=0; i<ITERS; i++) {
256             scalable_free(objsSmall[i]);
257             scalable_free(objsLarge[i]);
258         }
259     }
260 };
261 
262 static size_t allocatedBackRefCount()
263 {
264     size_t cnt = 0;
265     for (int i=0; i<=backRefMaster.load(std::memory_order_relaxed)->lastUsed.load(std::memory_order_relaxed); i++)
266         cnt += backRefMaster.load(std::memory_order_relaxed)->backRefBl[i]->allocatedCount;
267     return cnt;
268 }
269 
270 class TestInvalidBackrefs: public SimpleBarrier {
271 #if __ANDROID__
272     // Android requires lower iters due to lack of virtual memory.
273     static const int BACKREF_GROWTH_ITERS = 50*1024;
274 #else
275     static const int BACKREF_GROWTH_ITERS = 200*1024;
276 #endif
277 
278     static std::atomic<bool> backrefGrowthDone;
279     static void *ptrs[BACKREF_GROWTH_ITERS];
280 public:
281     TestInvalidBackrefs() {}
282     void operator()(int id) const {
283 
284         if (!id) {
285             backrefGrowthDone = false;
286             barrier.wait();
287 
288             for (int i=0; i<BACKREF_GROWTH_ITERS; i++)
289                 ptrs[i] = scalable_malloc(minLargeObjectSize);
290             backrefGrowthDone = true;
291             for (int i=0; i<BACKREF_GROWTH_ITERS; i++)
292                 scalable_free(ptrs[i]);
293         } else {
294             void *p2 = scalable_malloc(minLargeObjectSize-1);
295             char *p1 = (char*)scalable_malloc(minLargeObjectSize-1);
296             LargeObjectHdr *hdr =
297                 (LargeObjectHdr*)(p1+minLargeObjectSize-1 - sizeof(LargeObjectHdr));
298             hdr->backRefIdx.master = 7;
299             hdr->backRefIdx.largeObj = 1;
300             hdr->backRefIdx.offset = 2000;
301 
302             barrier.wait();
303 
304             while (!backrefGrowthDone) {
305                 scalable_free(p2);
306                 p2 = scalable_malloc(minLargeObjectSize-1);
307             }
308             scalable_free(p1);
309             scalable_free(p2);
310         }
311     }
312 };
313 
314 std::atomic<bool> TestInvalidBackrefs::backrefGrowthDone;
315 void *TestInvalidBackrefs::ptrs[BACKREF_GROWTH_ITERS];
316 
317 void TestBackRef() {
318     size_t beforeNumBackRef, afterNumBackRef;
319 
320     beforeNumBackRef = allocatedBackRefCount();
321     for( int p=MaxThread; p>=MinThread; --p )
322         utils::NativeParallelFor( p, BackRefWork<2*BR_MAX_CNT+2>() );
323     afterNumBackRef = allocatedBackRefCount();
324     REQUIRE_MESSAGE(beforeNumBackRef==afterNumBackRef, "backreference leak detected");
325     // lastUsed marks peak resource consumption. As we allocate below the mark,
326     // it must not move up, otherwise there is a resource leak.
327     int sustLastUsed = backRefMaster.load(std::memory_order_relaxed)->lastUsed.load(std::memory_order_relaxed);
328     utils::NativeParallelFor( 1, BackRefWork<2*BR_MAX_CNT+2>() );
329     REQUIRE_MESSAGE(sustLastUsed == backRefMaster.load(std::memory_order_relaxed)->lastUsed.load(std::memory_order_relaxed), "backreference leak detected");
330     // check leak of back references while per-thread caches are in use
331     // warm up needed to cover bootStrapMalloc call
332     utils::NativeParallelFor( 1, LocalCachesHit() );
333     beforeNumBackRef = allocatedBackRefCount();
334     utils::NativeParallelFor( 2, LocalCachesHit() );
335     int res = scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, NULL);
336     REQUIRE(res == TBBMALLOC_OK);
337     afterNumBackRef = allocatedBackRefCount();
338     REQUIRE_MESSAGE(beforeNumBackRef>=afterNumBackRef, "backreference leak detected");
339 
340     // This is a regression test against race condition between backreference
341     // extension and checking invalid BackRefIdx.
342     // While detecting is object large or small, scalable_free 1st check for
343     // large objects, so there is a chance to prepend small object with
344     // seems valid BackRefIdx for large objects, and thus trigger the bug.
345     TestInvalidBackrefs::initBarrier(MaxThread);
346     utils::NativeParallelFor( MaxThread, TestInvalidBackrefs() );
347     // Consume all available backrefs and check they work correctly.
348     // For now test 32-bit machines only, because for 64-bit memory consumption is too high.
349     if (sizeof(uintptr_t) == 4)
350         utils::NativeParallelFor( MaxThread, BackRefWork<0>() );
351 }
352 
353 void *getMem(intptr_t /*pool_id*/, size_t &bytes)
354 {
355     const size_t BUF_SIZE = 8*1024*1024;
356     static char space[BUF_SIZE];
357     static size_t pos;
358 
359     if (pos + bytes > BUF_SIZE)
360         return NULL;
361 
362     void *ret = space + pos;
363     pos += bytes;
364 
365     return ret;
366 }
367 
368 int putMem(intptr_t /*pool_id*/, void* /*raw_ptr*/, size_t /*raw_bytes*/)
369 {
370     return 0;
371 }
372 
373 struct MallocPoolHeader {
374     void  *rawPtr;
375     size_t userSize;
376 };
377 
378 void *getMallocMem(intptr_t /*pool_id*/, size_t &bytes)
379 {
380     void *rawPtr = malloc(bytes+sizeof(MallocPoolHeader));
381     void *ret = (void *)((uintptr_t)rawPtr+sizeof(MallocPoolHeader));
382 
383     MallocPoolHeader *hdr = (MallocPoolHeader*)ret-1;
384     hdr->rawPtr = rawPtr;
385     hdr->userSize = bytes;
386 
387     return ret;
388 }
389 
390 int putMallocMem(intptr_t /*pool_id*/, void *ptr, size_t bytes)
391 {
392     MallocPoolHeader *hdr = (MallocPoolHeader*)ptr-1;
393     ASSERT(bytes == hdr->userSize, "Invalid size in pool callback.");
394     free(hdr->rawPtr);
395 
396     return 0;
397 }
398 
399 class StressLOCacheWork: utils::NoAssign {
400     rml::MemoryPool *my_mallocPool;
401 public:
402     StressLOCacheWork(rml::MemoryPool *mallocPool) : my_mallocPool(mallocPool) {}
403     void operator()(int) const {
404         for (size_t sz=minLargeObjectSize; sz<1*1024*1024;
405              sz+=LargeObjectCache::LargeBSProps::CacheStep) {
406             void *ptr = pool_malloc(my_mallocPool, sz);
407             REQUIRE_MESSAGE(ptr, "Memory was not allocated");
408             memset(ptr, sz, sz);
409             pool_free(my_mallocPool, ptr);
410         }
411     }
412 };
413 
414 void TestPools() {
415     rml::MemPoolPolicy pol(getMem, putMem);
416     size_t beforeNumBackRef, afterNumBackRef;
417 
418     rml::MemoryPool *pool1;
419     rml::MemoryPool *pool2;
420     pool_create_v1(0, &pol, &pool1);
421     pool_create_v1(0, &pol, &pool2);
422     pool_destroy(pool1);
423     pool_destroy(pool2);
424 
425     scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, NULL);
426     beforeNumBackRef = allocatedBackRefCount();
427     rml::MemoryPool *fixedPool;
428 
429     pool_create_v1(0, &pol, &fixedPool);
430     pol.pAlloc = getMallocMem;
431     pol.pFree = putMallocMem;
432     pol.granularity = 8;
433     rml::MemoryPool *mallocPool;
434 
435     pool_create_v1(0, &pol, &mallocPool);
436 /* check that large object cache (LOC) returns correct size for cached objects
437    passBackendSz Byte objects are cached in LOC, but bypassed the backend, so
438    memory requested directly from allocation callback.
439    nextPassBackendSz Byte objects must fit to another LOC bin,
440    so that their allocation/realeasing leads to cache cleanup.
441    All this is expecting to lead to releasing of passBackendSz Byte object
442    from LOC during LOC cleanup, and putMallocMem checks that returned size
443    is correct.
444 */
445     const size_t passBackendSz = Backend::maxBinned_HugePage+1,
446         anotherLOCBinSz = minLargeObjectSize+1;
447     for (int i=0; i<10; i++) { // run long enough to be cached
448         void *p = pool_malloc(mallocPool, passBackendSz);
449         REQUIRE_MESSAGE(p, "Memory was not allocated");
450         pool_free(mallocPool, p);
451     }
452     // run long enough to passBackendSz allocation was cleaned from cache
453     // and returned back to putMallocMem for size checking
454     for (int i=0; i<1000; i++) {
455         void *p = pool_malloc(mallocPool, anotherLOCBinSz);
456         REQUIRE_MESSAGE(p, "Memory was not allocated");
457         pool_free(mallocPool, p);
458     }
459 
460     void *smallObj =  pool_malloc(fixedPool, 10);
461     REQUIRE_MESSAGE(smallObj, "Memory was not allocated");
462     memset(smallObj, 1, 10);
463     void *ptr = pool_malloc(fixedPool, 1024);
464     REQUIRE_MESSAGE(ptr, "Memory was not allocated");
465     memset(ptr, 1, 1024);
466     void *largeObj = pool_malloc(fixedPool, minLargeObjectSize);
467     REQUIRE_MESSAGE(largeObj, "Memory was not allocated");
468     memset(largeObj, 1, minLargeObjectSize);
469     ptr = pool_malloc(fixedPool, minLargeObjectSize);
470     REQUIRE_MESSAGE(ptr, "Memory was not allocated");
471     memset(ptr, minLargeObjectSize, minLargeObjectSize);
472     pool_malloc(fixedPool, 10*minLargeObjectSize); // no leak for unsuccessful allocations
473     pool_free(fixedPool, smallObj);
474     pool_free(fixedPool, largeObj);
475 
476     // provoke large object cache cleanup and hope no leaks occurs
477     for( int p=MaxThread; p>=MinThread; --p )
478         utils::NativeParallelFor( p, StressLOCacheWork(mallocPool) );
479     pool_destroy(mallocPool);
480     pool_destroy(fixedPool);
481 
482     scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, NULL);
483     afterNumBackRef = allocatedBackRefCount();
484     REQUIRE_MESSAGE(beforeNumBackRef==afterNumBackRef, "backreference leak detected");
485 
486     {
487         // test usedSize/cachedSize and LOC bitmask correctness
488         void *p[5];
489         pool_create_v1(0, &pol, &mallocPool);
490         const LargeObjectCache *loc = &((rml::internal::MemoryPool*)mallocPool)->extMemPool.loc;
491         const int LargeCacheStep = LargeObjectCache::LargeBSProps::CacheStep;
492         p[3] = pool_malloc(mallocPool, minLargeObjectSize+2*LargeCacheStep);
493         for (int i=0; i<10; i++) {
494             p[0] = pool_malloc(mallocPool, minLargeObjectSize);
495             p[1] = pool_malloc(mallocPool, minLargeObjectSize+LargeCacheStep);
496             pool_free(mallocPool, p[0]);
497             pool_free(mallocPool, p[1]);
498         }
499         REQUIRE(loc->getUsedSize());
500         pool_free(mallocPool, p[3]);
501         REQUIRE(loc->getLOCSize() < 3*(minLargeObjectSize+LargeCacheStep));
502         const size_t maxLocalLOCSize = LocalLOCImpl<3,30>::getMaxSize();
503         REQUIRE(loc->getUsedSize() <= maxLocalLOCSize);
504         for (int i=0; i<3; i++)
505             p[i] = pool_malloc(mallocPool, minLargeObjectSize+i*LargeCacheStep);
506         size_t currUser = loc->getUsedSize();
507         REQUIRE((!loc->getLOCSize() && currUser >= 3*(minLargeObjectSize+LargeCacheStep)));
508         p[4] = pool_malloc(mallocPool, minLargeObjectSize+3*LargeCacheStep);
509         REQUIRE(loc->getUsedSize() - currUser >= minLargeObjectSize+3*LargeCacheStep);
510         pool_free(mallocPool, p[4]);
511         REQUIRE(loc->getUsedSize() <= currUser+maxLocalLOCSize);
512         pool_reset(mallocPool);
513         REQUIRE((!loc->getLOCSize() && !loc->getUsedSize()));
514         pool_destroy(mallocPool);
515     }
516     // To test LOC we need bigger lists than released by current LocalLOC
517     //   in production code. Create special LocalLOC.
518     {
519         LocalLOCImpl<2, 20> lLOC;
520         pool_create_v1(0, &pol, &mallocPool);
521         rml::internal::ExtMemoryPool *mPool = &((rml::internal::MemoryPool*)mallocPool)->extMemPool;
522         const LargeObjectCache *loc = &((rml::internal::MemoryPool*)mallocPool)->extMemPool.loc;
523         const int LargeCacheStep = LargeObjectCache::LargeBSProps::CacheStep;
524         for (int i=0; i<22; i++) {
525             void *o = pool_malloc(mallocPool, minLargeObjectSize+i*LargeCacheStep);
526             bool ret = lLOC.put(((LargeObjectHdr*)o - 1)->memoryBlock, mPool);
527             REQUIRE(ret);
528 
529             o = pool_malloc(mallocPool, minLargeObjectSize+i*LargeCacheStep);
530             ret = lLOC.put(((LargeObjectHdr*)o - 1)->memoryBlock, mPool);
531             REQUIRE(ret);
532         }
533         lLOC.externalCleanup(mPool);
534         REQUIRE(!loc->getUsedSize());
535 
536         pool_destroy(mallocPool);
537     }
538 }
539 
540 void TestObjectRecognition() {
541     size_t headersSize = sizeof(LargeMemoryBlock)+sizeof(LargeObjectHdr);
542     unsigned falseObjectSize = 113; // unsigned is the type expected by getObjectSize
543     size_t obtainedSize;
544 
545     REQUIRE_MESSAGE(sizeof(BackRefIdx)==sizeof(uintptr_t), "Unexpected size of BackRefIdx");
546     REQUIRE_MESSAGE(getObjectSize(falseObjectSize)!=falseObjectSize, "Error in test: bad choice for false object size");
547 
548     void* mem = scalable_malloc(2*slabSize);
549     REQUIRE_MESSAGE(mem, "Memory was not allocated");
550     Block* falseBlock = (Block*)alignUp((uintptr_t)mem, slabSize);
551     falseBlock->objectSize = falseObjectSize;
552     char* falseSO = (char*)falseBlock + falseObjectSize*7;
553     REQUIRE_MESSAGE(alignDown(falseSO, slabSize)==(void*)falseBlock, "Error in test: false object offset is too big");
554 
555     void* bufferLOH = scalable_malloc(2*slabSize + headersSize);
556     REQUIRE_MESSAGE(bufferLOH, "Memory was not allocated");
557     LargeObjectHdr* falseLO =
558         (LargeObjectHdr*)alignUp((uintptr_t)bufferLOH + headersSize, slabSize);
559     LargeObjectHdr* headerLO = (LargeObjectHdr*)falseLO-1;
560     headerLO->memoryBlock = (LargeMemoryBlock*)bufferLOH;
561     headerLO->memoryBlock->unalignedSize = 2*slabSize + headersSize;
562     headerLO->memoryBlock->objectSize = slabSize + headersSize;
563     headerLO->backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/true);
564     setBackRef(headerLO->backRefIdx, headerLO);
565     REQUIRE_MESSAGE(scalable_msize(falseLO) == slabSize + headersSize,
566            "Error in test: LOH falsification failed");
567     removeBackRef(headerLO->backRefIdx);
568 
569     const int NUM_OF_IDX = BR_MAX_CNT+2;
570     BackRefIdx idxs[NUM_OF_IDX];
571     for (int cnt=0; cnt<2; cnt++) {
572         for (int master = -10; master<10; master++) {
573             falseBlock->backRefIdx.master = (uint16_t)master;
574             headerLO->backRefIdx.master = (uint16_t)master;
575 
576             for (int bl = -10; bl<BR_MAX_CNT+10; bl++) {
577                 falseBlock->backRefIdx.offset = (uint16_t)bl;
578                 headerLO->backRefIdx.offset = (uint16_t)bl;
579 
580                 for (int largeObj = 0; largeObj<2; largeObj++) {
581                     falseBlock->backRefIdx.largeObj = largeObj;
582                     headerLO->backRefIdx.largeObj = largeObj;
583 
584                     obtainedSize = __TBB_malloc_safer_msize(falseSO, NULL);
585                     REQUIRE_MESSAGE(obtainedSize==0, "Incorrect pointer accepted");
586                     obtainedSize = __TBB_malloc_safer_msize(falseLO, NULL);
587                     REQUIRE_MESSAGE(obtainedSize==0, "Incorrect pointer accepted");
588                 }
589             }
590         }
591         if (cnt == 1) {
592             for (int i=0; i<NUM_OF_IDX; i++)
593                 removeBackRef(idxs[i]);
594             break;
595         }
596         for (int i=0; i<NUM_OF_IDX; i++) {
597             idxs[i] = BackRefIdx::newBackRef(/*largeObj=*/false);
598             setBackRef(idxs[i], NULL);
599         }
600     }
601     char *smallPtr = (char*)scalable_malloc(falseObjectSize);
602     obtainedSize = __TBB_malloc_safer_msize(smallPtr, NULL);
603     REQUIRE_MESSAGE(obtainedSize==getObjectSize(falseObjectSize), "Correct pointer not accepted?");
604     scalable_free(smallPtr);
605 
606     obtainedSize = __TBB_malloc_safer_msize(mem, NULL);
607     REQUIRE_MESSAGE(obtainedSize>=2*slabSize, "Correct pointer not accepted?");
608     scalable_free(mem);
609     scalable_free(bufferLOH);
610 }
611 
612 class TestBackendWork: public SimpleBarrier {
613     struct TestBlock {
614         intptr_t   data;
615         BackRefIdx idx;
616     };
617     static const int ITERS = 20;
618 
619     rml::internal::Backend *backend;
620 public:
621     TestBackendWork(rml::internal::Backend *bknd) : backend(bknd) {}
622     void operator()(int) const {
623         barrier.wait();
624 
625         for (int i=0; i<ITERS; i++) {
626             BlockI *slabBlock = backend->getSlabBlock(1);
627             REQUIRE_MESSAGE(slabBlock, "Memory was not allocated");
628             uintptr_t prevBlock = (uintptr_t)slabBlock;
629             backend->putSlabBlock(slabBlock);
630 
631             LargeMemoryBlock *largeBlock = backend->getLargeBlock(16*1024);
632             REQUIRE_MESSAGE(largeBlock, "Memory was not allocated");
633             REQUIRE_MESSAGE((uintptr_t)largeBlock != prevBlock,
634                     "Large block cannot be reused from slab memory, only in fixed_pool case.");
635             backend->putLargeBlock(largeBlock);
636         }
637     }
638 };
639 
640 void TestBackend()
641 {
642     rml::MemPoolPolicy pol(getMallocMem, putMallocMem);
643     rml::MemoryPool *mPool;
644     pool_create_v1(0, &pol, &mPool);
645     rml::internal::ExtMemoryPool *ePool = &((rml::internal::MemoryPool*)mPool)->extMemPool;
646     rml::internal::Backend *backend = &ePool->backend;
647 
648     for( int p=MaxThread; p>=MinThread; --p ) {
649         // regression test against an race condition in backend synchronization,
650         // triggered only when WhiteboxTestingYield() call yields
651         for (int i=0; i<100; i++) {
652             TestBackendWork::initBarrier(p);
653             utils::NativeParallelFor( p, TestBackendWork(backend) );
654         }
655     }
656 
657     BlockI *block = backend->getSlabBlock(1);
658     REQUIRE_MESSAGE(block, "Memory was not allocated");
659     backend->putSlabBlock(block);
660 
661     // Checks if the backend increases and decreases the amount of allocated memory when memory is allocated.
662     const size_t memSize0 = backend->getTotalMemSize();
663     LargeMemoryBlock *lmb = backend->getLargeBlock(4*MByte);
664     REQUIRE( lmb );
665 
666     const size_t memSize1 = backend->getTotalMemSize();
667     REQUIRE_MESSAGE( (intptr_t)(memSize1-memSize0) >= 4*MByte, "The backend has not increased the amount of using memory." );
668 
669     backend->putLargeBlock(lmb);
670     const size_t memSize2 = backend->getTotalMemSize();
671     REQUIRE_MESSAGE( memSize2 == memSize0, "The backend has not decreased the amount of using memory." );
672 
673     pool_destroy(mPool);
674 }
675 
676 void TestBitMask()
677 {
678     BitMaskMin<256> mask;
679 
680     mask.reset();
681     mask.set(10, 1);
682     mask.set(5, 1);
683     mask.set(1, 1);
684     REQUIRE(mask.getMinTrue(2) == 5);
685 
686     mask.reset();
687     mask.set(0, 1);
688     mask.set(64, 1);
689     mask.set(63, 1);
690     mask.set(200, 1);
691     mask.set(255, 1);
692     REQUIRE(mask.getMinTrue(0) == 0);
693     REQUIRE(mask.getMinTrue(1) == 63);
694     REQUIRE(mask.getMinTrue(63) == 63);
695     REQUIRE(mask.getMinTrue(64) == 64);
696     REQUIRE(mask.getMinTrue(101) == 200);
697     REQUIRE(mask.getMinTrue(201) == 255);
698     mask.set(255, 0);
699     REQUIRE(mask.getMinTrue(201) == -1);
700 }
701 
702 size_t getMemSize()
703 {
704     return defaultMemPool->extMemPool.backend.getTotalMemSize();
705 }
706 
707 class CheckNotCached {
708     static size_t memSize;
709 public:
710     void operator() () const {
711         int res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 1);
712         REQUIRE(res == TBBMALLOC_OK);
713         if (memSize==(size_t)-1) {
714             memSize = getMemSize();
715         } else {
716             REQUIRE(getMemSize() == memSize);
717             memSize=(size_t)-1;
718         }
719     }
720 };
721 
722 size_t CheckNotCached::memSize = (size_t)-1;
723 
724 class RunTestHeapLimit: public SimpleBarrier {
725 public:
726     void operator()( int /*mynum*/ ) const {
727         // Provoke bootstrap heap initialization before recording memory size.
728         // NOTE: The initialization should be processed only with a "large"
729         // object. Since the "small" object allocation lead to blocking of a
730         // slab as an active block and it is impossible to release it with
731         // foreign thread.
732         scalable_free(scalable_malloc(minLargeObjectSize));
733         barrier.wait(CheckNotCached());
734         for (size_t n = minLargeObjectSize; n < 5*1024*1024; n += 128*1024)
735             scalable_free(scalable_malloc(n));
736         barrier.wait(CheckNotCached());
737     }
738 };
739 
740 void TestHeapLimit()
741 {
742     if(!isMallocInitialized()) doInitialization();
743     // tiny limit to stop caching
744     int res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 1);
745     REQUIRE(res == TBBMALLOC_OK);
746      // Provoke bootstrap heap initialization before recording memory size.
747     scalable_free(scalable_malloc(8));
748     size_t n, sizeBefore = getMemSize();
749 
750     // Try to provoke call to OS for memory to check that
751     // requests are not fulfilled from caches.
752     // Single call is not enough here because of backend fragmentation.
753     for (n = minLargeObjectSize; n < 10*1024*1024; n += 16*1024) {
754         void *p = scalable_malloc(n);
755         bool leave = (sizeBefore != getMemSize());
756         scalable_free(p);
757         if (leave)
758             break;
759         REQUIRE_MESSAGE(sizeBefore == getMemSize(), "No caching expected");
760     }
761     REQUIRE_MESSAGE(n < 10*1024*1024, "scalable_malloc doesn't provoke OS request for memory, "
762            "is some internal cache still used?");
763 
764     for( int p=MaxThread; p>=MinThread; --p ) {
765         RunTestHeapLimit::initBarrier( p );
766         utils::NativeParallelFor( p, RunTestHeapLimit() );
767     }
768     // it's try to match limit as well as set limit, so call here
769     res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 1);
770     REQUIRE(res == TBBMALLOC_OK);
771     size_t m = getMemSize();
772     REQUIRE(sizeBefore == m);
773     // restore default
774     res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 0);
775     REQUIRE(res == TBBMALLOC_OK);
776 }
777 
778 void checkNoHugePages()
779 {
780     REQUIRE_MESSAGE(!hugePages.isEnabled, "scalable_allocation_mode "
781            "must have priority over environment variable");
782 }
783 
784 /*---------------------------------------------------------------------------*/
785 // The regression test against bugs in TBBMALLOC_CLEAN_ALL_BUFFERS allocation command.
786 // The idea is to allocate and deallocate a set of objects randomly in parallel.
787 // For large sizes (16K), it forces conflicts in backend during coalescing.
788 // For small sizes (4K), it forces cross-thread deallocations and then orphaned slabs.
789 // Global cleanup should process orphaned slabs and the queue of postponed coalescing
790 // requests, otherwise it will not be able to unmap all unused memory.
791 
792 const int num_allocs = 10*1024;
793 void *ptrs[num_allocs];
794 std::atomic<int> alloc_counter;
795 
796 inline void multiThreadAlloc(size_t alloc_size) {
797     for( int i = alloc_counter++; i < num_allocs; i = alloc_counter++ ) {
798        ptrs[i] = scalable_malloc( alloc_size );
799        REQUIRE_MESSAGE( ptrs[i] != nullptr, "scalable_malloc returned zero." );
800     }
801 }
802 inline void crossThreadDealloc() {
803     for( int i = --alloc_counter; i >= 0; i = --alloc_counter ) {
804        if (i < num_allocs) scalable_free( ptrs[i] );
805     }
806 }
807 
808 template<int AllocSize>
809 struct TestCleanAllBuffersBody : public SimpleBarrier {
810     void operator() ( int ) const {
811         barrier.wait();
812         multiThreadAlloc(AllocSize);
813         barrier.wait();
814         crossThreadDealloc();
815     }
816 };
817 
818 template<int AllocSize>
819 void TestCleanAllBuffers() {
820     const int num_threads = 8;
821     // Clean up if something was allocated before the test
822     scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS,0);
823 
824     size_t memory_in_use_before = getMemSize();
825     alloc_counter = 0;
826     TestCleanAllBuffersBody<AllocSize>::initBarrier(num_threads);
827 
828     utils::NativeParallelFor(num_threads, TestCleanAllBuffersBody<AllocSize>());
829     // TODO: reproduce the bug conditions more reliably
830     if ( defaultMemPool->extMemPool.backend.coalescQ.blocksToFree.load(std::memory_order_relaxed) == NULL ) {
831         INFO( "Warning: The queue of postponed coalescing requests is empty. ");
832         INFO( "Unable to create the condition for bug reproduction.\n" );
833     }
834     int result = scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS,0);
835     REQUIRE_MESSAGE( result == TBBMALLOC_OK, "The cleanup request has not cleaned anything." );
836     size_t memory_in_use_after = getMemSize();
837 
838     size_t memory_leak = memory_in_use_after - memory_in_use_before;
839     INFO( "memory_in_use_before = " <<  memory_in_use_before << ", memory_in_use_after = " << memory_in_use_after << "\n" );
840     REQUIRE_MESSAGE( memory_leak == 0, "Cleanup was unable to release all allocated memory." );
841 }
842 
843 //! Force cross thread deallocation of small objects to create a set of privatizable slab blocks.
844 //! TBBMALLOC_CLEAN_THREAD_BUFFERS command have to privatize all the block.
845 struct TestCleanThreadBuffersBody : public SimpleBarrier {
846     void operator() ( int ) const {
847         barrier.wait();
848         multiThreadAlloc(2*1024);
849         barrier.wait();
850         crossThreadDealloc();
851         barrier.wait();
852         int result = scalable_allocation_command(TBBMALLOC_CLEAN_THREAD_BUFFERS,0);
853         if (result != TBBMALLOC_OK) {
854             REPORT("Warning: clean-up request for this particular thread has not cleaned anything.");
855         }
856 
857         // Check that TLS was cleaned fully
858         TLSData *tlsCurr = defaultMemPool->getTLS(/*create=*/false);
859         for (int i = 0; i < numBlockBinLimit; i++) {
860             REQUIRE_MESSAGE(!(tlsCurr->bin[i].activeBlk), "Some bin was not cleaned.");
861         }
862         REQUIRE_MESSAGE(!(tlsCurr->lloc.head.load(std::memory_order_relaxed)), "Local LOC was not cleaned.");
863         REQUIRE_MESSAGE(!(tlsCurr->freeSlabBlocks.head.load(std::memory_order_relaxed)), "Free Block pool was not cleaned.");
864     }
865 };
866 
867 void TestCleanThreadBuffers() {
868     const int num_threads = 8;
869     // Clean up if something was allocated before the test
870     scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS,0);
871 
872     alloc_counter = 0;
873     TestCleanThreadBuffersBody::initBarrier(num_threads);
874     utils::NativeParallelFor(num_threads, TestCleanThreadBuffersBody());
875 }
876 
877 /*---------------------------------------------------------------------------*/
878 /*------------------------- Large Object Cache tests ------------------------*/
879 #if _MSC_VER==1600 || _MSC_VER==1500
880     // ignore C4275: non dll-interface class 'stdext::exception' used as
881     // base for dll-interface class 'std::bad_cast'
882     #pragma warning (disable: 4275)
883 #endif
884 #include <vector>
885 #include <list>
886 
887 // default constructor of CacheBin
888 template<typename Props>
889 rml::internal::LargeObjectCacheImpl<Props>::CacheBin::CacheBin() {}
890 
891 template<typename Props>
892 class CacheBinModel {
893 
894     typedef typename rml::internal::LargeObjectCacheImpl<Props>::CacheBin CacheBinType;
895 
896     // The emulated cache bin.
897     CacheBinType cacheBinModel;
898     // The reference to real cache bin inside the large object cache.
899     CacheBinType &cacheBin;
900 
901     const size_t size;
902 
903     // save only current time
904     std::list<uintptr_t> objects;
905 
906     void doCleanup() {
907         if ( cacheBinModel.cachedSize > Props::TooLargeFactor*cacheBinModel.usedSize ) tooLargeLOC++;
908         else tooLargeLOC = 0;
909 
910         if (tooLargeLOC>3 && cacheBinModel.ageThreshold)
911             cacheBinModel.ageThreshold = (cacheBinModel.ageThreshold + cacheBinModel.meanHitRange)/2;
912 
913         uintptr_t currTime = cacheCurrTime;
914         while (!objects.empty() && (intptr_t)(currTime - objects.front()) > cacheBinModel.ageThreshold) {
915             cacheBinModel.cachedSize -= size;
916             cacheBinModel.lastCleanedAge = objects.front();
917             objects.pop_front();
918         }
919 
920         cacheBinModel.oldest = objects.empty() ? 0 : objects.front();
921     }
922 
923 public:
924     CacheBinModel(CacheBinType &_cacheBin, size_t allocSize) : cacheBin(_cacheBin), size(allocSize) {
925         cacheBinModel.oldest = cacheBin.oldest;
926         cacheBinModel.lastCleanedAge = cacheBin.lastCleanedAge;
927         cacheBinModel.ageThreshold = cacheBin.ageThreshold;
928         cacheBinModel.usedSize = cacheBin.usedSize;
929         cacheBinModel.cachedSize = cacheBin.cachedSize;
930         cacheBinModel.meanHitRange = cacheBin.meanHitRange;
931         cacheBinModel.lastGet = cacheBin.lastGet;
932     }
933     void get() {
934         uintptr_t currTime = ++cacheCurrTime;
935 
936         if ( objects.empty() ) {
937             const uintptr_t sinceLastGet = currTime - cacheBinModel.lastGet;
938             if ( ( cacheBinModel.ageThreshold && sinceLastGet > Props::LongWaitFactor*cacheBinModel.ageThreshold ) ||
939                  ( cacheBinModel.lastCleanedAge && sinceLastGet > Props::LongWaitFactor*(cacheBinModel.lastCleanedAge - cacheBinModel.lastGet) ) )
940                 cacheBinModel.lastCleanedAge = cacheBinModel.ageThreshold = 0;
941 
942             if (cacheBinModel.lastCleanedAge)
943                 cacheBinModel.ageThreshold = Props::OnMissFactor*(currTime - cacheBinModel.lastCleanedAge);
944         } else {
945             uintptr_t obj_age = objects.back();
946             objects.pop_back();
947             if ( objects.empty() ) cacheBinModel.oldest = 0;
948 
949             intptr_t hitRange = currTime - obj_age;
950             cacheBinModel.meanHitRange = cacheBinModel.meanHitRange? (cacheBinModel.meanHitRange + hitRange)/2 : hitRange;
951 
952             cacheBinModel.cachedSize -= size;
953         }
954 
955         cacheBinModel.usedSize += size;
956         cacheBinModel.lastGet = currTime;
957 
958         if ( currTime % rml::internal::cacheCleanupFreq == 0 ) doCleanup();
959     }
960 
961     void putList( int num ) {
962         uintptr_t currTime = cacheCurrTime;
963         cacheCurrTime += num;
964 
965         cacheBinModel.usedSize -= num*size;
966 
967         bool cleanUpNeeded = false;
968         if ( !cacheBinModel.lastCleanedAge ) {
969             cacheBinModel.lastCleanedAge = ++currTime;
970             cleanUpNeeded |= currTime % rml::internal::cacheCleanupFreq == 0;
971             num--;
972         }
973 
974         for ( int i=1; i<=num; ++i ) {
975             currTime+=1;
976             cleanUpNeeded |= currTime % rml::internal::cacheCleanupFreq == 0;
977             if ( objects.empty() )
978                 cacheBinModel.oldest = currTime;
979             objects.push_back(currTime);
980         }
981 
982         cacheBinModel.cachedSize += num*size;
983 
984         if ( cleanUpNeeded ) doCleanup();
985     }
986 
987     void check() {
988         REQUIRE(cacheBinModel.oldest == cacheBin.oldest);
989         REQUIRE(cacheBinModel.lastCleanedAge == cacheBin.lastCleanedAge);
990         REQUIRE(cacheBinModel.ageThreshold == cacheBin.ageThreshold);
991         REQUIRE(cacheBinModel.usedSize == cacheBin.usedSize);
992         REQUIRE(cacheBinModel.cachedSize == cacheBin.cachedSize);
993         REQUIRE(cacheBinModel.meanHitRange == cacheBin.meanHitRange);
994         REQUIRE(cacheBinModel.lastGet == cacheBin.lastGet);
995     }
996 
997     static uintptr_t cacheCurrTime;
998     static intptr_t tooLargeLOC;
999 };
1000 
1001 template<typename Props> uintptr_t CacheBinModel<Props>::cacheCurrTime;
1002 template<typename Props> intptr_t CacheBinModel<Props>::tooLargeLOC;
1003 
1004 template <typename Scenario>
1005 void LOCModelTester() {
1006     defaultMemPool->extMemPool.loc.cleanAll();
1007     defaultMemPool->extMemPool.loc.reset();
1008 
1009     const size_t size = 16 * 1024;
1010     const size_t headersSize = sizeof(rml::internal::LargeMemoryBlock)+sizeof(rml::internal::LargeObjectHdr);
1011     const size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+rml::internal::largeObjectAlignment);
1012     const int binIdx = defaultMemPool->extMemPool.loc.largeCache.sizeToIdx( allocationSize );
1013 
1014     CacheBinModel<rml::internal::LargeObjectCache::LargeCacheTypeProps>::cacheCurrTime = defaultMemPool->extMemPool.loc.cacheCurrTime;
1015     CacheBinModel<rml::internal::LargeObjectCache::LargeCacheTypeProps>::tooLargeLOC = defaultMemPool->extMemPool.loc.largeCache.tooLargeLOC;
1016     CacheBinModel<rml::internal::LargeObjectCache::LargeCacheTypeProps> cacheBinModel(defaultMemPool->extMemPool.loc.largeCache.bin[binIdx], allocationSize);
1017 
1018     Scenario scen;
1019     for (rml::internal::LargeMemoryBlock *lmb = scen.next(); (intptr_t)lmb != (intptr_t)-1; lmb = scen.next()) {
1020         if ( lmb ) {
1021             int num=1;
1022             for (rml::internal::LargeMemoryBlock *curr = lmb; curr->next; curr=curr->next) num+=1;
1023             defaultMemPool->extMemPool.freeLargeObject(lmb);
1024             cacheBinModel.putList(num);
1025         } else {
1026             scen.saveLmb(defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize));
1027             cacheBinModel.get();
1028         }
1029 
1030         cacheBinModel.check();
1031     }
1032 }
1033 
1034 class TestBootstrap {
1035     bool allocating;
1036     std::vector<rml::internal::LargeMemoryBlock*> lmbArray;
1037 public:
1038     TestBootstrap() : allocating(true) {}
1039 
1040     rml::internal::LargeMemoryBlock* next() {
1041         if ( allocating )
1042             return NULL;
1043         if ( !lmbArray.empty() ) {
1044             rml::internal::LargeMemoryBlock *ret = lmbArray.back();
1045             lmbArray.pop_back();
1046             return ret;
1047         }
1048         return (rml::internal::LargeMemoryBlock*)-1;
1049     }
1050 
1051     void saveLmb( rml::internal::LargeMemoryBlock *lmb ) {
1052         lmb->next = NULL;
1053         lmbArray.push_back(lmb);
1054         if ( lmbArray.size() == 1000 ) allocating = false;
1055     }
1056 };
1057 
1058 class TestRandom {
1059     std::vector<rml::internal::LargeMemoryBlock*> lmbArray;
1060     int numOps;
1061 public:
1062     TestRandom() : numOps(100000) {
1063         srand(1234);
1064     }
1065 
1066     rml::internal::LargeMemoryBlock* next() {
1067         if ( numOps-- ) {
1068             if ( lmbArray.empty() || rand() / (RAND_MAX>>1) == 0 )
1069                 return NULL;
1070             size_t ind = rand()%lmbArray.size();
1071             if ( ind != lmbArray.size()-1 ) std::swap(lmbArray[ind],lmbArray[lmbArray.size()-1]);
1072             rml::internal::LargeMemoryBlock *lmb = lmbArray.back();
1073             lmbArray.pop_back();
1074             return lmb;
1075         }
1076         return (rml::internal::LargeMemoryBlock*)-1;
1077     }
1078 
1079     void saveLmb( rml::internal::LargeMemoryBlock *lmb ) {
1080         lmb->next = NULL;
1081         lmbArray.push_back(lmb);
1082     }
1083 };
1084 
1085 class TestCollapsingMallocFree : public SimpleBarrier {
1086 public:
1087     static const int NUM_ALLOCS = 100000;
1088     const int num_threads;
1089 
1090     TestCollapsingMallocFree( int _num_threads ) : num_threads(_num_threads) {
1091         initBarrier( num_threads );
1092     }
1093 
1094     void operator() ( int ) const {
1095         const size_t size = 16 * 1024;
1096         const size_t headersSize = sizeof(rml::internal::LargeMemoryBlock)+sizeof(rml::internal::LargeObjectHdr);
1097         const size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+rml::internal::largeObjectAlignment);
1098 
1099         barrier.wait();
1100         for ( int i=0; i<NUM_ALLOCS; ++i ) {
1101             defaultMemPool->extMemPool.freeLargeObject(
1102                 defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize) );
1103         }
1104     }
1105 
1106     void check() {
1107         REQUIRE( tbbmalloc_whitebox::locGetProcessed == tbbmalloc_whitebox::locPutProcessed);
1108         REQUIRE_MESSAGE( tbbmalloc_whitebox::locGetProcessed < num_threads*NUM_ALLOCS, "No one Malloc/Free pair was collapsed." );
1109     }
1110 };
1111 
1112 class TestCollapsingBootstrap : public SimpleBarrier {
1113     class CheckNumAllocs {
1114         const int num_threads;
1115     public:
1116         CheckNumAllocs( int _num_threads ) : num_threads(_num_threads) {}
1117         void operator()() const {
1118             REQUIRE( tbbmalloc_whitebox::locGetProcessed == num_threads*NUM_ALLOCS );
1119             REQUIRE( tbbmalloc_whitebox::locPutProcessed == 0 );
1120         }
1121     };
1122 public:
1123     static const int NUM_ALLOCS = 1000;
1124     const int num_threads;
1125 
1126     TestCollapsingBootstrap( int _num_threads ) : num_threads(_num_threads) {
1127         initBarrier( num_threads );
1128     }
1129 
1130     void operator() ( int ) const {
1131         const size_t size = 16 * 1024;
1132         size_t headersSize = sizeof(rml::internal::LargeMemoryBlock)+sizeof(rml::internal::LargeObjectHdr);
1133         size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+rml::internal::largeObjectAlignment);
1134 
1135         barrier.wait();
1136         rml::internal::LargeMemoryBlock *lmbArray[NUM_ALLOCS];
1137         for ( int i=0; i<NUM_ALLOCS; ++i )
1138             lmbArray[i] = defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize);
1139 
1140         barrier.wait(CheckNumAllocs(num_threads));
1141         for ( int i=0; i<NUM_ALLOCS; ++i )
1142             defaultMemPool->extMemPool.freeLargeObject( lmbArray[i] );
1143     }
1144 
1145     void check() {
1146         REQUIRE( tbbmalloc_whitebox::locGetProcessed == tbbmalloc_whitebox::locPutProcessed );
1147         REQUIRE( tbbmalloc_whitebox::locGetProcessed == num_threads*NUM_ALLOCS );
1148     }
1149 };
1150 
1151 template <typename Scenario>
1152 void LOCCollapsingTester( int num_threads ) {
1153     tbbmalloc_whitebox::locGetProcessed = 0;
1154     tbbmalloc_whitebox::locPutProcessed = 0;
1155     defaultMemPool->extMemPool.loc.cleanAll();
1156     defaultMemPool->extMemPool.loc.reset();
1157 
1158     Scenario scen(num_threads);
1159     utils::NativeParallelFor(num_threads, scen);
1160 
1161     scen.check();
1162 }
1163 
1164 void TestLOC() {
1165     LOCModelTester<TestBootstrap>();
1166     LOCModelTester<TestRandom>();
1167 
1168     const int num_threads = 16;
1169     LOCCollapsingTester<TestCollapsingBootstrap>( num_threads );
1170     if ( num_threads > 1 ) {
1171         INFO( "num_threads = " << num_threads );
1172         LOCCollapsingTester<TestCollapsingMallocFree>( num_threads );
1173     } else {
1174         REPORT( "Warning: concurrency is too low for TestMallocFreeCollapsing ( num_threads = %d )\n", num_threads );
1175     }
1176 }
1177 /*---------------------------------------------------------------------------*/
1178 
1179 void *findCacheLine(void *p) {
1180     return (void*)alignDown((uintptr_t)p, estimatedCacheLineSize);
1181 }
1182 
1183 // test that internals of Block are at expected cache lines
1184 void TestSlabAlignment() {
1185     const size_t min_sz = 8;
1186     const int space = 2*16*1024; // fill at least 2 slabs
1187     void *pointers[space / min_sz];  // the worst case is min_sz byte object
1188 
1189     for (size_t sz = min_sz; sz <= 64; sz *= 2) {
1190         for (size_t i = 0; i < space/sz; i++) {
1191             pointers[i] = scalable_malloc(sz);
1192             Block *block = (Block *)alignDown(pointers[i], slabSize);
1193             REQUIRE_MESSAGE(findCacheLine(&block->isFull) != findCacheLine(pointers[i]),
1194                           "A user object must not share a cache line with slab control structures.");
1195             REQUIRE_MESSAGE(findCacheLine(&block->next) != findCacheLine(&block->nextPrivatizable),
1196                           "GlobalBlockFields and LocalBlockFields must be on different cache lines.");
1197         }
1198         for (size_t i = 0; i < space/sz; i++)
1199             scalable_free(pointers[i]);
1200     }
1201 }
1202 
1203 #include "common/memory_usage.h"
1204 
1205 // TODO: Consider adding Huge Pages support on macOS (special mmap flag).
1206 // Transparent Huge pages support could be enabled by different system parsing mechanism,
1207 // because there is no /proc/meminfo on macOS
1208 #if __linux__
1209 void TestTHP() {
1210     // Get backend from default memory pool
1211     rml::internal::Backend *backend = &(defaultMemPool->extMemPool.backend);
1212 
1213     // Configure malloc to use huge pages
1214     scalable_allocation_mode(USE_HUGE_PAGES, 1);
1215     REQUIRE_MESSAGE(hugePages.isEnabled, "Huge pages should be enabled via scalable_allocation_mode");
1216 
1217     const int HUGE_PAGE_SIZE = 2 * 1024 * 1024;
1218 
1219     // allocCount transparent huge pages should be allocated
1220     const int allocCount = 10;
1221 
1222     // Allocate huge page aligned memory regions to track system
1223     // counters for transparent huge pages
1224     void*  allocPtrs[allocCount];
1225 
1226     // Wait for the system to update process memory info files after other tests
1227     utils::Sleep(4000);
1228 
1229     // Parse system info regarding current THP status
1230     size_t currentSystemTHPCount = utils::getSystemTHPCount();
1231     size_t currentSystemTHPAllocatedSize = utils::getSystemTHPAllocatedSize();
1232 
1233     for (int i = 0; i < allocCount; i++) {
1234         // Allocation size have to be aligned on page size
1235         size_t allocSize = HUGE_PAGE_SIZE - (i * 1000);
1236 
1237         // Map memory
1238         allocPtrs[i] = backend->allocRawMem(allocSize);
1239 
1240         REQUIRE_MESSAGE(allocPtrs[i], "Allocation not succeeded.");
1241         REQUIRE_MESSAGE(allocSize == HUGE_PAGE_SIZE,
1242             "Allocation size have to be aligned on Huge Page size internally.");
1243 
1244         // First touch policy - no real pages allocated by OS without accessing the region
1245         memset(allocPtrs[i], 1, allocSize);
1246 
1247         REQUIRE_MESSAGE(isAligned(allocPtrs[i], HUGE_PAGE_SIZE),
1248             "The pointer returned by scalable_malloc is not aligned on huge page size.");
1249     }
1250 
1251     // Wait for the system to update process memory info files after allocations
1252     utils::Sleep(4000);
1253 
1254     // Generally, kernel tries to allocate transparent huge pages, but sometimes it cannot do this
1255     // (tested on SLES 11/12), so consider this system info checks as a remark.
1256     // Also, some systems can allocate more memory then needed in background (tested on Ubuntu 14.04)
1257     size_t newSystemTHPCount = utils::getSystemTHPCount();
1258     size_t newSystemTHPAllocatedSize = utils::getSystemTHPAllocatedSize();
1259     if ((newSystemTHPCount - currentSystemTHPCount) < allocCount
1260             && (newSystemTHPAllocatedSize - currentSystemTHPAllocatedSize) / (2 * 1024) < allocCount) {
1261         REPORT( "Warning: the system didn't allocate needed amount of THPs.\n" );
1262     }
1263 
1264     // Test memory unmap
1265     for (int i = 0; i < allocCount; i++) {
1266         REQUIRE_MESSAGE(backend->freeRawMem(allocPtrs[i], HUGE_PAGE_SIZE),
1267                 "Something went wrong during raw memory free");
1268     }
1269 }
1270 #endif // __linux__
1271 
1272 inline size_t getStabilizedMemUsage() {
1273     for (int i = 0; i < 3; i++) utils::GetMemoryUsage();
1274     return utils::GetMemoryUsage();
1275 }
1276 
1277 inline void* reallocAndRetrieve(void* origPtr, size_t reallocSize, size_t& origBlockSize, size_t& reallocBlockSize) {
1278     rml::internal::LargeMemoryBlock* origLmb = ((rml::internal::LargeObjectHdr *)origPtr - 1)->memoryBlock;
1279     origBlockSize = origLmb->unalignedSize;
1280 
1281     void* reallocPtr = rml::internal::reallocAligned(defaultMemPool, origPtr, reallocSize, 0);
1282 
1283     // Retrieved reallocated block information
1284     rml::internal::LargeMemoryBlock* reallocLmb = ((rml::internal::LargeObjectHdr *)reallocPtr - 1)->memoryBlock;
1285     reallocBlockSize = reallocLmb->unalignedSize;
1286 
1287     return reallocPtr;
1288 }
1289 
1290 void TestReallocDecreasing() {
1291 
1292     /* Testing that actual reallocation happens for large objects that do not fit the backend cache
1293        but decrease in size by a factor of >= 2. */
1294 
1295     size_t startSize = 100 * 1024 * 1024;
1296     size_t maxBinnedSize = defaultMemPool->extMemPool.backend.getMaxBinnedSize();
1297     void*  origPtr = scalable_malloc(startSize);
1298     void*  reallocPtr = NULL;
1299 
1300     // Realloc on 1MB less size
1301     size_t origBlockSize = 42;
1302     size_t reallocBlockSize = 43;
1303     reallocPtr = reallocAndRetrieve(origPtr, startSize - 1 * 1024 * 1024, origBlockSize, reallocBlockSize);
1304     REQUIRE_MESSAGE(origBlockSize == reallocBlockSize, "Reallocated block size shouldn't change");
1305     REQUIRE_MESSAGE(reallocPtr == origPtr, "Original pointer shouldn't change");
1306 
1307     // Repeated decreasing reallocation while max cache bin size reached
1308     size_t reallocSize = (startSize / 2) - 1000; // exact realloc
1309     while(reallocSize > maxBinnedSize) {
1310 
1311         // Prevent huge/large objects caching
1312         defaultMemPool->extMemPool.loc.cleanAll();
1313         // Prevent local large object caching
1314         TLSData *tls = defaultMemPool->getTLS(/*create=*/false);
1315         tls->lloc.externalCleanup(&defaultMemPool->extMemPool);
1316 
1317         size_t sysMemUsageBefore = getStabilizedMemUsage();
1318         size_t totalMemSizeBefore = defaultMemPool->extMemPool.backend.getTotalMemSize();
1319 
1320         reallocPtr = reallocAndRetrieve(origPtr, reallocSize, origBlockSize, reallocBlockSize);
1321 
1322         REQUIRE_MESSAGE(origBlockSize > reallocBlockSize, "Reallocated block size should descrease.");
1323 
1324         size_t sysMemUsageAfter = getStabilizedMemUsage();
1325         size_t totalMemSizeAfter = defaultMemPool->extMemPool.backend.getTotalMemSize();
1326 
1327         // Prevent false checking when backend caching occurred or could not read system memory usage info
1328         if (totalMemSizeBefore > totalMemSizeAfter && sysMemUsageAfter != 0 && sysMemUsageBefore != 0) {
1329             REQUIRE_MESSAGE(sysMemUsageBefore > sysMemUsageAfter, "Memory were not released");
1330         }
1331 
1332         origPtr = reallocPtr;
1333         reallocSize = (reallocSize / 2) - 1000; // exact realloc
1334     }
1335     scalable_free(reallocPtr);
1336 
1337     /* TODO: Decreasing reallocation of large objects that fit backend cache */
1338     /* TODO: Small objects decreasing reallocation test */
1339 }
1340 #if !__TBB_WIN8UI_SUPPORT && defined(_WIN32)
1341 
1342 #include "../../src/tbbmalloc_proxy/function_replacement.cpp"
1343 #include <string>
1344 namespace FunctionReplacement {
1345     FunctionInfo funcInfo = { "funcname","dllname" };
1346     char **func_replacement_log;
1347     int status;
1348 
1349     void LogCleanup() {
1350         // Free all allocated memory
1351         for (unsigned i = 0; i < Log::record_number; i++){
1352             HeapFree(GetProcessHeap(), 0, Log::records[i]);
1353         }
1354         for (unsigned i = 0; i < Log::RECORDS_COUNT + 1; i++){
1355             Log::records[i] = NULL;
1356         }
1357         Log::replacement_status = true;
1358         Log::record_number = 0;
1359     }
1360 
1361     void TestEmptyLog() {
1362         status = TBB_malloc_replacement_log(&func_replacement_log);
1363 
1364         REQUIRE_MESSAGE(status == -1, "Status is true, but log is empty");
1365         REQUIRE_MESSAGE(*func_replacement_log == nullptr, "Log must be empty");
1366     }
1367 
1368     void TestLogOverload() {
1369         for (int i = 0; i < 1000; i++)
1370             Log::record(funcInfo, "opcode string", true);
1371 
1372         status = TBB_malloc_replacement_log(&func_replacement_log);
1373         // Find last record
1374         for (; *(func_replacement_log + 1) != 0; func_replacement_log++) {}
1375 
1376         std::string last_line(*func_replacement_log);
1377         REQUIRE_MESSAGE(status == 0, "False status, but all functions found");
1378         REQUIRE_MESSAGE(last_line.compare("Log was truncated.") == 0, "Log overflow was not handled");
1379 
1380         // Change status
1381         Log::record(funcInfo, "opcode string", false);
1382         status = TBB_malloc_replacement_log(NULL);
1383         REQUIRE_MESSAGE(status == -1, "Status is true, but we have false search case");
1384 
1385         LogCleanup();
1386     }
1387 
1388     void TestFalseSearchCase() {
1389         Log::record(funcInfo, "opcode string", false);
1390         std::string expected_line = "Fail: "+ std::string(funcInfo.funcName) + " (" +
1391                          std::string(funcInfo.dllName) + "), byte pattern: <opcode string>";
1392 
1393         status = TBB_malloc_replacement_log(&func_replacement_log);
1394 
1395         REQUIRE_MESSAGE(expected_line.compare(*func_replacement_log) == 0, "Wrong last string contnent");
1396         REQUIRE_MESSAGE(status == -1, "Status is true, but we have false search case");
1397         LogCleanup();
1398     }
1399 
1400     void TestWrongFunctionInDll(){
1401         HMODULE ucrtbase_handle = GetModuleHandle("ucrtbase.dll");
1402         if (ucrtbase_handle) {
1403             IsPrologueKnown("ucrtbase.dll", "fake_function", NULL, ucrtbase_handle);
1404             std::string expected_line = "Fail: fake_function (ucrtbase.dll), byte pattern: <unknown>";
1405 
1406             status = TBB_malloc_replacement_log(&func_replacement_log);
1407 
1408             REQUIRE_MESSAGE(expected_line.compare(*func_replacement_log) == 0, "Wrong last string contnent");
1409             REQUIRE_MESSAGE(status == -1, "Status is true, but we have false search case");
1410             LogCleanup();
1411         } else {
1412             INFO("Cannot found ucrtbase.dll on system, test skipped!\n");
1413         }
1414     }
1415 }
1416 
1417 void TesFunctionReplacementLog() {
1418     using namespace FunctionReplacement;
1419     // Do not reorder the test cases
1420     TestEmptyLog();
1421     TestLogOverload();
1422     TestFalseSearchCase();
1423     TestWrongFunctionInDll();
1424 }
1425 
1426 #endif /*!__TBB_WIN8UI_SUPPORT && defined(_WIN32)*/
1427 
1428 #include <cmath> // pow function
1429 
1430 // Huge objects cache: Size = MinSize * (2 ^ (Index / StepFactor) formula gives value for the bin size,
1431 // but it is not matched with our sizeToIdx approximation algorithm, where step sizes between major
1432 // (power of 2) sizes are equal. Used internally for the test. Static cast to avoid warnings.
1433 inline size_t hocIdxToSizeFormula(int idx) {
1434     return static_cast<size_t>(float(rml::internal::LargeObjectCache::maxLargeSize) *
1435         pow(2, float(idx) / float(rml::internal::LargeObjectCache::HugeBSProps::StepFactor)));
1436 }
1437 // Large objects cache arithmetic progression
1438 inline size_t locIdxToSizeFormula(int idx) {
1439     return rml::internal::LargeObjectCache::LargeBSProps::MinSize +
1440         (idx * rml::internal::LargeObjectCache::LargeBSProps::CacheStep);
1441 }
1442 
1443 template <typename CacheType>
1444 void TestLOCacheBinsConverterImpl(int idx, size_t checkingSize) {
1445     size_t alignedSize = CacheType::alignToBin(checkingSize);
1446     REQUIRE_MESSAGE(alignedSize >= checkingSize, "Size is not correctly aligned");
1447     int calcIdx = CacheType::sizeToIdx(alignedSize);
1448     REQUIRE_MESSAGE(calcIdx == idx, "Index from size calculated not correctly");
1449 }
1450 
1451 void TestLOCacheBinsConverter(){
1452     typedef rml::internal::LargeObjectCache::LargeCacheType LargeCacheType;
1453     typedef rml::internal::LargeObjectCache::HugeCacheType HugeCacheType;
1454 
1455     size_t checkingSize = 0;
1456     for (int idx = 0; idx < LargeCacheType::numBins; idx++) {
1457         checkingSize = locIdxToSizeFormula(idx);
1458         TestLOCacheBinsConverterImpl<LargeCacheType>(idx, checkingSize);
1459     }
1460     for (int idx = 0; idx < HugeCacheType::numBins; idx++) {
1461         checkingSize = hocIdxToSizeFormula(idx);
1462         TestLOCacheBinsConverterImpl<HugeCacheType>(idx, checkingSize);
1463     }
1464 }
1465 
1466 struct HOThresholdTester {
1467     LargeObjectCache* loc;
1468     size_t hugeSize;
1469 
1470     static const size_t sieveSize = LargeObjectCache::defaultMaxHugeSize;
1471     // Sieve starts from 64MB (24-th cache bin), enough to check 4 bins radius range
1472     // for decent memory consumption (especially for 32-bit arch)
1473     static const int MIN_BIN_IDX = 21;
1474     static const int MAX_BIN_IDX = 27;
1475 
1476     enum CleanupType {
1477         NO_CLEANUP,
1478         REGULAR_CLEANUP,
1479         HARD_CLEANUP
1480     };
1481 
1482     void populateCache() {
1483         LargeMemoryBlock* loArray[MAX_BIN_IDX - MIN_BIN_IDX];
1484         // To avoid backend::softCacheCleanup consequences (cleanup by isLOCToolarge),
1485         // firstly allocate all objects and then cache them at once.
1486         // Morevover, just because first cache item will still be dropped from cache because of the lack of history,
1487         // redo allocation 2 times.
1488         for (int idx = MIN_BIN_IDX; idx < MAX_BIN_IDX; ++idx) {
1489             size_t allocationSize = alignedSizeFromIdx(idx);
1490             int localIdx = idx - MIN_BIN_IDX;
1491             loArray[localIdx] = defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize);
1492             REQUIRE_MESSAGE(loArray[localIdx], "Large object was not allocated.");
1493             loc->put(loArray[localIdx]);
1494             loArray[localIdx] = defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize);
1495         }
1496         for (int idx = MIN_BIN_IDX; idx < MAX_BIN_IDX; ++idx) {
1497             loc->put(loArray[idx - MIN_BIN_IDX]);
1498         }
1499     }
1500     void clean(bool all) {
1501         if (all) {
1502             // Should avoid any threshold and clean all bins
1503             loc->cleanAll();
1504         } else {
1505             // Regular cleanup should do nothing for bins above threshold. Decreasing option used
1506             // for the test to be sure that all objects below defaultMaxHugeSize (sieveSize) were cleaned
1507             loc->regularCleanup();
1508             loc->decreasingCleanup();
1509         }
1510     }
1511     void check(CleanupType type) {
1512         for (int idx = MIN_BIN_IDX; idx < MAX_BIN_IDX; ++idx) {
1513             size_t objectSize = alignedSizeFromIdx(idx);
1514             // Cache object below sieve threshold and above huge object threshold should be cached
1515             // (other should be sieved). Unless all cache is dropped. Regular cleanup drops object only below sieve size.
1516             if (type == NO_CLEANUP && sizeInCacheRange(objectSize)) {
1517                 REQUIRE_MESSAGE(objectInCacheBin(idx, objectSize), "Object was released from cache, it shouldn't.");
1518             } else if (type == REGULAR_CLEANUP && (objectSize >= hugeSize)) {
1519                 REQUIRE_MESSAGE(objectInCacheBin(idx, objectSize), "Object was released from cache, it shouldn't.");
1520             } else { // HARD_CLEANUP
1521                 REQUIRE_MESSAGE(cacheBinEmpty(idx), "Object is still cached.");
1522             }
1523         }
1524     }
1525 
1526 private:
1527     bool cacheBinEmpty(int idx) {
1528         return (loc->hugeCache.bin[idx].cachedSize == 0 && loc->hugeCache.bin[idx].get() == NULL);
1529     }
1530     bool objectInCacheBin(int idx, size_t size) {
1531         return (loc->hugeCache.bin[idx].cachedSize != 0 && loc->hugeCache.bin[idx].cachedSize % size == 0);
1532     }
1533     bool sizeInCacheRange(size_t size) {
1534         return size <= sieveSize || size >= hugeSize;
1535     }
1536     size_t alignedSizeFromIdx(int idx) {
1537         return rml::internal::LargeObjectCache::alignToBin(hocIdxToSizeFormula(idx));
1538     }
1539 };
1540 
1541 // TBBMALLOC_SET_HUGE_OBJECT_THRESHOLD value should be set before the test,
1542 // through scalable API or env variable
1543 void TestHugeSizeThresholdImpl(LargeObjectCache* loc, size_t hugeSize, bool fullTesting) {
1544     HOThresholdTester test = {loc, hugeSize};
1545     test.populateCache();
1546     // Check the default sieve value
1547     test.check(HOThresholdTester::NO_CLEANUP);
1548 
1549     if(fullTesting) {
1550         // Check that objects above threshold stay in cache after regular cleanup
1551         test.clean(/*all*/false);
1552         test.check(HOThresholdTester::REGULAR_CLEANUP);
1553     }
1554     // Check that all objects dropped from cache after hard cleanup (ignore huge obects threshold)
1555     test.clean(/*all*/true);
1556     test.check(HOThresholdTester::HARD_CLEANUP);
1557     // Restore previous settings
1558     loc->setHugeSizeThreshold(LargeObjectCache::maxHugeSize);
1559     loc->reset();
1560 }
1561 
1562 /*
1563  *  Test for default huge size and behaviour when huge object settings defined
1564  */
1565 void TestHugeSizeThreshold() {
1566     // Clean up if something was allocated before the test and reset cache state
1567     scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, 0);
1568     LargeObjectCache* loc = &defaultMemPool->extMemPool.loc;
1569     // Restore default settings just in case
1570     loc->setHugeSizeThreshold(LargeObjectCache::maxHugeSize);
1571     loc->reset();
1572     // Firstly check default huge size value (with max huge object threshold).
1573     // Everything that more then this value should be released to OS without caching.
1574     TestHugeSizeThresholdImpl(loc, loc->hugeSizeThreshold, false);
1575     // Then set huge object threshold.
1576     // All objects with sizes after threshold will be released only after the hard cleanup.
1577 #if !__TBB_WIN8UI_SUPPORT
1578     // Unit testing for environment variable
1579     utils::SetEnv("TBB_MALLOC_SET_HUGE_SIZE_THRESHOLD","67108864");
1580     // Large object cache reads threshold environment during initialization.
1581     // Reset the value before the test.
1582     loc->hugeSizeThreshold = 0;
1583     loc->init(&defaultMemPool->extMemPool);
1584     TestHugeSizeThresholdImpl(loc, 64 * MByte, true);
1585 #endif
1586     // Unit testing for scalable_allocation_command
1587     scalable_allocation_mode(TBBMALLOC_SET_HUGE_SIZE_THRESHOLD, 56 * MByte);
1588     TestHugeSizeThresholdImpl(loc, 56 * MByte, true);
1589 }
1590 
1591 //! \brief \ref error_guessing
1592 TEST_CASE("Main test case") {
1593     scalable_allocation_mode(USE_HUGE_PAGES, 0);
1594 #if !__TBB_WIN8UI_SUPPORT
1595     utils::SetEnv("TBB_MALLOC_USE_HUGE_PAGES","yes");
1596 #endif
1597     checkNoHugePages();
1598     // backreference requires that initialization was done
1599     if(!isMallocInitialized()) doInitialization();
1600     checkNoHugePages();
1601     // to succeed, leak detection must be the 1st memory-intensive test
1602     TestBackRef();
1603     TestCleanAllBuffers<4*1024>();
1604     TestCleanAllBuffers<16*1024>();
1605     TestCleanThreadBuffers();
1606     TestPools();
1607     TestBackend();
1608 
1609 #if MALLOC_CHECK_RECURSION
1610     for( int p=MaxThread; p>=MinThread; --p ) {
1611         TestStartupAlloc::initBarrier( p );
1612         utils::NativeParallelFor( p, TestStartupAlloc() );
1613         REQUIRE_MESSAGE(!firstStartupBlock, "Startup heap memory leak detected");
1614     }
1615 #endif
1616     TestLargeObjectCache();
1617     TestObjectRecognition();
1618     TestBitMask();
1619     TestHeapLimit();
1620     TestLOC();
1621     TestSlabAlignment();
1622 }
1623 
1624 //! \brief \ref error_guessing
1625 TEST_CASE("Decreasing reallocation") {
1626     if (!isMallocInitialized()) doInitialization();
1627     TestReallocDecreasing();
1628 }
1629 
1630 //! \brief \ref error_guessing
1631 TEST_CASE("Large object cache bins converter") {
1632     if (!isMallocInitialized()) doInitialization();
1633     TestLOCacheBinsConverter();
1634 }
1635 
1636 //! \brief \ref error_guessing
1637 TEST_CASE("Huge size threshold settings") {
1638     if (!isMallocInitialized()) doInitialization();
1639     TestHugeSizeThreshold();
1640 }
1641 
1642 #if __linux__
1643 //! \brief \ref error_guessing
1644 TEST_CASE("Transparent huge pages") {
1645     if (utils::isTHPEnabledOnMachine()) {
1646         if (!isMallocInitialized()) doInitialization();
1647         TestTHP();
1648     } else {
1649         INFO("Transparent Huge Pages is not supported on the system - skipped the test\n");
1650     }
1651 }
1652 #endif
1653 
1654 #if !__TBB_WIN8UI_SUPPORT && defined(_WIN32)
1655 //! \brief \ref error_guessing
1656 TEST_CASE("Function replacement log") {
1657     TesFunctionReplacementLog();
1658 }
1659 #endif
1660