1 /* 2 Copyright (c) 2005-2021 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 //! \file test_malloc_whitebox.cpp 18 //! \brief Test for [memory_allocation] functionality 19 20 // To prevent loading dynamic TBBmalloc at startup, that is not needed for the whitebox test 21 #define __TBB_SOURCE_DIRECTLY_INCLUDED 1 22 // Call thread shutdown API for native threads join 23 #define HARNESS_TBBMALLOC_THREAD_SHUTDOWN 1 24 25 // According to C99 standard INTPTR_MIN defined for C++ if __STDC_LIMIT_MACROS pre-defined 26 #define __STDC_LIMIT_MACROS 1 27 28 // To not depends on ITT support stuff 29 #ifdef DO_ITT_NOTIFY 30 #undef DO_ITT_NOTIFY 31 #endif 32 33 #include "common/test.h" 34 35 #include "common/utils.h" 36 #include "common/utils_assert.h" 37 #include "common/utils_env.h" 38 #include "common/spin_barrier.h" 39 40 #include "oneapi/tbb/detail/_machine.h" 41 42 #define __TBB_MALLOC_WHITEBOX_TEST 1 // to get access to allocator internals 43 // help trigger rare race condition 44 #define WhiteboxTestingYield() (tbb::detail::yield(), tbb::detail::yield(), tbb::detail::yield(), tbb::detail::yield()) 45 46 #if __INTEL_COMPILER && __TBB_MIC_OFFLOAD 47 // 2571 is variable has not been declared with compatible "target" attribute 48 // 3218 is class/struct may fail when offloaded because this field is misaligned 49 // or contains data that is misaligned 50 #pragma warning(push) 51 #pragma warning(disable:2571 3218) 52 #endif 53 #define protected public 54 #define private public 55 #include "../../src/tbbmalloc/frontend.cpp" 56 #undef protected 57 #undef private 58 #if __INTEL_COMPILER && __TBB_MIC_OFFLOAD 59 #pragma warning(pop) 60 #endif 61 #include "../../src/tbbmalloc/backend.cpp" 62 #include "../../src/tbbmalloc/backref.cpp" 63 64 namespace tbbmalloc_whitebox { 65 size_t locGetProcessed = 0; 66 size_t locPutProcessed = 0; 67 } 68 #include "../../src/tbbmalloc/large_objects.cpp" 69 #include "../../src/tbbmalloc/tbbmalloc.cpp" 70 71 const int LARGE_MEM_SIZES_NUM = 10; 72 static const int MinThread = 1; 73 static const int MaxThread = 4; 74 75 class AllocInfo { 76 int *p; 77 int val; 78 int size; 79 public: 80 AllocInfo() : p(NULL), val(0), size(0) {} 81 explicit AllocInfo(int sz) : p((int*)scalable_malloc(sz*sizeof(int))), 82 val(rand()), size(sz) { 83 REQUIRE(p); 84 for (int k=0; k<size; k++) 85 p[k] = val; 86 } 87 void check() const { 88 for (int k=0; k<size; k++) 89 ASSERT(p[k] == val, NULL); 90 } 91 void clear() { 92 scalable_free(p); 93 } 94 }; 95 96 class SimpleBarrier: utils::NoAssign { 97 protected: 98 static utils::SpinBarrier barrier; 99 public: 100 static void initBarrier(unsigned thrds) { barrier.initialize(thrds); } 101 }; 102 103 utils::SpinBarrier SimpleBarrier::barrier; 104 105 class TestLargeObjCache: public SimpleBarrier { 106 public: 107 static int largeMemSizes[LARGE_MEM_SIZES_NUM]; 108 109 TestLargeObjCache( ) {} 110 111 void operator()( int /*mynum*/ ) const { 112 AllocInfo allocs[LARGE_MEM_SIZES_NUM]; 113 114 // push to maximal cache limit 115 for (int i=0; i<2; i++) { 116 const int sizes[] = { MByte/sizeof(int), 117 (MByte-2*LargeObjectCache::LargeBSProps::CacheStep)/sizeof(int) }; 118 for (int q=0; q<2; q++) { 119 size_t curr = 0; 120 for (int j=0; j<LARGE_MEM_SIZES_NUM; j++, curr++) 121 new (allocs+curr) AllocInfo(sizes[q]); 122 123 for (size_t j=0; j<curr; j++) { 124 allocs[j].check(); 125 allocs[j].clear(); 126 } 127 } 128 } 129 130 barrier.wait(); 131 132 // check caching correctness 133 for (int i=0; i<1000; i++) { 134 size_t curr = 0; 135 for (int j=0; j<LARGE_MEM_SIZES_NUM-1; j++, curr++) 136 new (allocs+curr) AllocInfo(largeMemSizes[j]); 137 138 new (allocs+curr) 139 AllocInfo((int)(4*minLargeObjectSize + 140 2*minLargeObjectSize*(1.*rand()/RAND_MAX))); 141 curr++; 142 143 for (size_t j=0; j<curr; j++) { 144 allocs[j].check(); 145 allocs[j].clear(); 146 } 147 } 148 } 149 }; 150 151 int TestLargeObjCache::largeMemSizes[LARGE_MEM_SIZES_NUM]; 152 153 void TestLargeObjectCache() 154 { 155 for (int i=0; i<LARGE_MEM_SIZES_NUM; i++) 156 TestLargeObjCache::largeMemSizes[i] = 157 (int)(minLargeObjectSize + 2*minLargeObjectSize*(1.*rand()/RAND_MAX)); 158 159 for( int p=MaxThread; p>=MinThread; --p ) { 160 TestLargeObjCache::initBarrier( p ); 161 utils::NativeParallelFor( p, TestLargeObjCache() ); 162 } 163 } 164 165 #if MALLOC_CHECK_RECURSION 166 167 class TestStartupAlloc: public SimpleBarrier { 168 struct TestBlock { 169 void *ptr; 170 size_t sz; 171 }; 172 static const int ITERS = 100; 173 public: 174 TestStartupAlloc() {} 175 void operator()(int) const { 176 TestBlock blocks1[ITERS], blocks2[ITERS]; 177 178 barrier.wait(); 179 180 for (int i=0; i<ITERS; i++) { 181 blocks1[i].sz = rand() % minLargeObjectSize; 182 blocks1[i].ptr = StartupBlock::allocate(blocks1[i].sz); 183 REQUIRE((blocks1[i].ptr && StartupBlock::msize(blocks1[i].ptr)>=blocks1[i].sz 184 && 0==(uintptr_t)blocks1[i].ptr % sizeof(void*))); 185 memset(blocks1[i].ptr, i, blocks1[i].sz); 186 } 187 for (int i=0; i<ITERS; i++) { 188 blocks2[i].sz = rand() % minLargeObjectSize; 189 blocks2[i].ptr = StartupBlock::allocate(blocks2[i].sz); 190 REQUIRE((blocks2[i].ptr && StartupBlock::msize(blocks2[i].ptr)>=blocks2[i].sz 191 && 0==(uintptr_t)blocks2[i].ptr % sizeof(void*))); 192 memset(blocks2[i].ptr, i, blocks2[i].sz); 193 194 for (size_t j=0; j<blocks1[i].sz; j++) 195 REQUIRE(*((char*)blocks1[i].ptr+j) == i); 196 Block *block = (Block *)alignDown(blocks1[i].ptr, slabSize); 197 ((StartupBlock *)block)->free(blocks1[i].ptr); 198 } 199 for (int i=ITERS-1; i>=0; i--) { 200 for (size_t j=0; j<blocks2[i].sz; j++) 201 REQUIRE(*((char*)blocks2[i].ptr+j) == i); 202 Block *block = (Block *)alignDown(blocks2[i].ptr, slabSize); 203 ((StartupBlock *)block)->free(blocks2[i].ptr); 204 } 205 } 206 }; 207 208 #endif /* MALLOC_CHECK_RECURSION */ 209 210 #include <deque> 211 212 template<int ITERS> 213 class BackRefWork: utils::NoAssign { 214 struct TestBlock { 215 BackRefIdx idx; 216 char data; 217 TestBlock(BackRefIdx idx_) : idx(idx_) {} 218 }; 219 public: 220 BackRefWork() {} 221 void operator()(int) const { 222 size_t cnt; 223 // it's important to not invalidate pointers to the contents of the container 224 std::deque<TestBlock> blocks; 225 226 // for ITERS==0 consume all available backrefs 227 for (cnt=0; !ITERS || cnt<ITERS; cnt++) { 228 BackRefIdx idx = BackRefIdx::newBackRef(/*largeObj=*/false); 229 if (idx.isInvalid()) 230 break; 231 blocks.push_back(TestBlock(idx)); 232 setBackRef(blocks.back().idx, &blocks.back().data); 233 } 234 for (size_t i=0; i<cnt; i++) 235 REQUIRE((Block*)&blocks[i].data == getBackRef(blocks[i].idx)); 236 for (size_t i=cnt; i>0; i--) 237 removeBackRef(blocks[i-1].idx); 238 } 239 }; 240 241 class LocalCachesHit: utils::NoAssign { 242 // set ITERS to trigger possible leak of backreferences 243 // during cleanup on cache overflow and on thread termination 244 static const int ITERS = 2*(FreeBlockPool::POOL_HIGH_MARK + 245 LocalLOC::LOC_HIGH_MARK); 246 public: 247 LocalCachesHit() {} 248 void operator()(int) const { 249 void *objsSmall[ITERS], *objsLarge[ITERS]; 250 251 for (int i=0; i<ITERS; i++) { 252 objsSmall[i] = scalable_malloc(minLargeObjectSize-1); 253 objsLarge[i] = scalable_malloc(minLargeObjectSize); 254 } 255 for (int i=0; i<ITERS; i++) { 256 scalable_free(objsSmall[i]); 257 scalable_free(objsLarge[i]); 258 } 259 } 260 }; 261 262 static size_t allocatedBackRefCount() 263 { 264 size_t cnt = 0; 265 for (int i=0; i<=backRefMaster.load(std::memory_order_relaxed)->lastUsed.load(std::memory_order_relaxed); i++) 266 cnt += backRefMaster.load(std::memory_order_relaxed)->backRefBl[i]->allocatedCount; 267 return cnt; 268 } 269 270 class TestInvalidBackrefs: public SimpleBarrier { 271 #if __ANDROID__ 272 // Android requires lower iters due to lack of virtual memory. 273 static const int BACKREF_GROWTH_ITERS = 50*1024; 274 #else 275 static const int BACKREF_GROWTH_ITERS = 200*1024; 276 #endif 277 278 static std::atomic<bool> backrefGrowthDone; 279 static void *ptrs[BACKREF_GROWTH_ITERS]; 280 public: 281 TestInvalidBackrefs() {} 282 void operator()(int id) const { 283 284 if (!id) { 285 backrefGrowthDone = false; 286 barrier.wait(); 287 288 for (int i=0; i<BACKREF_GROWTH_ITERS; i++) 289 ptrs[i] = scalable_malloc(minLargeObjectSize); 290 backrefGrowthDone = true; 291 for (int i=0; i<BACKREF_GROWTH_ITERS; i++) 292 scalable_free(ptrs[i]); 293 } else { 294 void *p2 = scalable_malloc(minLargeObjectSize-1); 295 char *p1 = (char*)scalable_malloc(minLargeObjectSize-1); 296 LargeObjectHdr *hdr = 297 (LargeObjectHdr*)(p1+minLargeObjectSize-1 - sizeof(LargeObjectHdr)); 298 hdr->backRefIdx.master = 7; 299 hdr->backRefIdx.largeObj = 1; 300 hdr->backRefIdx.offset = 2000; 301 302 barrier.wait(); 303 304 while (!backrefGrowthDone) { 305 scalable_free(p2); 306 p2 = scalable_malloc(minLargeObjectSize-1); 307 } 308 scalable_free(p1); 309 scalable_free(p2); 310 } 311 } 312 }; 313 314 std::atomic<bool> TestInvalidBackrefs::backrefGrowthDone; 315 void *TestInvalidBackrefs::ptrs[BACKREF_GROWTH_ITERS]; 316 317 void TestBackRef() { 318 size_t beforeNumBackRef, afterNumBackRef; 319 320 beforeNumBackRef = allocatedBackRefCount(); 321 for( int p=MaxThread; p>=MinThread; --p ) 322 utils::NativeParallelFor( p, BackRefWork<2*BR_MAX_CNT+2>() ); 323 afterNumBackRef = allocatedBackRefCount(); 324 REQUIRE_MESSAGE(beforeNumBackRef==afterNumBackRef, "backreference leak detected"); 325 // lastUsed marks peak resource consumption. As we allocate below the mark, 326 // it must not move up, otherwise there is a resource leak. 327 int sustLastUsed = backRefMaster.load(std::memory_order_relaxed)->lastUsed.load(std::memory_order_relaxed); 328 utils::NativeParallelFor( 1, BackRefWork<2*BR_MAX_CNT+2>() ); 329 REQUIRE_MESSAGE(sustLastUsed == backRefMaster.load(std::memory_order_relaxed)->lastUsed.load(std::memory_order_relaxed), "backreference leak detected"); 330 // check leak of back references while per-thread caches are in use 331 // warm up needed to cover bootStrapMalloc call 332 utils::NativeParallelFor( 1, LocalCachesHit() ); 333 beforeNumBackRef = allocatedBackRefCount(); 334 utils::NativeParallelFor( 2, LocalCachesHit() ); 335 int res = scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, NULL); 336 REQUIRE(res == TBBMALLOC_OK); 337 afterNumBackRef = allocatedBackRefCount(); 338 REQUIRE_MESSAGE(beforeNumBackRef>=afterNumBackRef, "backreference leak detected"); 339 340 // This is a regression test against race condition between backreference 341 // extension and checking invalid BackRefIdx. 342 // While detecting is object large or small, scalable_free 1st check for 343 // large objects, so there is a chance to prepend small object with 344 // seems valid BackRefIdx for large objects, and thus trigger the bug. 345 TestInvalidBackrefs::initBarrier(MaxThread); 346 utils::NativeParallelFor( MaxThread, TestInvalidBackrefs() ); 347 // Consume all available backrefs and check they work correctly. 348 // For now test 32-bit machines only, because for 64-bit memory consumption is too high. 349 if (sizeof(uintptr_t) == 4) 350 utils::NativeParallelFor( MaxThread, BackRefWork<0>() ); 351 } 352 353 void *getMem(intptr_t /*pool_id*/, size_t &bytes) 354 { 355 const size_t BUF_SIZE = 8*1024*1024; 356 static char space[BUF_SIZE]; 357 static size_t pos; 358 359 if (pos + bytes > BUF_SIZE) 360 return NULL; 361 362 void *ret = space + pos; 363 pos += bytes; 364 365 return ret; 366 } 367 368 int putMem(intptr_t /*pool_id*/, void* /*raw_ptr*/, size_t /*raw_bytes*/) 369 { 370 return 0; 371 } 372 373 struct MallocPoolHeader { 374 void *rawPtr; 375 size_t userSize; 376 }; 377 378 void *getMallocMem(intptr_t /*pool_id*/, size_t &bytes) 379 { 380 void *rawPtr = malloc(bytes+sizeof(MallocPoolHeader)); 381 void *ret = (void *)((uintptr_t)rawPtr+sizeof(MallocPoolHeader)); 382 383 MallocPoolHeader *hdr = (MallocPoolHeader*)ret-1; 384 hdr->rawPtr = rawPtr; 385 hdr->userSize = bytes; 386 387 return ret; 388 } 389 390 int putMallocMem(intptr_t /*pool_id*/, void *ptr, size_t bytes) 391 { 392 MallocPoolHeader *hdr = (MallocPoolHeader*)ptr-1; 393 ASSERT(bytes == hdr->userSize, "Invalid size in pool callback."); 394 free(hdr->rawPtr); 395 396 return 0; 397 } 398 399 class StressLOCacheWork: utils::NoAssign { 400 rml::MemoryPool *my_mallocPool; 401 public: 402 StressLOCacheWork(rml::MemoryPool *mallocPool) : my_mallocPool(mallocPool) {} 403 void operator()(int) const { 404 for (size_t sz=minLargeObjectSize; sz<1*1024*1024; 405 sz+=LargeObjectCache::LargeBSProps::CacheStep) { 406 void *ptr = pool_malloc(my_mallocPool, sz); 407 REQUIRE_MESSAGE(ptr, "Memory was not allocated"); 408 memset(ptr, sz, sz); 409 pool_free(my_mallocPool, ptr); 410 } 411 } 412 }; 413 414 void TestPools() { 415 rml::MemPoolPolicy pol(getMem, putMem); 416 size_t beforeNumBackRef, afterNumBackRef; 417 418 rml::MemoryPool *pool1; 419 rml::MemoryPool *pool2; 420 pool_create_v1(0, &pol, &pool1); 421 pool_create_v1(0, &pol, &pool2); 422 pool_destroy(pool1); 423 pool_destroy(pool2); 424 425 scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, NULL); 426 beforeNumBackRef = allocatedBackRefCount(); 427 rml::MemoryPool *fixedPool; 428 429 pool_create_v1(0, &pol, &fixedPool); 430 pol.pAlloc = getMallocMem; 431 pol.pFree = putMallocMem; 432 pol.granularity = 8; 433 rml::MemoryPool *mallocPool; 434 435 pool_create_v1(0, &pol, &mallocPool); 436 /* check that large object cache (LOC) returns correct size for cached objects 437 passBackendSz Byte objects are cached in LOC, but bypassed the backend, so 438 memory requested directly from allocation callback. 439 nextPassBackendSz Byte objects must fit to another LOC bin, 440 so that their allocation/realeasing leads to cache cleanup. 441 All this is expecting to lead to releasing of passBackendSz Byte object 442 from LOC during LOC cleanup, and putMallocMem checks that returned size 443 is correct. 444 */ 445 const size_t passBackendSz = Backend::maxBinned_HugePage+1, 446 anotherLOCBinSz = minLargeObjectSize+1; 447 for (int i=0; i<10; i++) { // run long enough to be cached 448 void *p = pool_malloc(mallocPool, passBackendSz); 449 REQUIRE_MESSAGE(p, "Memory was not allocated"); 450 pool_free(mallocPool, p); 451 } 452 // run long enough to passBackendSz allocation was cleaned from cache 453 // and returned back to putMallocMem for size checking 454 for (int i=0; i<1000; i++) { 455 void *p = pool_malloc(mallocPool, anotherLOCBinSz); 456 REQUIRE_MESSAGE(p, "Memory was not allocated"); 457 pool_free(mallocPool, p); 458 } 459 460 void *smallObj = pool_malloc(fixedPool, 10); 461 REQUIRE_MESSAGE(smallObj, "Memory was not allocated"); 462 memset(smallObj, 1, 10); 463 void *ptr = pool_malloc(fixedPool, 1024); 464 REQUIRE_MESSAGE(ptr, "Memory was not allocated"); 465 memset(ptr, 1, 1024); 466 void *largeObj = pool_malloc(fixedPool, minLargeObjectSize); 467 REQUIRE_MESSAGE(largeObj, "Memory was not allocated"); 468 memset(largeObj, 1, minLargeObjectSize); 469 ptr = pool_malloc(fixedPool, minLargeObjectSize); 470 REQUIRE_MESSAGE(ptr, "Memory was not allocated"); 471 memset(ptr, minLargeObjectSize, minLargeObjectSize); 472 pool_malloc(fixedPool, 10*minLargeObjectSize); // no leak for unsuccessful allocations 473 pool_free(fixedPool, smallObj); 474 pool_free(fixedPool, largeObj); 475 476 // provoke large object cache cleanup and hope no leaks occurs 477 for( int p=MaxThread; p>=MinThread; --p ) 478 utils::NativeParallelFor( p, StressLOCacheWork(mallocPool) ); 479 pool_destroy(mallocPool); 480 pool_destroy(fixedPool); 481 482 scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, NULL); 483 afterNumBackRef = allocatedBackRefCount(); 484 REQUIRE_MESSAGE(beforeNumBackRef==afterNumBackRef, "backreference leak detected"); 485 486 { 487 // test usedSize/cachedSize and LOC bitmask correctness 488 void *p[5]; 489 pool_create_v1(0, &pol, &mallocPool); 490 const LargeObjectCache *loc = &((rml::internal::MemoryPool*)mallocPool)->extMemPool.loc; 491 const int LargeCacheStep = LargeObjectCache::LargeBSProps::CacheStep; 492 p[3] = pool_malloc(mallocPool, minLargeObjectSize+2*LargeCacheStep); 493 for (int i=0; i<10; i++) { 494 p[0] = pool_malloc(mallocPool, minLargeObjectSize); 495 p[1] = pool_malloc(mallocPool, minLargeObjectSize+LargeCacheStep); 496 pool_free(mallocPool, p[0]); 497 pool_free(mallocPool, p[1]); 498 } 499 REQUIRE(loc->getUsedSize()); 500 pool_free(mallocPool, p[3]); 501 REQUIRE(loc->getLOCSize() < 3*(minLargeObjectSize+LargeCacheStep)); 502 const size_t maxLocalLOCSize = LocalLOCImpl<3,30>::getMaxSize(); 503 REQUIRE(loc->getUsedSize() <= maxLocalLOCSize); 504 for (int i=0; i<3; i++) 505 p[i] = pool_malloc(mallocPool, minLargeObjectSize+i*LargeCacheStep); 506 size_t currUser = loc->getUsedSize(); 507 REQUIRE((!loc->getLOCSize() && currUser >= 3*(minLargeObjectSize+LargeCacheStep))); 508 p[4] = pool_malloc(mallocPool, minLargeObjectSize+3*LargeCacheStep); 509 REQUIRE(loc->getUsedSize() - currUser >= minLargeObjectSize+3*LargeCacheStep); 510 pool_free(mallocPool, p[4]); 511 REQUIRE(loc->getUsedSize() <= currUser+maxLocalLOCSize); 512 pool_reset(mallocPool); 513 REQUIRE((!loc->getLOCSize() && !loc->getUsedSize())); 514 pool_destroy(mallocPool); 515 } 516 // To test LOC we need bigger lists than released by current LocalLOC 517 // in production code. Create special LocalLOC. 518 { 519 LocalLOCImpl<2, 20> lLOC; 520 pool_create_v1(0, &pol, &mallocPool); 521 rml::internal::ExtMemoryPool *mPool = &((rml::internal::MemoryPool*)mallocPool)->extMemPool; 522 const LargeObjectCache *loc = &((rml::internal::MemoryPool*)mallocPool)->extMemPool.loc; 523 const int LargeCacheStep = LargeObjectCache::LargeBSProps::CacheStep; 524 for (int i=0; i<22; i++) { 525 void *o = pool_malloc(mallocPool, minLargeObjectSize+i*LargeCacheStep); 526 bool ret = lLOC.put(((LargeObjectHdr*)o - 1)->memoryBlock, mPool); 527 REQUIRE(ret); 528 529 o = pool_malloc(mallocPool, minLargeObjectSize+i*LargeCacheStep); 530 ret = lLOC.put(((LargeObjectHdr*)o - 1)->memoryBlock, mPool); 531 REQUIRE(ret); 532 } 533 lLOC.externalCleanup(mPool); 534 REQUIRE(!loc->getUsedSize()); 535 536 pool_destroy(mallocPool); 537 } 538 } 539 540 void TestObjectRecognition() { 541 size_t headersSize = sizeof(LargeMemoryBlock)+sizeof(LargeObjectHdr); 542 unsigned falseObjectSize = 113; // unsigned is the type expected by getObjectSize 543 size_t obtainedSize; 544 545 REQUIRE_MESSAGE(sizeof(BackRefIdx)==sizeof(uintptr_t), "Unexpected size of BackRefIdx"); 546 REQUIRE_MESSAGE(getObjectSize(falseObjectSize)!=falseObjectSize, "Error in test: bad choice for false object size"); 547 548 void* mem = scalable_malloc(2*slabSize); 549 REQUIRE_MESSAGE(mem, "Memory was not allocated"); 550 Block* falseBlock = (Block*)alignUp((uintptr_t)mem, slabSize); 551 falseBlock->objectSize = falseObjectSize; 552 char* falseSO = (char*)falseBlock + falseObjectSize*7; 553 REQUIRE_MESSAGE(alignDown(falseSO, slabSize)==(void*)falseBlock, "Error in test: false object offset is too big"); 554 555 void* bufferLOH = scalable_malloc(2*slabSize + headersSize); 556 REQUIRE_MESSAGE(bufferLOH, "Memory was not allocated"); 557 LargeObjectHdr* falseLO = 558 (LargeObjectHdr*)alignUp((uintptr_t)bufferLOH + headersSize, slabSize); 559 LargeObjectHdr* headerLO = (LargeObjectHdr*)falseLO-1; 560 headerLO->memoryBlock = (LargeMemoryBlock*)bufferLOH; 561 headerLO->memoryBlock->unalignedSize = 2*slabSize + headersSize; 562 headerLO->memoryBlock->objectSize = slabSize + headersSize; 563 headerLO->backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/true); 564 setBackRef(headerLO->backRefIdx, headerLO); 565 REQUIRE_MESSAGE(scalable_msize(falseLO) == slabSize + headersSize, 566 "Error in test: LOH falsification failed"); 567 removeBackRef(headerLO->backRefIdx); 568 569 const int NUM_OF_IDX = BR_MAX_CNT+2; 570 BackRefIdx idxs[NUM_OF_IDX]; 571 for (int cnt=0; cnt<2; cnt++) { 572 for (int master = -10; master<10; master++) { 573 falseBlock->backRefIdx.master = (uint16_t)master; 574 headerLO->backRefIdx.master = (uint16_t)master; 575 576 for (int bl = -10; bl<BR_MAX_CNT+10; bl++) { 577 falseBlock->backRefIdx.offset = (uint16_t)bl; 578 headerLO->backRefIdx.offset = (uint16_t)bl; 579 580 for (int largeObj = 0; largeObj<2; largeObj++) { 581 falseBlock->backRefIdx.largeObj = largeObj; 582 headerLO->backRefIdx.largeObj = largeObj; 583 584 obtainedSize = __TBB_malloc_safer_msize(falseSO, NULL); 585 REQUIRE_MESSAGE(obtainedSize==0, "Incorrect pointer accepted"); 586 obtainedSize = __TBB_malloc_safer_msize(falseLO, NULL); 587 REQUIRE_MESSAGE(obtainedSize==0, "Incorrect pointer accepted"); 588 } 589 } 590 } 591 if (cnt == 1) { 592 for (int i=0; i<NUM_OF_IDX; i++) 593 removeBackRef(idxs[i]); 594 break; 595 } 596 for (int i=0; i<NUM_OF_IDX; i++) { 597 idxs[i] = BackRefIdx::newBackRef(/*largeObj=*/false); 598 setBackRef(idxs[i], NULL); 599 } 600 } 601 char *smallPtr = (char*)scalable_malloc(falseObjectSize); 602 obtainedSize = __TBB_malloc_safer_msize(smallPtr, NULL); 603 REQUIRE_MESSAGE(obtainedSize==getObjectSize(falseObjectSize), "Correct pointer not accepted?"); 604 scalable_free(smallPtr); 605 606 obtainedSize = __TBB_malloc_safer_msize(mem, NULL); 607 REQUIRE_MESSAGE(obtainedSize>=2*slabSize, "Correct pointer not accepted?"); 608 scalable_free(mem); 609 scalable_free(bufferLOH); 610 } 611 612 class TestBackendWork: public SimpleBarrier { 613 struct TestBlock { 614 intptr_t data; 615 BackRefIdx idx; 616 }; 617 static const int ITERS = 20; 618 619 rml::internal::Backend *backend; 620 public: 621 TestBackendWork(rml::internal::Backend *bknd) : backend(bknd) {} 622 void operator()(int) const { 623 barrier.wait(); 624 625 for (int i=0; i<ITERS; i++) { 626 BlockI *slabBlock = backend->getSlabBlock(1); 627 REQUIRE_MESSAGE(slabBlock, "Memory was not allocated"); 628 uintptr_t prevBlock = (uintptr_t)slabBlock; 629 backend->putSlabBlock(slabBlock); 630 631 LargeMemoryBlock *largeBlock = backend->getLargeBlock(16*1024); 632 REQUIRE_MESSAGE(largeBlock, "Memory was not allocated"); 633 REQUIRE_MESSAGE((uintptr_t)largeBlock != prevBlock, 634 "Large block cannot be reused from slab memory, only in fixed_pool case."); 635 backend->putLargeBlock(largeBlock); 636 } 637 } 638 }; 639 640 void TestBackend() 641 { 642 rml::MemPoolPolicy pol(getMallocMem, putMallocMem); 643 rml::MemoryPool *mPool; 644 pool_create_v1(0, &pol, &mPool); 645 rml::internal::ExtMemoryPool *ePool = &((rml::internal::MemoryPool*)mPool)->extMemPool; 646 rml::internal::Backend *backend = &ePool->backend; 647 648 for( int p=MaxThread; p>=MinThread; --p ) { 649 // regression test against an race condition in backend synchronization, 650 // triggered only when WhiteboxTestingYield() call yields 651 for (int i=0; i<100; i++) { 652 TestBackendWork::initBarrier(p); 653 utils::NativeParallelFor( p, TestBackendWork(backend) ); 654 } 655 } 656 657 BlockI *block = backend->getSlabBlock(1); 658 REQUIRE_MESSAGE(block, "Memory was not allocated"); 659 backend->putSlabBlock(block); 660 661 // Checks if the backend increases and decreases the amount of allocated memory when memory is allocated. 662 const size_t memSize0 = backend->getTotalMemSize(); 663 LargeMemoryBlock *lmb = backend->getLargeBlock(4*MByte); 664 REQUIRE( lmb ); 665 666 const size_t memSize1 = backend->getTotalMemSize(); 667 REQUIRE_MESSAGE( (intptr_t)(memSize1-memSize0) >= 4*MByte, "The backend has not increased the amount of using memory." ); 668 669 backend->putLargeBlock(lmb); 670 const size_t memSize2 = backend->getTotalMemSize(); 671 REQUIRE_MESSAGE( memSize2 == memSize0, "The backend has not decreased the amount of using memory." ); 672 673 pool_destroy(mPool); 674 } 675 676 void TestBitMask() 677 { 678 BitMaskMin<256> mask; 679 680 mask.reset(); 681 mask.set(10, 1); 682 mask.set(5, 1); 683 mask.set(1, 1); 684 REQUIRE(mask.getMinTrue(2) == 5); 685 686 mask.reset(); 687 mask.set(0, 1); 688 mask.set(64, 1); 689 mask.set(63, 1); 690 mask.set(200, 1); 691 mask.set(255, 1); 692 REQUIRE(mask.getMinTrue(0) == 0); 693 REQUIRE(mask.getMinTrue(1) == 63); 694 REQUIRE(mask.getMinTrue(63) == 63); 695 REQUIRE(mask.getMinTrue(64) == 64); 696 REQUIRE(mask.getMinTrue(101) == 200); 697 REQUIRE(mask.getMinTrue(201) == 255); 698 mask.set(255, 0); 699 REQUIRE(mask.getMinTrue(201) == -1); 700 } 701 702 size_t getMemSize() 703 { 704 return defaultMemPool->extMemPool.backend.getTotalMemSize(); 705 } 706 707 class CheckNotCached { 708 static size_t memSize; 709 public: 710 void operator() () const { 711 int res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 1); 712 REQUIRE(res == TBBMALLOC_OK); 713 if (memSize==(size_t)-1) { 714 memSize = getMemSize(); 715 } else { 716 REQUIRE(getMemSize() == memSize); 717 memSize=(size_t)-1; 718 } 719 } 720 }; 721 722 size_t CheckNotCached::memSize = (size_t)-1; 723 724 class RunTestHeapLimit: public SimpleBarrier { 725 public: 726 void operator()( int /*mynum*/ ) const { 727 // Provoke bootstrap heap initialization before recording memory size. 728 // NOTE: The initialization should be processed only with a "large" 729 // object. Since the "small" object allocation lead to blocking of a 730 // slab as an active block and it is impossible to release it with 731 // foreign thread. 732 scalable_free(scalable_malloc(minLargeObjectSize)); 733 barrier.wait(CheckNotCached()); 734 for (size_t n = minLargeObjectSize; n < 5*1024*1024; n += 128*1024) 735 scalable_free(scalable_malloc(n)); 736 barrier.wait(CheckNotCached()); 737 } 738 }; 739 740 void TestHeapLimit() 741 { 742 if(!isMallocInitialized()) doInitialization(); 743 // tiny limit to stop caching 744 int res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 1); 745 REQUIRE(res == TBBMALLOC_OK); 746 // Provoke bootstrap heap initialization before recording memory size. 747 scalable_free(scalable_malloc(8)); 748 size_t n, sizeBefore = getMemSize(); 749 750 // Try to provoke call to OS for memory to check that 751 // requests are not fulfilled from caches. 752 // Single call is not enough here because of backend fragmentation. 753 for (n = minLargeObjectSize; n < 10*1024*1024; n += 16*1024) { 754 void *p = scalable_malloc(n); 755 bool leave = (sizeBefore != getMemSize()); 756 scalable_free(p); 757 if (leave) 758 break; 759 REQUIRE_MESSAGE(sizeBefore == getMemSize(), "No caching expected"); 760 } 761 REQUIRE_MESSAGE(n < 10*1024*1024, "scalable_malloc doesn't provoke OS request for memory, " 762 "is some internal cache still used?"); 763 764 for( int p=MaxThread; p>=MinThread; --p ) { 765 RunTestHeapLimit::initBarrier( p ); 766 utils::NativeParallelFor( p, RunTestHeapLimit() ); 767 } 768 // it's try to match limit as well as set limit, so call here 769 res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 1); 770 REQUIRE(res == TBBMALLOC_OK); 771 size_t m = getMemSize(); 772 REQUIRE(sizeBefore == m); 773 // restore default 774 res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 0); 775 REQUIRE(res == TBBMALLOC_OK); 776 } 777 778 void checkNoHugePages() 779 { 780 REQUIRE_MESSAGE(!hugePages.isEnabled, "scalable_allocation_mode " 781 "must have priority over environment variable"); 782 } 783 784 /*---------------------------------------------------------------------------*/ 785 // The regression test against bugs in TBBMALLOC_CLEAN_ALL_BUFFERS allocation command. 786 // The idea is to allocate and deallocate a set of objects randomly in parallel. 787 // For large sizes (16K), it forces conflicts in backend during coalescing. 788 // For small sizes (4K), it forces cross-thread deallocations and then orphaned slabs. 789 // Global cleanup should process orphaned slabs and the queue of postponed coalescing 790 // requests, otherwise it will not be able to unmap all unused memory. 791 792 const int num_allocs = 10*1024; 793 void *ptrs[num_allocs]; 794 std::atomic<int> alloc_counter; 795 static thread_local bool free_was_called = false; 796 797 inline void multiThreadAlloc(size_t alloc_size) { 798 for( int i = alloc_counter++; i < num_allocs; i = alloc_counter++ ) { 799 ptrs[i] = scalable_malloc( alloc_size ); 800 REQUIRE_MESSAGE( ptrs[i] != nullptr, "scalable_malloc returned zero." ); 801 } 802 } 803 inline void crossThreadDealloc() { 804 free_was_called = false; 805 for( int i = --alloc_counter; i >= 0; i = --alloc_counter ) { 806 if (i < num_allocs) { 807 scalable_free(ptrs[i]); 808 free_was_called = true; 809 } 810 } 811 } 812 813 template<int AllocSize> 814 struct TestCleanAllBuffersBody : public SimpleBarrier { 815 void operator() ( int ) const { 816 barrier.wait(); 817 multiThreadAlloc(AllocSize); 818 barrier.wait(); 819 crossThreadDealloc(); 820 } 821 }; 822 823 template<int AllocSize> 824 void TestCleanAllBuffers() { 825 const int num_threads = 8; 826 // Clean up if something was allocated before the test 827 scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS,0); 828 829 size_t memory_in_use_before = getMemSize(); 830 alloc_counter = 0; 831 TestCleanAllBuffersBody<AllocSize>::initBarrier(num_threads); 832 833 utils::NativeParallelFor(num_threads, TestCleanAllBuffersBody<AllocSize>()); 834 // TODO: reproduce the bug conditions more reliably 835 if ( defaultMemPool->extMemPool.backend.coalescQ.blocksToFree.load(std::memory_order_relaxed) == NULL ) { 836 INFO( "Warning: The queue of postponed coalescing requests is empty. "); 837 INFO( "Unable to create the condition for bug reproduction.\n" ); 838 } 839 int result = scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS,0); 840 REQUIRE_MESSAGE( result == TBBMALLOC_OK, "The cleanup request has not cleaned anything." ); 841 size_t memory_in_use_after = getMemSize(); 842 843 size_t memory_leak = memory_in_use_after - memory_in_use_before; 844 INFO( "memory_in_use_before = " << memory_in_use_before << ", memory_in_use_after = " << memory_in_use_after << "\n" ); 845 REQUIRE_MESSAGE( memory_leak == 0, "Cleanup was unable to release all allocated memory." ); 846 } 847 848 //! Force cross thread deallocation of small objects to create a set of privatizable slab blocks. 849 //! TBBMALLOC_CLEAN_THREAD_BUFFERS command have to privatize all the block. 850 struct TestCleanThreadBuffersBody : public SimpleBarrier { 851 void operator() ( int ) const { 852 barrier.wait(); 853 multiThreadAlloc(2*1024); 854 barrier.wait(); 855 crossThreadDealloc(); 856 barrier.wait(); 857 int result = scalable_allocation_command(TBBMALLOC_CLEAN_THREAD_BUFFERS,0); 858 if (result != TBBMALLOC_OK && free_was_called) { 859 REPORT("Warning: clean-up request for this particular thread has not cleaned anything."); 860 } 861 862 // Check that TLS was cleaned fully 863 TLSData *tlsCurr = defaultMemPool->getTLS(/*create=*/false); 864 if (tlsCurr) { 865 for (int i = 0; i < numBlockBinLimit; i++) { 866 REQUIRE_MESSAGE(!(tlsCurr->bin[i].activeBlk), "Some bin was not cleaned."); 867 } 868 REQUIRE_MESSAGE(!(tlsCurr->lloc.head.load(std::memory_order_relaxed)), "Local LOC was not cleaned."); 869 REQUIRE_MESSAGE(!(tlsCurr->freeSlabBlocks.head.load(std::memory_order_relaxed)), "Free Block pool was not cleaned."); 870 } 871 } 872 }; 873 874 void TestCleanThreadBuffers() { 875 const int num_threads = 8; 876 // Clean up if something was allocated before the test 877 scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS,0); 878 879 alloc_counter = 0; 880 TestCleanThreadBuffersBody::initBarrier(num_threads); 881 utils::NativeParallelFor(num_threads, TestCleanThreadBuffersBody()); 882 } 883 884 /*---------------------------------------------------------------------------*/ 885 /*------------------------- Large Object Cache tests ------------------------*/ 886 #if _MSC_VER==1600 || _MSC_VER==1500 887 // ignore C4275: non dll-interface class 'stdext::exception' used as 888 // base for dll-interface class 'std::bad_cast' 889 #pragma warning (disable: 4275) 890 #endif 891 #include <vector> 892 #include <list> 893 894 // default constructor of CacheBin 895 template<typename Props> 896 rml::internal::LargeObjectCacheImpl<Props>::CacheBin::CacheBin() {} 897 898 template<typename Props> 899 class CacheBinModel { 900 901 typedef typename rml::internal::LargeObjectCacheImpl<Props>::CacheBin CacheBinType; 902 903 // The emulated cache bin. 904 CacheBinType cacheBinModel; 905 // The reference to real cache bin inside the large object cache. 906 CacheBinType &cacheBin; 907 908 const size_t size; 909 910 // save only current time 911 std::list<uintptr_t> objects; 912 913 void doCleanup() { 914 if ( cacheBinModel.cachedSize > Props::TooLargeFactor*cacheBinModel.usedSize ) tooLargeLOC++; 915 else tooLargeLOC = 0; 916 917 if (tooLargeLOC>3 && cacheBinModel.ageThreshold) 918 cacheBinModel.ageThreshold = (cacheBinModel.ageThreshold + cacheBinModel.meanHitRange)/2; 919 920 uintptr_t currTime = cacheCurrTime; 921 while (!objects.empty() && (intptr_t)(currTime - objects.front()) > cacheBinModel.ageThreshold) { 922 cacheBinModel.cachedSize -= size; 923 cacheBinModel.lastCleanedAge = objects.front(); 924 objects.pop_front(); 925 } 926 927 cacheBinModel.oldest = objects.empty() ? 0 : objects.front(); 928 } 929 930 public: 931 CacheBinModel(CacheBinType &_cacheBin, size_t allocSize) : cacheBin(_cacheBin), size(allocSize) { 932 cacheBinModel.oldest = cacheBin.oldest; 933 cacheBinModel.lastCleanedAge = cacheBin.lastCleanedAge; 934 cacheBinModel.ageThreshold = cacheBin.ageThreshold; 935 cacheBinModel.usedSize = cacheBin.usedSize; 936 cacheBinModel.cachedSize = cacheBin.cachedSize; 937 cacheBinModel.meanHitRange = cacheBin.meanHitRange; 938 cacheBinModel.lastGet = cacheBin.lastGet; 939 } 940 void get() { 941 uintptr_t currTime = ++cacheCurrTime; 942 943 if ( objects.empty() ) { 944 const uintptr_t sinceLastGet = currTime - cacheBinModel.lastGet; 945 if ( ( cacheBinModel.ageThreshold && sinceLastGet > Props::LongWaitFactor*cacheBinModel.ageThreshold ) || 946 ( cacheBinModel.lastCleanedAge && sinceLastGet > Props::LongWaitFactor*(cacheBinModel.lastCleanedAge - cacheBinModel.lastGet) ) ) 947 cacheBinModel.lastCleanedAge = cacheBinModel.ageThreshold = 0; 948 949 if (cacheBinModel.lastCleanedAge) 950 cacheBinModel.ageThreshold = Props::OnMissFactor*(currTime - cacheBinModel.lastCleanedAge); 951 } else { 952 uintptr_t obj_age = objects.back(); 953 objects.pop_back(); 954 if ( objects.empty() ) cacheBinModel.oldest = 0; 955 956 intptr_t hitRange = currTime - obj_age; 957 cacheBinModel.meanHitRange = cacheBinModel.meanHitRange? (cacheBinModel.meanHitRange + hitRange)/2 : hitRange; 958 959 cacheBinModel.cachedSize -= size; 960 } 961 962 cacheBinModel.usedSize += size; 963 cacheBinModel.lastGet = currTime; 964 965 if ( currTime % rml::internal::cacheCleanupFreq == 0 ) doCleanup(); 966 } 967 968 void putList( int num ) { 969 uintptr_t currTime = cacheCurrTime; 970 cacheCurrTime += num; 971 972 cacheBinModel.usedSize -= num*size; 973 974 bool cleanUpNeeded = false; 975 if ( !cacheBinModel.lastCleanedAge ) { 976 cacheBinModel.lastCleanedAge = ++currTime; 977 cleanUpNeeded |= currTime % rml::internal::cacheCleanupFreq == 0; 978 num--; 979 } 980 981 for ( int i=1; i<=num; ++i ) { 982 currTime+=1; 983 cleanUpNeeded |= currTime % rml::internal::cacheCleanupFreq == 0; 984 if ( objects.empty() ) 985 cacheBinModel.oldest = currTime; 986 objects.push_back(currTime); 987 } 988 989 cacheBinModel.cachedSize += num*size; 990 991 if ( cleanUpNeeded ) doCleanup(); 992 } 993 994 void check() { 995 REQUIRE(cacheBinModel.oldest == cacheBin.oldest); 996 REQUIRE(cacheBinModel.lastCleanedAge == cacheBin.lastCleanedAge); 997 REQUIRE(cacheBinModel.ageThreshold == cacheBin.ageThreshold); 998 REQUIRE(cacheBinModel.usedSize == cacheBin.usedSize); 999 REQUIRE(cacheBinModel.cachedSize == cacheBin.cachedSize); 1000 REQUIRE(cacheBinModel.meanHitRange == cacheBin.meanHitRange); 1001 REQUIRE(cacheBinModel.lastGet == cacheBin.lastGet); 1002 } 1003 1004 static uintptr_t cacheCurrTime; 1005 static intptr_t tooLargeLOC; 1006 }; 1007 1008 template<typename Props> uintptr_t CacheBinModel<Props>::cacheCurrTime; 1009 template<typename Props> intptr_t CacheBinModel<Props>::tooLargeLOC; 1010 1011 template <typename Scenario> 1012 void LOCModelTester() { 1013 defaultMemPool->extMemPool.loc.cleanAll(); 1014 defaultMemPool->extMemPool.loc.reset(); 1015 1016 const size_t size = 16 * 1024; 1017 const size_t headersSize = sizeof(rml::internal::LargeMemoryBlock)+sizeof(rml::internal::LargeObjectHdr); 1018 const size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+rml::internal::largeObjectAlignment); 1019 const int binIdx = defaultMemPool->extMemPool.loc.largeCache.sizeToIdx( allocationSize ); 1020 1021 CacheBinModel<rml::internal::LargeObjectCache::LargeCacheTypeProps>::cacheCurrTime = defaultMemPool->extMemPool.loc.cacheCurrTime; 1022 CacheBinModel<rml::internal::LargeObjectCache::LargeCacheTypeProps>::tooLargeLOC = defaultMemPool->extMemPool.loc.largeCache.tooLargeLOC; 1023 CacheBinModel<rml::internal::LargeObjectCache::LargeCacheTypeProps> cacheBinModel(defaultMemPool->extMemPool.loc.largeCache.bin[binIdx], allocationSize); 1024 1025 Scenario scen; 1026 for (rml::internal::LargeMemoryBlock *lmb = scen.next(); (intptr_t)lmb != (intptr_t)-1; lmb = scen.next()) { 1027 if ( lmb ) { 1028 int num=1; 1029 for (rml::internal::LargeMemoryBlock *curr = lmb; curr->next; curr=curr->next) num+=1; 1030 defaultMemPool->extMemPool.freeLargeObject(lmb); 1031 cacheBinModel.putList(num); 1032 } else { 1033 scen.saveLmb(defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize)); 1034 cacheBinModel.get(); 1035 } 1036 1037 cacheBinModel.check(); 1038 } 1039 } 1040 1041 class TestBootstrap { 1042 bool allocating; 1043 std::vector<rml::internal::LargeMemoryBlock*> lmbArray; 1044 public: 1045 TestBootstrap() : allocating(true) {} 1046 1047 rml::internal::LargeMemoryBlock* next() { 1048 if ( allocating ) 1049 return NULL; 1050 if ( !lmbArray.empty() ) { 1051 rml::internal::LargeMemoryBlock *ret = lmbArray.back(); 1052 lmbArray.pop_back(); 1053 return ret; 1054 } 1055 return (rml::internal::LargeMemoryBlock*)-1; 1056 } 1057 1058 void saveLmb( rml::internal::LargeMemoryBlock *lmb ) { 1059 lmb->next = NULL; 1060 lmbArray.push_back(lmb); 1061 if ( lmbArray.size() == 1000 ) allocating = false; 1062 } 1063 }; 1064 1065 class TestRandom { 1066 std::vector<rml::internal::LargeMemoryBlock*> lmbArray; 1067 int numOps; 1068 public: 1069 TestRandom() : numOps(100000) { 1070 srand(1234); 1071 } 1072 1073 rml::internal::LargeMemoryBlock* next() { 1074 if ( numOps-- ) { 1075 if ( lmbArray.empty() || rand() / (RAND_MAX>>1) == 0 ) 1076 return NULL; 1077 size_t ind = rand()%lmbArray.size(); 1078 if ( ind != lmbArray.size()-1 ) std::swap(lmbArray[ind],lmbArray[lmbArray.size()-1]); 1079 rml::internal::LargeMemoryBlock *lmb = lmbArray.back(); 1080 lmbArray.pop_back(); 1081 return lmb; 1082 } 1083 return (rml::internal::LargeMemoryBlock*)-1; 1084 } 1085 1086 void saveLmb( rml::internal::LargeMemoryBlock *lmb ) { 1087 lmb->next = NULL; 1088 lmbArray.push_back(lmb); 1089 } 1090 }; 1091 1092 class TestCollapsingMallocFree : public SimpleBarrier { 1093 public: 1094 static const int NUM_ALLOCS = 100000; 1095 const int num_threads; 1096 1097 TestCollapsingMallocFree( int _num_threads ) : num_threads(_num_threads) { 1098 initBarrier( num_threads ); 1099 } 1100 1101 void operator() ( int ) const { 1102 const size_t size = 16 * 1024; 1103 const size_t headersSize = sizeof(rml::internal::LargeMemoryBlock)+sizeof(rml::internal::LargeObjectHdr); 1104 const size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+rml::internal::largeObjectAlignment); 1105 1106 barrier.wait(); 1107 for ( int i=0; i<NUM_ALLOCS; ++i ) { 1108 defaultMemPool->extMemPool.freeLargeObject( 1109 defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize) ); 1110 } 1111 } 1112 1113 void check() { 1114 REQUIRE( tbbmalloc_whitebox::locGetProcessed == tbbmalloc_whitebox::locPutProcessed); 1115 REQUIRE_MESSAGE( tbbmalloc_whitebox::locGetProcessed < num_threads*NUM_ALLOCS, "No one Malloc/Free pair was collapsed." ); 1116 } 1117 }; 1118 1119 class TestCollapsingBootstrap : public SimpleBarrier { 1120 class CheckNumAllocs { 1121 const int num_threads; 1122 public: 1123 CheckNumAllocs( int _num_threads ) : num_threads(_num_threads) {} 1124 void operator()() const { 1125 REQUIRE( tbbmalloc_whitebox::locGetProcessed == num_threads*NUM_ALLOCS ); 1126 REQUIRE( tbbmalloc_whitebox::locPutProcessed == 0 ); 1127 } 1128 }; 1129 public: 1130 static const int NUM_ALLOCS = 1000; 1131 const int num_threads; 1132 1133 TestCollapsingBootstrap( int _num_threads ) : num_threads(_num_threads) { 1134 initBarrier( num_threads ); 1135 } 1136 1137 void operator() ( int ) const { 1138 const size_t size = 16 * 1024; 1139 size_t headersSize = sizeof(rml::internal::LargeMemoryBlock)+sizeof(rml::internal::LargeObjectHdr); 1140 size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+rml::internal::largeObjectAlignment); 1141 1142 barrier.wait(); 1143 rml::internal::LargeMemoryBlock *lmbArray[NUM_ALLOCS]; 1144 for ( int i=0; i<NUM_ALLOCS; ++i ) 1145 lmbArray[i] = defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize); 1146 1147 barrier.wait(CheckNumAllocs(num_threads)); 1148 for ( int i=0; i<NUM_ALLOCS; ++i ) 1149 defaultMemPool->extMemPool.freeLargeObject( lmbArray[i] ); 1150 } 1151 1152 void check() { 1153 REQUIRE( tbbmalloc_whitebox::locGetProcessed == tbbmalloc_whitebox::locPutProcessed ); 1154 REQUIRE( tbbmalloc_whitebox::locGetProcessed == num_threads*NUM_ALLOCS ); 1155 } 1156 }; 1157 1158 template <typename Scenario> 1159 void LOCCollapsingTester( int num_threads ) { 1160 tbbmalloc_whitebox::locGetProcessed = 0; 1161 tbbmalloc_whitebox::locPutProcessed = 0; 1162 defaultMemPool->extMemPool.loc.cleanAll(); 1163 defaultMemPool->extMemPool.loc.reset(); 1164 1165 Scenario scen(num_threads); 1166 utils::NativeParallelFor(num_threads, scen); 1167 1168 scen.check(); 1169 } 1170 1171 void TestLOC() { 1172 LOCModelTester<TestBootstrap>(); 1173 LOCModelTester<TestRandom>(); 1174 1175 const int num_threads = 16; 1176 LOCCollapsingTester<TestCollapsingBootstrap>( num_threads ); 1177 if ( num_threads > 1 ) { 1178 INFO( "num_threads = " << num_threads ); 1179 LOCCollapsingTester<TestCollapsingMallocFree>( num_threads ); 1180 } else { 1181 REPORT( "Warning: concurrency is too low for TestMallocFreeCollapsing ( num_threads = %d )\n", num_threads ); 1182 } 1183 } 1184 /*---------------------------------------------------------------------------*/ 1185 1186 void *findCacheLine(void *p) { 1187 return (void*)alignDown((uintptr_t)p, estimatedCacheLineSize); 1188 } 1189 1190 // test that internals of Block are at expected cache lines 1191 void TestSlabAlignment() { 1192 const size_t min_sz = 8; 1193 const int space = 2*16*1024; // fill at least 2 slabs 1194 void *pointers[space / min_sz]; // the worst case is min_sz byte object 1195 1196 for (size_t sz = min_sz; sz <= 64; sz *= 2) { 1197 for (size_t i = 0; i < space/sz; i++) { 1198 pointers[i] = scalable_malloc(sz); 1199 Block *block = (Block *)alignDown(pointers[i], slabSize); 1200 REQUIRE_MESSAGE(findCacheLine(&block->isFull) != findCacheLine(pointers[i]), 1201 "A user object must not share a cache line with slab control structures."); 1202 REQUIRE_MESSAGE(findCacheLine(&block->next) != findCacheLine(&block->nextPrivatizable), 1203 "GlobalBlockFields and LocalBlockFields must be on different cache lines."); 1204 } 1205 for (size_t i = 0; i < space/sz; i++) 1206 scalable_free(pointers[i]); 1207 } 1208 } 1209 1210 #include "common/memory_usage.h" 1211 1212 // TODO: Consider adding Huge Pages support on macOS (special mmap flag). 1213 // Transparent Huge pages support could be enabled by different system parsing mechanism, 1214 // because there is no /proc/meminfo on macOS 1215 #if __linux__ 1216 void TestTHP() { 1217 // Get backend from default memory pool 1218 rml::internal::Backend *backend = &(defaultMemPool->extMemPool.backend); 1219 1220 // Configure malloc to use huge pages 1221 scalable_allocation_mode(USE_HUGE_PAGES, 1); 1222 REQUIRE_MESSAGE(hugePages.isEnabled, "Huge pages should be enabled via scalable_allocation_mode"); 1223 1224 const int HUGE_PAGE_SIZE = 2 * 1024 * 1024; 1225 1226 // allocCount transparent huge pages should be allocated 1227 const int allocCount = 10; 1228 1229 // Allocate huge page aligned memory regions to track system 1230 // counters for transparent huge pages 1231 void* allocPtrs[allocCount]; 1232 1233 // Wait for the system to update process memory info files after other tests 1234 utils::Sleep(4000); 1235 1236 // Parse system info regarding current THP status 1237 size_t currentSystemTHPCount = utils::getSystemTHPCount(); 1238 size_t currentSystemTHPAllocatedSize = utils::getSystemTHPAllocatedSize(); 1239 1240 for (int i = 0; i < allocCount; i++) { 1241 // Allocation size have to be aligned on page size 1242 size_t allocSize = HUGE_PAGE_SIZE - (i * 1000); 1243 1244 // Map memory 1245 allocPtrs[i] = backend->allocRawMem(allocSize); 1246 1247 REQUIRE_MESSAGE(allocPtrs[i], "Allocation not succeeded."); 1248 REQUIRE_MESSAGE(allocSize == HUGE_PAGE_SIZE, 1249 "Allocation size have to be aligned on Huge Page size internally."); 1250 1251 // First touch policy - no real pages allocated by OS without accessing the region 1252 memset(allocPtrs[i], 1, allocSize); 1253 1254 REQUIRE_MESSAGE(isAligned(allocPtrs[i], HUGE_PAGE_SIZE), 1255 "The pointer returned by scalable_malloc is not aligned on huge page size."); 1256 } 1257 1258 // Wait for the system to update process memory info files after allocations 1259 utils::Sleep(4000); 1260 1261 // Generally, kernel tries to allocate transparent huge pages, but sometimes it cannot do this 1262 // (tested on SLES 11/12), so consider this system info checks as a remark. 1263 // Also, some systems can allocate more memory then needed in background (tested on Ubuntu 14.04) 1264 size_t newSystemTHPCount = utils::getSystemTHPCount(); 1265 size_t newSystemTHPAllocatedSize = utils::getSystemTHPAllocatedSize(); 1266 if ((newSystemTHPCount - currentSystemTHPCount) < allocCount 1267 && (newSystemTHPAllocatedSize - currentSystemTHPAllocatedSize) / (2 * 1024) < allocCount) { 1268 REPORT( "Warning: the system didn't allocate needed amount of THPs.\n" ); 1269 } 1270 1271 // Test memory unmap 1272 for (int i = 0; i < allocCount; i++) { 1273 REQUIRE_MESSAGE(backend->freeRawMem(allocPtrs[i], HUGE_PAGE_SIZE), 1274 "Something went wrong during raw memory free"); 1275 } 1276 } 1277 #endif // __linux__ 1278 1279 inline size_t getStabilizedMemUsage() { 1280 for (int i = 0; i < 3; i++) utils::GetMemoryUsage(); 1281 return utils::GetMemoryUsage(); 1282 } 1283 1284 inline void* reallocAndRetrieve(void* origPtr, size_t reallocSize, size_t& origBlockSize, size_t& reallocBlockSize) { 1285 rml::internal::LargeMemoryBlock* origLmb = ((rml::internal::LargeObjectHdr *)origPtr - 1)->memoryBlock; 1286 origBlockSize = origLmb->unalignedSize; 1287 1288 void* reallocPtr = rml::internal::reallocAligned(defaultMemPool, origPtr, reallocSize, 0); 1289 1290 // Retrieved reallocated block information 1291 rml::internal::LargeMemoryBlock* reallocLmb = ((rml::internal::LargeObjectHdr *)reallocPtr - 1)->memoryBlock; 1292 reallocBlockSize = reallocLmb->unalignedSize; 1293 1294 return reallocPtr; 1295 } 1296 1297 void TestReallocDecreasing() { 1298 1299 /* Testing that actual reallocation happens for large objects that do not fit the backend cache 1300 but decrease in size by a factor of >= 2. */ 1301 1302 size_t startSize = 100 * 1024 * 1024; 1303 size_t maxBinnedSize = defaultMemPool->extMemPool.backend.getMaxBinnedSize(); 1304 void* origPtr = scalable_malloc(startSize); 1305 void* reallocPtr = NULL; 1306 1307 // Realloc on 1MB less size 1308 size_t origBlockSize = 42; 1309 size_t reallocBlockSize = 43; 1310 reallocPtr = reallocAndRetrieve(origPtr, startSize - 1 * 1024 * 1024, origBlockSize, reallocBlockSize); 1311 REQUIRE_MESSAGE(origBlockSize == reallocBlockSize, "Reallocated block size shouldn't change"); 1312 REQUIRE_MESSAGE(reallocPtr == origPtr, "Original pointer shouldn't change"); 1313 1314 // Repeated decreasing reallocation while max cache bin size reached 1315 size_t reallocSize = (startSize / 2) - 1000; // exact realloc 1316 while(reallocSize > maxBinnedSize) { 1317 1318 // Prevent huge/large objects caching 1319 defaultMemPool->extMemPool.loc.cleanAll(); 1320 // Prevent local large object caching 1321 TLSData *tls = defaultMemPool->getTLS(/*create=*/false); 1322 tls->lloc.externalCleanup(&defaultMemPool->extMemPool); 1323 1324 size_t sysMemUsageBefore = getStabilizedMemUsage(); 1325 size_t totalMemSizeBefore = defaultMemPool->extMemPool.backend.getTotalMemSize(); 1326 1327 reallocPtr = reallocAndRetrieve(origPtr, reallocSize, origBlockSize, reallocBlockSize); 1328 1329 REQUIRE_MESSAGE(origBlockSize > reallocBlockSize, "Reallocated block size should descrease."); 1330 1331 size_t sysMemUsageAfter = getStabilizedMemUsage(); 1332 size_t totalMemSizeAfter = defaultMemPool->extMemPool.backend.getTotalMemSize(); 1333 1334 // Prevent false checking when backend caching occurred or could not read system memory usage info 1335 if (totalMemSizeBefore > totalMemSizeAfter && sysMemUsageAfter != 0 && sysMemUsageBefore != 0) { 1336 REQUIRE_MESSAGE(sysMemUsageBefore > sysMemUsageAfter, "Memory were not released"); 1337 } 1338 1339 origPtr = reallocPtr; 1340 reallocSize = (reallocSize / 2) - 1000; // exact realloc 1341 } 1342 scalable_free(reallocPtr); 1343 1344 /* TODO: Decreasing reallocation of large objects that fit backend cache */ 1345 /* TODO: Small objects decreasing reallocation test */ 1346 } 1347 #if !__TBB_WIN8UI_SUPPORT && defined(_WIN32) 1348 1349 #include "../../src/tbbmalloc_proxy/function_replacement.cpp" 1350 #include <string> 1351 namespace FunctionReplacement { 1352 FunctionInfo funcInfo = { "funcname","dllname" }; 1353 char **func_replacement_log; 1354 int status; 1355 1356 void LogCleanup() { 1357 // Free all allocated memory 1358 for (unsigned i = 0; i < Log::record_number; i++){ 1359 HeapFree(GetProcessHeap(), 0, Log::records[i]); 1360 } 1361 for (unsigned i = 0; i < Log::RECORDS_COUNT + 1; i++){ 1362 Log::records[i] = NULL; 1363 } 1364 Log::replacement_status = true; 1365 Log::record_number = 0; 1366 } 1367 1368 void TestEmptyLog() { 1369 status = TBB_malloc_replacement_log(&func_replacement_log); 1370 1371 REQUIRE_MESSAGE(status == -1, "Status is true, but log is empty"); 1372 REQUIRE_MESSAGE(*func_replacement_log == nullptr, "Log must be empty"); 1373 } 1374 1375 void TestLogOverload() { 1376 for (int i = 0; i < 1000; i++) 1377 Log::record(funcInfo, "opcode string", true); 1378 1379 status = TBB_malloc_replacement_log(&func_replacement_log); 1380 // Find last record 1381 for (; *(func_replacement_log + 1) != 0; func_replacement_log++) {} 1382 1383 std::string last_line(*func_replacement_log); 1384 REQUIRE_MESSAGE(status == 0, "False status, but all functions found"); 1385 REQUIRE_MESSAGE(last_line.compare("Log was truncated.") == 0, "Log overflow was not handled"); 1386 1387 // Change status 1388 Log::record(funcInfo, "opcode string", false); 1389 status = TBB_malloc_replacement_log(NULL); 1390 REQUIRE_MESSAGE(status == -1, "Status is true, but we have false search case"); 1391 1392 LogCleanup(); 1393 } 1394 1395 void TestFalseSearchCase() { 1396 Log::record(funcInfo, "opcode string", false); 1397 std::string expected_line = "Fail: "+ std::string(funcInfo.funcName) + " (" + 1398 std::string(funcInfo.dllName) + "), byte pattern: <opcode string>"; 1399 1400 status = TBB_malloc_replacement_log(&func_replacement_log); 1401 1402 REQUIRE_MESSAGE(expected_line.compare(*func_replacement_log) == 0, "Wrong last string contnent"); 1403 REQUIRE_MESSAGE(status == -1, "Status is true, but we have false search case"); 1404 LogCleanup(); 1405 } 1406 1407 void TestWrongFunctionInDll(){ 1408 HMODULE ucrtbase_handle = GetModuleHandle("ucrtbase.dll"); 1409 if (ucrtbase_handle) { 1410 IsPrologueKnown("ucrtbase.dll", "fake_function", NULL, ucrtbase_handle); 1411 std::string expected_line = "Fail: fake_function (ucrtbase.dll), byte pattern: <unknown>"; 1412 1413 status = TBB_malloc_replacement_log(&func_replacement_log); 1414 1415 REQUIRE_MESSAGE(expected_line.compare(*func_replacement_log) == 0, "Wrong last string contnent"); 1416 REQUIRE_MESSAGE(status == -1, "Status is true, but we have false search case"); 1417 LogCleanup(); 1418 } else { 1419 INFO("Cannot found ucrtbase.dll on system, test skipped!\n"); 1420 } 1421 } 1422 } 1423 1424 void TesFunctionReplacementLog() { 1425 using namespace FunctionReplacement; 1426 // Do not reorder the test cases 1427 TestEmptyLog(); 1428 TestLogOverload(); 1429 TestFalseSearchCase(); 1430 TestWrongFunctionInDll(); 1431 } 1432 1433 #endif /*!__TBB_WIN8UI_SUPPORT && defined(_WIN32)*/ 1434 1435 #include <cmath> // pow function 1436 1437 // Huge objects cache: Size = MinSize * (2 ^ (Index / StepFactor) formula gives value for the bin size, 1438 // but it is not matched with our sizeToIdx approximation algorithm, where step sizes between major 1439 // (power of 2) sizes are equal. Used internally for the test. Static cast to avoid warnings. 1440 inline size_t hocIdxToSizeFormula(int idx) { 1441 return static_cast<size_t>(float(rml::internal::LargeObjectCache::maxLargeSize) * 1442 pow(2, float(idx) / float(rml::internal::LargeObjectCache::HugeBSProps::StepFactor))); 1443 } 1444 // Large objects cache arithmetic progression 1445 inline size_t locIdxToSizeFormula(int idx) { 1446 return rml::internal::LargeObjectCache::LargeBSProps::MinSize + 1447 (idx * rml::internal::LargeObjectCache::LargeBSProps::CacheStep); 1448 } 1449 1450 template <typename CacheType> 1451 void TestLOCacheBinsConverterImpl(int idx, size_t checkingSize) { 1452 size_t alignedSize = CacheType::alignToBin(checkingSize); 1453 REQUIRE_MESSAGE(alignedSize >= checkingSize, "Size is not correctly aligned"); 1454 int calcIdx = CacheType::sizeToIdx(alignedSize); 1455 REQUIRE_MESSAGE(calcIdx == idx, "Index from size calculated not correctly"); 1456 } 1457 1458 void TestLOCacheBinsConverter(){ 1459 typedef rml::internal::LargeObjectCache::LargeCacheType LargeCacheType; 1460 typedef rml::internal::LargeObjectCache::HugeCacheType HugeCacheType; 1461 1462 size_t checkingSize = 0; 1463 for (int idx = 0; idx < LargeCacheType::numBins; idx++) { 1464 checkingSize = locIdxToSizeFormula(idx); 1465 TestLOCacheBinsConverterImpl<LargeCacheType>(idx, checkingSize); 1466 } 1467 for (int idx = 0; idx < HugeCacheType::numBins; idx++) { 1468 checkingSize = hocIdxToSizeFormula(idx); 1469 TestLOCacheBinsConverterImpl<HugeCacheType>(idx, checkingSize); 1470 } 1471 } 1472 1473 struct HOThresholdTester { 1474 LargeObjectCache* loc; 1475 size_t hugeSize; 1476 1477 static const size_t sieveSize = LargeObjectCache::defaultMaxHugeSize; 1478 // Sieve starts from 64MB (24-th cache bin), enough to check 4 bins radius range 1479 // for decent memory consumption (especially for 32-bit arch) 1480 static const int MIN_BIN_IDX = 21; 1481 static const int MAX_BIN_IDX = 27; 1482 1483 enum CleanupType { 1484 NO_CLEANUP, 1485 REGULAR_CLEANUP, 1486 HARD_CLEANUP 1487 }; 1488 1489 void populateCache() { 1490 LargeMemoryBlock* loArray[MAX_BIN_IDX - MIN_BIN_IDX]; 1491 // To avoid backend::softCacheCleanup consequences (cleanup by isLOCToolarge), 1492 // firstly allocate all objects and then cache them at once. 1493 // Morevover, just because first cache item will still be dropped from cache because of the lack of history, 1494 // redo allocation 2 times. 1495 for (int idx = MIN_BIN_IDX; idx < MAX_BIN_IDX; ++idx) { 1496 size_t allocationSize = alignedSizeFromIdx(idx); 1497 int localIdx = idx - MIN_BIN_IDX; 1498 loArray[localIdx] = defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize); 1499 REQUIRE_MESSAGE(loArray[localIdx], "Large object was not allocated."); 1500 loc->put(loArray[localIdx]); 1501 loArray[localIdx] = defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize); 1502 } 1503 for (int idx = MIN_BIN_IDX; idx < MAX_BIN_IDX; ++idx) { 1504 loc->put(loArray[idx - MIN_BIN_IDX]); 1505 } 1506 } 1507 void clean(bool all) { 1508 if (all) { 1509 // Should avoid any threshold and clean all bins 1510 loc->cleanAll(); 1511 } else { 1512 // Regular cleanup should do nothing for bins above threshold. Decreasing option used 1513 // for the test to be sure that all objects below defaultMaxHugeSize (sieveSize) were cleaned 1514 loc->regularCleanup(); 1515 loc->decreasingCleanup(); 1516 } 1517 } 1518 void check(CleanupType type) { 1519 for (int idx = MIN_BIN_IDX; idx < MAX_BIN_IDX; ++idx) { 1520 size_t objectSize = alignedSizeFromIdx(idx); 1521 // Cache object below sieve threshold and above huge object threshold should be cached 1522 // (other should be sieved). Unless all cache is dropped. Regular cleanup drops object only below sieve size. 1523 if (type == NO_CLEANUP && sizeInCacheRange(objectSize)) { 1524 REQUIRE_MESSAGE(objectInCacheBin(idx, objectSize), "Object was released from cache, it shouldn't."); 1525 } else if (type == REGULAR_CLEANUP && (objectSize >= hugeSize)) { 1526 REQUIRE_MESSAGE(objectInCacheBin(idx, objectSize), "Object was released from cache, it shouldn't."); 1527 } else { // HARD_CLEANUP 1528 REQUIRE_MESSAGE(cacheBinEmpty(idx), "Object is still cached."); 1529 } 1530 } 1531 } 1532 1533 private: 1534 bool cacheBinEmpty(int idx) { 1535 return (loc->hugeCache.bin[idx].cachedSize == 0 && loc->hugeCache.bin[idx].get() == NULL); 1536 } 1537 bool objectInCacheBin(int idx, size_t size) { 1538 return (loc->hugeCache.bin[idx].cachedSize != 0 && loc->hugeCache.bin[idx].cachedSize % size == 0); 1539 } 1540 bool sizeInCacheRange(size_t size) { 1541 return size <= sieveSize || size >= hugeSize; 1542 } 1543 size_t alignedSizeFromIdx(int idx) { 1544 return rml::internal::LargeObjectCache::alignToBin(hocIdxToSizeFormula(idx)); 1545 } 1546 }; 1547 1548 // TBBMALLOC_SET_HUGE_OBJECT_THRESHOLD value should be set before the test, 1549 // through scalable API or env variable 1550 void TestHugeSizeThresholdImpl(LargeObjectCache* loc, size_t hugeSize, bool fullTesting) { 1551 HOThresholdTester test = {loc, hugeSize}; 1552 test.populateCache(); 1553 // Check the default sieve value 1554 test.check(HOThresholdTester::NO_CLEANUP); 1555 1556 if(fullTesting) { 1557 // Check that objects above threshold stay in cache after regular cleanup 1558 test.clean(/*all*/false); 1559 test.check(HOThresholdTester::REGULAR_CLEANUP); 1560 } 1561 // Check that all objects dropped from cache after hard cleanup (ignore huge obects threshold) 1562 test.clean(/*all*/true); 1563 test.check(HOThresholdTester::HARD_CLEANUP); 1564 // Restore previous settings 1565 loc->setHugeSizeThreshold(LargeObjectCache::maxHugeSize); 1566 loc->reset(); 1567 } 1568 1569 /* 1570 * Test for default huge size and behaviour when huge object settings defined 1571 */ 1572 void TestHugeSizeThreshold() { 1573 // Clean up if something was allocated before the test and reset cache state 1574 scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, 0); 1575 LargeObjectCache* loc = &defaultMemPool->extMemPool.loc; 1576 // Restore default settings just in case 1577 loc->setHugeSizeThreshold(LargeObjectCache::maxHugeSize); 1578 loc->reset(); 1579 // Firstly check default huge size value (with max huge object threshold). 1580 // Everything that more then this value should be released to OS without caching. 1581 TestHugeSizeThresholdImpl(loc, loc->hugeSizeThreshold, false); 1582 // Then set huge object threshold. 1583 // All objects with sizes after threshold will be released only after the hard cleanup. 1584 #if !__TBB_WIN8UI_SUPPORT 1585 // Unit testing for environment variable 1586 utils::SetEnv("TBB_MALLOC_SET_HUGE_SIZE_THRESHOLD","67108864"); 1587 // Large object cache reads threshold environment during initialization. 1588 // Reset the value before the test. 1589 loc->hugeSizeThreshold = 0; 1590 // Reset logical time to prevent regular cleanup 1591 loc->cacheCurrTime = 0; 1592 loc->init(&defaultMemPool->extMemPool); 1593 TestHugeSizeThresholdImpl(loc, 64 * MByte, true); 1594 #endif 1595 // Unit testing for scalable_allocation_command 1596 scalable_allocation_mode(TBBMALLOC_SET_HUGE_SIZE_THRESHOLD, 56 * MByte); 1597 TestHugeSizeThresholdImpl(loc, 56 * MByte, true); 1598 } 1599 1600 //! \brief \ref error_guessing 1601 TEST_CASE("Main test case") { 1602 scalable_allocation_mode(USE_HUGE_PAGES, 0); 1603 #if !__TBB_WIN8UI_SUPPORT 1604 utils::SetEnv("TBB_MALLOC_USE_HUGE_PAGES","yes"); 1605 #endif 1606 checkNoHugePages(); 1607 // backreference requires that initialization was done 1608 if(!isMallocInitialized()) doInitialization(); 1609 checkNoHugePages(); 1610 // to succeed, leak detection must be the 1st memory-intensive test 1611 TestBackRef(); 1612 TestCleanAllBuffers<4*1024>(); 1613 TestCleanAllBuffers<16*1024>(); 1614 TestCleanThreadBuffers(); 1615 TestPools(); 1616 TestBackend(); 1617 1618 #if MALLOC_CHECK_RECURSION 1619 for( int p=MaxThread; p>=MinThread; --p ) { 1620 TestStartupAlloc::initBarrier( p ); 1621 utils::NativeParallelFor( p, TestStartupAlloc() ); 1622 REQUIRE_MESSAGE(!firstStartupBlock, "Startup heap memory leak detected"); 1623 } 1624 #endif 1625 TestLargeObjectCache(); 1626 TestObjectRecognition(); 1627 TestBitMask(); 1628 TestHeapLimit(); 1629 TestLOC(); 1630 TestSlabAlignment(); 1631 } 1632 1633 //! \brief \ref error_guessing 1634 TEST_CASE("Decreasing reallocation") { 1635 if (!isMallocInitialized()) doInitialization(); 1636 TestReallocDecreasing(); 1637 } 1638 1639 //! \brief \ref error_guessing 1640 TEST_CASE("Large object cache bins converter") { 1641 if (!isMallocInitialized()) doInitialization(); 1642 TestLOCacheBinsConverter(); 1643 } 1644 1645 //! \brief \ref error_guessing 1646 TEST_CASE("Huge size threshold settings") { 1647 if (!isMallocInitialized()) doInitialization(); 1648 TestHugeSizeThreshold(); 1649 } 1650 1651 #if __linux__ 1652 //! \brief \ref error_guessing 1653 TEST_CASE("Transparent huge pages") { 1654 if (utils::isTHPEnabledOnMachine()) { 1655 if (!isMallocInitialized()) doInitialization(); 1656 TestTHP(); 1657 } else { 1658 INFO("Transparent Huge Pages is not supported on the system - skipped the test\n"); 1659 } 1660 } 1661 #endif 1662 1663 #if !__TBB_WIN8UI_SUPPORT && defined(_WIN32) 1664 //! \brief \ref error_guessing 1665 TEST_CASE("Function replacement log") { 1666 TesFunctionReplacementLog(); 1667 } 1668 #endif 1669