1 /* 2 Copyright (c) 2005-2022 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 //! \file test_malloc_whitebox.cpp 18 //! \brief Test for [memory_allocation] functionality 19 20 #if _WIN32 || _WIN64 21 #define _CRT_SECURE_NO_WARNINGS 22 #endif 23 24 // To prevent loading dynamic TBBmalloc at startup, that is not needed for the whitebox test 25 #define __TBB_SOURCE_DIRECTLY_INCLUDED 1 26 // Call thread shutdown API for native threads join 27 #define HARNESS_TBBMALLOC_THREAD_SHUTDOWN 1 28 29 // According to C99 standard INTPTR_MIN defined for C++ if __STDC_LIMIT_MACROS pre-defined 30 #define __STDC_LIMIT_MACROS 1 31 32 // To not depends on ITT support stuff 33 #ifdef DO_ITT_NOTIFY 34 #undef DO_ITT_NOTIFY 35 #endif 36 37 #include "common/test.h" 38 39 #include "common/utils.h" 40 #include "common/utils_assert.h" 41 #include "common/utils_env.h" 42 #include "common/spin_barrier.h" 43 44 #include "oneapi/tbb/detail/_machine.h" 45 46 #define __TBB_MALLOC_WHITEBOX_TEST 1 // to get access to allocator internals 47 // help trigger rare race condition 48 #define WhiteboxTestingYield() (tbb::detail::yield(), tbb::detail::yield(), tbb::detail::yield(), tbb::detail::yield()) 49 50 #if __INTEL_COMPILER && __TBB_MIC_OFFLOAD 51 // 2571 is variable has not been declared with compatible "target" attribute 52 // 3218 is class/struct may fail when offloaded because this field is misaligned 53 // or contains data that is misaligned 54 #pragma warning(push) 55 #pragma warning(disable:2571 3218) 56 #endif 57 #define protected public 58 #define private public 59 #include "../../src/tbbmalloc/frontend.cpp" 60 #undef protected 61 #undef private 62 #if __INTEL_COMPILER && __TBB_MIC_OFFLOAD 63 #pragma warning(pop) 64 #endif 65 #include "../../src/tbbmalloc/backend.cpp" 66 #include "../../src/tbbmalloc/backref.cpp" 67 68 namespace tbbmalloc_whitebox { 69 std::atomic<size_t> locGetProcessed{}; 70 std::atomic<size_t> locPutProcessed{}; 71 } 72 #include "../../src/tbbmalloc/large_objects.cpp" 73 #include "../../src/tbbmalloc/tbbmalloc.cpp" 74 75 const int LARGE_MEM_SIZES_NUM = 10; 76 static const int MinThread = 1; 77 static const int MaxThread = 4; 78 79 class AllocInfo { 80 int *p; 81 int val; 82 int size; 83 public: 84 AllocInfo() : p(nullptr), val(0), size(0) {} 85 explicit AllocInfo(int sz) : p((int*)scalable_malloc(sz*sizeof(int))), 86 val(rand()), size(sz) { 87 REQUIRE(p); 88 for (int k=0; k<size; k++) 89 p[k] = val; 90 } 91 void check() const { 92 for (int k=0; k<size; k++) 93 ASSERT(p[k] == val, nullptr); 94 } 95 void clear() { 96 scalable_free(p); 97 } 98 }; 99 100 // Test struct to call ProcessShutdown after all tests 101 struct ShutdownTest { 102 ~ShutdownTest() { 103 #if _WIN32 || _WIN64 104 __TBB_mallocProcessShutdownNotification(true); 105 #else 106 __TBB_mallocProcessShutdownNotification(false); 107 #endif 108 } 109 }; 110 111 static ShutdownTest shutdownTest; 112 113 class SimpleBarrier: utils::NoAssign { 114 protected: 115 static utils::SpinBarrier barrier; 116 public: 117 static void initBarrier(unsigned thrds) { barrier.initialize(thrds); } 118 }; 119 120 utils::SpinBarrier SimpleBarrier::barrier; 121 122 class TestLargeObjCache: public SimpleBarrier { 123 public: 124 static int largeMemSizes[LARGE_MEM_SIZES_NUM]; 125 126 TestLargeObjCache( ) {} 127 128 void operator()( int /*mynum*/ ) const { 129 AllocInfo allocs[LARGE_MEM_SIZES_NUM]; 130 131 // push to maximal cache limit 132 for (int i=0; i<2; i++) { 133 const int sizes[] = { MByte/sizeof(int), 134 (MByte-2*LargeObjectCache::LargeBSProps::CacheStep)/sizeof(int) }; 135 for (int q=0; q<2; q++) { 136 size_t curr = 0; 137 for (int j=0; j<LARGE_MEM_SIZES_NUM; j++, curr++) 138 new (allocs+curr) AllocInfo(sizes[q]); 139 140 for (size_t j=0; j<curr; j++) { 141 allocs[j].check(); 142 allocs[j].clear(); 143 } 144 } 145 } 146 147 barrier.wait(); 148 149 // check caching correctness 150 for (int i=0; i<1000; i++) { 151 size_t curr = 0; 152 for (int j=0; j<LARGE_MEM_SIZES_NUM-1; j++, curr++) 153 new (allocs+curr) AllocInfo(largeMemSizes[j]); 154 155 new (allocs+curr) 156 AllocInfo((int)(4*minLargeObjectSize + 157 2*minLargeObjectSize*(1.*rand()/RAND_MAX))); 158 curr++; 159 160 for (size_t j=0; j<curr; j++) { 161 allocs[j].check(); 162 allocs[j].clear(); 163 } 164 } 165 } 166 }; 167 168 int TestLargeObjCache::largeMemSizes[LARGE_MEM_SIZES_NUM]; 169 170 void TestLargeObjectCache() 171 { 172 for (int i=0; i<LARGE_MEM_SIZES_NUM; i++) 173 TestLargeObjCache::largeMemSizes[i] = 174 (int)(minLargeObjectSize + 2*minLargeObjectSize*(1.*rand()/RAND_MAX)); 175 176 for( int p=MaxThread; p>=MinThread; --p ) { 177 TestLargeObjCache::initBarrier( p ); 178 utils::NativeParallelFor( p, TestLargeObjCache() ); 179 } 180 } 181 182 #if MALLOC_CHECK_RECURSION 183 184 class TestStartupAlloc: public SimpleBarrier { 185 struct TestBlock { 186 void *ptr; 187 size_t sz; 188 }; 189 static const int ITERS = 100; 190 public: 191 TestStartupAlloc() {} 192 void operator()(int) const { 193 TestBlock blocks1[ITERS], blocks2[ITERS]; 194 195 barrier.wait(); 196 197 for (int i=0; i<ITERS; i++) { 198 blocks1[i].sz = rand() % minLargeObjectSize; 199 blocks1[i].ptr = StartupBlock::allocate(blocks1[i].sz); 200 REQUIRE((blocks1[i].ptr && StartupBlock::msize(blocks1[i].ptr)>=blocks1[i].sz 201 && 0==(uintptr_t)blocks1[i].ptr % sizeof(void*))); 202 memset(blocks1[i].ptr, i, blocks1[i].sz); 203 } 204 for (int i=0; i<ITERS; i++) { 205 blocks2[i].sz = rand() % minLargeObjectSize; 206 blocks2[i].ptr = StartupBlock::allocate(blocks2[i].sz); 207 REQUIRE((blocks2[i].ptr && StartupBlock::msize(blocks2[i].ptr)>=blocks2[i].sz 208 && 0==(uintptr_t)blocks2[i].ptr % sizeof(void*))); 209 memset(blocks2[i].ptr, i, blocks2[i].sz); 210 211 for (size_t j=0; j<blocks1[i].sz; j++) 212 REQUIRE(*((char*)blocks1[i].ptr+j) == i); 213 Block *block = (Block *)alignDown(blocks1[i].ptr, slabSize); 214 ((StartupBlock *)block)->free(blocks1[i].ptr); 215 } 216 for (int i=ITERS-1; i>=0; i--) { 217 for (size_t j=0; j<blocks2[i].sz; j++) 218 REQUIRE(*((char*)blocks2[i].ptr+j) == i); 219 Block *block = (Block *)alignDown(blocks2[i].ptr, slabSize); 220 ((StartupBlock *)block)->free(blocks2[i].ptr); 221 } 222 } 223 }; 224 225 #endif /* MALLOC_CHECK_RECURSION */ 226 227 #include <deque> 228 229 template<int ITERS> 230 class BackRefWork: utils::NoAssign { 231 struct TestBlock { 232 BackRefIdx idx; 233 char data; 234 TestBlock(BackRefIdx idx_) : idx(idx_) {} 235 }; 236 public: 237 BackRefWork() {} 238 void operator()(int) const { 239 size_t cnt; 240 // it's important to not invalidate pointers to the contents of the container 241 std::deque<TestBlock> blocks; 242 243 // for ITERS==0 consume all available backrefs 244 for (cnt=0; !ITERS || cnt<ITERS; cnt++) { 245 BackRefIdx idx = BackRefIdx::newBackRef(/*largeObj=*/false); 246 if (idx.isInvalid()) 247 break; 248 blocks.push_back(TestBlock(idx)); 249 setBackRef(blocks.back().idx, &blocks.back().data); 250 } 251 for (size_t i=0; i<cnt; i++) 252 REQUIRE((Block*)&blocks[i].data == getBackRef(blocks[i].idx)); 253 for (size_t i=cnt; i>0; i--) 254 removeBackRef(blocks[i-1].idx); 255 } 256 }; 257 258 class LocalCachesHit: utils::NoAssign { 259 // set ITERS to trigger possible leak of backreferences 260 // during cleanup on cache overflow and on thread termination 261 static const int ITERS = 2*(FreeBlockPool::POOL_HIGH_MARK + 262 LocalLOC::LOC_HIGH_MARK); 263 public: 264 LocalCachesHit() {} 265 void operator()(int) const { 266 void *objsSmall[ITERS], *objsLarge[ITERS]; 267 268 for (int i=0; i<ITERS; i++) { 269 objsSmall[i] = scalable_malloc(minLargeObjectSize-1); 270 objsLarge[i] = scalable_malloc(minLargeObjectSize); 271 } 272 for (int i=0; i<ITERS; i++) { 273 scalable_free(objsSmall[i]); 274 scalable_free(objsLarge[i]); 275 } 276 } 277 }; 278 279 static size_t allocatedBackRefCount() 280 { 281 size_t cnt = 0; 282 for (int i=0; i<=backRefMain.load(std::memory_order_relaxed)->lastUsed.load(std::memory_order_relaxed); i++) 283 cnt += backRefMain.load(std::memory_order_relaxed)->backRefBl[i]->allocatedCount; 284 return cnt; 285 } 286 287 class TestInvalidBackrefs: public SimpleBarrier { 288 #if __ANDROID__ 289 // Android requires lower iters due to lack of virtual memory. 290 static const int BACKREF_GROWTH_ITERS = 50*1024; 291 #else 292 static const int BACKREF_GROWTH_ITERS = 200*1024; 293 #endif 294 295 static std::atomic<bool> backrefGrowthDone; 296 static void *ptrs[BACKREF_GROWTH_ITERS]; 297 public: 298 TestInvalidBackrefs() {} 299 void operator()(int id) const { 300 301 if (!id) { 302 backrefGrowthDone = false; 303 barrier.wait(); 304 305 for (int i=0; i<BACKREF_GROWTH_ITERS; i++) 306 ptrs[i] = scalable_malloc(minLargeObjectSize); 307 backrefGrowthDone = true; 308 for (int i=0; i<BACKREF_GROWTH_ITERS; i++) 309 scalable_free(ptrs[i]); 310 } else { 311 void *p2 = scalable_malloc(minLargeObjectSize-1); 312 char *p1 = (char*)scalable_malloc(minLargeObjectSize-1); 313 LargeObjectHdr *hdr = 314 (LargeObjectHdr*)(p1+minLargeObjectSize-1 - sizeof(LargeObjectHdr)); 315 hdr->backRefIdx.main = 7; 316 hdr->backRefIdx.largeObj = 1; 317 hdr->backRefIdx.offset = 2000; 318 319 barrier.wait(); 320 321 int yield_count = 0; 322 while (!backrefGrowthDone) { 323 scalable_free(p2); 324 p2 = scalable_malloc(minLargeObjectSize-1); 325 if (yield_count++ == 100) { 326 yield_count = 0; 327 std::this_thread::yield(); 328 } 329 } 330 scalable_free(p1); 331 scalable_free(p2); 332 } 333 } 334 }; 335 336 std::atomic<bool> TestInvalidBackrefs::backrefGrowthDone; 337 void *TestInvalidBackrefs::ptrs[BACKREF_GROWTH_ITERS]; 338 339 void TestBackRef() { 340 size_t beforeNumBackRef, afterNumBackRef; 341 342 beforeNumBackRef = allocatedBackRefCount(); 343 for( int p=MaxThread; p>=MinThread; --p ) 344 utils::NativeParallelFor( p, BackRefWork<2*BR_MAX_CNT+2>() ); 345 afterNumBackRef = allocatedBackRefCount(); 346 REQUIRE_MESSAGE(beforeNumBackRef==afterNumBackRef, "backreference leak detected"); 347 // lastUsed marks peak resource consumption. As we allocate below the mark, 348 // it must not move up, otherwise there is a resource leak. 349 int sustLastUsed = backRefMain.load(std::memory_order_relaxed)->lastUsed.load(std::memory_order_relaxed); 350 utils::NativeParallelFor( 1, BackRefWork<2*BR_MAX_CNT+2>() ); 351 REQUIRE_MESSAGE(sustLastUsed == backRefMain.load(std::memory_order_relaxed)->lastUsed.load(std::memory_order_relaxed), "backreference leak detected"); 352 // check leak of back references while per-thread caches are in use 353 // warm up needed to cover bootStrapMalloc call 354 utils::NativeParallelFor( 1, LocalCachesHit() ); 355 beforeNumBackRef = allocatedBackRefCount(); 356 utils::NativeParallelFor( 2, LocalCachesHit() ); 357 int res = scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, nullptr); 358 REQUIRE(res == TBBMALLOC_OK); 359 afterNumBackRef = allocatedBackRefCount(); 360 REQUIRE_MESSAGE(beforeNumBackRef>=afterNumBackRef, "backreference leak detected"); 361 362 // This is a regression test against race condition between backreference 363 // extension and checking invalid BackRefIdx. 364 // While detecting is object large or small, scalable_free 1st check for 365 // large objects, so there is a chance to prepend small object with 366 // seems valid BackRefIdx for large objects, and thus trigger the bug. 367 TestInvalidBackrefs::initBarrier(MaxThread); 368 utils::NativeParallelFor( MaxThread, TestInvalidBackrefs() ); 369 // Consume all available backrefs and check they work correctly. 370 // For now test 32-bit machines only, because for 64-bit memory consumption is too high. 371 if (sizeof(uintptr_t) == 4) 372 utils::NativeParallelFor( MaxThread, BackRefWork<0>() ); 373 } 374 375 void *getMem(intptr_t /*pool_id*/, size_t &bytes) 376 { 377 const size_t BUF_SIZE = 8*1024*1024; 378 static char space[BUF_SIZE]; 379 static size_t pos; 380 381 if (pos + bytes > BUF_SIZE) 382 return nullptr; 383 384 void *ret = space + pos; 385 pos += bytes; 386 387 return ret; 388 } 389 390 int putMem(intptr_t /*pool_id*/, void* /*raw_ptr*/, size_t /*raw_bytes*/) 391 { 392 return 0; 393 } 394 395 struct MallocPoolHeader { 396 void *rawPtr; 397 size_t userSize; 398 }; 399 400 void *getMallocMem(intptr_t /*pool_id*/, size_t &bytes) 401 { 402 void *rawPtr = malloc(bytes+sizeof(MallocPoolHeader)); 403 void *ret = (void *)((uintptr_t)rawPtr+sizeof(MallocPoolHeader)); 404 405 MallocPoolHeader *hdr = (MallocPoolHeader*)ret-1; 406 hdr->rawPtr = rawPtr; 407 hdr->userSize = bytes; 408 409 return ret; 410 } 411 412 int putMallocMem(intptr_t /*pool_id*/, void *ptr, size_t bytes) 413 { 414 MallocPoolHeader *hdr = (MallocPoolHeader*)ptr-1; 415 ASSERT(bytes == hdr->userSize, "Invalid size in pool callback."); 416 free(hdr->rawPtr); 417 418 return 0; 419 } 420 421 class StressLOCacheWork: utils::NoAssign { 422 rml::MemoryPool *my_mallocPool; 423 public: 424 StressLOCacheWork(rml::MemoryPool *mallocPool) : my_mallocPool(mallocPool) {} 425 void operator()(int) const { 426 for (size_t sz=minLargeObjectSize; sz<1*1024*1024; 427 sz+=LargeObjectCache::LargeBSProps::CacheStep) { 428 void *ptr = pool_malloc(my_mallocPool, sz); 429 REQUIRE_MESSAGE(ptr, "Memory was not allocated"); 430 memset(ptr, sz, sz); 431 pool_free(my_mallocPool, ptr); 432 } 433 } 434 }; 435 436 void TestPools() { 437 rml::MemPoolPolicy pol(getMem, putMem); 438 size_t beforeNumBackRef, afterNumBackRef; 439 440 rml::MemoryPool *pool1; 441 rml::MemoryPool *pool2; 442 pool_create_v1(0, &pol, &pool1); 443 pool_create_v1(0, &pol, &pool2); 444 pool_destroy(pool1); 445 pool_destroy(pool2); 446 447 scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, nullptr); 448 beforeNumBackRef = allocatedBackRefCount(); 449 rml::MemoryPool *fixedPool; 450 451 pool_create_v1(0, &pol, &fixedPool); 452 pol.pAlloc = getMallocMem; 453 pol.pFree = putMallocMem; 454 pol.granularity = 8; 455 rml::MemoryPool *mallocPool; 456 457 pool_create_v1(0, &pol, &mallocPool); 458 /* check that large object cache (LOC) returns correct size for cached objects 459 passBackendSz Byte objects are cached in LOC, but bypassed the backend, so 460 memory requested directly from allocation callback. 461 nextPassBackendSz Byte objects must fit to another LOC bin, 462 so that their allocation/realeasing leads to cache cleanup. 463 All this is expecting to lead to releasing of passBackendSz Byte object 464 from LOC during LOC cleanup, and putMallocMem checks that returned size 465 is correct. 466 */ 467 const size_t passBackendSz = Backend::maxBinned_HugePage+1, 468 anotherLOCBinSz = minLargeObjectSize+1; 469 for (int i=0; i<10; i++) { // run long enough to be cached 470 void *p = pool_malloc(mallocPool, passBackendSz); 471 REQUIRE_MESSAGE(p, "Memory was not allocated"); 472 pool_free(mallocPool, p); 473 } 474 // run long enough to passBackendSz allocation was cleaned from cache 475 // and returned back to putMallocMem for size checking 476 for (int i=0; i<1000; i++) { 477 void *p = pool_malloc(mallocPool, anotherLOCBinSz); 478 REQUIRE_MESSAGE(p, "Memory was not allocated"); 479 pool_free(mallocPool, p); 480 } 481 482 void *smallObj = pool_malloc(fixedPool, 10); 483 REQUIRE_MESSAGE(smallObj, "Memory was not allocated"); 484 memset(smallObj, 1, 10); 485 void *ptr = pool_malloc(fixedPool, 1024); 486 REQUIRE_MESSAGE(ptr, "Memory was not allocated"); 487 memset(ptr, 1, 1024); 488 void *largeObj = pool_malloc(fixedPool, minLargeObjectSize); 489 REQUIRE_MESSAGE(largeObj, "Memory was not allocated"); 490 memset(largeObj, 1, minLargeObjectSize); 491 ptr = pool_malloc(fixedPool, minLargeObjectSize); 492 REQUIRE_MESSAGE(ptr, "Memory was not allocated"); 493 memset(ptr, minLargeObjectSize, minLargeObjectSize); 494 pool_malloc(fixedPool, 10*minLargeObjectSize); // no leak for unsuccessful allocations 495 pool_free(fixedPool, smallObj); 496 pool_free(fixedPool, largeObj); 497 498 // provoke large object cache cleanup and hope no leaks occurs 499 for( int p=MaxThread; p>=MinThread; --p ) 500 utils::NativeParallelFor( p, StressLOCacheWork(mallocPool) ); 501 pool_destroy(mallocPool); 502 pool_destroy(fixedPool); 503 504 scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, nullptr); 505 afterNumBackRef = allocatedBackRefCount(); 506 REQUIRE_MESSAGE(beforeNumBackRef==afterNumBackRef, "backreference leak detected"); 507 508 { 509 // test usedSize/cachedSize and LOC bitmask correctness 510 void *p[5]; 511 pool_create_v1(0, &pol, &mallocPool); 512 const LargeObjectCache *loc = &((rml::internal::MemoryPool*)mallocPool)->extMemPool.loc; 513 const int LargeCacheStep = LargeObjectCache::LargeBSProps::CacheStep; 514 p[3] = pool_malloc(mallocPool, minLargeObjectSize+2*LargeCacheStep); 515 for (int i=0; i<10; i++) { 516 p[0] = pool_malloc(mallocPool, minLargeObjectSize); 517 p[1] = pool_malloc(mallocPool, minLargeObjectSize+LargeCacheStep); 518 pool_free(mallocPool, p[0]); 519 pool_free(mallocPool, p[1]); 520 } 521 REQUIRE(loc->getUsedSize()); 522 pool_free(mallocPool, p[3]); 523 REQUIRE(loc->getLOCSize() < 3*(minLargeObjectSize+LargeCacheStep)); 524 const size_t maxLocalLOCSize = LocalLOCImpl<3,30>::getMaxSize(); 525 REQUIRE(loc->getUsedSize() <= maxLocalLOCSize); 526 for (int i=0; i<3; i++) 527 p[i] = pool_malloc(mallocPool, minLargeObjectSize+i*LargeCacheStep); 528 size_t currUser = loc->getUsedSize(); 529 REQUIRE((!loc->getLOCSize() && currUser >= 3*(minLargeObjectSize+LargeCacheStep))); 530 p[4] = pool_malloc(mallocPool, minLargeObjectSize+3*LargeCacheStep); 531 REQUIRE(loc->getUsedSize() - currUser >= minLargeObjectSize+3*LargeCacheStep); 532 pool_free(mallocPool, p[4]); 533 REQUIRE(loc->getUsedSize() <= currUser+maxLocalLOCSize); 534 pool_reset(mallocPool); 535 REQUIRE((!loc->getLOCSize() && !loc->getUsedSize())); 536 pool_destroy(mallocPool); 537 } 538 // To test LOC we need bigger lists than released by current LocalLOC 539 // in production code. Create special LocalLOC. 540 { 541 LocalLOCImpl<2, 20> lLOC; 542 pool_create_v1(0, &pol, &mallocPool); 543 rml::internal::ExtMemoryPool *mPool = &((rml::internal::MemoryPool*)mallocPool)->extMemPool; 544 const LargeObjectCache *loc = &((rml::internal::MemoryPool*)mallocPool)->extMemPool.loc; 545 const int LargeCacheStep = LargeObjectCache::LargeBSProps::CacheStep; 546 for (int i=0; i<22; i++) { 547 void *o = pool_malloc(mallocPool, minLargeObjectSize+i*LargeCacheStep); 548 bool ret = lLOC.put(((LargeObjectHdr*)o - 1)->memoryBlock, mPool); 549 REQUIRE(ret); 550 551 o = pool_malloc(mallocPool, minLargeObjectSize+i*LargeCacheStep); 552 ret = lLOC.put(((LargeObjectHdr*)o - 1)->memoryBlock, mPool); 553 REQUIRE(ret); 554 } 555 lLOC.externalCleanup(mPool); 556 REQUIRE(!loc->getUsedSize()); 557 558 pool_destroy(mallocPool); 559 } 560 } 561 562 void TestObjectRecognition() { 563 size_t headersSize = sizeof(LargeMemoryBlock)+sizeof(LargeObjectHdr); 564 unsigned falseObjectSize = 113; // unsigned is the type expected by getObjectSize 565 size_t obtainedSize; 566 567 REQUIRE_MESSAGE(sizeof(BackRefIdx)==sizeof(uintptr_t), "Unexpected size of BackRefIdx"); 568 REQUIRE_MESSAGE(getObjectSize(falseObjectSize)!=falseObjectSize, "Error in test: bad choice for false object size"); 569 570 void* mem = scalable_malloc(2*slabSize); 571 REQUIRE_MESSAGE(mem, "Memory was not allocated"); 572 Block* falseBlock = (Block*)alignUp((uintptr_t)mem, slabSize); 573 falseBlock->objectSize = falseObjectSize; 574 char* falseSO = (char*)falseBlock + falseObjectSize*7; 575 REQUIRE_MESSAGE(alignDown(falseSO, slabSize)==(void*)falseBlock, "Error in test: false object offset is too big"); 576 577 void* bufferLOH = scalable_malloc(2*slabSize + headersSize); 578 REQUIRE_MESSAGE(bufferLOH, "Memory was not allocated"); 579 LargeObjectHdr* falseLO = 580 (LargeObjectHdr*)alignUp((uintptr_t)bufferLOH + headersSize, slabSize); 581 LargeObjectHdr* headerLO = (LargeObjectHdr*)falseLO-1; 582 headerLO->memoryBlock = (LargeMemoryBlock*)bufferLOH; 583 headerLO->memoryBlock->unalignedSize = 2*slabSize + headersSize; 584 headerLO->memoryBlock->objectSize = slabSize + headersSize; 585 headerLO->backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/true); 586 setBackRef(headerLO->backRefIdx, headerLO); 587 REQUIRE_MESSAGE(scalable_msize(falseLO) == slabSize + headersSize, 588 "Error in test: LOH falsification failed"); 589 removeBackRef(headerLO->backRefIdx); 590 591 const int NUM_OF_IDX = BR_MAX_CNT+2; 592 BackRefIdx idxs[NUM_OF_IDX]; 593 for (int cnt=0; cnt<2; cnt++) { 594 for (int main = -10; main<10; main++) { 595 falseBlock->backRefIdx.main = (uint16_t)main; 596 headerLO->backRefIdx.main = (uint16_t)main; 597 598 for (int bl = -10; bl<BR_MAX_CNT+10; bl++) { 599 falseBlock->backRefIdx.offset = (uint16_t)bl; 600 headerLO->backRefIdx.offset = (uint16_t)bl; 601 602 for (int largeObj = 0; largeObj<2; largeObj++) { 603 falseBlock->backRefIdx.largeObj = largeObj; 604 headerLO->backRefIdx.largeObj = largeObj; 605 606 obtainedSize = __TBB_malloc_safer_msize(falseSO, nullptr); 607 REQUIRE_MESSAGE(obtainedSize==0, "Incorrect pointer accepted"); 608 obtainedSize = __TBB_malloc_safer_msize(falseLO, nullptr); 609 REQUIRE_MESSAGE(obtainedSize==0, "Incorrect pointer accepted"); 610 } 611 } 612 } 613 if (cnt == 1) { 614 for (int i=0; i<NUM_OF_IDX; i++) 615 removeBackRef(idxs[i]); 616 break; 617 } 618 for (int i=0; i<NUM_OF_IDX; i++) { 619 idxs[i] = BackRefIdx::newBackRef(/*largeObj=*/false); 620 setBackRef(idxs[i], nullptr); 621 } 622 } 623 char *smallPtr = (char*)scalable_malloc(falseObjectSize); 624 obtainedSize = __TBB_malloc_safer_msize(smallPtr, nullptr); 625 REQUIRE_MESSAGE(obtainedSize==getObjectSize(falseObjectSize), "Correct pointer not accepted?"); 626 scalable_free(smallPtr); 627 628 obtainedSize = __TBB_malloc_safer_msize(mem, nullptr); 629 REQUIRE_MESSAGE(obtainedSize>=2*slabSize, "Correct pointer not accepted?"); 630 scalable_free(mem); 631 scalable_free(bufferLOH); 632 } 633 634 class TestBackendWork: public SimpleBarrier { 635 struct TestBlock { 636 intptr_t data; 637 BackRefIdx idx; 638 }; 639 static const int ITERS = 20; 640 641 rml::internal::Backend *backend; 642 public: 643 TestBackendWork(rml::internal::Backend *bknd) : backend(bknd) {} 644 void operator()(int) const { 645 barrier.wait(); 646 647 for (int i=0; i<ITERS; i++) { 648 BlockI *slabBlock = backend->getSlabBlock(1); 649 REQUIRE_MESSAGE(slabBlock, "Memory was not allocated"); 650 uintptr_t prevBlock = (uintptr_t)slabBlock; 651 backend->putSlabBlock(slabBlock); 652 653 LargeMemoryBlock *largeBlock = backend->getLargeBlock(16*1024); 654 REQUIRE_MESSAGE(largeBlock, "Memory was not allocated"); 655 REQUIRE_MESSAGE((uintptr_t)largeBlock != prevBlock, 656 "Large block cannot be reused from slab memory, only in fixed_pool case."); 657 backend->putLargeBlock(largeBlock); 658 } 659 } 660 }; 661 662 void TestBackend() 663 { 664 rml::MemPoolPolicy pol(getMallocMem, putMallocMem); 665 rml::MemoryPool *mPool; 666 pool_create_v1(0, &pol, &mPool); 667 rml::internal::ExtMemoryPool *ePool = &((rml::internal::MemoryPool*)mPool)->extMemPool; 668 rml::internal::Backend *backend = &ePool->backend; 669 670 for( int p=MaxThread; p>=MinThread; --p ) { 671 // regression test against an race condition in backend synchronization, 672 // triggered only when WhiteboxTestingYield() call yields 673 #if TBB_USE_DEBUG 674 int num_iters = 10; 675 #else 676 int num_iters = 100; 677 #endif 678 for (int i = 0; i < num_iters; i++) { 679 TestBackendWork::initBarrier(p); 680 utils::NativeParallelFor( p, TestBackendWork(backend) ); 681 } 682 } 683 684 BlockI *block = backend->getSlabBlock(1); 685 REQUIRE_MESSAGE(block, "Memory was not allocated"); 686 backend->putSlabBlock(block); 687 688 // Checks if the backend increases and decreases the amount of allocated memory when memory is allocated. 689 const size_t memSize0 = backend->getTotalMemSize(); 690 LargeMemoryBlock *lmb = backend->getLargeBlock(4*MByte); 691 REQUIRE( lmb ); 692 693 const size_t memSize1 = backend->getTotalMemSize(); 694 REQUIRE_MESSAGE( (intptr_t)(memSize1-memSize0) >= 4*MByte, "The backend has not increased the amount of using memory." ); 695 696 backend->putLargeBlock(lmb); 697 const size_t memSize2 = backend->getTotalMemSize(); 698 REQUIRE_MESSAGE( memSize2 == memSize0, "The backend has not decreased the amount of using memory." ); 699 700 pool_destroy(mPool); 701 } 702 703 void TestBitMask() 704 { 705 BitMaskMin<256> mask; 706 707 mask.reset(); 708 mask.set(10, 1); 709 mask.set(5, 1); 710 mask.set(1, 1); 711 REQUIRE(mask.getMinTrue(2) == 5); 712 713 mask.reset(); 714 mask.set(0, 1); 715 mask.set(64, 1); 716 mask.set(63, 1); 717 mask.set(200, 1); 718 mask.set(255, 1); 719 REQUIRE(mask.getMinTrue(0) == 0); 720 REQUIRE(mask.getMinTrue(1) == 63); 721 REQUIRE(mask.getMinTrue(63) == 63); 722 REQUIRE(mask.getMinTrue(64) == 64); 723 REQUIRE(mask.getMinTrue(101) == 200); 724 REQUIRE(mask.getMinTrue(201) == 255); 725 mask.set(255, 0); 726 REQUIRE(mask.getMinTrue(201) == -1); 727 } 728 729 size_t getMemSize() 730 { 731 return defaultMemPool->extMemPool.backend.getTotalMemSize(); 732 } 733 734 class CheckNotCached { 735 static size_t memSize; 736 public: 737 void operator() () const { 738 int res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 1); 739 REQUIRE(res == TBBMALLOC_OK); 740 if (memSize==(size_t)-1) { 741 memSize = getMemSize(); 742 } else { 743 REQUIRE(getMemSize() == memSize); 744 memSize=(size_t)-1; 745 } 746 } 747 }; 748 749 size_t CheckNotCached::memSize = (size_t)-1; 750 751 class RunTestHeapLimit: public SimpleBarrier { 752 public: 753 void operator()( int /*mynum*/ ) const { 754 // Provoke bootstrap heap initialization before recording memory size. 755 // NOTE: The initialization should be processed only with a "large" 756 // object. Since the "small" object allocation lead to blocking of a 757 // slab as an active block and it is impossible to release it with 758 // foreign thread. 759 scalable_free(scalable_malloc(minLargeObjectSize)); 760 barrier.wait(CheckNotCached()); 761 for (size_t n = minLargeObjectSize; n < 5*1024*1024; n += 128*1024) 762 scalable_free(scalable_malloc(n)); 763 barrier.wait(CheckNotCached()); 764 } 765 }; 766 767 void TestHeapLimit() 768 { 769 if(!isMallocInitialized()) doInitialization(); 770 // tiny limit to stop caching 771 int res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 1); 772 REQUIRE(res == TBBMALLOC_OK); 773 // Provoke bootstrap heap initialization before recording memory size. 774 scalable_free(scalable_malloc(8)); 775 size_t n, sizeBefore = getMemSize(); 776 777 // Try to provoke call to OS for memory to check that 778 // requests are not fulfilled from caches. 779 // Single call is not enough here because of backend fragmentation. 780 for (n = minLargeObjectSize; n < 10*1024*1024; n += 16*1024) { 781 void *p = scalable_malloc(n); 782 bool leave = (sizeBefore != getMemSize()); 783 scalable_free(p); 784 if (leave) 785 break; 786 REQUIRE_MESSAGE(sizeBefore == getMemSize(), "No caching expected"); 787 } 788 REQUIRE_MESSAGE(n < 10*1024*1024, "scalable_malloc doesn't provoke OS request for memory, " 789 "is some internal cache still used?"); 790 791 for( int p=MaxThread; p>=MinThread; --p ) { 792 RunTestHeapLimit::initBarrier( p ); 793 utils::NativeParallelFor( p, RunTestHeapLimit() ); 794 } 795 // it's try to match limit as well as set limit, so call here 796 res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 1); 797 REQUIRE(res == TBBMALLOC_OK); 798 size_t m = getMemSize(); 799 REQUIRE(sizeBefore == m); 800 // restore default 801 res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 0); 802 REQUIRE(res == TBBMALLOC_OK); 803 } 804 805 void checkNoHugePages() 806 { 807 REQUIRE_MESSAGE(!hugePages.isEnabled, "scalable_allocation_mode " 808 "must have priority over environment variable"); 809 } 810 811 /*---------------------------------------------------------------------------*/ 812 // The regression test against bugs in TBBMALLOC_CLEAN_ALL_BUFFERS allocation command. 813 // The idea is to allocate and deallocate a set of objects randomly in parallel. 814 // For large sizes (16K), it forces conflicts in backend during coalescing. 815 // For small sizes (4K), it forces cross-thread deallocations and then orphaned slabs. 816 // Global cleanup should process orphaned slabs and the queue of postponed coalescing 817 // requests, otherwise it will not be able to unmap all unused memory. 818 819 const int num_allocs = 10*1024; 820 void *ptrs[num_allocs]; 821 std::atomic<int> alloc_counter; 822 static thread_local bool free_was_called = false; 823 824 inline void multiThreadAlloc(size_t alloc_size) { 825 for( int i = alloc_counter++; i < num_allocs; i = alloc_counter++ ) { 826 ptrs[i] = scalable_malloc( alloc_size ); 827 REQUIRE_MESSAGE( ptrs[i] != nullptr, "scalable_malloc returned zero." ); 828 } 829 } 830 inline void crossThreadDealloc() { 831 free_was_called = false; 832 for( int i = --alloc_counter; i >= 0; i = --alloc_counter ) { 833 if (i < num_allocs) { 834 scalable_free(ptrs[i]); 835 free_was_called = true; 836 } 837 } 838 } 839 840 template<int AllocSize> 841 struct TestCleanAllBuffersBody : public SimpleBarrier { 842 void operator() ( int ) const { 843 barrier.wait(); 844 multiThreadAlloc(AllocSize); 845 barrier.wait(); 846 crossThreadDealloc(); 847 } 848 }; 849 850 template<int AllocSize> 851 void TestCleanAllBuffers() { 852 const int num_threads = 8; 853 // Clean up if something was allocated before the test 854 scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS,nullptr); 855 856 size_t memory_in_use_before = getMemSize(); 857 alloc_counter = 0; 858 TestCleanAllBuffersBody<AllocSize>::initBarrier(num_threads); 859 860 utils::NativeParallelFor(num_threads, TestCleanAllBuffersBody<AllocSize>()); 861 // TODO: reproduce the bug conditions more reliably 862 if ( defaultMemPool->extMemPool.backend.coalescQ.blocksToFree.load(std::memory_order_relaxed) == nullptr ) { 863 INFO( "Warning: The queue of postponed coalescing requests is empty. "); 864 INFO( "Unable to create the condition for bug reproduction.\n" ); 865 } 866 int result = scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS,nullptr); 867 REQUIRE_MESSAGE( result == TBBMALLOC_OK, "The cleanup request has not cleaned anything." ); 868 size_t memory_in_use_after = getMemSize(); 869 870 size_t memory_leak = memory_in_use_after - memory_in_use_before; 871 INFO( "memory_in_use_before = " << memory_in_use_before << ", memory_in_use_after = " << memory_in_use_after << "\n" ); 872 REQUIRE_MESSAGE( memory_leak == 0, "Cleanup was unable to release all allocated memory." ); 873 } 874 875 //! Force cross thread deallocation of small objects to create a set of privatizable slab blocks. 876 //! TBBMALLOC_CLEAN_THREAD_BUFFERS command have to privatize all the block. 877 struct TestCleanThreadBuffersBody : public SimpleBarrier { 878 void operator() ( int ) const { 879 barrier.wait(); 880 multiThreadAlloc(2*1024); 881 barrier.wait(); 882 crossThreadDealloc(); 883 barrier.wait(); 884 int result = scalable_allocation_command(TBBMALLOC_CLEAN_THREAD_BUFFERS,nullptr); 885 if (result != TBBMALLOC_OK && free_was_called) { 886 REPORT("Warning: clean-up request for this particular thread has not cleaned anything."); 887 } 888 889 // Check that TLS was cleaned fully 890 TLSData *tlsCurr = defaultMemPool->getTLS(/*create=*/false); 891 if (tlsCurr) { 892 for (int i = 0; i < numBlockBinLimit; i++) { 893 REQUIRE_MESSAGE(!(tlsCurr->bin[i].activeBlk), "Some bin was not cleaned."); 894 } 895 REQUIRE_MESSAGE(!(tlsCurr->lloc.head.load(std::memory_order_relaxed)), "Local LOC was not cleaned."); 896 REQUIRE_MESSAGE(!(tlsCurr->freeSlabBlocks.head.load(std::memory_order_relaxed)), "Free Block pool was not cleaned."); 897 } 898 } 899 }; 900 901 void TestCleanThreadBuffers() { 902 const int num_threads = 8; 903 // Clean up if something was allocated before the test 904 scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS,nullptr); 905 906 alloc_counter = 0; 907 TestCleanThreadBuffersBody::initBarrier(num_threads); 908 utils::NativeParallelFor(num_threads, TestCleanThreadBuffersBody()); 909 } 910 911 /*---------------------------------------------------------------------------*/ 912 /*------------------------- Large Object Cache tests ------------------------*/ 913 #if _MSC_VER==1600 || _MSC_VER==1500 914 // ignore C4275: non dll-interface class 'stdext::exception' used as 915 // base for dll-interface class 'std::bad_cast' 916 #pragma warning (disable: 4275) 917 #endif 918 #include <vector> 919 #include <list> 920 921 // default constructor of CacheBin 922 template<typename Props> 923 rml::internal::LargeObjectCacheImpl<Props>::CacheBin::CacheBin() {} 924 925 template<typename Props> 926 class CacheBinModel { 927 928 typedef typename rml::internal::LargeObjectCacheImpl<Props>::CacheBin CacheBinType; 929 930 // The emulated cache bin. 931 CacheBinType cacheBinModel; 932 // The reference to real cache bin inside the large object cache. 933 CacheBinType &cacheBin; 934 935 const size_t size; 936 937 // save only current time 938 std::list<uintptr_t> objects; 939 940 void doCleanup() { 941 if ( cacheBinModel.cachedSize.load(std::memory_order_relaxed) > 942 Props::TooLargeFactor*cacheBinModel.usedSize.load(std::memory_order_relaxed)) tooLargeLOC++; 943 else tooLargeLOC = 0; 944 945 intptr_t threshold = cacheBinModel.ageThreshold.load(std::memory_order_relaxed); 946 if (tooLargeLOC > 3 && threshold) { 947 threshold = (threshold + cacheBinModel.meanHitRange.load(std::memory_order_relaxed)) / 2; 948 cacheBinModel.ageThreshold.store(threshold, std::memory_order_relaxed); 949 } 950 951 uintptr_t currTime = cacheCurrTime; 952 while (!objects.empty() && (intptr_t)(currTime - objects.front()) > threshold) { 953 cacheBinModel.cachedSize.store(cacheBinModel.cachedSize.load(std::memory_order_relaxed) - size, std::memory_order_relaxed); 954 cacheBinModel.lastCleanedAge = objects.front(); 955 objects.pop_front(); 956 } 957 958 cacheBinModel.oldest.store(objects.empty() ? 0 : objects.front(), std::memory_order_relaxed); 959 } 960 961 public: 962 CacheBinModel(CacheBinType &_cacheBin, size_t allocSize) : cacheBin(_cacheBin), size(allocSize) { 963 cacheBinModel.oldest.store(cacheBin.oldest.load(std::memory_order_relaxed), std::memory_order_relaxed); 964 cacheBinModel.lastCleanedAge = cacheBin.lastCleanedAge; 965 cacheBinModel.ageThreshold.store(cacheBin.ageThreshold.load(std::memory_order_relaxed), std::memory_order_relaxed); 966 cacheBinModel.usedSize.store(cacheBin.usedSize.load(std::memory_order_relaxed), std::memory_order_relaxed); 967 cacheBinModel.cachedSize.store(cacheBin.cachedSize.load(std::memory_order_relaxed), std::memory_order_relaxed); 968 cacheBinModel.meanHitRange.store(cacheBin.meanHitRange.load(std::memory_order_relaxed), std::memory_order_relaxed); 969 cacheBinModel.lastGet = cacheBin.lastGet; 970 } 971 void get() { 972 uintptr_t currTime = ++cacheCurrTime; 973 974 if ( objects.empty() ) { 975 const uintptr_t sinceLastGet = currTime - cacheBinModel.lastGet; 976 intptr_t threshold = cacheBinModel.ageThreshold.load(std::memory_order_relaxed); 977 if ((threshold && sinceLastGet > Props::LongWaitFactor * threshold) || 978 (cacheBinModel.lastCleanedAge && sinceLastGet > Props::LongWaitFactor * (cacheBinModel.lastCleanedAge - cacheBinModel.lastGet))) { 979 cacheBinModel.lastCleanedAge = 0; 980 cacheBinModel.ageThreshold.store(0, std::memory_order_relaxed); 981 } 982 983 if (cacheBinModel.lastCleanedAge) 984 cacheBinModel.ageThreshold.store(Props::OnMissFactor * (currTime - cacheBinModel.lastCleanedAge), std::memory_order_relaxed); 985 } else { 986 uintptr_t obj_age = objects.back(); 987 objects.pop_back(); 988 if (objects.empty()) cacheBinModel.oldest.store(0, std::memory_order_relaxed); 989 990 intptr_t hitRange = currTime - obj_age; 991 intptr_t mean = cacheBinModel.meanHitRange.load(std::memory_order_relaxed); 992 mean = mean ? (mean + hitRange) / 2 : hitRange; 993 cacheBinModel.meanHitRange.store(mean, std::memory_order_relaxed); 994 995 cacheBinModel.cachedSize.store(cacheBinModel.cachedSize.load(std::memory_order_relaxed) - size, std::memory_order_relaxed); 996 } 997 998 cacheBinModel.usedSize.store(cacheBinModel.usedSize.load(std::memory_order_relaxed) + size, std::memory_order_relaxed); 999 cacheBinModel.lastGet = currTime; 1000 1001 if ( currTime % rml::internal::cacheCleanupFreq == 0 ) doCleanup(); 1002 } 1003 1004 void putList( int num ) { 1005 uintptr_t currTime = cacheCurrTime; 1006 cacheCurrTime += num; 1007 1008 cacheBinModel.usedSize.store(cacheBinModel.usedSize.load(std::memory_order_relaxed) - num * size, std::memory_order_relaxed); 1009 1010 bool cleanUpNeeded = false; 1011 if ( !cacheBinModel.lastCleanedAge ) { 1012 cacheBinModel.lastCleanedAge = ++currTime; 1013 cleanUpNeeded |= currTime % rml::internal::cacheCleanupFreq == 0; 1014 num--; 1015 } 1016 1017 for ( int i=1; i<=num; ++i ) { 1018 currTime+=1; 1019 cleanUpNeeded |= currTime % rml::internal::cacheCleanupFreq == 0; 1020 if (objects.empty()) 1021 cacheBinModel.oldest.store(currTime, std::memory_order_relaxed); 1022 objects.push_back(currTime); 1023 } 1024 1025 cacheBinModel.cachedSize.store(cacheBinModel.cachedSize.load(std::memory_order_relaxed) + num * size, std::memory_order_relaxed); 1026 1027 if ( cleanUpNeeded ) doCleanup(); 1028 } 1029 1030 void check() { 1031 CHECK_FAST(cacheBinModel.oldest.load(std::memory_order_relaxed) == cacheBin.oldest.load(std::memory_order_relaxed)); 1032 CHECK_FAST(cacheBinModel.lastCleanedAge == cacheBin.lastCleanedAge); 1033 CHECK_FAST(cacheBinModel.ageThreshold.load(std::memory_order_relaxed) == cacheBin.ageThreshold.load(std::memory_order_relaxed)); 1034 CHECK_FAST(cacheBinModel.usedSize.load(std::memory_order_relaxed) == cacheBin.usedSize.load(std::memory_order_relaxed)); 1035 CHECK_FAST(cacheBinModel.cachedSize.load(std::memory_order_relaxed) == cacheBin.cachedSize.load(std::memory_order_relaxed)); 1036 CHECK_FAST(cacheBinModel.meanHitRange.load(std::memory_order_relaxed) == cacheBin.meanHitRange.load(std::memory_order_relaxed)); 1037 CHECK_FAST(cacheBinModel.lastGet == cacheBin.lastGet); 1038 } 1039 1040 static uintptr_t cacheCurrTime; 1041 static intptr_t tooLargeLOC; 1042 }; 1043 1044 template<typename Props> uintptr_t CacheBinModel<Props>::cacheCurrTime; 1045 template<typename Props> intptr_t CacheBinModel<Props>::tooLargeLOC; 1046 1047 template <typename Scenario> 1048 void LOCModelTester() { 1049 defaultMemPool->extMemPool.loc.cleanAll(); 1050 defaultMemPool->extMemPool.loc.reset(); 1051 1052 const size_t size = 16 * 1024; 1053 const size_t headersSize = sizeof(rml::internal::LargeMemoryBlock)+sizeof(rml::internal::LargeObjectHdr); 1054 const size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+rml::internal::largeObjectAlignment); 1055 const int binIdx = defaultMemPool->extMemPool.loc.largeCache.sizeToIdx( allocationSize ); 1056 1057 CacheBinModel<rml::internal::LargeObjectCache::LargeCacheTypeProps>::cacheCurrTime = defaultMemPool->extMemPool.loc.cacheCurrTime; 1058 CacheBinModel<rml::internal::LargeObjectCache::LargeCacheTypeProps>::tooLargeLOC = defaultMemPool->extMemPool.loc.largeCache.tooLargeLOC; 1059 CacheBinModel<rml::internal::LargeObjectCache::LargeCacheTypeProps> cacheBinModel(defaultMemPool->extMemPool.loc.largeCache.bin[binIdx], allocationSize); 1060 1061 Scenario scen; 1062 for (rml::internal::LargeMemoryBlock *lmb = scen.next(); (intptr_t)lmb != (intptr_t)-1; lmb = scen.next()) { 1063 if ( lmb ) { 1064 int num=1; 1065 for (rml::internal::LargeMemoryBlock *curr = lmb; curr->next; curr=curr->next) num+=1; 1066 defaultMemPool->extMemPool.freeLargeObject(lmb); 1067 cacheBinModel.putList(num); 1068 } else { 1069 scen.saveLmb(defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize)); 1070 cacheBinModel.get(); 1071 } 1072 1073 cacheBinModel.check(); 1074 } 1075 } 1076 1077 class TestBootstrap { 1078 bool allocating; 1079 std::vector<rml::internal::LargeMemoryBlock*> lmbArray; 1080 public: 1081 TestBootstrap() : allocating(true) {} 1082 1083 rml::internal::LargeMemoryBlock* next() { 1084 if ( allocating ) 1085 return nullptr; 1086 if ( !lmbArray.empty() ) { 1087 rml::internal::LargeMemoryBlock *ret = lmbArray.back(); 1088 lmbArray.pop_back(); 1089 return ret; 1090 } 1091 return (rml::internal::LargeMemoryBlock*)-1; 1092 } 1093 1094 void saveLmb( rml::internal::LargeMemoryBlock *lmb ) { 1095 lmb->next = nullptr; 1096 lmbArray.push_back(lmb); 1097 if ( lmbArray.size() == 1000 ) allocating = false; 1098 } 1099 }; 1100 1101 class TestRandom { 1102 std::vector<rml::internal::LargeMemoryBlock*> lmbArray; 1103 int numOps; 1104 public: 1105 TestRandom() : numOps(100000) { 1106 srand(1234); 1107 } 1108 1109 rml::internal::LargeMemoryBlock* next() { 1110 if ( numOps-- ) { 1111 if ( lmbArray.empty() || rand() / (RAND_MAX>>1) == 0 ) 1112 return nullptr; 1113 size_t ind = rand()%lmbArray.size(); 1114 if ( ind != lmbArray.size()-1 ) std::swap(lmbArray[ind],lmbArray[lmbArray.size()-1]); 1115 rml::internal::LargeMemoryBlock *lmb = lmbArray.back(); 1116 lmbArray.pop_back(); 1117 return lmb; 1118 } 1119 return (rml::internal::LargeMemoryBlock*)-1; 1120 } 1121 1122 void saveLmb( rml::internal::LargeMemoryBlock *lmb ) { 1123 lmb->next = nullptr; 1124 lmbArray.push_back(lmb); 1125 } 1126 }; 1127 1128 class TestCollapsingMallocFree : public SimpleBarrier { 1129 public: 1130 static const int NUM_ALLOCS = 100000; 1131 const int num_threads; 1132 1133 TestCollapsingMallocFree( int _num_threads ) : num_threads(_num_threads) { 1134 initBarrier( num_threads ); 1135 } 1136 1137 void operator() ( int ) const { 1138 const size_t size = 16 * 1024; 1139 const size_t headersSize = sizeof(rml::internal::LargeMemoryBlock)+sizeof(rml::internal::LargeObjectHdr); 1140 const size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+rml::internal::largeObjectAlignment); 1141 1142 barrier.wait(); 1143 for ( int i=0; i<NUM_ALLOCS; ++i ) { 1144 defaultMemPool->extMemPool.freeLargeObject( 1145 defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize) ); 1146 } 1147 } 1148 1149 void check() { 1150 REQUIRE( tbbmalloc_whitebox::locGetProcessed == tbbmalloc_whitebox::locPutProcessed); 1151 REQUIRE_MESSAGE( tbbmalloc_whitebox::locGetProcessed < num_threads*NUM_ALLOCS, "No one Malloc/Free pair was collapsed." ); 1152 } 1153 }; 1154 1155 class TestCollapsingBootstrap : public SimpleBarrier { 1156 class CheckNumAllocs { 1157 const int num_threads; 1158 public: 1159 CheckNumAllocs( int _num_threads ) : num_threads(_num_threads) {} 1160 void operator()() const { 1161 REQUIRE( tbbmalloc_whitebox::locGetProcessed == num_threads*NUM_ALLOCS ); 1162 REQUIRE( tbbmalloc_whitebox::locPutProcessed == 0 ); 1163 } 1164 }; 1165 public: 1166 static const int NUM_ALLOCS = 1000; 1167 const int num_threads; 1168 1169 TestCollapsingBootstrap( int _num_threads ) : num_threads(_num_threads) { 1170 initBarrier( num_threads ); 1171 } 1172 1173 void operator() ( int ) const { 1174 const size_t size = 16 * 1024; 1175 size_t headersSize = sizeof(rml::internal::LargeMemoryBlock)+sizeof(rml::internal::LargeObjectHdr); 1176 size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+rml::internal::largeObjectAlignment); 1177 1178 barrier.wait(); 1179 rml::internal::LargeMemoryBlock *lmbArray[NUM_ALLOCS]; 1180 for ( int i=0; i<NUM_ALLOCS; ++i ) 1181 lmbArray[i] = defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize); 1182 1183 barrier.wait(CheckNumAllocs(num_threads)); 1184 for ( int i=0; i<NUM_ALLOCS; ++i ) 1185 defaultMemPool->extMemPool.freeLargeObject( lmbArray[i] ); 1186 } 1187 1188 void check() { 1189 REQUIRE( tbbmalloc_whitebox::locGetProcessed == tbbmalloc_whitebox::locPutProcessed ); 1190 REQUIRE( tbbmalloc_whitebox::locGetProcessed == num_threads*NUM_ALLOCS ); 1191 } 1192 }; 1193 1194 template <typename Scenario> 1195 void LOCCollapsingTester( int num_threads ) { 1196 tbbmalloc_whitebox::locGetProcessed = 0; 1197 tbbmalloc_whitebox::locPutProcessed = 0; 1198 defaultMemPool->extMemPool.loc.cleanAll(); 1199 defaultMemPool->extMemPool.loc.reset(); 1200 1201 Scenario scen(num_threads); 1202 utils::NativeParallelFor(num_threads, scen); 1203 1204 scen.check(); 1205 } 1206 1207 void TestLOC() { 1208 LOCModelTester<TestBootstrap>(); 1209 LOCModelTester<TestRandom>(); 1210 1211 const int num_threads = 16; 1212 LOCCollapsingTester<TestCollapsingBootstrap>( num_threads ); 1213 if ( num_threads > 1 ) { 1214 INFO( "num_threads = " << num_threads ); 1215 LOCCollapsingTester<TestCollapsingMallocFree>( num_threads ); 1216 } else { 1217 REPORT( "Warning: concurrency is too low for TestMallocFreeCollapsing ( num_threads = %d )\n", num_threads ); 1218 } 1219 } 1220 /*---------------------------------------------------------------------------*/ 1221 1222 void *findCacheLine(void *p) { 1223 return (void*)alignDown((uintptr_t)p, estimatedCacheLineSize); 1224 } 1225 1226 // test that internals of Block are at expected cache lines 1227 void TestSlabAlignment() { 1228 const size_t min_sz = 8; 1229 const int space = 2*16*1024; // fill at least 2 slabs 1230 void *pointers[space / min_sz]; // the worst case is min_sz byte object 1231 1232 for (size_t sz = min_sz; sz <= 64; sz *= 2) { 1233 for (size_t i = 0; i < space/sz; i++) { 1234 pointers[i] = scalable_malloc(sz); 1235 Block *block = (Block *)alignDown(pointers[i], slabSize); 1236 REQUIRE_MESSAGE(findCacheLine(&block->isFull) != findCacheLine(pointers[i]), 1237 "A user object must not share a cache line with slab control structures."); 1238 REQUIRE_MESSAGE(findCacheLine(&block->next) != findCacheLine(&block->nextPrivatizable), 1239 "GlobalBlockFields and LocalBlockFields must be on different cache lines."); 1240 } 1241 for (size_t i = 0; i < space/sz; i++) 1242 scalable_free(pointers[i]); 1243 } 1244 } 1245 1246 #include "common/memory_usage.h" 1247 1248 // TODO: Consider adding Huge Pages support on macOS (special mmap flag). 1249 // Transparent Huge pages support could be enabled by different system parsing mechanism, 1250 // because there is no /proc/meminfo on macOS 1251 #if __unix__ 1252 void TestTHP() { 1253 // Get backend from default memory pool 1254 rml::internal::Backend *backend = &(defaultMemPool->extMemPool.backend); 1255 1256 // Configure malloc to use huge pages 1257 scalable_allocation_mode(USE_HUGE_PAGES, 1); 1258 REQUIRE_MESSAGE(hugePages.isEnabled, "Huge pages should be enabled via scalable_allocation_mode"); 1259 1260 const int HUGE_PAGE_SIZE = 2 * 1024 * 1024; 1261 1262 // allocCount transparent huge pages should be allocated 1263 const int allocCount = 10; 1264 1265 // Allocate huge page aligned memory regions to track system 1266 // counters for transparent huge pages 1267 void* allocPtrs[allocCount]; 1268 1269 // Wait for the system to update process memory info files after other tests 1270 utils::Sleep(4000); 1271 1272 // Parse system info regarding current THP status 1273 size_t currentSystemTHPCount = utils::getSystemTHPCount(); 1274 size_t currentSystemTHPAllocatedSize = utils::getSystemTHPAllocatedSize(); 1275 1276 for (int i = 0; i < allocCount; i++) { 1277 // Allocation size have to be aligned on page size 1278 size_t allocSize = HUGE_PAGE_SIZE - (i * 1000); 1279 1280 // Map memory 1281 allocPtrs[i] = backend->allocRawMem(allocSize); 1282 1283 REQUIRE_MESSAGE(allocPtrs[i], "Allocation not succeeded."); 1284 REQUIRE_MESSAGE(allocSize == HUGE_PAGE_SIZE, 1285 "Allocation size have to be aligned on Huge Page size internally."); 1286 1287 // First touch policy - no real pages allocated by OS without accessing the region 1288 memset(allocPtrs[i], 1, allocSize); 1289 1290 REQUIRE_MESSAGE(isAligned(allocPtrs[i], HUGE_PAGE_SIZE), 1291 "The pointer returned by scalable_malloc is not aligned on huge page size."); 1292 } 1293 1294 // Wait for the system to update process memory info files after allocations 1295 utils::Sleep(4000); 1296 1297 // Generally, kernel tries to allocate transparent huge pages, but sometimes it cannot do this 1298 // (tested on SLES 11/12), so consider this system info checks as a remark. 1299 // Also, some systems can allocate more memory then needed in background (tested on Ubuntu 14.04) 1300 size_t newSystemTHPCount = utils::getSystemTHPCount(); 1301 size_t newSystemTHPAllocatedSize = utils::getSystemTHPAllocatedSize(); 1302 if ((newSystemTHPCount - currentSystemTHPCount) < allocCount 1303 && (newSystemTHPAllocatedSize - currentSystemTHPAllocatedSize) / (2 * 1024) < allocCount) { 1304 REPORT( "Warning: the system didn't allocate needed amount of THPs.\n" ); 1305 } 1306 1307 // Test memory unmap 1308 for (int i = 0; i < allocCount; i++) { 1309 REQUIRE_MESSAGE(backend->freeRawMem(allocPtrs[i], HUGE_PAGE_SIZE), 1310 "Something went wrong during raw memory free"); 1311 } 1312 } 1313 #endif // __unix__ 1314 1315 inline size_t getStabilizedMemUsage() { 1316 for (int i = 0; i < 3; i++) utils::GetMemoryUsage(); 1317 return utils::GetMemoryUsage(); 1318 } 1319 1320 inline void* reallocAndRetrieve(void* origPtr, size_t reallocSize, size_t& origBlockSize, size_t& reallocBlockSize) { 1321 rml::internal::LargeMemoryBlock* origLmb = ((rml::internal::LargeObjectHdr *)origPtr - 1)->memoryBlock; 1322 origBlockSize = origLmb->unalignedSize; 1323 1324 void* reallocPtr = rml::internal::reallocAligned(defaultMemPool, origPtr, reallocSize, 0); 1325 1326 // Retrieved reallocated block information 1327 rml::internal::LargeMemoryBlock* reallocLmb = ((rml::internal::LargeObjectHdr *)reallocPtr - 1)->memoryBlock; 1328 reallocBlockSize = reallocLmb->unalignedSize; 1329 1330 return reallocPtr; 1331 } 1332 1333 void TestReallocDecreasing() { 1334 1335 /* Testing that actual reallocation happens for large objects that do not fit the backend cache 1336 but decrease in size by a factor of >= 2. */ 1337 1338 size_t startSize = 100 * 1024 * 1024; 1339 size_t maxBinnedSize = defaultMemPool->extMemPool.backend.getMaxBinnedSize(); 1340 void* origPtr = scalable_malloc(startSize); 1341 void* reallocPtr = nullptr; 1342 1343 // Realloc on 1MB less size 1344 size_t origBlockSize = 42; 1345 size_t reallocBlockSize = 43; 1346 reallocPtr = reallocAndRetrieve(origPtr, startSize - 1 * 1024 * 1024, origBlockSize, reallocBlockSize); 1347 REQUIRE_MESSAGE(origBlockSize == reallocBlockSize, "Reallocated block size shouldn't change"); 1348 REQUIRE_MESSAGE(reallocPtr == origPtr, "Original pointer shouldn't change"); 1349 1350 // Repeated decreasing reallocation while max cache bin size reached 1351 size_t reallocSize = (startSize / 2) - 1000; // exact realloc 1352 while(reallocSize > maxBinnedSize) { 1353 1354 // Prevent huge/large objects caching 1355 defaultMemPool->extMemPool.loc.cleanAll(); 1356 // Prevent local large object caching 1357 TLSData *tls = defaultMemPool->getTLS(/*create=*/false); 1358 tls->lloc.externalCleanup(&defaultMemPool->extMemPool); 1359 1360 size_t sysMemUsageBefore = getStabilizedMemUsage(); 1361 size_t totalMemSizeBefore = defaultMemPool->extMemPool.backend.getTotalMemSize(); 1362 1363 reallocPtr = reallocAndRetrieve(origPtr, reallocSize, origBlockSize, reallocBlockSize); 1364 1365 REQUIRE_MESSAGE(origBlockSize > reallocBlockSize, "Reallocated block size should descrease."); 1366 1367 size_t sysMemUsageAfter = getStabilizedMemUsage(); 1368 size_t totalMemSizeAfter = defaultMemPool->extMemPool.backend.getTotalMemSize(); 1369 1370 // Prevent false checking when backend caching occurred or could not read system memory usage info 1371 if (totalMemSizeBefore > totalMemSizeAfter && sysMemUsageAfter != 0 && sysMemUsageBefore != 0) { 1372 REQUIRE_MESSAGE(sysMemUsageBefore > sysMemUsageAfter, "Memory were not released"); 1373 } 1374 1375 origPtr = reallocPtr; 1376 reallocSize = (reallocSize / 2) - 1000; // exact realloc 1377 } 1378 scalable_free(reallocPtr); 1379 1380 /* TODO: Decreasing reallocation of large objects that fit backend cache */ 1381 /* TODO: Small objects decreasing reallocation test */ 1382 } 1383 #if !__TBB_WIN8UI_SUPPORT && defined(_WIN32) 1384 1385 #include "../../src/tbbmalloc_proxy/function_replacement.cpp" 1386 #include <string> 1387 namespace FunctionReplacement { 1388 FunctionInfo funcInfo = { "funcname","dllname" }; 1389 char **func_replacement_log; 1390 int status; 1391 1392 void LogCleanup() { 1393 // Free all allocated memory 1394 for (unsigned i = 0; i < Log::record_number; i++){ 1395 HeapFree(GetProcessHeap(), 0, Log::records[i]); 1396 } 1397 for (unsigned i = 0; i < Log::RECORDS_COUNT + 1; i++){ 1398 Log::records[i] = nullptr; 1399 } 1400 Log::replacement_status = true; 1401 Log::record_number = 0; 1402 } 1403 1404 void TestEmptyLog() { 1405 status = TBB_malloc_replacement_log(&func_replacement_log); 1406 1407 REQUIRE_MESSAGE(status == -1, "Status is true, but log is empty"); 1408 REQUIRE_MESSAGE(*func_replacement_log == nullptr, "Log must be empty"); 1409 } 1410 1411 void TestLogOverload() { 1412 for (int i = 0; i < 1000; i++) 1413 Log::record(funcInfo, "opcode string", true); 1414 1415 status = TBB_malloc_replacement_log(&func_replacement_log); 1416 // Find last record 1417 for (; *(func_replacement_log + 1) != 0; func_replacement_log++) {} 1418 1419 std::string last_line(*func_replacement_log); 1420 REQUIRE_MESSAGE(status == 0, "False status, but all functions found"); 1421 REQUIRE_MESSAGE(last_line.compare("Log was truncated.") == 0, "Log overflow was not handled"); 1422 1423 // Change status 1424 Log::record(funcInfo, "opcode string", false); 1425 status = TBB_malloc_replacement_log(nullptr); 1426 REQUIRE_MESSAGE(status == -1, "Status is true, but we have false search case"); 1427 1428 LogCleanup(); 1429 } 1430 1431 void TestFalseSearchCase() { 1432 Log::record(funcInfo, "opcode string", false); 1433 std::string expected_line = "Fail: "+ std::string(funcInfo.funcName) + " (" + 1434 std::string(funcInfo.dllName) + "), byte pattern: <opcode string>"; 1435 1436 status = TBB_malloc_replacement_log(&func_replacement_log); 1437 1438 REQUIRE_MESSAGE(expected_line.compare(*func_replacement_log) == 0, "Wrong last string contnent"); 1439 REQUIRE_MESSAGE(status == -1, "Status is true, but we have false search case"); 1440 LogCleanup(); 1441 } 1442 1443 void TestWrongFunctionInDll(){ 1444 HMODULE ucrtbase_handle = GetModuleHandle("ucrtbase.dll"); 1445 if (ucrtbase_handle) { 1446 IsPrologueKnown("ucrtbase.dll", "fake_function", nullptr, ucrtbase_handle); 1447 std::string expected_line = "Fail: fake_function (ucrtbase.dll), byte pattern: <unknown>"; 1448 1449 status = TBB_malloc_replacement_log(&func_replacement_log); 1450 1451 REQUIRE_MESSAGE(expected_line.compare(*func_replacement_log) == 0, "Wrong last string contnent"); 1452 REQUIRE_MESSAGE(status == -1, "Status is true, but we have false search case"); 1453 LogCleanup(); 1454 } else { 1455 INFO("Cannot found ucrtbase.dll on system, test skipped!\n"); 1456 } 1457 } 1458 } 1459 1460 void TesFunctionReplacementLog() { 1461 using namespace FunctionReplacement; 1462 // Do not reorder the test cases 1463 TestEmptyLog(); 1464 TestLogOverload(); 1465 TestFalseSearchCase(); 1466 TestWrongFunctionInDll(); 1467 } 1468 1469 #endif /*!__TBB_WIN8UI_SUPPORT && defined(_WIN32)*/ 1470 1471 #include <cmath> // pow function 1472 1473 // Huge objects cache: Size = MinSize * (2 ^ (Index / StepFactor) formula gives value for the bin size, 1474 // but it is not matched with our sizeToIdx approximation algorithm, where step sizes between major 1475 // (power of 2) sizes are equal. Used internally for the test. Static cast to avoid warnings. 1476 inline size_t hocIdxToSizeFormula(int idx) { 1477 return static_cast<size_t>(float(rml::internal::LargeObjectCache::maxLargeSize) * 1478 pow(2, float(idx) / float(rml::internal::LargeObjectCache::HugeBSProps::StepFactor))); 1479 } 1480 // Large objects cache arithmetic progression 1481 inline size_t locIdxToSizeFormula(int idx) { 1482 return rml::internal::LargeObjectCache::LargeBSProps::MinSize + 1483 (idx * rml::internal::LargeObjectCache::LargeBSProps::CacheStep); 1484 } 1485 1486 template <typename CacheType> 1487 void TestLOCacheBinsConverterImpl(int idx, size_t checkingSize) { 1488 size_t alignedSize = CacheType::alignToBin(checkingSize); 1489 REQUIRE_MESSAGE(alignedSize >= checkingSize, "Size is not correctly aligned"); 1490 int calcIdx = CacheType::sizeToIdx(alignedSize); 1491 REQUIRE_MESSAGE(calcIdx == idx, "Index from size calculated not correctly"); 1492 } 1493 1494 void TestLOCacheBinsConverter(){ 1495 typedef rml::internal::LargeObjectCache::LargeCacheType LargeCacheType; 1496 typedef rml::internal::LargeObjectCache::HugeCacheType HugeCacheType; 1497 1498 size_t checkingSize = 0; 1499 for (int idx = 0; idx < LargeCacheType::numBins; idx++) { 1500 checkingSize = locIdxToSizeFormula(idx); 1501 TestLOCacheBinsConverterImpl<LargeCacheType>(idx, checkingSize); 1502 } 1503 for (int idx = 0; idx < HugeCacheType::numBins; idx++) { 1504 checkingSize = hocIdxToSizeFormula(idx); 1505 TestLOCacheBinsConverterImpl<HugeCacheType>(idx, checkingSize); 1506 } 1507 } 1508 1509 struct HOThresholdTester { 1510 LargeObjectCache* loc; 1511 size_t hugeSize; 1512 1513 static const size_t sieveSize = LargeObjectCache::defaultMaxHugeSize; 1514 // Sieve starts from 64MB (24-th cache bin), enough to check 4 bins radius range 1515 // for decent memory consumption (especially for 32-bit arch) 1516 static const int MIN_BIN_IDX = 21; 1517 static const int MAX_BIN_IDX = 27; 1518 1519 enum CleanupType { 1520 NO_CLEANUP, 1521 REGULAR_CLEANUP, 1522 HARD_CLEANUP 1523 }; 1524 1525 void populateCache() { 1526 LargeMemoryBlock* loArray[MAX_BIN_IDX - MIN_BIN_IDX]; 1527 // To avoid backend::softCacheCleanup consequences (cleanup by isLOCToolarge), 1528 // firstly allocate all objects and then cache them at once. 1529 // Morevover, just because first cache item will still be dropped from cache because of the lack of history, 1530 // redo allocation 2 times. 1531 for (int idx = MIN_BIN_IDX; idx < MAX_BIN_IDX; ++idx) { 1532 size_t allocationSize = alignedSizeFromIdx(idx); 1533 int localIdx = idx - MIN_BIN_IDX; 1534 loArray[localIdx] = defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize); 1535 REQUIRE_MESSAGE(loArray[localIdx], "Large object was not allocated."); 1536 loc->put(loArray[localIdx]); 1537 loArray[localIdx] = defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize); 1538 } 1539 for (int idx = MIN_BIN_IDX; idx < MAX_BIN_IDX; ++idx) { 1540 loc->put(loArray[idx - MIN_BIN_IDX]); 1541 } 1542 } 1543 void clean(bool all) { 1544 if (all) { 1545 // Should avoid any threshold and clean all bins 1546 loc->cleanAll(); 1547 } else { 1548 // Regular cleanup should do nothing for bins above threshold. Decreasing option used 1549 // for the test to be sure that all objects below defaultMaxHugeSize (sieveSize) were cleaned 1550 loc->regularCleanup(); 1551 loc->decreasingCleanup(); 1552 } 1553 } 1554 void check(CleanupType type) { 1555 for (int idx = MIN_BIN_IDX; idx < MAX_BIN_IDX; ++idx) { 1556 size_t objectSize = alignedSizeFromIdx(idx); 1557 // Cache object below sieve threshold and above huge object threshold should be cached 1558 // (other should be sieved). Unless all cache is dropped. Regular cleanup drops object only below sieve size. 1559 if (type == NO_CLEANUP && sizeInCacheRange(objectSize)) { 1560 REQUIRE_MESSAGE(objectInCacheBin(idx, objectSize), "Object was released from cache, it shouldn't."); 1561 } else if (type == REGULAR_CLEANUP && (objectSize >= hugeSize)) { 1562 REQUIRE_MESSAGE(objectInCacheBin(idx, objectSize), "Object was released from cache, it shouldn't."); 1563 } else { // HARD_CLEANUP 1564 REQUIRE_MESSAGE(cacheBinEmpty(idx), "Object is still cached."); 1565 } 1566 } 1567 } 1568 1569 private: 1570 bool cacheBinEmpty(int idx) { 1571 return (loc->hugeCache.bin[idx].cachedSize.load(std::memory_order_relaxed) == 0 && loc->hugeCache.bin[idx].get() == nullptr); 1572 } 1573 bool objectInCacheBin(int idx, size_t size) { 1574 return (loc->hugeCache.bin[idx].cachedSize.load(std::memory_order_relaxed) != 0 && 1575 loc->hugeCache.bin[idx].cachedSize.load(std::memory_order_relaxed) % size == 0); 1576 } 1577 bool sizeInCacheRange(size_t size) { 1578 return size <= sieveSize || size >= hugeSize; 1579 } 1580 size_t alignedSizeFromIdx(int idx) { 1581 return rml::internal::LargeObjectCache::alignToBin(hocIdxToSizeFormula(idx)); 1582 } 1583 }; 1584 1585 // TBBMALLOC_SET_HUGE_OBJECT_THRESHOLD value should be set before the test, 1586 // through scalable API or env variable 1587 void TestHugeSizeThresholdImpl(LargeObjectCache* loc, size_t hugeSize, bool fullTesting) { 1588 HOThresholdTester test = {loc, hugeSize}; 1589 test.populateCache(); 1590 // Check the default sieve value 1591 test.check(HOThresholdTester::NO_CLEANUP); 1592 1593 if(fullTesting) { 1594 // Check that objects above threshold stay in cache after regular cleanup 1595 test.clean(/*all*/false); 1596 test.check(HOThresholdTester::REGULAR_CLEANUP); 1597 } 1598 // Check that all objects dropped from cache after hard cleanup (ignore huge obects threshold) 1599 test.clean(/*all*/true); 1600 test.check(HOThresholdTester::HARD_CLEANUP); 1601 // Restore previous settings 1602 loc->setHugeSizeThreshold(LargeObjectCache::maxHugeSize); 1603 loc->reset(); 1604 } 1605 1606 /* 1607 * Test for default huge size and behaviour when huge object settings defined 1608 */ 1609 void TestHugeSizeThreshold() { 1610 // Clean up if something was allocated before the test and reset cache state 1611 scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, nullptr); 1612 LargeObjectCache* loc = &defaultMemPool->extMemPool.loc; 1613 // Restore default settings just in case 1614 loc->setHugeSizeThreshold(LargeObjectCache::maxHugeSize); 1615 loc->reset(); 1616 // Firstly check default huge size value (with max huge object threshold). 1617 // Everything that more then this value should be released to OS without caching. 1618 TestHugeSizeThresholdImpl(loc, loc->hugeSizeThreshold, false); 1619 // Then set huge object threshold. 1620 // All objects with sizes after threshold will be released only after the hard cleanup. 1621 #if !__TBB_WIN8UI_SUPPORT 1622 // Unit testing for environment variable 1623 utils::SetEnv("TBB_MALLOC_SET_HUGE_SIZE_THRESHOLD","67108864"); 1624 // Large object cache reads threshold environment during initialization. 1625 // Reset the value before the test. 1626 loc->hugeSizeThreshold = 0; 1627 // Reset logical time to prevent regular cleanup 1628 loc->cacheCurrTime = 0; 1629 loc->init(&defaultMemPool->extMemPool); 1630 TestHugeSizeThresholdImpl(loc, 64 * MByte, true); 1631 #endif 1632 // Unit testing for scalable_allocation_command 1633 scalable_allocation_mode(TBBMALLOC_SET_HUGE_SIZE_THRESHOLD, 56 * MByte); 1634 TestHugeSizeThresholdImpl(loc, 56 * MByte, true); 1635 // Verify that objects whose sizes align to maxHugeSize are not cached. 1636 size_t sz = LargeObjectCache::maxHugeSize; 1637 size_t aligned_sz = LargeObjectCache::alignToBin(sz); 1638 REQUIRE_MESSAGE(sz == aligned_sz, "maxHugeSize should be aligned."); 1639 REQUIRE_MESSAGE(!loc->sizeInCacheRange(sz), "Upper bound sized object shouldn't be cached."); 1640 REQUIRE_MESSAGE(loc->get(sz) == nullptr, "Upper bound sized object shouldn't be cached."); 1641 } 1642 1643 //! \brief \ref error_guessing 1644 TEST_CASE("Main test case") { 1645 scalable_allocation_mode(USE_HUGE_PAGES, 0); 1646 #if !__TBB_WIN8UI_SUPPORT 1647 utils::SetEnv("TBB_MALLOC_USE_HUGE_PAGES","yes"); 1648 #endif 1649 checkNoHugePages(); 1650 // backreference requires that initialization was done 1651 if(!isMallocInitialized()) doInitialization(); 1652 checkNoHugePages(); 1653 // to succeed, leak detection must be the 1st memory-intensive test 1654 TestBackRef(); 1655 TestCleanAllBuffers<4*1024>(); 1656 TestCleanAllBuffers<16*1024>(); 1657 TestCleanThreadBuffers(); 1658 TestPools(); 1659 TestBackend(); 1660 1661 #if MALLOC_CHECK_RECURSION 1662 for( int p=MaxThread; p>=MinThread; --p ) { 1663 TestStartupAlloc::initBarrier( p ); 1664 utils::NativeParallelFor( p, TestStartupAlloc() ); 1665 REQUIRE_MESSAGE(!firstStartupBlock, "Startup heap memory leak detected"); 1666 } 1667 #endif 1668 TestLargeObjectCache(); 1669 TestObjectRecognition(); 1670 TestBitMask(); 1671 TestHeapLimit(); 1672 TestLOC(); 1673 TestSlabAlignment(); 1674 } 1675 1676 //! \brief \ref error_guessing 1677 TEST_CASE("Decreasing reallocation") { 1678 if (!isMallocInitialized()) doInitialization(); 1679 TestReallocDecreasing(); 1680 } 1681 1682 //! \brief \ref error_guessing 1683 TEST_CASE("Large object cache bins converter") { 1684 if (!isMallocInitialized()) doInitialization(); 1685 TestLOCacheBinsConverter(); 1686 } 1687 1688 //! \brief \ref error_guessing 1689 TEST_CASE("Huge size threshold settings") { 1690 if (!isMallocInitialized()) doInitialization(); 1691 TestHugeSizeThreshold(); 1692 } 1693 1694 #if __unix__ 1695 //! \brief \ref error_guessing 1696 TEST_CASE("Transparent huge pages") { 1697 if (utils::isTHPEnabledOnMachine()) { 1698 if (!isMallocInitialized()) doInitialization(); 1699 TestTHP(); 1700 } else { 1701 INFO("Transparent Huge Pages is not supported on the system - skipped the test\n"); 1702 } 1703 } 1704 #endif 1705 1706 #if !__TBB_WIN8UI_SUPPORT && defined(_WIN32) 1707 //! \brief \ref error_guessing 1708 TEST_CASE("Function replacement log") { 1709 TesFunctionReplacementLog(); 1710 } 1711 #endif 1712