151c0b2f7Stbbdev /* 2*b15aabb3Stbbdev Copyright (c) 2005-2021 Intel Corporation 351c0b2f7Stbbdev 451c0b2f7Stbbdev Licensed under the Apache License, Version 2.0 (the "License"); 551c0b2f7Stbbdev you may not use this file except in compliance with the License. 651c0b2f7Stbbdev You may obtain a copy of the License at 751c0b2f7Stbbdev 851c0b2f7Stbbdev http://www.apache.org/licenses/LICENSE-2.0 951c0b2f7Stbbdev 1051c0b2f7Stbbdev Unless required by applicable law or agreed to in writing, software 1151c0b2f7Stbbdev distributed under the License is distributed on an "AS IS" BASIS, 1251c0b2f7Stbbdev WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1351c0b2f7Stbbdev See the License for the specific language governing permissions and 1451c0b2f7Stbbdev limitations under the License. 1551c0b2f7Stbbdev */ 1651c0b2f7Stbbdev 1751c0b2f7Stbbdev #define __TBB_NO_IMPLICIT_LINKAGE 1 1851c0b2f7Stbbdev 1951c0b2f7Stbbdev #include "common/test.h" 2051c0b2f7Stbbdev #include "common/utils.h" 2151c0b2f7Stbbdev #include "common/spin_barrier.h" 2249e08aacStbbdev #include "oneapi/tbb/detail/_utils.h" 2351c0b2f7Stbbdev #include "tbb/scalable_allocator.h" 2451c0b2f7Stbbdev #include <thread> 2551c0b2f7Stbbdev 2651c0b2f7Stbbdev static constexpr std::size_t MaxTasks = 16; 2751c0b2f7Stbbdev std::atomic<std::size_t> FinishedTasks; 2851c0b2f7Stbbdev 2951c0b2f7Stbbdev static constexpr std::size_t MaxThread = 4; 3051c0b2f7Stbbdev 3151c0b2f7Stbbdev /*--------------------------------------------------------------------*/ 3251c0b2f7Stbbdev // The regression test against a bug triggered when malloc initialization 3351c0b2f7Stbbdev // and thread shutdown were called simultaneously, in which case 3451c0b2f7Stbbdev // Windows dynamic loader lock and allocator initialization/termination lock 3551c0b2f7Stbbdev // were taken in different order. 3651c0b2f7Stbbdev 3751c0b2f7Stbbdev 3851c0b2f7Stbbdev 3951c0b2f7Stbbdev class TestFunc1 { 4051c0b2f7Stbbdev utils::SpinBarrier* my_barr; 4151c0b2f7Stbbdev public: 4251c0b2f7Stbbdev TestFunc1 (utils::SpinBarrier& barr) : my_barr(&barr) {} 4351c0b2f7Stbbdev void operator() (bool do_malloc) const { 4451c0b2f7Stbbdev my_barr->wait(); 4551c0b2f7Stbbdev if (do_malloc) scalable_malloc(10); 4651c0b2f7Stbbdev ++FinishedTasks; 4751c0b2f7Stbbdev } 4851c0b2f7Stbbdev }; 4951c0b2f7Stbbdev 5051c0b2f7Stbbdev void Test1 () { 5151c0b2f7Stbbdev std::size_t NTasks = utils::min(MaxTasks, utils::max(std::size_t(2), MaxThread)); 5251c0b2f7Stbbdev utils::SpinBarrier barr(NTasks); 5351c0b2f7Stbbdev TestFunc1 tf(barr); 5451c0b2f7Stbbdev FinishedTasks = 0; 5551c0b2f7Stbbdev 5651c0b2f7Stbbdev utils::NativeParallelFor(NTasks, [&] (std::size_t thread_idx) { 5751c0b2f7Stbbdev tf(thread_idx % 2 == 0); 58*b15aabb3Stbbdev while (FinishedTasks != NTasks) utils::yield(); 5951c0b2f7Stbbdev }); 6051c0b2f7Stbbdev } 6151c0b2f7Stbbdev 6251c0b2f7Stbbdev /*--------------------------------------------------------------------*/ 6351c0b2f7Stbbdev // The regression test against a bug when cross-thread deallocation 6451c0b2f7Stbbdev // caused livelock at thread shutdown. 6551c0b2f7Stbbdev 6651c0b2f7Stbbdev std::atomic<void*> gPtr(nullptr); 6751c0b2f7Stbbdev 6851c0b2f7Stbbdev class TestFunc2a { 6951c0b2f7Stbbdev utils::SpinBarrier* my_barr; 7051c0b2f7Stbbdev public: 7151c0b2f7Stbbdev TestFunc2a (utils::SpinBarrier& barr) : my_barr(&barr) {} 7251c0b2f7Stbbdev void operator() (std::size_t) const { 7351c0b2f7Stbbdev gPtr = scalable_malloc(8); 7451c0b2f7Stbbdev my_barr->wait(); 7551c0b2f7Stbbdev ++FinishedTasks; 7651c0b2f7Stbbdev } 7751c0b2f7Stbbdev }; 7851c0b2f7Stbbdev 7951c0b2f7Stbbdev class TestFunc2b { 8051c0b2f7Stbbdev utils::SpinBarrier* my_barr; 8151c0b2f7Stbbdev std::thread& my_ward; 8251c0b2f7Stbbdev public: 8351c0b2f7Stbbdev TestFunc2b (utils::SpinBarrier& barr, std::thread& t) : my_barr(&barr), my_ward(t) {} 8451c0b2f7Stbbdev void operator() (std::size_t) const { 8551c0b2f7Stbbdev utils::SpinWaitWhileEq(gPtr, (void*)nullptr); 8651c0b2f7Stbbdev scalable_free(gPtr); 8751c0b2f7Stbbdev my_barr->wait(); 8851c0b2f7Stbbdev my_ward.join(); 8951c0b2f7Stbbdev ++FinishedTasks; 9051c0b2f7Stbbdev } 9151c0b2f7Stbbdev }; 9251c0b2f7Stbbdev void Test2() { 9351c0b2f7Stbbdev utils::SpinBarrier barr(2); 9451c0b2f7Stbbdev TestFunc2a func2a(barr); 9551c0b2f7Stbbdev std::thread t2a; 9651c0b2f7Stbbdev TestFunc2b func2b(barr, t2a); 9751c0b2f7Stbbdev FinishedTasks = 0; 9851c0b2f7Stbbdev t2a = std::thread(func2a, std::size_t(0)); 9951c0b2f7Stbbdev std::thread t2b(func2b, std::size_t(1)); 100*b15aabb3Stbbdev 101*b15aabb3Stbbdev while (FinishedTasks != 2) utils::yield(); 10251c0b2f7Stbbdev 10351c0b2f7Stbbdev t2b.join(); // t2a is monitored by t2b 10451c0b2f7Stbbdev 10551c0b2f7Stbbdev if (t2a.joinable()) t2a.join(); 10651c0b2f7Stbbdev } 10751c0b2f7Stbbdev 10851c0b2f7Stbbdev #if _WIN32||_WIN64 10951c0b2f7Stbbdev 11051c0b2f7Stbbdev void TestKeyDtor() {} 11151c0b2f7Stbbdev 11251c0b2f7Stbbdev #else 11351c0b2f7Stbbdev 11451c0b2f7Stbbdev void *currSmall, *prevSmall, *currLarge, *prevLarge; 11551c0b2f7Stbbdev 11651c0b2f7Stbbdev extern "C" void threadDtor(void*) { 11751c0b2f7Stbbdev // First, release memory that was allocated before; 11851c0b2f7Stbbdev // it will not re-initialize the thread-local data if already deleted 11951c0b2f7Stbbdev prevSmall = currSmall; 12051c0b2f7Stbbdev scalable_free(currSmall); 12151c0b2f7Stbbdev prevLarge = currLarge; 12251c0b2f7Stbbdev scalable_free(currLarge); 12351c0b2f7Stbbdev // Then, allocate more memory. 12451c0b2f7Stbbdev // It will re-initialize the allocator data in the thread. 12551c0b2f7Stbbdev scalable_free(scalable_malloc(8)); 12651c0b2f7Stbbdev } 12751c0b2f7Stbbdev 12851c0b2f7Stbbdev inline bool intersectingObjects(const void *p1, const void *p2, size_t n) 12951c0b2f7Stbbdev { 13051c0b2f7Stbbdev return p1>p2 ? ((uintptr_t)p1-(uintptr_t)p2)<n : ((uintptr_t)p2-(uintptr_t)p1)<n; 13151c0b2f7Stbbdev } 13251c0b2f7Stbbdev 13351c0b2f7Stbbdev struct TestThread: utils::NoAssign { 13451c0b2f7Stbbdev TestThread(int ) {} 13551c0b2f7Stbbdev 13651c0b2f7Stbbdev void operator()( std::size_t /*id*/ ) const { 13751c0b2f7Stbbdev pthread_key_t key; 13851c0b2f7Stbbdev 13951c0b2f7Stbbdev currSmall = scalable_malloc(8); 14051c0b2f7Stbbdev REQUIRE_MESSAGE((!prevSmall || currSmall==prevSmall), "Possible memory leak"); 14151c0b2f7Stbbdev currLarge = scalable_malloc(32*1024); 14251c0b2f7Stbbdev // intersectingObjects takes into account object shuffle 14351c0b2f7Stbbdev REQUIRE_MESSAGE((!prevLarge || intersectingObjects(currLarge, prevLarge, 32*1024)), "Possible memory leak"); 14451c0b2f7Stbbdev pthread_key_create( &key, &threadDtor ); 14551c0b2f7Stbbdev pthread_setspecific(key, (const void*)42); 14651c0b2f7Stbbdev } 14751c0b2f7Stbbdev }; 14851c0b2f7Stbbdev 14951c0b2f7Stbbdev // test releasing memory from pthread key destructor 15051c0b2f7Stbbdev void TestKeyDtor() { 15151c0b2f7Stbbdev // Allocate region for large objects to prevent whole region release 15251c0b2f7Stbbdev // on scalable_free(currLarge) call, which result in wrong assert inside intersectingObjects check 15351c0b2f7Stbbdev void* preventLargeRelease = scalable_malloc(32*1024); 15451c0b2f7Stbbdev for (int i=0; i<4; i++) 15551c0b2f7Stbbdev utils::NativeParallelFor( 1, TestThread(1) ); 15651c0b2f7Stbbdev scalable_free(preventLargeRelease); 15751c0b2f7Stbbdev } 15851c0b2f7Stbbdev 15951c0b2f7Stbbdev #endif // _WIN32||_WIN64 16051c0b2f7Stbbdev 16151c0b2f7Stbbdev 16251c0b2f7Stbbdev //! \brief \ref error_guessing 16351c0b2f7Stbbdev TEST_CASE("test1") { 16451c0b2f7Stbbdev Test1(); // requires malloc initialization so should be first 16551c0b2f7Stbbdev } 16651c0b2f7Stbbdev 16751c0b2f7Stbbdev //! \brief \ref error_guessing 16851c0b2f7Stbbdev TEST_CASE("test2") { 16951c0b2f7Stbbdev Test2(); 17051c0b2f7Stbbdev } 17151c0b2f7Stbbdev 17251c0b2f7Stbbdev //! \brief \ref error_guessing 17351c0b2f7Stbbdev TEST_CASE("test key dtor") { 17451c0b2f7Stbbdev TestKeyDtor(); 17551c0b2f7Stbbdev } 176