xref: /oneTBB/src/tbb/governor.cpp (revision f9fd1bee)
151c0b2f7Stbbdev /*
2c4568449SPavel Kumbrasev     Copyright (c) 2005-2023 Intel Corporation
351c0b2f7Stbbdev 
451c0b2f7Stbbdev     Licensed under the Apache License, Version 2.0 (the "License");
551c0b2f7Stbbdev     you may not use this file except in compliance with the License.
651c0b2f7Stbbdev     You may obtain a copy of the License at
751c0b2f7Stbbdev 
851c0b2f7Stbbdev         http://www.apache.org/licenses/LICENSE-2.0
951c0b2f7Stbbdev 
1051c0b2f7Stbbdev     Unless required by applicable law or agreed to in writing, software
1151c0b2f7Stbbdev     distributed under the License is distributed on an "AS IS" BASIS,
1251c0b2f7Stbbdev     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1351c0b2f7Stbbdev     See the License for the specific language governing permissions and
1451c0b2f7Stbbdev     limitations under the License.
1551c0b2f7Stbbdev */
1651c0b2f7Stbbdev 
1751c0b2f7Stbbdev #include "governor.h"
18c4568449SPavel Kumbrasev #include "threading_control.h"
1951c0b2f7Stbbdev #include "main.h"
2051c0b2f7Stbbdev #include "thread_data.h"
2151c0b2f7Stbbdev #include "market.h"
2251c0b2f7Stbbdev #include "arena.h"
2351c0b2f7Stbbdev #include "dynamic_link.h"
244523a761Stbbdev #include "concurrent_monitor.h"
25c4568449SPavel Kumbrasev #include "thread_dispatcher.h"
2651c0b2f7Stbbdev 
2749e08aacStbbdev #include "oneapi/tbb/task_group.h"
2849e08aacStbbdev #include "oneapi/tbb/global_control.h"
2949e08aacStbbdev #include "oneapi/tbb/tbb_allocator.h"
30b15aabb3Stbbdev #include "oneapi/tbb/info.h"
3151c0b2f7Stbbdev 
3251c0b2f7Stbbdev #include "task_dispatcher.h"
3351c0b2f7Stbbdev 
3451c0b2f7Stbbdev #include <cstdio>
3551c0b2f7Stbbdev #include <cstdlib>
3651c0b2f7Stbbdev #include <cstring>
3751c0b2f7Stbbdev #include <atomic>
38b15aabb3Stbbdev #include <algorithm>
3951c0b2f7Stbbdev 
4051c0b2f7Stbbdev namespace tbb {
4151c0b2f7Stbbdev namespace detail {
4251c0b2f7Stbbdev namespace r1 {
4351c0b2f7Stbbdev 
444523a761Stbbdev void clear_address_waiter_table();
454523a761Stbbdev 
4651c0b2f7Stbbdev //! global_control.cpp contains definition
4751c0b2f7Stbbdev bool remove_and_check_if_empty(d1::global_control& gc);
4851c0b2f7Stbbdev bool is_present(d1::global_control& gc);
4951c0b2f7Stbbdev 
5051c0b2f7Stbbdev namespace rml {
5151c0b2f7Stbbdev tbb_server* make_private_server( tbb_client& client );
5251c0b2f7Stbbdev } // namespace rml
5351c0b2f7Stbbdev 
54edc30c82SIvan Kochin namespace system_topology {
55edc30c82SIvan Kochin     void destroy();
56edc30c82SIvan Kochin }
57edc30c82SIvan Kochin 
5851c0b2f7Stbbdev //------------------------------------------------------------------------
5951c0b2f7Stbbdev // governor
6051c0b2f7Stbbdev //------------------------------------------------------------------------
6151c0b2f7Stbbdev 
acquire_resources()6251c0b2f7Stbbdev void governor::acquire_resources () {
6351c0b2f7Stbbdev #if __TBB_USE_POSIX
6451c0b2f7Stbbdev     int status = theTLS.create(auto_terminate);
6551c0b2f7Stbbdev #else
6651c0b2f7Stbbdev     int status = theTLS.create();
6751c0b2f7Stbbdev #endif
6851c0b2f7Stbbdev     if( status )
6951c0b2f7Stbbdev         handle_perror(status, "TBB failed to initialize task scheduler TLS\n");
7051c0b2f7Stbbdev     detect_cpu_features(cpu_features);
714523a761Stbbdev 
7251c0b2f7Stbbdev     is_rethrow_broken = gcc_rethrow_exception_broken();
7351c0b2f7Stbbdev }
7451c0b2f7Stbbdev 
release_resources()7551c0b2f7Stbbdev void governor::release_resources () {
7651c0b2f7Stbbdev     theRMLServerFactory.close();
7751c0b2f7Stbbdev     destroy_process_mask();
7851c0b2f7Stbbdev 
7951c0b2f7Stbbdev     __TBB_ASSERT(!(__TBB_InitOnce::initialization_done() && theTLS.get()), "TBB is unloaded while thread data still alive?");
8051c0b2f7Stbbdev 
8151c0b2f7Stbbdev     int status = theTLS.destroy();
8251c0b2f7Stbbdev     if( status )
8351c0b2f7Stbbdev         runtime_warning("failed to destroy task scheduler TLS: %s", std::strerror(status));
844523a761Stbbdev     clear_address_waiter_table();
854523a761Stbbdev 
86edc30c82SIvan Kochin     system_topology::destroy();
8751c0b2f7Stbbdev     dynamic_unlink_all();
8851c0b2f7Stbbdev }
8951c0b2f7Stbbdev 
create_rml_server(rml::tbb_client & client)9051c0b2f7Stbbdev rml::tbb_server* governor::create_rml_server ( rml::tbb_client& client ) {
9157f524caSIlya Isaev     rml::tbb_server* server = nullptr;
9251c0b2f7Stbbdev     if( !UsePrivateRML ) {
9351c0b2f7Stbbdev         ::rml::factory::status_type status = theRMLServerFactory.make_server( server, client );
9451c0b2f7Stbbdev         if( status != ::rml::factory::st_success ) {
9551c0b2f7Stbbdev             UsePrivateRML = true;
9651c0b2f7Stbbdev             runtime_warning( "rml::tbb_factory::make_server failed with status %x, falling back on private rml", status );
9751c0b2f7Stbbdev         }
9851c0b2f7Stbbdev     }
9951c0b2f7Stbbdev     if ( !server ) {
10057f524caSIlya Isaev         __TBB_ASSERT( UsePrivateRML, nullptr);
10151c0b2f7Stbbdev         server = rml::make_private_server( client );
10251c0b2f7Stbbdev     }
10351c0b2f7Stbbdev     __TBB_ASSERT( server, "Failed to create RML server" );
10451c0b2f7Stbbdev     return server;
10551c0b2f7Stbbdev }
10651c0b2f7Stbbdev 
one_time_init()10751c0b2f7Stbbdev void governor::one_time_init() {
10851c0b2f7Stbbdev     if ( !__TBB_InitOnce::initialization_done() ) {
10951c0b2f7Stbbdev         DoOneTimeInitialization();
11051c0b2f7Stbbdev     }
11151c0b2f7Stbbdev }
11251c0b2f7Stbbdev 
does_client_join_workers(const rml::tbb_client & client)113c4568449SPavel Kumbrasev bool governor::does_client_join_workers(const rml::tbb_client &client) {
114c4568449SPavel Kumbrasev     return ((const thread_dispatcher&)client).must_join_workers();
115c4568449SPavel Kumbrasev }
116c4568449SPavel Kumbrasev 
11751c0b2f7Stbbdev /*
11851c0b2f7Stbbdev     There is no portable way to get stack base address in Posix, however the modern
11951c0b2f7Stbbdev     Linux versions provide pthread_attr_np API that can be used  to obtain thread's
12051c0b2f7Stbbdev     stack size and base address. Unfortunately even this function does not provide
12151c0b2f7Stbbdev     enough information for the main thread on IA-64 architecture (RSE spill area
12251c0b2f7Stbbdev     and memory stack are allocated as two separate discontinuous chunks of memory),
12351c0b2f7Stbbdev     and there is no portable way to discern the main and the secondary threads.
12451c0b2f7Stbbdev     Thus for macOS* and IA-64 architecture for Linux* OS we use the TBB worker stack size for
12551c0b2f7Stbbdev     all threads and use the current stack top as the stack base. This simplified
12651c0b2f7Stbbdev     approach is based on the following assumptions:
12751c0b2f7Stbbdev     1) If the default stack size is insufficient for the user app needs, the
12851c0b2f7Stbbdev     required amount will be explicitly specified by the user at the point of the
12951c0b2f7Stbbdev     TBB scheduler initialization (as an argument to tbb::task_scheduler_init
13051c0b2f7Stbbdev     constructor).
131b15aabb3Stbbdev     2) When an external thread initializes the scheduler, it has enough space on its
13251c0b2f7Stbbdev     stack. Here "enough" means "at least as much as worker threads have".
13351c0b2f7Stbbdev     3) If the user app strives to conserve the memory by cutting stack size, it
13451c0b2f7Stbbdev     should do this for TBB workers too (as in the #1).
13551c0b2f7Stbbdev */
get_stack_base(std::size_t stack_size)13651c0b2f7Stbbdev static std::uintptr_t get_stack_base(std::size_t stack_size) {
13751c0b2f7Stbbdev     // Stacks are growing top-down. Highest address is called "stack base",
13851c0b2f7Stbbdev     // and the lowest is "stack limit".
1397631793aSAlex #if __TBB_USE_WINAPI
14051c0b2f7Stbbdev     suppress_unused_warning(stack_size);
14151c0b2f7Stbbdev     NT_TIB* pteb = (NT_TIB*)NtCurrentTeb();
14251c0b2f7Stbbdev     __TBB_ASSERT(&pteb < pteb->StackBase && &pteb > pteb->StackLimit, "invalid stack info in TEB");
1437631793aSAlex     return reinterpret_cast<std::uintptr_t>(pteb->StackBase);
1447631793aSAlex #else
14551c0b2f7Stbbdev     // There is no portable way to get stack base address in Posix, so we use
14651c0b2f7Stbbdev     // non-portable method (on all modern Linux) or the simplified approach
14751c0b2f7Stbbdev     // based on the common sense assumptions. The most important assumption
14851c0b2f7Stbbdev     // is that the main thread's stack size is not less than that of other threads.
14951c0b2f7Stbbdev 
15051c0b2f7Stbbdev     // Points to the lowest addressable byte of a stack.
15151c0b2f7Stbbdev     void* stack_limit = nullptr;
15251c0b2f7Stbbdev #if __linux__ && !__bg__
15351c0b2f7Stbbdev     size_t np_stack_size = 0;
15451c0b2f7Stbbdev     pthread_attr_t np_attr_stack;
15551c0b2f7Stbbdev     if (0 == pthread_getattr_np(pthread_self(), &np_attr_stack)) {
15651c0b2f7Stbbdev         if (0 == pthread_attr_getstack(&np_attr_stack, &stack_limit, &np_stack_size)) {
15751c0b2f7Stbbdev             __TBB_ASSERT( &stack_limit > stack_limit, "stack size must be positive" );
15851c0b2f7Stbbdev         }
15951c0b2f7Stbbdev         pthread_attr_destroy(&np_attr_stack);
16051c0b2f7Stbbdev     }
16151c0b2f7Stbbdev #endif /* __linux__ */
16251c0b2f7Stbbdev     std::uintptr_t stack_base{};
16351c0b2f7Stbbdev     if (stack_limit) {
16451c0b2f7Stbbdev         stack_base = reinterpret_cast<std::uintptr_t>(stack_limit) + stack_size;
16551c0b2f7Stbbdev     } else {
16651c0b2f7Stbbdev         // Use an anchor as a base stack address.
16751c0b2f7Stbbdev         int anchor{};
16851c0b2f7Stbbdev         stack_base = reinterpret_cast<std::uintptr_t>(&anchor);
16951c0b2f7Stbbdev     }
17051c0b2f7Stbbdev     return stack_base;
1717631793aSAlex #endif /* __TBB_USE_WINAPI */
17251c0b2f7Stbbdev }
17351c0b2f7Stbbdev 
1749924f9e8SIlya Isaev #if (_WIN32||_WIN64) && !__TBB_DYNAMIC_LOAD_ENABLED
register_external_thread_destructor()1759924f9e8SIlya Isaev static void register_external_thread_destructor() {
1769924f9e8SIlya Isaev     struct thread_destructor {
1779924f9e8SIlya Isaev         ~thread_destructor() {
1789924f9e8SIlya Isaev             governor::terminate_external_thread();
1799924f9e8SIlya Isaev         }
1809924f9e8SIlya Isaev     };
1819924f9e8SIlya Isaev     // ~thread_destructor() will be call during the calling thread termination
1829924f9e8SIlya Isaev     static thread_local thread_destructor thr_destructor;
1839924f9e8SIlya Isaev }
1849924f9e8SIlya Isaev #endif // (_WIN32||_WIN64) && !__TBB_DYNAMIC_LOAD_ENABLED
1859924f9e8SIlya Isaev 
init_external_thread()18651c0b2f7Stbbdev void governor::init_external_thread() {
18751c0b2f7Stbbdev     one_time_init();
18851c0b2f7Stbbdev     // Create new scheduler instance with arena
18951c0b2f7Stbbdev     int num_slots = default_num_threads();
190b15aabb3Stbbdev     // TODO_REVAMP: support an external thread without an implicit arena
19151c0b2f7Stbbdev     int num_reserved_slots = 1;
19251c0b2f7Stbbdev     unsigned arena_priority_level = 1; // corresponds to tbb::task_arena::priority::normal
19351c0b2f7Stbbdev     std::size_t stack_size = 0;
194c4568449SPavel Kumbrasev     threading_control* thr_control = threading_control::register_public_reference();
195c4568449SPavel Kumbrasev     arena& a = arena::create(thr_control, num_slots, num_reserved_slots, arena_priority_level);
196b15aabb3Stbbdev     // External thread always occupies the first slot
19751c0b2f7Stbbdev     thread_data& td = *new(cache_aligned_allocate(sizeof(thread_data))) thread_data(0, false);
19851c0b2f7Stbbdev     td.attach_arena(a, /*slot index*/ 0);
199478de5b1Stbbdev     __TBB_ASSERT(td.my_inbox.is_idle_state(false), nullptr);
20051c0b2f7Stbbdev 
201c4568449SPavel Kumbrasev     stack_size = a.my_threading_control->worker_stack_size();
20251c0b2f7Stbbdev     std::uintptr_t stack_base = get_stack_base(stack_size);
20351c0b2f7Stbbdev     task_dispatcher& task_disp = td.my_arena_slot->default_task_dispatcher();
204219c4252SAlex     td.enter_task_dispatcher(task_disp, calculate_stealing_threshold(stack_base, stack_size));
20551c0b2f7Stbbdev 
20651c0b2f7Stbbdev     td.my_arena_slot->occupy();
207c4568449SPavel Kumbrasev     thr_control->register_thread(td);
20851c0b2f7Stbbdev     set_thread_data(td);
2099924f9e8SIlya Isaev #if (_WIN32||_WIN64) && !__TBB_DYNAMIC_LOAD_ENABLED
2109924f9e8SIlya Isaev     // The external thread destructor is called from dllMain but it is not available with a static build.
2119924f9e8SIlya Isaev     // Therefore, we need to register the current thread to call the destructor during thread termination.
2129924f9e8SIlya Isaev     register_external_thread_destructor();
2139924f9e8SIlya Isaev #endif
21451c0b2f7Stbbdev }
21551c0b2f7Stbbdev 
auto_terminate(void * tls)21651c0b2f7Stbbdev void governor::auto_terminate(void* tls) {
21751c0b2f7Stbbdev     __TBB_ASSERT(get_thread_data_if_initialized() == nullptr ||
21857f524caSIlya Isaev         get_thread_data_if_initialized() == tls, nullptr);
21951c0b2f7Stbbdev     if (tls) {
22051c0b2f7Stbbdev         thread_data* td = static_cast<thread_data*>(tls);
22151c0b2f7Stbbdev 
222219c4252SAlex         auto clear_tls = [td] {
223219c4252SAlex             td->~thread_data();
224219c4252SAlex             cache_aligned_deallocate(td);
225219c4252SAlex             clear_thread_data();
226219c4252SAlex         };
227219c4252SAlex 
22851c0b2f7Stbbdev         // Only external thread can be inside an arena during termination.
22951c0b2f7Stbbdev         if (td->my_arena_slot) {
23051c0b2f7Stbbdev             arena* a = td->my_arena;
231c4568449SPavel Kumbrasev             threading_control* thr_control = a->my_threading_control;
23251c0b2f7Stbbdev 
233219c4252SAlex             // If the TLS slot is already cleared by OS or underlying concurrency
234219c4252SAlex             // runtime, restore its value to properly clean up arena
235219c4252SAlex             if (!is_thread_data_set(td)) {
236219c4252SAlex                 set_thread_data(*td);
237219c4252SAlex             }
238219c4252SAlex 
23951c0b2f7Stbbdev             a->my_observers.notify_exit_observers(td->my_last_observer, td->my_is_worker);
24051c0b2f7Stbbdev 
241219c4252SAlex             td->leave_task_dispatcher();
24251c0b2f7Stbbdev             td->my_arena_slot->release();
24351c0b2f7Stbbdev             // Release an arena
244c4568449SPavel Kumbrasev             a->on_thread_leaving(arena::ref_external);
24551c0b2f7Stbbdev 
246c4568449SPavel Kumbrasev             thr_control->unregister_thread(*td);
247219c4252SAlex 
248219c4252SAlex             // The tls should be cleared before market::release because
249219c4252SAlex             // market can destroy the tls key if we keep the last reference
250219c4252SAlex             clear_tls();
251219c4252SAlex 
25251c0b2f7Stbbdev             // If there was an associated arena, it added a public market reference
253c4568449SPavel Kumbrasev             thr_control->unregister_public_reference(/* blocking terminate =*/ false);
254219c4252SAlex         } else {
255219c4252SAlex             clear_tls();
25651c0b2f7Stbbdev         }
25751c0b2f7Stbbdev     }
25857f524caSIlya Isaev     __TBB_ASSERT(get_thread_data_if_initialized() == nullptr, nullptr);
25951c0b2f7Stbbdev }
26051c0b2f7Stbbdev 
initialize_rml_factory()26151c0b2f7Stbbdev void governor::initialize_rml_factory () {
26251c0b2f7Stbbdev     ::rml::factory::status_type res = theRMLServerFactory.open();
26351c0b2f7Stbbdev     UsePrivateRML = res != ::rml::factory::st_success;
26451c0b2f7Stbbdev }
26551c0b2f7Stbbdev 
get(d1::task_scheduler_handle & handle)26651c0b2f7Stbbdev void __TBB_EXPORTED_FUNC get(d1::task_scheduler_handle& handle) {
26751c0b2f7Stbbdev     handle.m_ctl = new(allocate_memory(sizeof(global_control))) global_control(global_control::scheduler_handle, 1);
26851c0b2f7Stbbdev }
26951c0b2f7Stbbdev 
release_impl(d1::task_scheduler_handle & handle)27051c0b2f7Stbbdev void release_impl(d1::task_scheduler_handle& handle) {
27151c0b2f7Stbbdev     if (handle.m_ctl != nullptr) {
27251c0b2f7Stbbdev         handle.m_ctl->~global_control();
27351c0b2f7Stbbdev         deallocate_memory(handle.m_ctl);
27451c0b2f7Stbbdev         handle.m_ctl = nullptr;
27551c0b2f7Stbbdev     }
27651c0b2f7Stbbdev }
27751c0b2f7Stbbdev 
finalize_impl(d1::task_scheduler_handle & handle)27851c0b2f7Stbbdev bool finalize_impl(d1::task_scheduler_handle& handle) {
279478de5b1Stbbdev     __TBB_ASSERT_RELEASE(handle, "trying to finalize with null handle");
28051c0b2f7Stbbdev     __TBB_ASSERT(is_present(*handle.m_ctl), "finalize or release was already called on this object");
281c4568449SPavel Kumbrasev 
282c4568449SPavel Kumbrasev     bool ok = true; // ok if threading_control does not exist yet
283c4568449SPavel Kumbrasev     if (threading_control::is_present()) {
28451c0b2f7Stbbdev         thread_data* td = governor::get_thread_data_if_initialized();
28551c0b2f7Stbbdev         if (td) {
28651c0b2f7Stbbdev             task_dispatcher* task_disp = td->my_task_dispatcher;
28751c0b2f7Stbbdev             __TBB_ASSERT(task_disp, nullptr);
28851c0b2f7Stbbdev             if (task_disp->m_properties.outermost && !td->my_is_worker) { // is not inside a parallel region
28951c0b2f7Stbbdev                 governor::auto_terminate(td);
29051c0b2f7Stbbdev             }
29151c0b2f7Stbbdev         }
292c4568449SPavel Kumbrasev 
29351c0b2f7Stbbdev         if (remove_and_check_if_empty(*handle.m_ctl)) {
294c4568449SPavel Kumbrasev             ok = threading_control::unregister_lifetime_control(/*blocking_terminate*/ true);
29551c0b2f7Stbbdev         } else {
29651c0b2f7Stbbdev             ok = false;
29751c0b2f7Stbbdev         }
29851c0b2f7Stbbdev     }
299c4568449SPavel Kumbrasev 
30051c0b2f7Stbbdev     return ok;
30151c0b2f7Stbbdev }
30251c0b2f7Stbbdev 
finalize(d1::task_scheduler_handle & handle,std::intptr_t mode)30351c0b2f7Stbbdev bool __TBB_EXPORTED_FUNC finalize(d1::task_scheduler_handle& handle, std::intptr_t mode) {
30451c0b2f7Stbbdev     if (mode == d1::release_nothrowing) {
30551c0b2f7Stbbdev         release_impl(handle);
30651c0b2f7Stbbdev         return true;
30751c0b2f7Stbbdev     } else {
30851c0b2f7Stbbdev         bool ok = finalize_impl(handle);
30951c0b2f7Stbbdev         // TODO: it is unsafe when finalize is called concurrently and further library unload
31051c0b2f7Stbbdev         release_impl(handle);
31151c0b2f7Stbbdev         if (mode == d1::finalize_throwing && !ok) {
31251c0b2f7Stbbdev             throw_exception(exception_id::unsafe_wait);
31351c0b2f7Stbbdev         }
31451c0b2f7Stbbdev         return ok;
31551c0b2f7Stbbdev     }
31651c0b2f7Stbbdev }
31751c0b2f7Stbbdev 
318b15aabb3Stbbdev #if __TBB_ARENA_BINDING
31951c0b2f7Stbbdev 
32051c0b2f7Stbbdev #if __TBB_WEAK_SYMBOLS_PRESENT
321b15aabb3Stbbdev #pragma weak __TBB_internal_initialize_system_topology
322edc30c82SIvan Kochin #pragma weak __TBB_internal_destroy_system_topology
32351c0b2f7Stbbdev #pragma weak __TBB_internal_allocate_binding_handler
32451c0b2f7Stbbdev #pragma weak __TBB_internal_deallocate_binding_handler
325b15aabb3Stbbdev #pragma weak __TBB_internal_apply_affinity
32651c0b2f7Stbbdev #pragma weak __TBB_internal_restore_affinity
327b15aabb3Stbbdev #pragma weak __TBB_internal_get_default_concurrency
32851c0b2f7Stbbdev 
32951c0b2f7Stbbdev extern "C" {
330b15aabb3Stbbdev void __TBB_internal_initialize_system_topology(
331b15aabb3Stbbdev     size_t groups_num,
332b15aabb3Stbbdev     int& numa_nodes_count, int*& numa_indexes_list,
333b15aabb3Stbbdev     int& core_types_count, int*& core_types_indexes_list
334b15aabb3Stbbdev );
335edc30c82SIvan Kochin void __TBB_internal_destroy_system_topology( );
33651c0b2f7Stbbdev 
33751c0b2f7Stbbdev //TODO: consider renaming to `create_binding_handler` and `destroy_binding_handler`
338b15aabb3Stbbdev binding_handler* __TBB_internal_allocate_binding_handler( int slot_num, int numa_id, int core_type_id, int max_threads_per_core );
33951c0b2f7Stbbdev void __TBB_internal_deallocate_binding_handler( binding_handler* handler_ptr );
34051c0b2f7Stbbdev 
341b15aabb3Stbbdev void __TBB_internal_apply_affinity( binding_handler* handler_ptr, int slot_num );
34251c0b2f7Stbbdev void __TBB_internal_restore_affinity( binding_handler* handler_ptr, int slot_num );
343b15aabb3Stbbdev 
344b15aabb3Stbbdev int __TBB_internal_get_default_concurrency( int numa_id, int core_type_id, int max_threads_per_core );
34551c0b2f7Stbbdev }
34651c0b2f7Stbbdev #endif /* __TBB_WEAK_SYMBOLS_PRESENT */
34751c0b2f7Stbbdev 
348b15aabb3Stbbdev // Stubs that will be used if TBBbind library is unavailable.
dummy_destroy_system_topology()349edc30c82SIvan Kochin static void dummy_destroy_system_topology ( ) { }
dummy_allocate_binding_handler(int,int,int,int)350b15aabb3Stbbdev static binding_handler* dummy_allocate_binding_handler ( int, int, int, int ) { return nullptr; }
dummy_deallocate_binding_handler(binding_handler *)351b15aabb3Stbbdev static void dummy_deallocate_binding_handler ( binding_handler* ) { }
dummy_apply_affinity(binding_handler *,int)352b15aabb3Stbbdev static void dummy_apply_affinity ( binding_handler*, int ) { }
dummy_restore_affinity(binding_handler *,int)353b15aabb3Stbbdev static void dummy_restore_affinity ( binding_handler*, int ) { }
dummy_get_default_concurrency(int,int,int)354b15aabb3Stbbdev static int dummy_get_default_concurrency( int, int, int ) { return governor::default_num_threads(); }
35551c0b2f7Stbbdev 
356b15aabb3Stbbdev // Handlers for communication with TBBbind
357b15aabb3Stbbdev static void (*initialize_system_topology_ptr)(
358b15aabb3Stbbdev     size_t groups_num,
359b15aabb3Stbbdev     int& numa_nodes_count, int*& numa_indexes_list,
360b15aabb3Stbbdev     int& core_types_count, int*& core_types_indexes_list
361b15aabb3Stbbdev ) = nullptr;
362edc30c82SIvan Kochin static void (*destroy_system_topology_ptr)( ) = dummy_destroy_system_topology;
36351c0b2f7Stbbdev 
364b15aabb3Stbbdev static binding_handler* (*allocate_binding_handler_ptr)( int slot_num, int numa_id, int core_type_id, int max_threads_per_core )
365b15aabb3Stbbdev     = dummy_allocate_binding_handler;
366b15aabb3Stbbdev static void (*deallocate_binding_handler_ptr)( binding_handler* handler_ptr )
367b15aabb3Stbbdev     = dummy_deallocate_binding_handler;
368b15aabb3Stbbdev static void (*apply_affinity_ptr)( binding_handler* handler_ptr, int slot_num )
369b15aabb3Stbbdev     = dummy_apply_affinity;
370b15aabb3Stbbdev static void (*restore_affinity_ptr)( binding_handler* handler_ptr, int slot_num )
371b15aabb3Stbbdev     = dummy_restore_affinity;
372b15aabb3Stbbdev int (*get_default_concurrency_ptr)( int numa_id, int core_type_id, int max_threads_per_core )
373b15aabb3Stbbdev     = dummy_get_default_concurrency;
37451c0b2f7Stbbdev 
375*f9fd1beeSIlya Isaev #if _WIN32 || _WIN64 || __unix__ || __APPLE__
376*f9fd1beeSIlya Isaev 
37751c0b2f7Stbbdev // Table describing how to link the handlers.
37851c0b2f7Stbbdev static const dynamic_link_descriptor TbbBindLinkTable[] = {
379b15aabb3Stbbdev     DLD(__TBB_internal_initialize_system_topology, initialize_system_topology_ptr),
380edc30c82SIvan Kochin     DLD(__TBB_internal_destroy_system_topology, destroy_system_topology_ptr),
381*f9fd1beeSIlya Isaev #if __TBB_CPUBIND_PRESENT
38251c0b2f7Stbbdev     DLD(__TBB_internal_allocate_binding_handler, allocate_binding_handler_ptr),
38351c0b2f7Stbbdev     DLD(__TBB_internal_deallocate_binding_handler, deallocate_binding_handler_ptr),
384b15aabb3Stbbdev     DLD(__TBB_internal_apply_affinity, apply_affinity_ptr),
385b15aabb3Stbbdev     DLD(__TBB_internal_restore_affinity, restore_affinity_ptr),
386*f9fd1beeSIlya Isaev #endif
387b15aabb3Stbbdev     DLD(__TBB_internal_get_default_concurrency, get_default_concurrency_ptr)
38851c0b2f7Stbbdev };
38951c0b2f7Stbbdev 
390b15aabb3Stbbdev static const unsigned LinkTableSize = sizeof(TbbBindLinkTable) / sizeof(dynamic_link_descriptor);
39151c0b2f7Stbbdev 
39251c0b2f7Stbbdev #if TBB_USE_DEBUG
39351c0b2f7Stbbdev #define DEBUG_SUFFIX "_debug"
39451c0b2f7Stbbdev #else
39551c0b2f7Stbbdev #define DEBUG_SUFFIX
39651c0b2f7Stbbdev #endif /* TBB_USE_DEBUG */
39751c0b2f7Stbbdev 
39851c0b2f7Stbbdev #if _WIN32 || _WIN64
399d86ed7fbStbbdev #define LIBRARY_EXTENSION ".dll"
400d86ed7fbStbbdev #define LIBRARY_PREFIX
401*f9fd1beeSIlya Isaev #elif __APPLE__
402*f9fd1beeSIlya Isaev #define LIBRARY_EXTENSION __TBB_STRING(.3.dylib)
403*f9fd1beeSIlya Isaev #define LIBRARY_PREFIX "lib"
404734f0bc0SPablo Romero #elif __unix__
405d86ed7fbStbbdev #define LIBRARY_EXTENSION __TBB_STRING(.so.3)
406d86ed7fbStbbdev #define LIBRARY_PREFIX "lib"
407734f0bc0SPablo Romero #endif /* __unix__ */
408d86ed7fbStbbdev 
409d86ed7fbStbbdev #define TBBBIND_NAME LIBRARY_PREFIX "tbbbind" DEBUG_SUFFIX LIBRARY_EXTENSION
410d86ed7fbStbbdev #define TBBBIND_2_0_NAME LIBRARY_PREFIX "tbbbind_2_0" DEBUG_SUFFIX LIBRARY_EXTENSION
411734f0bc0SPablo Romero 
412e96dbf4bSIvan Kochin #define TBBBIND_2_5_NAME LIBRARY_PREFIX "tbbbind_2_5" DEBUG_SUFFIX LIBRARY_EXTENSION
413734f0bc0SPablo Romero #endif /* _WIN32 || _WIN64 || __unix__ */
41451c0b2f7Stbbdev 
415b15aabb3Stbbdev // Representation of system hardware topology information on the TBB side.
416b15aabb3Stbbdev // System topology may be initialized by third-party component (e.g. hwloc)
417b15aabb3Stbbdev // or just filled in with default stubs.
418b15aabb3Stbbdev namespace system_topology {
41951c0b2f7Stbbdev 
420b15aabb3Stbbdev constexpr int automatic = -1;
421b15aabb3Stbbdev 
422b15aabb3Stbbdev static std::atomic<do_once_state> initialization_state;
423b15aabb3Stbbdev 
42451c0b2f7Stbbdev namespace {
42551c0b2f7Stbbdev int  numa_nodes_count = 0;
426b15aabb3Stbbdev int* numa_nodes_indexes = nullptr;
427b15aabb3Stbbdev 
428b15aabb3Stbbdev int  core_types_count = 0;
429b15aabb3Stbbdev int* core_types_indexes = nullptr;
43051c0b2f7Stbbdev 
load_tbbbind_shared_object()431d86ed7fbStbbdev const char* load_tbbbind_shared_object() {
432*f9fd1beeSIlya Isaev #if _WIN32 || _WIN64 || __unix__ || __APPLE__
43351c0b2f7Stbbdev #if _WIN32 && !_WIN64
43451c0b2f7Stbbdev     // For 32-bit Windows applications, process affinity masks can only support up to 32 logical CPUs.
43551c0b2f7Stbbdev     SYSTEM_INFO si;
43651c0b2f7Stbbdev     GetNativeSystemInfo(&si);
437d86ed7fbStbbdev     if (si.dwNumberOfProcessors > 32) return nullptr;
43851c0b2f7Stbbdev #endif /* _WIN32 && !_WIN64 */
439e96dbf4bSIvan Kochin     for (const auto& tbbbind_version : {TBBBIND_2_5_NAME, TBBBIND_2_0_NAME, TBBBIND_NAME}) {
4400354fc10SIvan Kochin         if (dynamic_link(tbbbind_version, TbbBindLinkTable, LinkTableSize, nullptr, DYNAMIC_LINK_LOCAL_BINDING)) {
441d86ed7fbStbbdev             return tbbbind_version;
44251c0b2f7Stbbdev         }
44351c0b2f7Stbbdev     }
444*f9fd1beeSIlya Isaev #endif /* _WIN32 || _WIN64 || __unix__ || __APPLE__ */
445d86ed7fbStbbdev     return nullptr;
446d86ed7fbStbbdev }
447d86ed7fbStbbdev 
processor_groups_num()448d86ed7fbStbbdev int processor_groups_num() {
449d86ed7fbStbbdev #if _WIN32
450d86ed7fbStbbdev     return NumberOfProcessorGroups();
451d86ed7fbStbbdev #else
452d86ed7fbStbbdev     // Stub to improve code readability by reducing number of the compile-time conditions
453d86ed7fbStbbdev     return 1;
454d86ed7fbStbbdev #endif
455d86ed7fbStbbdev }
456d86ed7fbStbbdev } // internal namespace
457d86ed7fbStbbdev 
458d86ed7fbStbbdev // Tries to load TBBbind library API, if success, gets NUMA topology information from it,
459d86ed7fbStbbdev // in another case, fills NUMA topology by stubs.
initialization_impl()460d86ed7fbStbbdev void initialization_impl() {
461d86ed7fbStbbdev     governor::one_time_init();
462d86ed7fbStbbdev 
463d86ed7fbStbbdev     if (const char* tbbbind_name = load_tbbbind_shared_object()) {
464b15aabb3Stbbdev         initialize_system_topology_ptr(
465b15aabb3Stbbdev             processor_groups_num(),
466b15aabb3Stbbdev             numa_nodes_count, numa_nodes_indexes,
467b15aabb3Stbbdev             core_types_count, core_types_indexes
468d86ed7fbStbbdev         );
469d86ed7fbStbbdev 
470d86ed7fbStbbdev         PrintExtraVersionInfo("TBBBIND", tbbbind_name);
471d86ed7fbStbbdev         return;
472d86ed7fbStbbdev     }
47351c0b2f7Stbbdev 
474b15aabb3Stbbdev     static int dummy_index = automatic;
47551c0b2f7Stbbdev 
47651c0b2f7Stbbdev     numa_nodes_count = 1;
477b15aabb3Stbbdev     numa_nodes_indexes = &dummy_index;
47851c0b2f7Stbbdev 
479b15aabb3Stbbdev     core_types_count = 1;
480b15aabb3Stbbdev     core_types_indexes = &dummy_index;
48151c0b2f7Stbbdev 
482d86ed7fbStbbdev     PrintExtraVersionInfo("TBBBIND", "UNAVAILABLE");
48351c0b2f7Stbbdev }
48451c0b2f7Stbbdev 
initialize()48551c0b2f7Stbbdev void initialize() {
486b15aabb3Stbbdev     atomic_do_once(initialization_impl, initialization_state);
48751c0b2f7Stbbdev }
488edc30c82SIvan Kochin 
destroy()489edc30c82SIvan Kochin void destroy() {
490edc30c82SIvan Kochin     destroy_system_topology_ptr();
491edc30c82SIvan Kochin }
492b15aabb3Stbbdev } // namespace system_topology
49351c0b2f7Stbbdev 
construct_binding_handler(int slot_num,int numa_id,int core_type_id,int max_threads_per_core)494b15aabb3Stbbdev binding_handler* construct_binding_handler(int slot_num, int numa_id, int core_type_id, int max_threads_per_core) {
495b15aabb3Stbbdev     system_topology::initialize();
496b15aabb3Stbbdev     return allocate_binding_handler_ptr(slot_num, numa_id, core_type_id, max_threads_per_core);
49751c0b2f7Stbbdev }
49851c0b2f7Stbbdev 
destroy_binding_handler(binding_handler * handler_ptr)49951c0b2f7Stbbdev void destroy_binding_handler(binding_handler* handler_ptr) {
50051c0b2f7Stbbdev     __TBB_ASSERT(deallocate_binding_handler_ptr, "tbbbind loading was not performed");
50151c0b2f7Stbbdev     deallocate_binding_handler_ptr(handler_ptr);
50251c0b2f7Stbbdev }
50351c0b2f7Stbbdev 
apply_affinity_mask(binding_handler * handler_ptr,int slot_index)504b15aabb3Stbbdev void apply_affinity_mask(binding_handler* handler_ptr, int slot_index) {
505b15aabb3Stbbdev     __TBB_ASSERT(slot_index >= 0, "Negative thread index");
506b15aabb3Stbbdev     __TBB_ASSERT(apply_affinity_ptr, "tbbbind loading was not performed");
507b15aabb3Stbbdev     apply_affinity_ptr(handler_ptr, slot_index);
50851c0b2f7Stbbdev }
50951c0b2f7Stbbdev 
restore_affinity_mask(binding_handler * handler_ptr,int slot_index)510b15aabb3Stbbdev void restore_affinity_mask(binding_handler* handler_ptr, int slot_index) {
511b15aabb3Stbbdev     __TBB_ASSERT(slot_index >= 0, "Negative thread index");
51251c0b2f7Stbbdev     __TBB_ASSERT(restore_affinity_ptr, "tbbbind loading was not performed");
513b15aabb3Stbbdev     restore_affinity_ptr(handler_ptr, slot_index);
51451c0b2f7Stbbdev }
51551c0b2f7Stbbdev 
numa_node_count()51651c0b2f7Stbbdev unsigned __TBB_EXPORTED_FUNC numa_node_count() {
517b15aabb3Stbbdev     system_topology::initialize();
518b15aabb3Stbbdev     return system_topology::numa_nodes_count;
51951c0b2f7Stbbdev }
520b15aabb3Stbbdev 
fill_numa_indices(int * index_array)52151c0b2f7Stbbdev void __TBB_EXPORTED_FUNC fill_numa_indices(int* index_array) {
522b15aabb3Stbbdev     system_topology::initialize();
523b15aabb3Stbbdev     std::memcpy(index_array, system_topology::numa_nodes_indexes, system_topology::numa_nodes_count * sizeof(int));
52451c0b2f7Stbbdev }
525b15aabb3Stbbdev 
numa_default_concurrency(int node_id)52651c0b2f7Stbbdev int __TBB_EXPORTED_FUNC numa_default_concurrency(int node_id) {
52751c0b2f7Stbbdev     if (node_id >= 0) {
528b15aabb3Stbbdev         system_topology::initialize();
529b15aabb3Stbbdev         int result = get_default_concurrency_ptr(
530b15aabb3Stbbdev             node_id,
531b15aabb3Stbbdev             /*core_type*/system_topology::automatic,
532b15aabb3Stbbdev             /*threads_per_core*/system_topology::automatic
533b15aabb3Stbbdev         );
534b15aabb3Stbbdev         if (result > 0) return result;
53551c0b2f7Stbbdev     }
53651c0b2f7Stbbdev     return governor::default_num_threads();
53751c0b2f7Stbbdev }
538b15aabb3Stbbdev 
core_type_count(intptr_t)539b15aabb3Stbbdev unsigned __TBB_EXPORTED_FUNC core_type_count(intptr_t /*reserved*/) {
540b15aabb3Stbbdev     system_topology::initialize();
541b15aabb3Stbbdev     return system_topology::core_types_count;
542b15aabb3Stbbdev }
543b15aabb3Stbbdev 
fill_core_type_indices(int * index_array,intptr_t)544b15aabb3Stbbdev void __TBB_EXPORTED_FUNC fill_core_type_indices(int* index_array, intptr_t /*reserved*/) {
545b15aabb3Stbbdev     system_topology::initialize();
546b15aabb3Stbbdev     std::memcpy(index_array, system_topology::core_types_indexes, system_topology::core_types_count * sizeof(int));
547b15aabb3Stbbdev }
548b15aabb3Stbbdev 
constraints_assertion(d1::constraints c)549b15aabb3Stbbdev void constraints_assertion(d1::constraints c) {
550b15aabb3Stbbdev     bool is_topology_initialized = system_topology::initialization_state == do_once_state::initialized;
551b15aabb3Stbbdev     __TBB_ASSERT_RELEASE(c.max_threads_per_core == system_topology::automatic || c.max_threads_per_core > 0,
552b15aabb3Stbbdev         "Wrong max_threads_per_core constraints field value.");
553b15aabb3Stbbdev 
554b15aabb3Stbbdev     auto numa_nodes_begin = system_topology::numa_nodes_indexes;
555b15aabb3Stbbdev     auto numa_nodes_end = system_topology::numa_nodes_indexes + system_topology::numa_nodes_count;
556b15aabb3Stbbdev     __TBB_ASSERT_RELEASE(
557b15aabb3Stbbdev         c.numa_id == system_topology::automatic ||
558b15aabb3Stbbdev         (is_topology_initialized && std::find(numa_nodes_begin, numa_nodes_end, c.numa_id) != numa_nodes_end),
559b15aabb3Stbbdev         "The constraints::numa_id value is not known to the library. Use tbb::info::numa_nodes() to get the list of possible values.");
560b15aabb3Stbbdev 
561b15aabb3Stbbdev     int* core_types_begin = system_topology::core_types_indexes;
562b15aabb3Stbbdev     int* core_types_end = system_topology::core_types_indexes + system_topology::core_types_count;
563b15aabb3Stbbdev     __TBB_ASSERT_RELEASE(c.core_type == system_topology::automatic ||
564b15aabb3Stbbdev         (is_topology_initialized && std::find(core_types_begin, core_types_end, c.core_type) != core_types_end),
565b15aabb3Stbbdev         "The constraints::core_type value is not known to the library. Use tbb::info::core_types() to get the list of possible values.");
566b15aabb3Stbbdev }
567b15aabb3Stbbdev 
constraints_default_concurrency(const d1::constraints & c,intptr_t)568b15aabb3Stbbdev int __TBB_EXPORTED_FUNC constraints_default_concurrency(const d1::constraints& c, intptr_t /*reserved*/) {
569b15aabb3Stbbdev     constraints_assertion(c);
570b15aabb3Stbbdev 
571b15aabb3Stbbdev     if (c.numa_id >= 0 || c.core_type >= 0 || c.max_threads_per_core > 0) {
572b15aabb3Stbbdev         system_topology::initialize();
573b15aabb3Stbbdev         return get_default_concurrency_ptr(c.numa_id, c.core_type, c.max_threads_per_core);
574b15aabb3Stbbdev     }
575b15aabb3Stbbdev     return governor::default_num_threads();
576b15aabb3Stbbdev }
577b15aabb3Stbbdev 
constraints_threads_per_core(const d1::constraints &,intptr_t)578b15aabb3Stbbdev int __TBB_EXPORTED_FUNC constraints_threads_per_core(const d1::constraints&, intptr_t /*reserved*/) {
579b15aabb3Stbbdev     return system_topology::automatic;
580b15aabb3Stbbdev }
581b15aabb3Stbbdev #endif /* __TBB_ARENA_BINDING */
58251c0b2f7Stbbdev 
58351c0b2f7Stbbdev } // namespace r1
58451c0b2f7Stbbdev } // namespace detail
58551c0b2f7Stbbdev } // namespace tbb
586