151c0b2f7Stbbdev /*
2c4568449SPavel Kumbrasev Copyright (c) 2005-2023 Intel Corporation
351c0b2f7Stbbdev
451c0b2f7Stbbdev Licensed under the Apache License, Version 2.0 (the "License");
551c0b2f7Stbbdev you may not use this file except in compliance with the License.
651c0b2f7Stbbdev You may obtain a copy of the License at
751c0b2f7Stbbdev
851c0b2f7Stbbdev http://www.apache.org/licenses/LICENSE-2.0
951c0b2f7Stbbdev
1051c0b2f7Stbbdev Unless required by applicable law or agreed to in writing, software
1151c0b2f7Stbbdev distributed under the License is distributed on an "AS IS" BASIS,
1251c0b2f7Stbbdev WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1351c0b2f7Stbbdev See the License for the specific language governing permissions and
1451c0b2f7Stbbdev limitations under the License.
1551c0b2f7Stbbdev */
1651c0b2f7Stbbdev
1751c0b2f7Stbbdev #include "governor.h"
18c4568449SPavel Kumbrasev #include "threading_control.h"
1951c0b2f7Stbbdev #include "main.h"
2051c0b2f7Stbbdev #include "thread_data.h"
2151c0b2f7Stbbdev #include "market.h"
2251c0b2f7Stbbdev #include "arena.h"
2351c0b2f7Stbbdev #include "dynamic_link.h"
244523a761Stbbdev #include "concurrent_monitor.h"
25c4568449SPavel Kumbrasev #include "thread_dispatcher.h"
2651c0b2f7Stbbdev
2749e08aacStbbdev #include "oneapi/tbb/task_group.h"
2849e08aacStbbdev #include "oneapi/tbb/global_control.h"
2949e08aacStbbdev #include "oneapi/tbb/tbb_allocator.h"
30b15aabb3Stbbdev #include "oneapi/tbb/info.h"
3151c0b2f7Stbbdev
3251c0b2f7Stbbdev #include "task_dispatcher.h"
3351c0b2f7Stbbdev
3451c0b2f7Stbbdev #include <cstdio>
3551c0b2f7Stbbdev #include <cstdlib>
3651c0b2f7Stbbdev #include <cstring>
3751c0b2f7Stbbdev #include <atomic>
38b15aabb3Stbbdev #include <algorithm>
3951c0b2f7Stbbdev
4051c0b2f7Stbbdev namespace tbb {
4151c0b2f7Stbbdev namespace detail {
4251c0b2f7Stbbdev namespace r1 {
4351c0b2f7Stbbdev
444523a761Stbbdev void clear_address_waiter_table();
454523a761Stbbdev
4651c0b2f7Stbbdev //! global_control.cpp contains definition
4751c0b2f7Stbbdev bool remove_and_check_if_empty(d1::global_control& gc);
4851c0b2f7Stbbdev bool is_present(d1::global_control& gc);
4951c0b2f7Stbbdev
5051c0b2f7Stbbdev namespace rml {
5151c0b2f7Stbbdev tbb_server* make_private_server( tbb_client& client );
5251c0b2f7Stbbdev } // namespace rml
5351c0b2f7Stbbdev
54edc30c82SIvan Kochin namespace system_topology {
55edc30c82SIvan Kochin void destroy();
56edc30c82SIvan Kochin }
57edc30c82SIvan Kochin
5851c0b2f7Stbbdev //------------------------------------------------------------------------
5951c0b2f7Stbbdev // governor
6051c0b2f7Stbbdev //------------------------------------------------------------------------
6151c0b2f7Stbbdev
acquire_resources()6251c0b2f7Stbbdev void governor::acquire_resources () {
6351c0b2f7Stbbdev #if __TBB_USE_POSIX
6451c0b2f7Stbbdev int status = theTLS.create(auto_terminate);
6551c0b2f7Stbbdev #else
6651c0b2f7Stbbdev int status = theTLS.create();
6751c0b2f7Stbbdev #endif
6851c0b2f7Stbbdev if( status )
6951c0b2f7Stbbdev handle_perror(status, "TBB failed to initialize task scheduler TLS\n");
7051c0b2f7Stbbdev detect_cpu_features(cpu_features);
714523a761Stbbdev
7251c0b2f7Stbbdev is_rethrow_broken = gcc_rethrow_exception_broken();
7351c0b2f7Stbbdev }
7451c0b2f7Stbbdev
release_resources()7551c0b2f7Stbbdev void governor::release_resources () {
7651c0b2f7Stbbdev theRMLServerFactory.close();
7751c0b2f7Stbbdev destroy_process_mask();
7851c0b2f7Stbbdev
7951c0b2f7Stbbdev __TBB_ASSERT(!(__TBB_InitOnce::initialization_done() && theTLS.get()), "TBB is unloaded while thread data still alive?");
8051c0b2f7Stbbdev
8151c0b2f7Stbbdev int status = theTLS.destroy();
8251c0b2f7Stbbdev if( status )
8351c0b2f7Stbbdev runtime_warning("failed to destroy task scheduler TLS: %s", std::strerror(status));
844523a761Stbbdev clear_address_waiter_table();
854523a761Stbbdev
86edc30c82SIvan Kochin system_topology::destroy();
8751c0b2f7Stbbdev dynamic_unlink_all();
8851c0b2f7Stbbdev }
8951c0b2f7Stbbdev
create_rml_server(rml::tbb_client & client)9051c0b2f7Stbbdev rml::tbb_server* governor::create_rml_server ( rml::tbb_client& client ) {
9157f524caSIlya Isaev rml::tbb_server* server = nullptr;
9251c0b2f7Stbbdev if( !UsePrivateRML ) {
9351c0b2f7Stbbdev ::rml::factory::status_type status = theRMLServerFactory.make_server( server, client );
9451c0b2f7Stbbdev if( status != ::rml::factory::st_success ) {
9551c0b2f7Stbbdev UsePrivateRML = true;
9651c0b2f7Stbbdev runtime_warning( "rml::tbb_factory::make_server failed with status %x, falling back on private rml", status );
9751c0b2f7Stbbdev }
9851c0b2f7Stbbdev }
9951c0b2f7Stbbdev if ( !server ) {
10057f524caSIlya Isaev __TBB_ASSERT( UsePrivateRML, nullptr);
10151c0b2f7Stbbdev server = rml::make_private_server( client );
10251c0b2f7Stbbdev }
10351c0b2f7Stbbdev __TBB_ASSERT( server, "Failed to create RML server" );
10451c0b2f7Stbbdev return server;
10551c0b2f7Stbbdev }
10651c0b2f7Stbbdev
one_time_init()10751c0b2f7Stbbdev void governor::one_time_init() {
10851c0b2f7Stbbdev if ( !__TBB_InitOnce::initialization_done() ) {
10951c0b2f7Stbbdev DoOneTimeInitialization();
11051c0b2f7Stbbdev }
11151c0b2f7Stbbdev }
11251c0b2f7Stbbdev
does_client_join_workers(const rml::tbb_client & client)113c4568449SPavel Kumbrasev bool governor::does_client_join_workers(const rml::tbb_client &client) {
114c4568449SPavel Kumbrasev return ((const thread_dispatcher&)client).must_join_workers();
115c4568449SPavel Kumbrasev }
116c4568449SPavel Kumbrasev
11751c0b2f7Stbbdev /*
11851c0b2f7Stbbdev There is no portable way to get stack base address in Posix, however the modern
11951c0b2f7Stbbdev Linux versions provide pthread_attr_np API that can be used to obtain thread's
12051c0b2f7Stbbdev stack size and base address. Unfortunately even this function does not provide
12151c0b2f7Stbbdev enough information for the main thread on IA-64 architecture (RSE spill area
12251c0b2f7Stbbdev and memory stack are allocated as two separate discontinuous chunks of memory),
12351c0b2f7Stbbdev and there is no portable way to discern the main and the secondary threads.
12451c0b2f7Stbbdev Thus for macOS* and IA-64 architecture for Linux* OS we use the TBB worker stack size for
12551c0b2f7Stbbdev all threads and use the current stack top as the stack base. This simplified
12651c0b2f7Stbbdev approach is based on the following assumptions:
12751c0b2f7Stbbdev 1) If the default stack size is insufficient for the user app needs, the
12851c0b2f7Stbbdev required amount will be explicitly specified by the user at the point of the
12951c0b2f7Stbbdev TBB scheduler initialization (as an argument to tbb::task_scheduler_init
13051c0b2f7Stbbdev constructor).
131b15aabb3Stbbdev 2) When an external thread initializes the scheduler, it has enough space on its
13251c0b2f7Stbbdev stack. Here "enough" means "at least as much as worker threads have".
13351c0b2f7Stbbdev 3) If the user app strives to conserve the memory by cutting stack size, it
13451c0b2f7Stbbdev should do this for TBB workers too (as in the #1).
13551c0b2f7Stbbdev */
get_stack_base(std::size_t stack_size)13651c0b2f7Stbbdev static std::uintptr_t get_stack_base(std::size_t stack_size) {
13751c0b2f7Stbbdev // Stacks are growing top-down. Highest address is called "stack base",
13851c0b2f7Stbbdev // and the lowest is "stack limit".
1397631793aSAlex #if __TBB_USE_WINAPI
14051c0b2f7Stbbdev suppress_unused_warning(stack_size);
14151c0b2f7Stbbdev NT_TIB* pteb = (NT_TIB*)NtCurrentTeb();
14251c0b2f7Stbbdev __TBB_ASSERT(&pteb < pteb->StackBase && &pteb > pteb->StackLimit, "invalid stack info in TEB");
1437631793aSAlex return reinterpret_cast<std::uintptr_t>(pteb->StackBase);
1447631793aSAlex #else
14551c0b2f7Stbbdev // There is no portable way to get stack base address in Posix, so we use
14651c0b2f7Stbbdev // non-portable method (on all modern Linux) or the simplified approach
14751c0b2f7Stbbdev // based on the common sense assumptions. The most important assumption
14851c0b2f7Stbbdev // is that the main thread's stack size is not less than that of other threads.
14951c0b2f7Stbbdev
15051c0b2f7Stbbdev // Points to the lowest addressable byte of a stack.
15151c0b2f7Stbbdev void* stack_limit = nullptr;
15251c0b2f7Stbbdev #if __linux__ && !__bg__
15351c0b2f7Stbbdev size_t np_stack_size = 0;
15451c0b2f7Stbbdev pthread_attr_t np_attr_stack;
15551c0b2f7Stbbdev if (0 == pthread_getattr_np(pthread_self(), &np_attr_stack)) {
15651c0b2f7Stbbdev if (0 == pthread_attr_getstack(&np_attr_stack, &stack_limit, &np_stack_size)) {
15751c0b2f7Stbbdev __TBB_ASSERT( &stack_limit > stack_limit, "stack size must be positive" );
15851c0b2f7Stbbdev }
15951c0b2f7Stbbdev pthread_attr_destroy(&np_attr_stack);
16051c0b2f7Stbbdev }
16151c0b2f7Stbbdev #endif /* __linux__ */
16251c0b2f7Stbbdev std::uintptr_t stack_base{};
16351c0b2f7Stbbdev if (stack_limit) {
16451c0b2f7Stbbdev stack_base = reinterpret_cast<std::uintptr_t>(stack_limit) + stack_size;
16551c0b2f7Stbbdev } else {
16651c0b2f7Stbbdev // Use an anchor as a base stack address.
16751c0b2f7Stbbdev int anchor{};
16851c0b2f7Stbbdev stack_base = reinterpret_cast<std::uintptr_t>(&anchor);
16951c0b2f7Stbbdev }
17051c0b2f7Stbbdev return stack_base;
1717631793aSAlex #endif /* __TBB_USE_WINAPI */
17251c0b2f7Stbbdev }
17351c0b2f7Stbbdev
1749924f9e8SIlya Isaev #if (_WIN32||_WIN64) && !__TBB_DYNAMIC_LOAD_ENABLED
register_external_thread_destructor()1759924f9e8SIlya Isaev static void register_external_thread_destructor() {
1769924f9e8SIlya Isaev struct thread_destructor {
1779924f9e8SIlya Isaev ~thread_destructor() {
1789924f9e8SIlya Isaev governor::terminate_external_thread();
1799924f9e8SIlya Isaev }
1809924f9e8SIlya Isaev };
1819924f9e8SIlya Isaev // ~thread_destructor() will be call during the calling thread termination
1829924f9e8SIlya Isaev static thread_local thread_destructor thr_destructor;
1839924f9e8SIlya Isaev }
1849924f9e8SIlya Isaev #endif // (_WIN32||_WIN64) && !__TBB_DYNAMIC_LOAD_ENABLED
1859924f9e8SIlya Isaev
init_external_thread()18651c0b2f7Stbbdev void governor::init_external_thread() {
18751c0b2f7Stbbdev one_time_init();
18851c0b2f7Stbbdev // Create new scheduler instance with arena
18951c0b2f7Stbbdev int num_slots = default_num_threads();
190b15aabb3Stbbdev // TODO_REVAMP: support an external thread without an implicit arena
19151c0b2f7Stbbdev int num_reserved_slots = 1;
19251c0b2f7Stbbdev unsigned arena_priority_level = 1; // corresponds to tbb::task_arena::priority::normal
19351c0b2f7Stbbdev std::size_t stack_size = 0;
194c4568449SPavel Kumbrasev threading_control* thr_control = threading_control::register_public_reference();
195c4568449SPavel Kumbrasev arena& a = arena::create(thr_control, num_slots, num_reserved_slots, arena_priority_level);
196b15aabb3Stbbdev // External thread always occupies the first slot
19751c0b2f7Stbbdev thread_data& td = *new(cache_aligned_allocate(sizeof(thread_data))) thread_data(0, false);
19851c0b2f7Stbbdev td.attach_arena(a, /*slot index*/ 0);
199478de5b1Stbbdev __TBB_ASSERT(td.my_inbox.is_idle_state(false), nullptr);
20051c0b2f7Stbbdev
201c4568449SPavel Kumbrasev stack_size = a.my_threading_control->worker_stack_size();
20251c0b2f7Stbbdev std::uintptr_t stack_base = get_stack_base(stack_size);
20351c0b2f7Stbbdev task_dispatcher& task_disp = td.my_arena_slot->default_task_dispatcher();
204219c4252SAlex td.enter_task_dispatcher(task_disp, calculate_stealing_threshold(stack_base, stack_size));
20551c0b2f7Stbbdev
20651c0b2f7Stbbdev td.my_arena_slot->occupy();
207c4568449SPavel Kumbrasev thr_control->register_thread(td);
20851c0b2f7Stbbdev set_thread_data(td);
2099924f9e8SIlya Isaev #if (_WIN32||_WIN64) && !__TBB_DYNAMIC_LOAD_ENABLED
2109924f9e8SIlya Isaev // The external thread destructor is called from dllMain but it is not available with a static build.
2119924f9e8SIlya Isaev // Therefore, we need to register the current thread to call the destructor during thread termination.
2129924f9e8SIlya Isaev register_external_thread_destructor();
2139924f9e8SIlya Isaev #endif
21451c0b2f7Stbbdev }
21551c0b2f7Stbbdev
auto_terminate(void * tls)21651c0b2f7Stbbdev void governor::auto_terminate(void* tls) {
21751c0b2f7Stbbdev __TBB_ASSERT(get_thread_data_if_initialized() == nullptr ||
21857f524caSIlya Isaev get_thread_data_if_initialized() == tls, nullptr);
21951c0b2f7Stbbdev if (tls) {
22051c0b2f7Stbbdev thread_data* td = static_cast<thread_data*>(tls);
22151c0b2f7Stbbdev
222219c4252SAlex auto clear_tls = [td] {
223219c4252SAlex td->~thread_data();
224219c4252SAlex cache_aligned_deallocate(td);
225219c4252SAlex clear_thread_data();
226219c4252SAlex };
227219c4252SAlex
22851c0b2f7Stbbdev // Only external thread can be inside an arena during termination.
22951c0b2f7Stbbdev if (td->my_arena_slot) {
23051c0b2f7Stbbdev arena* a = td->my_arena;
231c4568449SPavel Kumbrasev threading_control* thr_control = a->my_threading_control;
23251c0b2f7Stbbdev
233219c4252SAlex // If the TLS slot is already cleared by OS or underlying concurrency
234219c4252SAlex // runtime, restore its value to properly clean up arena
235219c4252SAlex if (!is_thread_data_set(td)) {
236219c4252SAlex set_thread_data(*td);
237219c4252SAlex }
238219c4252SAlex
23951c0b2f7Stbbdev a->my_observers.notify_exit_observers(td->my_last_observer, td->my_is_worker);
24051c0b2f7Stbbdev
241219c4252SAlex td->leave_task_dispatcher();
24251c0b2f7Stbbdev td->my_arena_slot->release();
24351c0b2f7Stbbdev // Release an arena
244c4568449SPavel Kumbrasev a->on_thread_leaving(arena::ref_external);
24551c0b2f7Stbbdev
246c4568449SPavel Kumbrasev thr_control->unregister_thread(*td);
247219c4252SAlex
248219c4252SAlex // The tls should be cleared before market::release because
249219c4252SAlex // market can destroy the tls key if we keep the last reference
250219c4252SAlex clear_tls();
251219c4252SAlex
25251c0b2f7Stbbdev // If there was an associated arena, it added a public market reference
253c4568449SPavel Kumbrasev thr_control->unregister_public_reference(/* blocking terminate =*/ false);
254219c4252SAlex } else {
255219c4252SAlex clear_tls();
25651c0b2f7Stbbdev }
25751c0b2f7Stbbdev }
25857f524caSIlya Isaev __TBB_ASSERT(get_thread_data_if_initialized() == nullptr, nullptr);
25951c0b2f7Stbbdev }
26051c0b2f7Stbbdev
initialize_rml_factory()26151c0b2f7Stbbdev void governor::initialize_rml_factory () {
26251c0b2f7Stbbdev ::rml::factory::status_type res = theRMLServerFactory.open();
26351c0b2f7Stbbdev UsePrivateRML = res != ::rml::factory::st_success;
26451c0b2f7Stbbdev }
26551c0b2f7Stbbdev
get(d1::task_scheduler_handle & handle)26651c0b2f7Stbbdev void __TBB_EXPORTED_FUNC get(d1::task_scheduler_handle& handle) {
26751c0b2f7Stbbdev handle.m_ctl = new(allocate_memory(sizeof(global_control))) global_control(global_control::scheduler_handle, 1);
26851c0b2f7Stbbdev }
26951c0b2f7Stbbdev
release_impl(d1::task_scheduler_handle & handle)27051c0b2f7Stbbdev void release_impl(d1::task_scheduler_handle& handle) {
27151c0b2f7Stbbdev if (handle.m_ctl != nullptr) {
27251c0b2f7Stbbdev handle.m_ctl->~global_control();
27351c0b2f7Stbbdev deallocate_memory(handle.m_ctl);
27451c0b2f7Stbbdev handle.m_ctl = nullptr;
27551c0b2f7Stbbdev }
27651c0b2f7Stbbdev }
27751c0b2f7Stbbdev
finalize_impl(d1::task_scheduler_handle & handle)27851c0b2f7Stbbdev bool finalize_impl(d1::task_scheduler_handle& handle) {
279478de5b1Stbbdev __TBB_ASSERT_RELEASE(handle, "trying to finalize with null handle");
28051c0b2f7Stbbdev __TBB_ASSERT(is_present(*handle.m_ctl), "finalize or release was already called on this object");
281c4568449SPavel Kumbrasev
282c4568449SPavel Kumbrasev bool ok = true; // ok if threading_control does not exist yet
283c4568449SPavel Kumbrasev if (threading_control::is_present()) {
28451c0b2f7Stbbdev thread_data* td = governor::get_thread_data_if_initialized();
28551c0b2f7Stbbdev if (td) {
28651c0b2f7Stbbdev task_dispatcher* task_disp = td->my_task_dispatcher;
28751c0b2f7Stbbdev __TBB_ASSERT(task_disp, nullptr);
28851c0b2f7Stbbdev if (task_disp->m_properties.outermost && !td->my_is_worker) { // is not inside a parallel region
28951c0b2f7Stbbdev governor::auto_terminate(td);
29051c0b2f7Stbbdev }
29151c0b2f7Stbbdev }
292c4568449SPavel Kumbrasev
29351c0b2f7Stbbdev if (remove_and_check_if_empty(*handle.m_ctl)) {
294c4568449SPavel Kumbrasev ok = threading_control::unregister_lifetime_control(/*blocking_terminate*/ true);
29551c0b2f7Stbbdev } else {
29651c0b2f7Stbbdev ok = false;
29751c0b2f7Stbbdev }
29851c0b2f7Stbbdev }
299c4568449SPavel Kumbrasev
30051c0b2f7Stbbdev return ok;
30151c0b2f7Stbbdev }
30251c0b2f7Stbbdev
finalize(d1::task_scheduler_handle & handle,std::intptr_t mode)30351c0b2f7Stbbdev bool __TBB_EXPORTED_FUNC finalize(d1::task_scheduler_handle& handle, std::intptr_t mode) {
30451c0b2f7Stbbdev if (mode == d1::release_nothrowing) {
30551c0b2f7Stbbdev release_impl(handle);
30651c0b2f7Stbbdev return true;
30751c0b2f7Stbbdev } else {
30851c0b2f7Stbbdev bool ok = finalize_impl(handle);
30951c0b2f7Stbbdev // TODO: it is unsafe when finalize is called concurrently and further library unload
31051c0b2f7Stbbdev release_impl(handle);
31151c0b2f7Stbbdev if (mode == d1::finalize_throwing && !ok) {
31251c0b2f7Stbbdev throw_exception(exception_id::unsafe_wait);
31351c0b2f7Stbbdev }
31451c0b2f7Stbbdev return ok;
31551c0b2f7Stbbdev }
31651c0b2f7Stbbdev }
31751c0b2f7Stbbdev
318b15aabb3Stbbdev #if __TBB_ARENA_BINDING
31951c0b2f7Stbbdev
32051c0b2f7Stbbdev #if __TBB_WEAK_SYMBOLS_PRESENT
321b15aabb3Stbbdev #pragma weak __TBB_internal_initialize_system_topology
322edc30c82SIvan Kochin #pragma weak __TBB_internal_destroy_system_topology
32351c0b2f7Stbbdev #pragma weak __TBB_internal_allocate_binding_handler
32451c0b2f7Stbbdev #pragma weak __TBB_internal_deallocate_binding_handler
325b15aabb3Stbbdev #pragma weak __TBB_internal_apply_affinity
32651c0b2f7Stbbdev #pragma weak __TBB_internal_restore_affinity
327b15aabb3Stbbdev #pragma weak __TBB_internal_get_default_concurrency
32851c0b2f7Stbbdev
32951c0b2f7Stbbdev extern "C" {
330b15aabb3Stbbdev void __TBB_internal_initialize_system_topology(
331b15aabb3Stbbdev size_t groups_num,
332b15aabb3Stbbdev int& numa_nodes_count, int*& numa_indexes_list,
333b15aabb3Stbbdev int& core_types_count, int*& core_types_indexes_list
334b15aabb3Stbbdev );
335edc30c82SIvan Kochin void __TBB_internal_destroy_system_topology( );
33651c0b2f7Stbbdev
33751c0b2f7Stbbdev //TODO: consider renaming to `create_binding_handler` and `destroy_binding_handler`
338b15aabb3Stbbdev binding_handler* __TBB_internal_allocate_binding_handler( int slot_num, int numa_id, int core_type_id, int max_threads_per_core );
33951c0b2f7Stbbdev void __TBB_internal_deallocate_binding_handler( binding_handler* handler_ptr );
34051c0b2f7Stbbdev
341b15aabb3Stbbdev void __TBB_internal_apply_affinity( binding_handler* handler_ptr, int slot_num );
34251c0b2f7Stbbdev void __TBB_internal_restore_affinity( binding_handler* handler_ptr, int slot_num );
343b15aabb3Stbbdev
344b15aabb3Stbbdev int __TBB_internal_get_default_concurrency( int numa_id, int core_type_id, int max_threads_per_core );
34551c0b2f7Stbbdev }
34651c0b2f7Stbbdev #endif /* __TBB_WEAK_SYMBOLS_PRESENT */
34751c0b2f7Stbbdev
348b15aabb3Stbbdev // Stubs that will be used if TBBbind library is unavailable.
dummy_destroy_system_topology()349edc30c82SIvan Kochin static void dummy_destroy_system_topology ( ) { }
dummy_allocate_binding_handler(int,int,int,int)350b15aabb3Stbbdev static binding_handler* dummy_allocate_binding_handler ( int, int, int, int ) { return nullptr; }
dummy_deallocate_binding_handler(binding_handler *)351b15aabb3Stbbdev static void dummy_deallocate_binding_handler ( binding_handler* ) { }
dummy_apply_affinity(binding_handler *,int)352b15aabb3Stbbdev static void dummy_apply_affinity ( binding_handler*, int ) { }
dummy_restore_affinity(binding_handler *,int)353b15aabb3Stbbdev static void dummy_restore_affinity ( binding_handler*, int ) { }
dummy_get_default_concurrency(int,int,int)354b15aabb3Stbbdev static int dummy_get_default_concurrency( int, int, int ) { return governor::default_num_threads(); }
35551c0b2f7Stbbdev
356b15aabb3Stbbdev // Handlers for communication with TBBbind
357b15aabb3Stbbdev static void (*initialize_system_topology_ptr)(
358b15aabb3Stbbdev size_t groups_num,
359b15aabb3Stbbdev int& numa_nodes_count, int*& numa_indexes_list,
360b15aabb3Stbbdev int& core_types_count, int*& core_types_indexes_list
361b15aabb3Stbbdev ) = nullptr;
362edc30c82SIvan Kochin static void (*destroy_system_topology_ptr)( ) = dummy_destroy_system_topology;
36351c0b2f7Stbbdev
364b15aabb3Stbbdev static binding_handler* (*allocate_binding_handler_ptr)( int slot_num, int numa_id, int core_type_id, int max_threads_per_core )
365b15aabb3Stbbdev = dummy_allocate_binding_handler;
366b15aabb3Stbbdev static void (*deallocate_binding_handler_ptr)( binding_handler* handler_ptr )
367b15aabb3Stbbdev = dummy_deallocate_binding_handler;
368b15aabb3Stbbdev static void (*apply_affinity_ptr)( binding_handler* handler_ptr, int slot_num )
369b15aabb3Stbbdev = dummy_apply_affinity;
370b15aabb3Stbbdev static void (*restore_affinity_ptr)( binding_handler* handler_ptr, int slot_num )
371b15aabb3Stbbdev = dummy_restore_affinity;
372b15aabb3Stbbdev int (*get_default_concurrency_ptr)( int numa_id, int core_type_id, int max_threads_per_core )
373b15aabb3Stbbdev = dummy_get_default_concurrency;
37451c0b2f7Stbbdev
375*f9fd1beeSIlya Isaev #if _WIN32 || _WIN64 || __unix__ || __APPLE__
376*f9fd1beeSIlya Isaev
37751c0b2f7Stbbdev // Table describing how to link the handlers.
37851c0b2f7Stbbdev static const dynamic_link_descriptor TbbBindLinkTable[] = {
379b15aabb3Stbbdev DLD(__TBB_internal_initialize_system_topology, initialize_system_topology_ptr),
380edc30c82SIvan Kochin DLD(__TBB_internal_destroy_system_topology, destroy_system_topology_ptr),
381*f9fd1beeSIlya Isaev #if __TBB_CPUBIND_PRESENT
38251c0b2f7Stbbdev DLD(__TBB_internal_allocate_binding_handler, allocate_binding_handler_ptr),
38351c0b2f7Stbbdev DLD(__TBB_internal_deallocate_binding_handler, deallocate_binding_handler_ptr),
384b15aabb3Stbbdev DLD(__TBB_internal_apply_affinity, apply_affinity_ptr),
385b15aabb3Stbbdev DLD(__TBB_internal_restore_affinity, restore_affinity_ptr),
386*f9fd1beeSIlya Isaev #endif
387b15aabb3Stbbdev DLD(__TBB_internal_get_default_concurrency, get_default_concurrency_ptr)
38851c0b2f7Stbbdev };
38951c0b2f7Stbbdev
390b15aabb3Stbbdev static const unsigned LinkTableSize = sizeof(TbbBindLinkTable) / sizeof(dynamic_link_descriptor);
39151c0b2f7Stbbdev
39251c0b2f7Stbbdev #if TBB_USE_DEBUG
39351c0b2f7Stbbdev #define DEBUG_SUFFIX "_debug"
39451c0b2f7Stbbdev #else
39551c0b2f7Stbbdev #define DEBUG_SUFFIX
39651c0b2f7Stbbdev #endif /* TBB_USE_DEBUG */
39751c0b2f7Stbbdev
39851c0b2f7Stbbdev #if _WIN32 || _WIN64
399d86ed7fbStbbdev #define LIBRARY_EXTENSION ".dll"
400d86ed7fbStbbdev #define LIBRARY_PREFIX
401*f9fd1beeSIlya Isaev #elif __APPLE__
402*f9fd1beeSIlya Isaev #define LIBRARY_EXTENSION __TBB_STRING(.3.dylib)
403*f9fd1beeSIlya Isaev #define LIBRARY_PREFIX "lib"
404734f0bc0SPablo Romero #elif __unix__
405d86ed7fbStbbdev #define LIBRARY_EXTENSION __TBB_STRING(.so.3)
406d86ed7fbStbbdev #define LIBRARY_PREFIX "lib"
407734f0bc0SPablo Romero #endif /* __unix__ */
408d86ed7fbStbbdev
409d86ed7fbStbbdev #define TBBBIND_NAME LIBRARY_PREFIX "tbbbind" DEBUG_SUFFIX LIBRARY_EXTENSION
410d86ed7fbStbbdev #define TBBBIND_2_0_NAME LIBRARY_PREFIX "tbbbind_2_0" DEBUG_SUFFIX LIBRARY_EXTENSION
411734f0bc0SPablo Romero
412e96dbf4bSIvan Kochin #define TBBBIND_2_5_NAME LIBRARY_PREFIX "tbbbind_2_5" DEBUG_SUFFIX LIBRARY_EXTENSION
413734f0bc0SPablo Romero #endif /* _WIN32 || _WIN64 || __unix__ */
41451c0b2f7Stbbdev
415b15aabb3Stbbdev // Representation of system hardware topology information on the TBB side.
416b15aabb3Stbbdev // System topology may be initialized by third-party component (e.g. hwloc)
417b15aabb3Stbbdev // or just filled in with default stubs.
418b15aabb3Stbbdev namespace system_topology {
41951c0b2f7Stbbdev
420b15aabb3Stbbdev constexpr int automatic = -1;
421b15aabb3Stbbdev
422b15aabb3Stbbdev static std::atomic<do_once_state> initialization_state;
423b15aabb3Stbbdev
42451c0b2f7Stbbdev namespace {
42551c0b2f7Stbbdev int numa_nodes_count = 0;
426b15aabb3Stbbdev int* numa_nodes_indexes = nullptr;
427b15aabb3Stbbdev
428b15aabb3Stbbdev int core_types_count = 0;
429b15aabb3Stbbdev int* core_types_indexes = nullptr;
43051c0b2f7Stbbdev
load_tbbbind_shared_object()431d86ed7fbStbbdev const char* load_tbbbind_shared_object() {
432*f9fd1beeSIlya Isaev #if _WIN32 || _WIN64 || __unix__ || __APPLE__
43351c0b2f7Stbbdev #if _WIN32 && !_WIN64
43451c0b2f7Stbbdev // For 32-bit Windows applications, process affinity masks can only support up to 32 logical CPUs.
43551c0b2f7Stbbdev SYSTEM_INFO si;
43651c0b2f7Stbbdev GetNativeSystemInfo(&si);
437d86ed7fbStbbdev if (si.dwNumberOfProcessors > 32) return nullptr;
43851c0b2f7Stbbdev #endif /* _WIN32 && !_WIN64 */
439e96dbf4bSIvan Kochin for (const auto& tbbbind_version : {TBBBIND_2_5_NAME, TBBBIND_2_0_NAME, TBBBIND_NAME}) {
4400354fc10SIvan Kochin if (dynamic_link(tbbbind_version, TbbBindLinkTable, LinkTableSize, nullptr, DYNAMIC_LINK_LOCAL_BINDING)) {
441d86ed7fbStbbdev return tbbbind_version;
44251c0b2f7Stbbdev }
44351c0b2f7Stbbdev }
444*f9fd1beeSIlya Isaev #endif /* _WIN32 || _WIN64 || __unix__ || __APPLE__ */
445d86ed7fbStbbdev return nullptr;
446d86ed7fbStbbdev }
447d86ed7fbStbbdev
processor_groups_num()448d86ed7fbStbbdev int processor_groups_num() {
449d86ed7fbStbbdev #if _WIN32
450d86ed7fbStbbdev return NumberOfProcessorGroups();
451d86ed7fbStbbdev #else
452d86ed7fbStbbdev // Stub to improve code readability by reducing number of the compile-time conditions
453d86ed7fbStbbdev return 1;
454d86ed7fbStbbdev #endif
455d86ed7fbStbbdev }
456d86ed7fbStbbdev } // internal namespace
457d86ed7fbStbbdev
458d86ed7fbStbbdev // Tries to load TBBbind library API, if success, gets NUMA topology information from it,
459d86ed7fbStbbdev // in another case, fills NUMA topology by stubs.
initialization_impl()460d86ed7fbStbbdev void initialization_impl() {
461d86ed7fbStbbdev governor::one_time_init();
462d86ed7fbStbbdev
463d86ed7fbStbbdev if (const char* tbbbind_name = load_tbbbind_shared_object()) {
464b15aabb3Stbbdev initialize_system_topology_ptr(
465b15aabb3Stbbdev processor_groups_num(),
466b15aabb3Stbbdev numa_nodes_count, numa_nodes_indexes,
467b15aabb3Stbbdev core_types_count, core_types_indexes
468d86ed7fbStbbdev );
469d86ed7fbStbbdev
470d86ed7fbStbbdev PrintExtraVersionInfo("TBBBIND", tbbbind_name);
471d86ed7fbStbbdev return;
472d86ed7fbStbbdev }
47351c0b2f7Stbbdev
474b15aabb3Stbbdev static int dummy_index = automatic;
47551c0b2f7Stbbdev
47651c0b2f7Stbbdev numa_nodes_count = 1;
477b15aabb3Stbbdev numa_nodes_indexes = &dummy_index;
47851c0b2f7Stbbdev
479b15aabb3Stbbdev core_types_count = 1;
480b15aabb3Stbbdev core_types_indexes = &dummy_index;
48151c0b2f7Stbbdev
482d86ed7fbStbbdev PrintExtraVersionInfo("TBBBIND", "UNAVAILABLE");
48351c0b2f7Stbbdev }
48451c0b2f7Stbbdev
initialize()48551c0b2f7Stbbdev void initialize() {
486b15aabb3Stbbdev atomic_do_once(initialization_impl, initialization_state);
48751c0b2f7Stbbdev }
488edc30c82SIvan Kochin
destroy()489edc30c82SIvan Kochin void destroy() {
490edc30c82SIvan Kochin destroy_system_topology_ptr();
491edc30c82SIvan Kochin }
492b15aabb3Stbbdev } // namespace system_topology
49351c0b2f7Stbbdev
construct_binding_handler(int slot_num,int numa_id,int core_type_id,int max_threads_per_core)494b15aabb3Stbbdev binding_handler* construct_binding_handler(int slot_num, int numa_id, int core_type_id, int max_threads_per_core) {
495b15aabb3Stbbdev system_topology::initialize();
496b15aabb3Stbbdev return allocate_binding_handler_ptr(slot_num, numa_id, core_type_id, max_threads_per_core);
49751c0b2f7Stbbdev }
49851c0b2f7Stbbdev
destroy_binding_handler(binding_handler * handler_ptr)49951c0b2f7Stbbdev void destroy_binding_handler(binding_handler* handler_ptr) {
50051c0b2f7Stbbdev __TBB_ASSERT(deallocate_binding_handler_ptr, "tbbbind loading was not performed");
50151c0b2f7Stbbdev deallocate_binding_handler_ptr(handler_ptr);
50251c0b2f7Stbbdev }
50351c0b2f7Stbbdev
apply_affinity_mask(binding_handler * handler_ptr,int slot_index)504b15aabb3Stbbdev void apply_affinity_mask(binding_handler* handler_ptr, int slot_index) {
505b15aabb3Stbbdev __TBB_ASSERT(slot_index >= 0, "Negative thread index");
506b15aabb3Stbbdev __TBB_ASSERT(apply_affinity_ptr, "tbbbind loading was not performed");
507b15aabb3Stbbdev apply_affinity_ptr(handler_ptr, slot_index);
50851c0b2f7Stbbdev }
50951c0b2f7Stbbdev
restore_affinity_mask(binding_handler * handler_ptr,int slot_index)510b15aabb3Stbbdev void restore_affinity_mask(binding_handler* handler_ptr, int slot_index) {
511b15aabb3Stbbdev __TBB_ASSERT(slot_index >= 0, "Negative thread index");
51251c0b2f7Stbbdev __TBB_ASSERT(restore_affinity_ptr, "tbbbind loading was not performed");
513b15aabb3Stbbdev restore_affinity_ptr(handler_ptr, slot_index);
51451c0b2f7Stbbdev }
51551c0b2f7Stbbdev
numa_node_count()51651c0b2f7Stbbdev unsigned __TBB_EXPORTED_FUNC numa_node_count() {
517b15aabb3Stbbdev system_topology::initialize();
518b15aabb3Stbbdev return system_topology::numa_nodes_count;
51951c0b2f7Stbbdev }
520b15aabb3Stbbdev
fill_numa_indices(int * index_array)52151c0b2f7Stbbdev void __TBB_EXPORTED_FUNC fill_numa_indices(int* index_array) {
522b15aabb3Stbbdev system_topology::initialize();
523b15aabb3Stbbdev std::memcpy(index_array, system_topology::numa_nodes_indexes, system_topology::numa_nodes_count * sizeof(int));
52451c0b2f7Stbbdev }
525b15aabb3Stbbdev
numa_default_concurrency(int node_id)52651c0b2f7Stbbdev int __TBB_EXPORTED_FUNC numa_default_concurrency(int node_id) {
52751c0b2f7Stbbdev if (node_id >= 0) {
528b15aabb3Stbbdev system_topology::initialize();
529b15aabb3Stbbdev int result = get_default_concurrency_ptr(
530b15aabb3Stbbdev node_id,
531b15aabb3Stbbdev /*core_type*/system_topology::automatic,
532b15aabb3Stbbdev /*threads_per_core*/system_topology::automatic
533b15aabb3Stbbdev );
534b15aabb3Stbbdev if (result > 0) return result;
53551c0b2f7Stbbdev }
53651c0b2f7Stbbdev return governor::default_num_threads();
53751c0b2f7Stbbdev }
538b15aabb3Stbbdev
core_type_count(intptr_t)539b15aabb3Stbbdev unsigned __TBB_EXPORTED_FUNC core_type_count(intptr_t /*reserved*/) {
540b15aabb3Stbbdev system_topology::initialize();
541b15aabb3Stbbdev return system_topology::core_types_count;
542b15aabb3Stbbdev }
543b15aabb3Stbbdev
fill_core_type_indices(int * index_array,intptr_t)544b15aabb3Stbbdev void __TBB_EXPORTED_FUNC fill_core_type_indices(int* index_array, intptr_t /*reserved*/) {
545b15aabb3Stbbdev system_topology::initialize();
546b15aabb3Stbbdev std::memcpy(index_array, system_topology::core_types_indexes, system_topology::core_types_count * sizeof(int));
547b15aabb3Stbbdev }
548b15aabb3Stbbdev
constraints_assertion(d1::constraints c)549b15aabb3Stbbdev void constraints_assertion(d1::constraints c) {
550b15aabb3Stbbdev bool is_topology_initialized = system_topology::initialization_state == do_once_state::initialized;
551b15aabb3Stbbdev __TBB_ASSERT_RELEASE(c.max_threads_per_core == system_topology::automatic || c.max_threads_per_core > 0,
552b15aabb3Stbbdev "Wrong max_threads_per_core constraints field value.");
553b15aabb3Stbbdev
554b15aabb3Stbbdev auto numa_nodes_begin = system_topology::numa_nodes_indexes;
555b15aabb3Stbbdev auto numa_nodes_end = system_topology::numa_nodes_indexes + system_topology::numa_nodes_count;
556b15aabb3Stbbdev __TBB_ASSERT_RELEASE(
557b15aabb3Stbbdev c.numa_id == system_topology::automatic ||
558b15aabb3Stbbdev (is_topology_initialized && std::find(numa_nodes_begin, numa_nodes_end, c.numa_id) != numa_nodes_end),
559b15aabb3Stbbdev "The constraints::numa_id value is not known to the library. Use tbb::info::numa_nodes() to get the list of possible values.");
560b15aabb3Stbbdev
561b15aabb3Stbbdev int* core_types_begin = system_topology::core_types_indexes;
562b15aabb3Stbbdev int* core_types_end = system_topology::core_types_indexes + system_topology::core_types_count;
563b15aabb3Stbbdev __TBB_ASSERT_RELEASE(c.core_type == system_topology::automatic ||
564b15aabb3Stbbdev (is_topology_initialized && std::find(core_types_begin, core_types_end, c.core_type) != core_types_end),
565b15aabb3Stbbdev "The constraints::core_type value is not known to the library. Use tbb::info::core_types() to get the list of possible values.");
566b15aabb3Stbbdev }
567b15aabb3Stbbdev
constraints_default_concurrency(const d1::constraints & c,intptr_t)568b15aabb3Stbbdev int __TBB_EXPORTED_FUNC constraints_default_concurrency(const d1::constraints& c, intptr_t /*reserved*/) {
569b15aabb3Stbbdev constraints_assertion(c);
570b15aabb3Stbbdev
571b15aabb3Stbbdev if (c.numa_id >= 0 || c.core_type >= 0 || c.max_threads_per_core > 0) {
572b15aabb3Stbbdev system_topology::initialize();
573b15aabb3Stbbdev return get_default_concurrency_ptr(c.numa_id, c.core_type, c.max_threads_per_core);
574b15aabb3Stbbdev }
575b15aabb3Stbbdev return governor::default_num_threads();
576b15aabb3Stbbdev }
577b15aabb3Stbbdev
constraints_threads_per_core(const d1::constraints &,intptr_t)578b15aabb3Stbbdev int __TBB_EXPORTED_FUNC constraints_threads_per_core(const d1::constraints&, intptr_t /*reserved*/) {
579b15aabb3Stbbdev return system_topology::automatic;
580b15aabb3Stbbdev }
581b15aabb3Stbbdev #endif /* __TBB_ARENA_BINDING */
58251c0b2f7Stbbdev
58351c0b2f7Stbbdev } // namespace r1
58451c0b2f7Stbbdev } // namespace detail
58551c0b2f7Stbbdev } // namespace tbb
586