xref: /oneTBB/src/tbb/governor.cpp (revision 56d44d28)
1 /*
2     Copyright (c) 2005-2021 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #include "governor.h"
18 #include "main.h"
19 #include "thread_data.h"
20 #include "market.h"
21 #include "arena.h"
22 #include "dynamic_link.h"
23 #include "concurrent_monitor.h"
24 
25 #include "oneapi/tbb/task_group.h"
26 #include "oneapi/tbb/global_control.h"
27 #include "oneapi/tbb/tbb_allocator.h"
28 #include "oneapi/tbb/info.h"
29 
30 #include "task_dispatcher.h"
31 
32 #include <cstdio>
33 #include <cstdlib>
34 #include <cstring>
35 #include <atomic>
36 #include <algorithm>
37 
38 namespace tbb {
39 namespace detail {
40 namespace r1 {
41 
42 void clear_address_waiter_table();
43 
44 #if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
45 //! global_control.cpp contains definition
46 bool remove_and_check_if_empty(d1::global_control& gc);
47 bool is_present(d1::global_control& gc);
48 #endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
49 
50 namespace rml {
51 tbb_server* make_private_server( tbb_client& client );
52 } // namespace rml
53 
54 namespace system_topology {
55     void destroy();
56 }
57 
58 //------------------------------------------------------------------------
59 // governor
60 //------------------------------------------------------------------------
61 
62 void governor::acquire_resources () {
63 #if __TBB_USE_POSIX
64     int status = theTLS.create(auto_terminate);
65 #else
66     int status = theTLS.create();
67 #endif
68     if( status )
69         handle_perror(status, "TBB failed to initialize task scheduler TLS\n");
70     detect_cpu_features(cpu_features);
71 
72     is_rethrow_broken = gcc_rethrow_exception_broken();
73 }
74 
75 void governor::release_resources () {
76     theRMLServerFactory.close();
77     destroy_process_mask();
78 
79     __TBB_ASSERT(!(__TBB_InitOnce::initialization_done() && theTLS.get()), "TBB is unloaded while thread data still alive?");
80 
81     int status = theTLS.destroy();
82     if( status )
83         runtime_warning("failed to destroy task scheduler TLS: %s", std::strerror(status));
84     clear_address_waiter_table();
85 
86     system_topology::destroy();
87     dynamic_unlink_all();
88 }
89 
90 rml::tbb_server* governor::create_rml_server ( rml::tbb_client& client ) {
91     rml::tbb_server* server = NULL;
92     if( !UsePrivateRML ) {
93         ::rml::factory::status_type status = theRMLServerFactory.make_server( server, client );
94         if( status != ::rml::factory::st_success ) {
95             UsePrivateRML = true;
96             runtime_warning( "rml::tbb_factory::make_server failed with status %x, falling back on private rml", status );
97         }
98     }
99     if ( !server ) {
100         __TBB_ASSERT( UsePrivateRML, NULL );
101         server = rml::make_private_server( client );
102     }
103     __TBB_ASSERT( server, "Failed to create RML server" );
104     return server;
105 }
106 
107 void governor::one_time_init() {
108     if ( !__TBB_InitOnce::initialization_done() ) {
109         DoOneTimeInitialization();
110     }
111 }
112 
113 /*
114     There is no portable way to get stack base address in Posix, however the modern
115     Linux versions provide pthread_attr_np API that can be used  to obtain thread's
116     stack size and base address. Unfortunately even this function does not provide
117     enough information for the main thread on IA-64 architecture (RSE spill area
118     and memory stack are allocated as two separate discontinuous chunks of memory),
119     and there is no portable way to discern the main and the secondary threads.
120     Thus for macOS* and IA-64 architecture for Linux* OS we use the TBB worker stack size for
121     all threads and use the current stack top as the stack base. This simplified
122     approach is based on the following assumptions:
123     1) If the default stack size is insufficient for the user app needs, the
124     required amount will be explicitly specified by the user at the point of the
125     TBB scheduler initialization (as an argument to tbb::task_scheduler_init
126     constructor).
127     2) When an external thread initializes the scheduler, it has enough space on its
128     stack. Here "enough" means "at least as much as worker threads have".
129     3) If the user app strives to conserve the memory by cutting stack size, it
130     should do this for TBB workers too (as in the #1).
131 */
132 static std::uintptr_t get_stack_base(std::size_t stack_size) {
133     // Stacks are growing top-down. Highest address is called "stack base",
134     // and the lowest is "stack limit".
135 #if __TBB_USE_WINAPI
136     suppress_unused_warning(stack_size);
137     NT_TIB* pteb = (NT_TIB*)NtCurrentTeb();
138     __TBB_ASSERT(&pteb < pteb->StackBase && &pteb > pteb->StackLimit, "invalid stack info in TEB");
139     return reinterpret_cast<std::uintptr_t>(pteb->StackBase);
140 #else
141     // There is no portable way to get stack base address in Posix, so we use
142     // non-portable method (on all modern Linux) or the simplified approach
143     // based on the common sense assumptions. The most important assumption
144     // is that the main thread's stack size is not less than that of other threads.
145 
146     // Points to the lowest addressable byte of a stack.
147     void* stack_limit = nullptr;
148 #if __linux__ && !__bg__
149     size_t np_stack_size = 0;
150     pthread_attr_t np_attr_stack;
151     if (0 == pthread_getattr_np(pthread_self(), &np_attr_stack)) {
152         if (0 == pthread_attr_getstack(&np_attr_stack, &stack_limit, &np_stack_size)) {
153             __TBB_ASSERT( &stack_limit > stack_limit, "stack size must be positive" );
154         }
155         pthread_attr_destroy(&np_attr_stack);
156     }
157 #endif /* __linux__ */
158     std::uintptr_t stack_base{};
159     if (stack_limit) {
160         stack_base = reinterpret_cast<std::uintptr_t>(stack_limit) + stack_size;
161     } else {
162         // Use an anchor as a base stack address.
163         int anchor{};
164         stack_base = reinterpret_cast<std::uintptr_t>(&anchor);
165     }
166     return stack_base;
167 #endif /* __TBB_USE_WINAPI */
168 }
169 
170 #if (_WIN32||_WIN64) && !__TBB_DYNAMIC_LOAD_ENABLED
171 static void register_external_thread_destructor() {
172     struct thread_destructor {
173         ~thread_destructor() {
174             governor::terminate_external_thread();
175         }
176     };
177     // ~thread_destructor() will be call during the calling thread termination
178     static thread_local thread_destructor thr_destructor;
179 }
180 #endif // (_WIN32||_WIN64) && !__TBB_DYNAMIC_LOAD_ENABLED
181 
182 void governor::init_external_thread() {
183     one_time_init();
184     // Create new scheduler instance with arena
185     int num_slots = default_num_threads();
186     // TODO_REVAMP: support an external thread without an implicit arena
187     int num_reserved_slots = 1;
188     unsigned arena_priority_level = 1; // corresponds to tbb::task_arena::priority::normal
189     std::size_t stack_size = 0;
190     arena& a = *market::create_arena(num_slots, num_reserved_slots, arena_priority_level, stack_size);
191     // We need an internal reference to the market. TODO: is it legacy?
192     market::global_market(false);
193     // External thread always occupies the first slot
194     thread_data& td = *new(cache_aligned_allocate(sizeof(thread_data))) thread_data(0, false);
195     td.attach_arena(a, /*slot index*/ 0);
196     __TBB_ASSERT(td.my_inbox.is_idle_state(false), nullptr);
197 
198     stack_size = a.my_market->worker_stack_size();
199     std::uintptr_t stack_base = get_stack_base(stack_size);
200     task_dispatcher& task_disp = td.my_arena_slot->default_task_dispatcher();
201     task_disp.set_stealing_threshold(calculate_stealing_threshold(stack_base, stack_size));
202     td.attach_task_dispatcher(task_disp);
203 
204     td.my_arena_slot->occupy();
205     a.my_market->add_external_thread(td);
206     set_thread_data(td);
207 #if (_WIN32||_WIN64) && !__TBB_DYNAMIC_LOAD_ENABLED
208     // The external thread destructor is called from dllMain but it is not available with a static build.
209     // Therefore, we need to register the current thread to call the destructor during thread termination.
210     register_external_thread_destructor();
211 #endif
212 }
213 
214 void governor::auto_terminate(void* tls) {
215     __TBB_ASSERT(get_thread_data_if_initialized() == nullptr ||
216         get_thread_data_if_initialized() == tls, NULL);
217     if (tls) {
218         thread_data* td = static_cast<thread_data*>(tls);
219 
220         // Only external thread can be inside an arena during termination.
221         if (td->my_arena_slot) {
222             arena* a = td->my_arena;
223             market* m = a->my_market;
224 
225             a->my_observers.notify_exit_observers(td->my_last_observer, td->my_is_worker);
226 
227             td->my_task_dispatcher->m_stealing_threshold = 0;
228             td->detach_task_dispatcher();
229             td->my_arena_slot->release();
230             // Release an arena
231             a->on_thread_leaving<arena::ref_external>();
232 
233             m->remove_external_thread(*td);
234             // If there was an associated arena, it added a public market reference
235             m->release( /*is_public*/ true, /*blocking_terminate*/ false);
236         }
237 
238         td->~thread_data();
239         cache_aligned_deallocate(td);
240 
241         clear_thread_data();
242     }
243     __TBB_ASSERT(get_thread_data_if_initialized() == nullptr, NULL);
244 }
245 
246 void governor::initialize_rml_factory () {
247     ::rml::factory::status_type res = theRMLServerFactory.open();
248     UsePrivateRML = res != ::rml::factory::st_success;
249 }
250 
251 #if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
252 void __TBB_EXPORTED_FUNC get(d1::task_scheduler_handle& handle) {
253     handle.m_ctl = new(allocate_memory(sizeof(global_control))) global_control(global_control::scheduler_handle, 1);
254 }
255 
256 void release_impl(d1::task_scheduler_handle& handle) {
257     if (handle.m_ctl != nullptr) {
258         handle.m_ctl->~global_control();
259         deallocate_memory(handle.m_ctl);
260         handle.m_ctl = nullptr;
261     }
262 }
263 
264 bool finalize_impl(d1::task_scheduler_handle& handle) {
265     __TBB_ASSERT_RELEASE(handle, "trying to finalize with null handle");
266     market::global_market_mutex_type::scoped_lock lock( market::theMarketMutex );
267     bool ok = true; // ok if theMarket does not exist yet
268     market* m = market::theMarket; // read the state of theMarket
269     if (m != nullptr) {
270         lock.release();
271         __TBB_ASSERT(is_present(*handle.m_ctl), "finalize or release was already called on this object");
272         thread_data* td = governor::get_thread_data_if_initialized();
273         if (td) {
274             task_dispatcher* task_disp = td->my_task_dispatcher;
275             __TBB_ASSERT(task_disp, nullptr);
276             if (task_disp->m_properties.outermost && !td->my_is_worker) { // is not inside a parallel region
277                 governor::auto_terminate(td);
278             }
279         }
280         if (remove_and_check_if_empty(*handle.m_ctl)) {
281             ok = m->release(/*is_public*/ true, /*blocking_terminate*/ true);
282         } else {
283             ok = false;
284         }
285     }
286     return ok;
287 }
288 
289 bool __TBB_EXPORTED_FUNC finalize(d1::task_scheduler_handle& handle, std::intptr_t mode) {
290     if (mode == d1::release_nothrowing) {
291         release_impl(handle);
292         return true;
293     } else {
294         bool ok = finalize_impl(handle);
295         // TODO: it is unsafe when finalize is called concurrently and further library unload
296         release_impl(handle);
297         if (mode == d1::finalize_throwing && !ok) {
298             throw_exception(exception_id::unsafe_wait);
299         }
300         return ok;
301     }
302 }
303 #endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
304 
305 #if __TBB_ARENA_BINDING
306 
307 #if __TBB_WEAK_SYMBOLS_PRESENT
308 #pragma weak __TBB_internal_initialize_system_topology
309 #pragma weak __TBB_internal_destroy_system_topology
310 #pragma weak __TBB_internal_allocate_binding_handler
311 #pragma weak __TBB_internal_deallocate_binding_handler
312 #pragma weak __TBB_internal_apply_affinity
313 #pragma weak __TBB_internal_restore_affinity
314 #pragma weak __TBB_internal_get_default_concurrency
315 
316 extern "C" {
317 void __TBB_internal_initialize_system_topology(
318     size_t groups_num,
319     int& numa_nodes_count, int*& numa_indexes_list,
320     int& core_types_count, int*& core_types_indexes_list
321 );
322 void __TBB_internal_destroy_system_topology( );
323 
324 //TODO: consider renaming to `create_binding_handler` and `destroy_binding_handler`
325 binding_handler* __TBB_internal_allocate_binding_handler( int slot_num, int numa_id, int core_type_id, int max_threads_per_core );
326 void __TBB_internal_deallocate_binding_handler( binding_handler* handler_ptr );
327 
328 void __TBB_internal_apply_affinity( binding_handler* handler_ptr, int slot_num );
329 void __TBB_internal_restore_affinity( binding_handler* handler_ptr, int slot_num );
330 
331 int __TBB_internal_get_default_concurrency( int numa_id, int core_type_id, int max_threads_per_core );
332 }
333 #endif /* __TBB_WEAK_SYMBOLS_PRESENT */
334 
335 // Stubs that will be used if TBBbind library is unavailable.
336 static void dummy_destroy_system_topology ( ) { }
337 static binding_handler* dummy_allocate_binding_handler ( int, int, int, int ) { return nullptr; }
338 static void dummy_deallocate_binding_handler ( binding_handler* ) { }
339 static void dummy_apply_affinity ( binding_handler*, int ) { }
340 static void dummy_restore_affinity ( binding_handler*, int ) { }
341 static int dummy_get_default_concurrency( int, int, int ) { return governor::default_num_threads(); }
342 
343 // Handlers for communication with TBBbind
344 static void (*initialize_system_topology_ptr)(
345     size_t groups_num,
346     int& numa_nodes_count, int*& numa_indexes_list,
347     int& core_types_count, int*& core_types_indexes_list
348 ) = nullptr;
349 static void (*destroy_system_topology_ptr)( ) = dummy_destroy_system_topology;
350 
351 static binding_handler* (*allocate_binding_handler_ptr)( int slot_num, int numa_id, int core_type_id, int max_threads_per_core )
352     = dummy_allocate_binding_handler;
353 static void (*deallocate_binding_handler_ptr)( binding_handler* handler_ptr )
354     = dummy_deallocate_binding_handler;
355 static void (*apply_affinity_ptr)( binding_handler* handler_ptr, int slot_num )
356     = dummy_apply_affinity;
357 static void (*restore_affinity_ptr)( binding_handler* handler_ptr, int slot_num )
358     = dummy_restore_affinity;
359 int (*get_default_concurrency_ptr)( int numa_id, int core_type_id, int max_threads_per_core )
360     = dummy_get_default_concurrency;
361 
362 #if _WIN32 || _WIN64 || __unix__
363 // Table describing how to link the handlers.
364 static const dynamic_link_descriptor TbbBindLinkTable[] = {
365     DLD(__TBB_internal_initialize_system_topology, initialize_system_topology_ptr),
366     DLD(__TBB_internal_destroy_system_topology, destroy_system_topology_ptr),
367     DLD(__TBB_internal_allocate_binding_handler, allocate_binding_handler_ptr),
368     DLD(__TBB_internal_deallocate_binding_handler, deallocate_binding_handler_ptr),
369     DLD(__TBB_internal_apply_affinity, apply_affinity_ptr),
370     DLD(__TBB_internal_restore_affinity, restore_affinity_ptr),
371     DLD(__TBB_internal_get_default_concurrency, get_default_concurrency_ptr)
372 };
373 
374 static const unsigned LinkTableSize = sizeof(TbbBindLinkTable) / sizeof(dynamic_link_descriptor);
375 
376 #if TBB_USE_DEBUG
377 #define DEBUG_SUFFIX "_debug"
378 #else
379 #define DEBUG_SUFFIX
380 #endif /* TBB_USE_DEBUG */
381 
382 #if _WIN32 || _WIN64
383 #define LIBRARY_EXTENSION ".dll"
384 #define LIBRARY_PREFIX
385 #elif __unix__
386 #define LIBRARY_EXTENSION __TBB_STRING(.so.3)
387 #define LIBRARY_PREFIX "lib"
388 #endif /* __unix__ */
389 
390 #define TBBBIND_NAME LIBRARY_PREFIX "tbbbind" DEBUG_SUFFIX LIBRARY_EXTENSION
391 #define TBBBIND_2_0_NAME LIBRARY_PREFIX "tbbbind_2_0" DEBUG_SUFFIX LIBRARY_EXTENSION
392 
393 #define TBBBIND_2_5_NAME LIBRARY_PREFIX "tbbbind_2_5" DEBUG_SUFFIX LIBRARY_EXTENSION
394 #endif /* _WIN32 || _WIN64 || __unix__ */
395 
396 // Representation of system hardware topology information on the TBB side.
397 // System topology may be initialized by third-party component (e.g. hwloc)
398 // or just filled in with default stubs.
399 namespace system_topology {
400 
401 constexpr int automatic = -1;
402 
403 static std::atomic<do_once_state> initialization_state;
404 
405 namespace {
406 int  numa_nodes_count = 0;
407 int* numa_nodes_indexes = nullptr;
408 
409 int  core_types_count = 0;
410 int* core_types_indexes = nullptr;
411 
412 const char* load_tbbbind_shared_object() {
413 #if _WIN32 || _WIN64 || __unix__
414 #if _WIN32 && !_WIN64
415     // For 32-bit Windows applications, process affinity masks can only support up to 32 logical CPUs.
416     SYSTEM_INFO si;
417     GetNativeSystemInfo(&si);
418     if (si.dwNumberOfProcessors > 32) return nullptr;
419 #endif /* _WIN32 && !_WIN64 */
420     for (const auto& tbbbind_version : {TBBBIND_2_5_NAME, TBBBIND_2_0_NAME, TBBBIND_NAME}) {
421         if (dynamic_link(tbbbind_version, TbbBindLinkTable, LinkTableSize, nullptr, DYNAMIC_LINK_LOCAL_BINDING)) {
422             return tbbbind_version;
423         }
424     }
425 #endif /* _WIN32 || _WIN64 || __unix__ */
426     return nullptr;
427 }
428 
429 int processor_groups_num() {
430 #if _WIN32
431     return NumberOfProcessorGroups();
432 #else
433     // Stub to improve code readability by reducing number of the compile-time conditions
434     return 1;
435 #endif
436 }
437 } // internal namespace
438 
439 // Tries to load TBBbind library API, if success, gets NUMA topology information from it,
440 // in another case, fills NUMA topology by stubs.
441 void initialization_impl() {
442     governor::one_time_init();
443 
444     if (const char* tbbbind_name = load_tbbbind_shared_object()) {
445         initialize_system_topology_ptr(
446             processor_groups_num(),
447             numa_nodes_count, numa_nodes_indexes,
448             core_types_count, core_types_indexes
449         );
450 
451         PrintExtraVersionInfo("TBBBIND", tbbbind_name);
452         return;
453     }
454 
455     static int dummy_index = automatic;
456 
457     numa_nodes_count = 1;
458     numa_nodes_indexes = &dummy_index;
459 
460     core_types_count = 1;
461     core_types_indexes = &dummy_index;
462 
463     PrintExtraVersionInfo("TBBBIND", "UNAVAILABLE");
464 }
465 
466 void initialize() {
467     atomic_do_once(initialization_impl, initialization_state);
468 }
469 
470 void destroy() {
471     destroy_system_topology_ptr();
472 }
473 } // namespace system_topology
474 
475 binding_handler* construct_binding_handler(int slot_num, int numa_id, int core_type_id, int max_threads_per_core) {
476     system_topology::initialize();
477     return allocate_binding_handler_ptr(slot_num, numa_id, core_type_id, max_threads_per_core);
478 }
479 
480 void destroy_binding_handler(binding_handler* handler_ptr) {
481     __TBB_ASSERT(deallocate_binding_handler_ptr, "tbbbind loading was not performed");
482     deallocate_binding_handler_ptr(handler_ptr);
483 }
484 
485 void apply_affinity_mask(binding_handler* handler_ptr, int slot_index) {
486     __TBB_ASSERT(slot_index >= 0, "Negative thread index");
487     __TBB_ASSERT(apply_affinity_ptr, "tbbbind loading was not performed");
488     apply_affinity_ptr(handler_ptr, slot_index);
489 }
490 
491 void restore_affinity_mask(binding_handler* handler_ptr, int slot_index) {
492     __TBB_ASSERT(slot_index >= 0, "Negative thread index");
493     __TBB_ASSERT(restore_affinity_ptr, "tbbbind loading was not performed");
494     restore_affinity_ptr(handler_ptr, slot_index);
495 }
496 
497 unsigned __TBB_EXPORTED_FUNC numa_node_count() {
498     system_topology::initialize();
499     return system_topology::numa_nodes_count;
500 }
501 
502 void __TBB_EXPORTED_FUNC fill_numa_indices(int* index_array) {
503     system_topology::initialize();
504     std::memcpy(index_array, system_topology::numa_nodes_indexes, system_topology::numa_nodes_count * sizeof(int));
505 }
506 
507 int __TBB_EXPORTED_FUNC numa_default_concurrency(int node_id) {
508     if (node_id >= 0) {
509         system_topology::initialize();
510         int result = get_default_concurrency_ptr(
511             node_id,
512             /*core_type*/system_topology::automatic,
513             /*threads_per_core*/system_topology::automatic
514         );
515         if (result > 0) return result;
516     }
517     return governor::default_num_threads();
518 }
519 
520 unsigned __TBB_EXPORTED_FUNC core_type_count(intptr_t /*reserved*/) {
521     system_topology::initialize();
522     return system_topology::core_types_count;
523 }
524 
525 void __TBB_EXPORTED_FUNC fill_core_type_indices(int* index_array, intptr_t /*reserved*/) {
526     system_topology::initialize();
527     std::memcpy(index_array, system_topology::core_types_indexes, system_topology::core_types_count * sizeof(int));
528 }
529 
530 void constraints_assertion(d1::constraints c) {
531     bool is_topology_initialized = system_topology::initialization_state == do_once_state::initialized;
532     __TBB_ASSERT_RELEASE(c.max_threads_per_core == system_topology::automatic || c.max_threads_per_core > 0,
533         "Wrong max_threads_per_core constraints field value.");
534 
535     auto numa_nodes_begin = system_topology::numa_nodes_indexes;
536     auto numa_nodes_end = system_topology::numa_nodes_indexes + system_topology::numa_nodes_count;
537     __TBB_ASSERT_RELEASE(
538         c.numa_id == system_topology::automatic ||
539         (is_topology_initialized && std::find(numa_nodes_begin, numa_nodes_end, c.numa_id) != numa_nodes_end),
540         "The constraints::numa_id value is not known to the library. Use tbb::info::numa_nodes() to get the list of possible values.");
541 
542     int* core_types_begin = system_topology::core_types_indexes;
543     int* core_types_end = system_topology::core_types_indexes + system_topology::core_types_count;
544     __TBB_ASSERT_RELEASE(c.core_type == system_topology::automatic ||
545         (is_topology_initialized && std::find(core_types_begin, core_types_end, c.core_type) != core_types_end),
546         "The constraints::core_type value is not known to the library. Use tbb::info::core_types() to get the list of possible values.");
547 }
548 
549 int __TBB_EXPORTED_FUNC constraints_default_concurrency(const d1::constraints& c, intptr_t /*reserved*/) {
550     constraints_assertion(c);
551 
552     if (c.numa_id >= 0 || c.core_type >= 0 || c.max_threads_per_core > 0) {
553         system_topology::initialize();
554         return get_default_concurrency_ptr(c.numa_id, c.core_type, c.max_threads_per_core);
555     }
556     return governor::default_num_threads();
557 }
558 
559 int __TBB_EXPORTED_FUNC constraints_threads_per_core(const d1::constraints&, intptr_t /*reserved*/) {
560     return system_topology::automatic;
561 }
562 #endif /* __TBB_ARENA_BINDING */
563 
564 } // namespace r1
565 } // namespace detail
566 } // namespace tbb
567