1 /* 2 Copyright (c) 2005-2023 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #include "governor.h" 18 #include "threading_control.h" 19 #include "main.h" 20 #include "thread_data.h" 21 #include "market.h" 22 #include "arena.h" 23 #include "dynamic_link.h" 24 #include "concurrent_monitor.h" 25 #include "thread_dispatcher.h" 26 27 #include "oneapi/tbb/task_group.h" 28 #include "oneapi/tbb/global_control.h" 29 #include "oneapi/tbb/tbb_allocator.h" 30 #include "oneapi/tbb/info.h" 31 32 #include "task_dispatcher.h" 33 34 #include <cstdio> 35 #include <cstdlib> 36 #include <cstring> 37 #include <atomic> 38 #include <algorithm> 39 40 namespace tbb { 41 namespace detail { 42 namespace r1 { 43 44 void clear_address_waiter_table(); 45 46 //! global_control.cpp contains definition 47 bool remove_and_check_if_empty(d1::global_control& gc); 48 bool is_present(d1::global_control& gc); 49 50 namespace rml { 51 tbb_server* make_private_server( tbb_client& client ); 52 } // namespace rml 53 54 namespace system_topology { 55 void destroy(); 56 } 57 58 //------------------------------------------------------------------------ 59 // governor 60 //------------------------------------------------------------------------ 61 62 void governor::acquire_resources () { 63 #if __TBB_USE_POSIX 64 int status = theTLS.create(auto_terminate); 65 #else 66 int status = theTLS.create(); 67 #endif 68 if( status ) 69 handle_perror(status, "TBB failed to initialize task scheduler TLS\n"); 70 detect_cpu_features(cpu_features); 71 72 is_rethrow_broken = gcc_rethrow_exception_broken(); 73 } 74 75 void governor::release_resources () { 76 theRMLServerFactory.close(); 77 destroy_process_mask(); 78 79 __TBB_ASSERT(!(__TBB_InitOnce::initialization_done() && theTLS.get()), "TBB is unloaded while thread data still alive?"); 80 81 int status = theTLS.destroy(); 82 if( status ) 83 runtime_warning("failed to destroy task scheduler TLS: %s", std::strerror(status)); 84 clear_address_waiter_table(); 85 86 system_topology::destroy(); 87 dynamic_unlink_all(); 88 } 89 90 rml::tbb_server* governor::create_rml_server ( rml::tbb_client& client ) { 91 rml::tbb_server* server = nullptr; 92 if( !UsePrivateRML ) { 93 ::rml::factory::status_type status = theRMLServerFactory.make_server( server, client ); 94 if( status != ::rml::factory::st_success ) { 95 UsePrivateRML = true; 96 runtime_warning( "rml::tbb_factory::make_server failed with status %x, falling back on private rml", status ); 97 } 98 } 99 if ( !server ) { 100 __TBB_ASSERT( UsePrivateRML, nullptr); 101 server = rml::make_private_server( client ); 102 } 103 __TBB_ASSERT( server, "Failed to create RML server" ); 104 return server; 105 } 106 107 void governor::one_time_init() { 108 if ( !__TBB_InitOnce::initialization_done() ) { 109 DoOneTimeInitialization(); 110 } 111 } 112 113 bool governor::does_client_join_workers(const rml::tbb_client &client) { 114 return ((const thread_dispatcher&)client).must_join_workers(); 115 } 116 117 /* 118 There is no portable way to get stack base address in Posix, however the modern 119 Linux versions provide pthread_attr_np API that can be used to obtain thread's 120 stack size and base address. Unfortunately even this function does not provide 121 enough information for the main thread on IA-64 architecture (RSE spill area 122 and memory stack are allocated as two separate discontinuous chunks of memory), 123 and there is no portable way to discern the main and the secondary threads. 124 Thus for macOS* and IA-64 architecture for Linux* OS we use the TBB worker stack size for 125 all threads and use the current stack top as the stack base. This simplified 126 approach is based on the following assumptions: 127 1) If the default stack size is insufficient for the user app needs, the 128 required amount will be explicitly specified by the user at the point of the 129 TBB scheduler initialization (as an argument to tbb::task_scheduler_init 130 constructor). 131 2) When an external thread initializes the scheduler, it has enough space on its 132 stack. Here "enough" means "at least as much as worker threads have". 133 3) If the user app strives to conserve the memory by cutting stack size, it 134 should do this for TBB workers too (as in the #1). 135 */ 136 static std::uintptr_t get_stack_base(std::size_t stack_size) { 137 // Stacks are growing top-down. Highest address is called "stack base", 138 // and the lowest is "stack limit". 139 #if __TBB_USE_WINAPI 140 suppress_unused_warning(stack_size); 141 NT_TIB* pteb = (NT_TIB*)NtCurrentTeb(); 142 __TBB_ASSERT(&pteb < pteb->StackBase && &pteb > pteb->StackLimit, "invalid stack info in TEB"); 143 return reinterpret_cast<std::uintptr_t>(pteb->StackBase); 144 #else 145 // There is no portable way to get stack base address in Posix, so we use 146 // non-portable method (on all modern Linux) or the simplified approach 147 // based on the common sense assumptions. The most important assumption 148 // is that the main thread's stack size is not less than that of other threads. 149 150 // Points to the lowest addressable byte of a stack. 151 void* stack_limit = nullptr; 152 #if __linux__ && !__bg__ 153 size_t np_stack_size = 0; 154 pthread_attr_t np_attr_stack; 155 if (0 == pthread_getattr_np(pthread_self(), &np_attr_stack)) { 156 if (0 == pthread_attr_getstack(&np_attr_stack, &stack_limit, &np_stack_size)) { 157 __TBB_ASSERT( &stack_limit > stack_limit, "stack size must be positive" ); 158 } 159 pthread_attr_destroy(&np_attr_stack); 160 } 161 #endif /* __linux__ */ 162 std::uintptr_t stack_base{}; 163 if (stack_limit) { 164 stack_base = reinterpret_cast<std::uintptr_t>(stack_limit) + stack_size; 165 } else { 166 // Use an anchor as a base stack address. 167 int anchor{}; 168 stack_base = reinterpret_cast<std::uintptr_t>(&anchor); 169 } 170 return stack_base; 171 #endif /* __TBB_USE_WINAPI */ 172 } 173 174 #if (_WIN32||_WIN64) && !__TBB_DYNAMIC_LOAD_ENABLED 175 static void register_external_thread_destructor() { 176 struct thread_destructor { 177 ~thread_destructor() { 178 governor::terminate_external_thread(); 179 } 180 }; 181 // ~thread_destructor() will be call during the calling thread termination 182 static thread_local thread_destructor thr_destructor; 183 } 184 #endif // (_WIN32||_WIN64) && !__TBB_DYNAMIC_LOAD_ENABLED 185 186 void governor::init_external_thread() { 187 one_time_init(); 188 // Create new scheduler instance with arena 189 int num_slots = default_num_threads(); 190 // TODO_REVAMP: support an external thread without an implicit arena 191 int num_reserved_slots = 1; 192 unsigned arena_priority_level = 1; // corresponds to tbb::task_arena::priority::normal 193 std::size_t stack_size = 0; 194 threading_control* thr_control = threading_control::register_public_reference(); 195 arena& a = arena::create(thr_control, num_slots, num_reserved_slots, arena_priority_level); 196 // External thread always occupies the first slot 197 thread_data& td = *new(cache_aligned_allocate(sizeof(thread_data))) thread_data(0, false); 198 td.attach_arena(a, /*slot index*/ 0); 199 __TBB_ASSERT(td.my_inbox.is_idle_state(false), nullptr); 200 201 stack_size = a.my_threading_control->worker_stack_size(); 202 std::uintptr_t stack_base = get_stack_base(stack_size); 203 task_dispatcher& task_disp = td.my_arena_slot->default_task_dispatcher(); 204 td.enter_task_dispatcher(task_disp, calculate_stealing_threshold(stack_base, stack_size)); 205 206 td.my_arena_slot->occupy(); 207 thr_control->register_thread(td); 208 set_thread_data(td); 209 #if (_WIN32||_WIN64) && !__TBB_DYNAMIC_LOAD_ENABLED 210 // The external thread destructor is called from dllMain but it is not available with a static build. 211 // Therefore, we need to register the current thread to call the destructor during thread termination. 212 register_external_thread_destructor(); 213 #endif 214 } 215 216 void governor::auto_terminate(void* tls) { 217 __TBB_ASSERT(get_thread_data_if_initialized() == nullptr || 218 get_thread_data_if_initialized() == tls, nullptr); 219 if (tls) { 220 thread_data* td = static_cast<thread_data*>(tls); 221 222 auto clear_tls = [td] { 223 td->~thread_data(); 224 cache_aligned_deallocate(td); 225 clear_thread_data(); 226 }; 227 228 // Only external thread can be inside an arena during termination. 229 if (td->my_arena_slot) { 230 arena* a = td->my_arena; 231 threading_control* thr_control = a->my_threading_control; 232 233 // If the TLS slot is already cleared by OS or underlying concurrency 234 // runtime, restore its value to properly clean up arena 235 if (!is_thread_data_set(td)) { 236 set_thread_data(*td); 237 } 238 239 a->my_observers.notify_exit_observers(td->my_last_observer, td->my_is_worker); 240 241 td->leave_task_dispatcher(); 242 td->my_arena_slot->release(); 243 // Release an arena 244 a->on_thread_leaving(arena::ref_external); 245 246 thr_control->unregister_thread(*td); 247 248 // The tls should be cleared before market::release because 249 // market can destroy the tls key if we keep the last reference 250 clear_tls(); 251 252 // If there was an associated arena, it added a public market reference 253 thr_control->unregister_public_reference(/* blocking terminate =*/ false); 254 } else { 255 clear_tls(); 256 } 257 } 258 __TBB_ASSERT(get_thread_data_if_initialized() == nullptr, nullptr); 259 } 260 261 void governor::initialize_rml_factory () { 262 ::rml::factory::status_type res = theRMLServerFactory.open(); 263 UsePrivateRML = res != ::rml::factory::st_success; 264 } 265 266 void __TBB_EXPORTED_FUNC get(d1::task_scheduler_handle& handle) { 267 handle.m_ctl = new(allocate_memory(sizeof(global_control))) global_control(global_control::scheduler_handle, 1); 268 } 269 270 void release_impl(d1::task_scheduler_handle& handle) { 271 if (handle.m_ctl != nullptr) { 272 handle.m_ctl->~global_control(); 273 deallocate_memory(handle.m_ctl); 274 handle.m_ctl = nullptr; 275 } 276 } 277 278 bool finalize_impl(d1::task_scheduler_handle& handle) { 279 __TBB_ASSERT_RELEASE(handle, "trying to finalize with null handle"); 280 __TBB_ASSERT(is_present(*handle.m_ctl), "finalize or release was already called on this object"); 281 282 bool ok = true; // ok if threading_control does not exist yet 283 if (threading_control::is_present()) { 284 thread_data* td = governor::get_thread_data_if_initialized(); 285 if (td) { 286 task_dispatcher* task_disp = td->my_task_dispatcher; 287 __TBB_ASSERT(task_disp, nullptr); 288 if (task_disp->m_properties.outermost && !td->my_is_worker) { // is not inside a parallel region 289 governor::auto_terminate(td); 290 } 291 } 292 293 if (remove_and_check_if_empty(*handle.m_ctl)) { 294 ok = threading_control::unregister_lifetime_control(/*blocking_terminate*/ true); 295 } else { 296 ok = false; 297 } 298 } 299 300 return ok; 301 } 302 303 bool __TBB_EXPORTED_FUNC finalize(d1::task_scheduler_handle& handle, std::intptr_t mode) { 304 if (mode == d1::release_nothrowing) { 305 release_impl(handle); 306 return true; 307 } else { 308 bool ok = finalize_impl(handle); 309 // TODO: it is unsafe when finalize is called concurrently and further library unload 310 release_impl(handle); 311 if (mode == d1::finalize_throwing && !ok) { 312 throw_exception(exception_id::unsafe_wait); 313 } 314 return ok; 315 } 316 } 317 318 #if __TBB_ARENA_BINDING 319 320 #if __TBB_WEAK_SYMBOLS_PRESENT 321 #pragma weak __TBB_internal_initialize_system_topology 322 #pragma weak __TBB_internal_destroy_system_topology 323 #pragma weak __TBB_internal_allocate_binding_handler 324 #pragma weak __TBB_internal_deallocate_binding_handler 325 #pragma weak __TBB_internal_apply_affinity 326 #pragma weak __TBB_internal_restore_affinity 327 #pragma weak __TBB_internal_get_default_concurrency 328 329 extern "C" { 330 void __TBB_internal_initialize_system_topology( 331 size_t groups_num, 332 int& numa_nodes_count, int*& numa_indexes_list, 333 int& core_types_count, int*& core_types_indexes_list 334 ); 335 void __TBB_internal_destroy_system_topology( ); 336 337 //TODO: consider renaming to `create_binding_handler` and `destroy_binding_handler` 338 binding_handler* __TBB_internal_allocate_binding_handler( int slot_num, int numa_id, int core_type_id, int max_threads_per_core ); 339 void __TBB_internal_deallocate_binding_handler( binding_handler* handler_ptr ); 340 341 void __TBB_internal_apply_affinity( binding_handler* handler_ptr, int slot_num ); 342 void __TBB_internal_restore_affinity( binding_handler* handler_ptr, int slot_num ); 343 344 int __TBB_internal_get_default_concurrency( int numa_id, int core_type_id, int max_threads_per_core ); 345 } 346 #endif /* __TBB_WEAK_SYMBOLS_PRESENT */ 347 348 // Stubs that will be used if TBBbind library is unavailable. 349 static void dummy_destroy_system_topology ( ) { } 350 static binding_handler* dummy_allocate_binding_handler ( int, int, int, int ) { return nullptr; } 351 static void dummy_deallocate_binding_handler ( binding_handler* ) { } 352 static void dummy_apply_affinity ( binding_handler*, int ) { } 353 static void dummy_restore_affinity ( binding_handler*, int ) { } 354 static int dummy_get_default_concurrency( int, int, int ) { return governor::default_num_threads(); } 355 356 // Handlers for communication with TBBbind 357 static void (*initialize_system_topology_ptr)( 358 size_t groups_num, 359 int& numa_nodes_count, int*& numa_indexes_list, 360 int& core_types_count, int*& core_types_indexes_list 361 ) = nullptr; 362 static void (*destroy_system_topology_ptr)( ) = dummy_destroy_system_topology; 363 364 static binding_handler* (*allocate_binding_handler_ptr)( int slot_num, int numa_id, int core_type_id, int max_threads_per_core ) 365 = dummy_allocate_binding_handler; 366 static void (*deallocate_binding_handler_ptr)( binding_handler* handler_ptr ) 367 = dummy_deallocate_binding_handler; 368 static void (*apply_affinity_ptr)( binding_handler* handler_ptr, int slot_num ) 369 = dummy_apply_affinity; 370 static void (*restore_affinity_ptr)( binding_handler* handler_ptr, int slot_num ) 371 = dummy_restore_affinity; 372 int (*get_default_concurrency_ptr)( int numa_id, int core_type_id, int max_threads_per_core ) 373 = dummy_get_default_concurrency; 374 375 #if _WIN32 || _WIN64 || __unix__ 376 // Table describing how to link the handlers. 377 static const dynamic_link_descriptor TbbBindLinkTable[] = { 378 DLD(__TBB_internal_initialize_system_topology, initialize_system_topology_ptr), 379 DLD(__TBB_internal_destroy_system_topology, destroy_system_topology_ptr), 380 DLD(__TBB_internal_allocate_binding_handler, allocate_binding_handler_ptr), 381 DLD(__TBB_internal_deallocate_binding_handler, deallocate_binding_handler_ptr), 382 DLD(__TBB_internal_apply_affinity, apply_affinity_ptr), 383 DLD(__TBB_internal_restore_affinity, restore_affinity_ptr), 384 DLD(__TBB_internal_get_default_concurrency, get_default_concurrency_ptr) 385 }; 386 387 static const unsigned LinkTableSize = sizeof(TbbBindLinkTable) / sizeof(dynamic_link_descriptor); 388 389 #if TBB_USE_DEBUG 390 #define DEBUG_SUFFIX "_debug" 391 #else 392 #define DEBUG_SUFFIX 393 #endif /* TBB_USE_DEBUG */ 394 395 #if _WIN32 || _WIN64 396 #define LIBRARY_EXTENSION ".dll" 397 #define LIBRARY_PREFIX 398 #elif __unix__ 399 #define LIBRARY_EXTENSION __TBB_STRING(.so.3) 400 #define LIBRARY_PREFIX "lib" 401 #endif /* __unix__ */ 402 403 #define TBBBIND_NAME LIBRARY_PREFIX "tbbbind" DEBUG_SUFFIX LIBRARY_EXTENSION 404 #define TBBBIND_2_0_NAME LIBRARY_PREFIX "tbbbind_2_0" DEBUG_SUFFIX LIBRARY_EXTENSION 405 406 #define TBBBIND_2_5_NAME LIBRARY_PREFIX "tbbbind_2_5" DEBUG_SUFFIX LIBRARY_EXTENSION 407 #endif /* _WIN32 || _WIN64 || __unix__ */ 408 409 // Representation of system hardware topology information on the TBB side. 410 // System topology may be initialized by third-party component (e.g. hwloc) 411 // or just filled in with default stubs. 412 namespace system_topology { 413 414 constexpr int automatic = -1; 415 416 static std::atomic<do_once_state> initialization_state; 417 418 namespace { 419 int numa_nodes_count = 0; 420 int* numa_nodes_indexes = nullptr; 421 422 int core_types_count = 0; 423 int* core_types_indexes = nullptr; 424 425 const char* load_tbbbind_shared_object() { 426 #if _WIN32 || _WIN64 || __unix__ 427 #if _WIN32 && !_WIN64 428 // For 32-bit Windows applications, process affinity masks can only support up to 32 logical CPUs. 429 SYSTEM_INFO si; 430 GetNativeSystemInfo(&si); 431 if (si.dwNumberOfProcessors > 32) return nullptr; 432 #endif /* _WIN32 && !_WIN64 */ 433 for (const auto& tbbbind_version : {TBBBIND_2_5_NAME, TBBBIND_2_0_NAME, TBBBIND_NAME}) { 434 if (dynamic_link(tbbbind_version, TbbBindLinkTable, LinkTableSize, nullptr, DYNAMIC_LINK_LOCAL_BINDING)) { 435 return tbbbind_version; 436 } 437 } 438 #endif /* _WIN32 || _WIN64 || __unix__ */ 439 return nullptr; 440 } 441 442 int processor_groups_num() { 443 #if _WIN32 444 return NumberOfProcessorGroups(); 445 #else 446 // Stub to improve code readability by reducing number of the compile-time conditions 447 return 1; 448 #endif 449 } 450 } // internal namespace 451 452 // Tries to load TBBbind library API, if success, gets NUMA topology information from it, 453 // in another case, fills NUMA topology by stubs. 454 void initialization_impl() { 455 governor::one_time_init(); 456 457 if (const char* tbbbind_name = load_tbbbind_shared_object()) { 458 initialize_system_topology_ptr( 459 processor_groups_num(), 460 numa_nodes_count, numa_nodes_indexes, 461 core_types_count, core_types_indexes 462 ); 463 464 PrintExtraVersionInfo("TBBBIND", tbbbind_name); 465 return; 466 } 467 468 static int dummy_index = automatic; 469 470 numa_nodes_count = 1; 471 numa_nodes_indexes = &dummy_index; 472 473 core_types_count = 1; 474 core_types_indexes = &dummy_index; 475 476 PrintExtraVersionInfo("TBBBIND", "UNAVAILABLE"); 477 } 478 479 void initialize() { 480 atomic_do_once(initialization_impl, initialization_state); 481 } 482 483 void destroy() { 484 destroy_system_topology_ptr(); 485 } 486 } // namespace system_topology 487 488 binding_handler* construct_binding_handler(int slot_num, int numa_id, int core_type_id, int max_threads_per_core) { 489 system_topology::initialize(); 490 return allocate_binding_handler_ptr(slot_num, numa_id, core_type_id, max_threads_per_core); 491 } 492 493 void destroy_binding_handler(binding_handler* handler_ptr) { 494 __TBB_ASSERT(deallocate_binding_handler_ptr, "tbbbind loading was not performed"); 495 deallocate_binding_handler_ptr(handler_ptr); 496 } 497 498 void apply_affinity_mask(binding_handler* handler_ptr, int slot_index) { 499 __TBB_ASSERT(slot_index >= 0, "Negative thread index"); 500 __TBB_ASSERT(apply_affinity_ptr, "tbbbind loading was not performed"); 501 apply_affinity_ptr(handler_ptr, slot_index); 502 } 503 504 void restore_affinity_mask(binding_handler* handler_ptr, int slot_index) { 505 __TBB_ASSERT(slot_index >= 0, "Negative thread index"); 506 __TBB_ASSERT(restore_affinity_ptr, "tbbbind loading was not performed"); 507 restore_affinity_ptr(handler_ptr, slot_index); 508 } 509 510 unsigned __TBB_EXPORTED_FUNC numa_node_count() { 511 system_topology::initialize(); 512 return system_topology::numa_nodes_count; 513 } 514 515 void __TBB_EXPORTED_FUNC fill_numa_indices(int* index_array) { 516 system_topology::initialize(); 517 std::memcpy(index_array, system_topology::numa_nodes_indexes, system_topology::numa_nodes_count * sizeof(int)); 518 } 519 520 int __TBB_EXPORTED_FUNC numa_default_concurrency(int node_id) { 521 if (node_id >= 0) { 522 system_topology::initialize(); 523 int result = get_default_concurrency_ptr( 524 node_id, 525 /*core_type*/system_topology::automatic, 526 /*threads_per_core*/system_topology::automatic 527 ); 528 if (result > 0) return result; 529 } 530 return governor::default_num_threads(); 531 } 532 533 unsigned __TBB_EXPORTED_FUNC core_type_count(intptr_t /*reserved*/) { 534 system_topology::initialize(); 535 return system_topology::core_types_count; 536 } 537 538 void __TBB_EXPORTED_FUNC fill_core_type_indices(int* index_array, intptr_t /*reserved*/) { 539 system_topology::initialize(); 540 std::memcpy(index_array, system_topology::core_types_indexes, system_topology::core_types_count * sizeof(int)); 541 } 542 543 void constraints_assertion(d1::constraints c) { 544 bool is_topology_initialized = system_topology::initialization_state == do_once_state::initialized; 545 __TBB_ASSERT_RELEASE(c.max_threads_per_core == system_topology::automatic || c.max_threads_per_core > 0, 546 "Wrong max_threads_per_core constraints field value."); 547 548 auto numa_nodes_begin = system_topology::numa_nodes_indexes; 549 auto numa_nodes_end = system_topology::numa_nodes_indexes + system_topology::numa_nodes_count; 550 __TBB_ASSERT_RELEASE( 551 c.numa_id == system_topology::automatic || 552 (is_topology_initialized && std::find(numa_nodes_begin, numa_nodes_end, c.numa_id) != numa_nodes_end), 553 "The constraints::numa_id value is not known to the library. Use tbb::info::numa_nodes() to get the list of possible values."); 554 555 int* core_types_begin = system_topology::core_types_indexes; 556 int* core_types_end = system_topology::core_types_indexes + system_topology::core_types_count; 557 __TBB_ASSERT_RELEASE(c.core_type == system_topology::automatic || 558 (is_topology_initialized && std::find(core_types_begin, core_types_end, c.core_type) != core_types_end), 559 "The constraints::core_type value is not known to the library. Use tbb::info::core_types() to get the list of possible values."); 560 } 561 562 int __TBB_EXPORTED_FUNC constraints_default_concurrency(const d1::constraints& c, intptr_t /*reserved*/) { 563 constraints_assertion(c); 564 565 if (c.numa_id >= 0 || c.core_type >= 0 || c.max_threads_per_core > 0) { 566 system_topology::initialize(); 567 return get_default_concurrency_ptr(c.numa_id, c.core_type, c.max_threads_per_core); 568 } 569 return governor::default_num_threads(); 570 } 571 572 int __TBB_EXPORTED_FUNC constraints_threads_per_core(const d1::constraints&, intptr_t /*reserved*/) { 573 return system_topology::automatic; 574 } 575 #endif /* __TBB_ARENA_BINDING */ 576 577 } // namespace r1 578 } // namespace detail 579 } // namespace tbb 580