1 /* 2 Copyright (c) 2005-2021 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #include "governor.h" 18 #include "main.h" 19 #include "thread_data.h" 20 #include "market.h" 21 #include "arena.h" 22 #include "dynamic_link.h" 23 #include "concurrent_monitor.h" 24 25 #include "oneapi/tbb/task_group.h" 26 #include "oneapi/tbb/global_control.h" 27 #include "oneapi/tbb/tbb_allocator.h" 28 #include "oneapi/tbb/info.h" 29 30 #include "task_dispatcher.h" 31 32 #include <cstdio> 33 #include <cstdlib> 34 #include <cstring> 35 #include <atomic> 36 #include <algorithm> 37 38 namespace tbb { 39 namespace detail { 40 namespace r1 { 41 42 void clear_address_waiter_table(); 43 44 #if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE 45 //! global_control.cpp contains definition 46 bool remove_and_check_if_empty(d1::global_control& gc); 47 bool is_present(d1::global_control& gc); 48 #endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE 49 50 namespace rml { 51 tbb_server* make_private_server( tbb_client& client ); 52 } // namespace rml 53 54 namespace system_topology { 55 void destroy(); 56 } 57 58 //------------------------------------------------------------------------ 59 // governor 60 //------------------------------------------------------------------------ 61 62 void governor::acquire_resources () { 63 #if __TBB_USE_POSIX 64 int status = theTLS.create(auto_terminate); 65 #else 66 int status = theTLS.create(); 67 #endif 68 if( status ) 69 handle_perror(status, "TBB failed to initialize task scheduler TLS\n"); 70 detect_cpu_features(cpu_features); 71 72 is_rethrow_broken = gcc_rethrow_exception_broken(); 73 } 74 75 void governor::release_resources () { 76 theRMLServerFactory.close(); 77 destroy_process_mask(); 78 79 __TBB_ASSERT(!(__TBB_InitOnce::initialization_done() && theTLS.get()), "TBB is unloaded while thread data still alive?"); 80 81 int status = theTLS.destroy(); 82 if( status ) 83 runtime_warning("failed to destroy task scheduler TLS: %s", std::strerror(status)); 84 clear_address_waiter_table(); 85 86 system_topology::destroy(); 87 dynamic_unlink_all(); 88 } 89 90 rml::tbb_server* governor::create_rml_server ( rml::tbb_client& client ) { 91 rml::tbb_server* server = NULL; 92 if( !UsePrivateRML ) { 93 ::rml::factory::status_type status = theRMLServerFactory.make_server( server, client ); 94 if( status != ::rml::factory::st_success ) { 95 UsePrivateRML = true; 96 runtime_warning( "rml::tbb_factory::make_server failed with status %x, falling back on private rml", status ); 97 } 98 } 99 if ( !server ) { 100 __TBB_ASSERT( UsePrivateRML, NULL ); 101 server = rml::make_private_server( client ); 102 } 103 __TBB_ASSERT( server, "Failed to create RML server" ); 104 return server; 105 } 106 107 void governor::one_time_init() { 108 if ( !__TBB_InitOnce::initialization_done() ) { 109 DoOneTimeInitialization(); 110 } 111 } 112 113 /* 114 There is no portable way to get stack base address in Posix, however the modern 115 Linux versions provide pthread_attr_np API that can be used to obtain thread's 116 stack size and base address. Unfortunately even this function does not provide 117 enough information for the main thread on IA-64 architecture (RSE spill area 118 and memory stack are allocated as two separate discontinuous chunks of memory), 119 and there is no portable way to discern the main and the secondary threads. 120 Thus for macOS* and IA-64 architecture for Linux* OS we use the TBB worker stack size for 121 all threads and use the current stack top as the stack base. This simplified 122 approach is based on the following assumptions: 123 1) If the default stack size is insufficient for the user app needs, the 124 required amount will be explicitly specified by the user at the point of the 125 TBB scheduler initialization (as an argument to tbb::task_scheduler_init 126 constructor). 127 2) When an external thread initializes the scheduler, it has enough space on its 128 stack. Here "enough" means "at least as much as worker threads have". 129 3) If the user app strives to conserve the memory by cutting stack size, it 130 should do this for TBB workers too (as in the #1). 131 */ 132 static std::uintptr_t get_stack_base(std::size_t stack_size) { 133 // Stacks are growing top-down. Highest address is called "stack base", 134 // and the lowest is "stack limit". 135 #if __TBB_USE_WINAPI 136 suppress_unused_warning(stack_size); 137 NT_TIB* pteb = (NT_TIB*)NtCurrentTeb(); 138 __TBB_ASSERT(&pteb < pteb->StackBase && &pteb > pteb->StackLimit, "invalid stack info in TEB"); 139 return reinterpret_cast<std::uintptr_t>(pteb->StackBase); 140 #else 141 // There is no portable way to get stack base address in Posix, so we use 142 // non-portable method (on all modern Linux) or the simplified approach 143 // based on the common sense assumptions. The most important assumption 144 // is that the main thread's stack size is not less than that of other threads. 145 146 // Points to the lowest addressable byte of a stack. 147 void* stack_limit = nullptr; 148 #if __linux__ && !__bg__ 149 size_t np_stack_size = 0; 150 pthread_attr_t np_attr_stack; 151 if (0 == pthread_getattr_np(pthread_self(), &np_attr_stack)) { 152 if (0 == pthread_attr_getstack(&np_attr_stack, &stack_limit, &np_stack_size)) { 153 __TBB_ASSERT( &stack_limit > stack_limit, "stack size must be positive" ); 154 } 155 pthread_attr_destroy(&np_attr_stack); 156 } 157 #endif /* __linux__ */ 158 std::uintptr_t stack_base{}; 159 if (stack_limit) { 160 stack_base = reinterpret_cast<std::uintptr_t>(stack_limit) + stack_size; 161 } else { 162 // Use an anchor as a base stack address. 163 int anchor{}; 164 stack_base = reinterpret_cast<std::uintptr_t>(&anchor); 165 } 166 return stack_base; 167 #endif /* __TBB_USE_WINAPI */ 168 } 169 170 #if (_WIN32||_WIN64) && !__TBB_DYNAMIC_LOAD_ENABLED 171 static void register_external_thread_destructor() { 172 struct thread_destructor { 173 ~thread_destructor() { 174 governor::terminate_external_thread(); 175 } 176 }; 177 // ~thread_destructor() will be call during the calling thread termination 178 static thread_local thread_destructor thr_destructor; 179 } 180 #endif // (_WIN32||_WIN64) && !__TBB_DYNAMIC_LOAD_ENABLED 181 182 void governor::init_external_thread() { 183 one_time_init(); 184 // Create new scheduler instance with arena 185 int num_slots = default_num_threads(); 186 // TODO_REVAMP: support an external thread without an implicit arena 187 int num_reserved_slots = 1; 188 unsigned arena_priority_level = 1; // corresponds to tbb::task_arena::priority::normal 189 std::size_t stack_size = 0; 190 arena& a = *market::create_arena(num_slots, num_reserved_slots, arena_priority_level, stack_size); 191 // We need an internal reference to the market. TODO: is it legacy? 192 market::global_market(false); 193 // External thread always occupies the first slot 194 thread_data& td = *new(cache_aligned_allocate(sizeof(thread_data))) thread_data(0, false); 195 td.attach_arena(a, /*slot index*/ 0); 196 __TBB_ASSERT(td.my_inbox.is_idle_state(false), nullptr); 197 198 stack_size = a.my_market->worker_stack_size(); 199 std::uintptr_t stack_base = get_stack_base(stack_size); 200 task_dispatcher& task_disp = td.my_arena_slot->default_task_dispatcher(); 201 task_disp.set_stealing_threshold(calculate_stealing_threshold(stack_base, stack_size)); 202 td.attach_task_dispatcher(task_disp); 203 204 td.my_arena_slot->occupy(); 205 a.my_market->add_external_thread(td); 206 set_thread_data(td); 207 #if (_WIN32||_WIN64) && !__TBB_DYNAMIC_LOAD_ENABLED 208 // The external thread destructor is called from dllMain but it is not available with a static build. 209 // Therefore, we need to register the current thread to call the destructor during thread termination. 210 register_external_thread_destructor(); 211 #endif 212 } 213 214 void governor::auto_terminate(void* tls) { 215 __TBB_ASSERT(get_thread_data_if_initialized() == nullptr || 216 get_thread_data_if_initialized() == tls, NULL); 217 if (tls) { 218 thread_data* td = static_cast<thread_data*>(tls); 219 220 // Only external thread can be inside an arena during termination. 221 if (td->my_arena_slot) { 222 arena* a = td->my_arena; 223 market* m = a->my_market; 224 225 a->my_observers.notify_exit_observers(td->my_last_observer, td->my_is_worker); 226 227 td->my_task_dispatcher->m_stealing_threshold = 0; 228 td->detach_task_dispatcher(); 229 td->my_arena_slot->release(); 230 // Release an arena 231 a->on_thread_leaving<arena::ref_external>(); 232 233 m->remove_external_thread(*td); 234 // If there was an associated arena, it added a public market reference 235 m->release( /*is_public*/ true, /*blocking_terminate*/ false); 236 } 237 238 td->~thread_data(); 239 cache_aligned_deallocate(td); 240 241 clear_thread_data(); 242 } 243 __TBB_ASSERT(get_thread_data_if_initialized() == nullptr, NULL); 244 } 245 246 void governor::initialize_rml_factory () { 247 ::rml::factory::status_type res = theRMLServerFactory.open(); 248 UsePrivateRML = res != ::rml::factory::st_success; 249 } 250 251 #if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE 252 void __TBB_EXPORTED_FUNC get(d1::task_scheduler_handle& handle) { 253 handle.m_ctl = new(allocate_memory(sizeof(global_control))) global_control(global_control::scheduler_handle, 1); 254 } 255 256 void release_impl(d1::task_scheduler_handle& handle) { 257 if (handle.m_ctl != nullptr) { 258 handle.m_ctl->~global_control(); 259 deallocate_memory(handle.m_ctl); 260 handle.m_ctl = nullptr; 261 } 262 } 263 264 bool finalize_impl(d1::task_scheduler_handle& handle) { 265 __TBB_ASSERT_RELEASE(handle, "trying to finalize with null handle"); 266 market::global_market_mutex_type::scoped_lock lock( market::theMarketMutex ); 267 bool ok = true; // ok if theMarket does not exist yet 268 market* m = market::theMarket; // read the state of theMarket 269 if (m != nullptr) { 270 lock.release(); 271 __TBB_ASSERT(is_present(*handle.m_ctl), "finalize or release was already called on this object"); 272 thread_data* td = governor::get_thread_data_if_initialized(); 273 if (td) { 274 task_dispatcher* task_disp = td->my_task_dispatcher; 275 __TBB_ASSERT(task_disp, nullptr); 276 if (task_disp->m_properties.outermost && !td->my_is_worker) { // is not inside a parallel region 277 governor::auto_terminate(td); 278 } 279 } 280 if (remove_and_check_if_empty(*handle.m_ctl)) { 281 ok = m->release(/*is_public*/ true, /*blocking_terminate*/ true); 282 } else { 283 ok = false; 284 } 285 } 286 return ok; 287 } 288 289 bool __TBB_EXPORTED_FUNC finalize(d1::task_scheduler_handle& handle, std::intptr_t mode) { 290 if (mode == d1::release_nothrowing) { 291 release_impl(handle); 292 return true; 293 } else { 294 bool ok = finalize_impl(handle); 295 // TODO: it is unsafe when finalize is called concurrently and further library unload 296 release_impl(handle); 297 if (mode == d1::finalize_throwing && !ok) { 298 throw_exception(exception_id::unsafe_wait); 299 } 300 return ok; 301 } 302 } 303 #endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE 304 305 #if __TBB_ARENA_BINDING 306 307 #if __TBB_WEAK_SYMBOLS_PRESENT 308 #pragma weak __TBB_internal_initialize_system_topology 309 #pragma weak __TBB_internal_destroy_system_topology 310 #pragma weak __TBB_internal_allocate_binding_handler 311 #pragma weak __TBB_internal_deallocate_binding_handler 312 #pragma weak __TBB_internal_apply_affinity 313 #pragma weak __TBB_internal_restore_affinity 314 #pragma weak __TBB_internal_get_default_concurrency 315 316 extern "C" { 317 void __TBB_internal_initialize_system_topology( 318 size_t groups_num, 319 int& numa_nodes_count, int*& numa_indexes_list, 320 int& core_types_count, int*& core_types_indexes_list 321 ); 322 void __TBB_internal_destroy_system_topology( ); 323 324 //TODO: consider renaming to `create_binding_handler` and `destroy_binding_handler` 325 binding_handler* __TBB_internal_allocate_binding_handler( int slot_num, int numa_id, int core_type_id, int max_threads_per_core ); 326 void __TBB_internal_deallocate_binding_handler( binding_handler* handler_ptr ); 327 328 void __TBB_internal_apply_affinity( binding_handler* handler_ptr, int slot_num ); 329 void __TBB_internal_restore_affinity( binding_handler* handler_ptr, int slot_num ); 330 331 int __TBB_internal_get_default_concurrency( int numa_id, int core_type_id, int max_threads_per_core ); 332 } 333 #endif /* __TBB_WEAK_SYMBOLS_PRESENT */ 334 335 // Stubs that will be used if TBBbind library is unavailable. 336 static void dummy_destroy_system_topology ( ) { } 337 static binding_handler* dummy_allocate_binding_handler ( int, int, int, int ) { return nullptr; } 338 static void dummy_deallocate_binding_handler ( binding_handler* ) { } 339 static void dummy_apply_affinity ( binding_handler*, int ) { } 340 static void dummy_restore_affinity ( binding_handler*, int ) { } 341 static int dummy_get_default_concurrency( int, int, int ) { return governor::default_num_threads(); } 342 343 // Handlers for communication with TBBbind 344 static void (*initialize_system_topology_ptr)( 345 size_t groups_num, 346 int& numa_nodes_count, int*& numa_indexes_list, 347 int& core_types_count, int*& core_types_indexes_list 348 ) = nullptr; 349 static void (*destroy_system_topology_ptr)( ) = dummy_destroy_system_topology; 350 351 static binding_handler* (*allocate_binding_handler_ptr)( int slot_num, int numa_id, int core_type_id, int max_threads_per_core ) 352 = dummy_allocate_binding_handler; 353 static void (*deallocate_binding_handler_ptr)( binding_handler* handler_ptr ) 354 = dummy_deallocate_binding_handler; 355 static void (*apply_affinity_ptr)( binding_handler* handler_ptr, int slot_num ) 356 = dummy_apply_affinity; 357 static void (*restore_affinity_ptr)( binding_handler* handler_ptr, int slot_num ) 358 = dummy_restore_affinity; 359 int (*get_default_concurrency_ptr)( int numa_id, int core_type_id, int max_threads_per_core ) 360 = dummy_get_default_concurrency; 361 362 #if _WIN32 || _WIN64 || __unix__ 363 // Table describing how to link the handlers. 364 static const dynamic_link_descriptor TbbBindLinkTable[] = { 365 DLD(__TBB_internal_initialize_system_topology, initialize_system_topology_ptr), 366 DLD(__TBB_internal_destroy_system_topology, destroy_system_topology_ptr), 367 DLD(__TBB_internal_allocate_binding_handler, allocate_binding_handler_ptr), 368 DLD(__TBB_internal_deallocate_binding_handler, deallocate_binding_handler_ptr), 369 DLD(__TBB_internal_apply_affinity, apply_affinity_ptr), 370 DLD(__TBB_internal_restore_affinity, restore_affinity_ptr), 371 DLD(__TBB_internal_get_default_concurrency, get_default_concurrency_ptr) 372 }; 373 374 static const unsigned LinkTableSize = sizeof(TbbBindLinkTable) / sizeof(dynamic_link_descriptor); 375 376 #if TBB_USE_DEBUG 377 #define DEBUG_SUFFIX "_debug" 378 #else 379 #define DEBUG_SUFFIX 380 #endif /* TBB_USE_DEBUG */ 381 382 #if _WIN32 || _WIN64 383 #define LIBRARY_EXTENSION ".dll" 384 #define LIBRARY_PREFIX 385 #elif __unix__ 386 #define LIBRARY_EXTENSION __TBB_STRING(.so.3) 387 #define LIBRARY_PREFIX "lib" 388 #endif /* __unix__ */ 389 390 #define TBBBIND_NAME LIBRARY_PREFIX "tbbbind" DEBUG_SUFFIX LIBRARY_EXTENSION 391 #define TBBBIND_2_0_NAME LIBRARY_PREFIX "tbbbind_2_0" DEBUG_SUFFIX LIBRARY_EXTENSION 392 393 #define TBBBIND_2_5_NAME LIBRARY_PREFIX "tbbbind_2_5" DEBUG_SUFFIX LIBRARY_EXTENSION 394 #endif /* _WIN32 || _WIN64 || __unix__ */ 395 396 // Representation of system hardware topology information on the TBB side. 397 // System topology may be initialized by third-party component (e.g. hwloc) 398 // or just filled in with default stubs. 399 namespace system_topology { 400 401 constexpr int automatic = -1; 402 403 static std::atomic<do_once_state> initialization_state; 404 405 namespace { 406 int numa_nodes_count = 0; 407 int* numa_nodes_indexes = nullptr; 408 409 int core_types_count = 0; 410 int* core_types_indexes = nullptr; 411 412 const char* load_tbbbind_shared_object() { 413 #if _WIN32 || _WIN64 || __unix__ 414 #if _WIN32 && !_WIN64 415 // For 32-bit Windows applications, process affinity masks can only support up to 32 logical CPUs. 416 SYSTEM_INFO si; 417 GetNativeSystemInfo(&si); 418 if (si.dwNumberOfProcessors > 32) return nullptr; 419 #endif /* _WIN32 && !_WIN64 */ 420 for (const auto& tbbbind_version : {TBBBIND_2_5_NAME, TBBBIND_2_0_NAME, TBBBIND_NAME}) { 421 if (dynamic_link(tbbbind_version, TbbBindLinkTable, LinkTableSize, nullptr, DYNAMIC_LINK_LOCAL_BINDING)) { 422 return tbbbind_version; 423 } 424 } 425 #endif /* _WIN32 || _WIN64 || __unix__ */ 426 return nullptr; 427 } 428 429 int processor_groups_num() { 430 #if _WIN32 431 return NumberOfProcessorGroups(); 432 #else 433 // Stub to improve code readability by reducing number of the compile-time conditions 434 return 1; 435 #endif 436 } 437 } // internal namespace 438 439 // Tries to load TBBbind library API, if success, gets NUMA topology information from it, 440 // in another case, fills NUMA topology by stubs. 441 void initialization_impl() { 442 governor::one_time_init(); 443 444 if (const char* tbbbind_name = load_tbbbind_shared_object()) { 445 initialize_system_topology_ptr( 446 processor_groups_num(), 447 numa_nodes_count, numa_nodes_indexes, 448 core_types_count, core_types_indexes 449 ); 450 451 PrintExtraVersionInfo("TBBBIND", tbbbind_name); 452 return; 453 } 454 455 static int dummy_index = automatic; 456 457 numa_nodes_count = 1; 458 numa_nodes_indexes = &dummy_index; 459 460 core_types_count = 1; 461 core_types_indexes = &dummy_index; 462 463 PrintExtraVersionInfo("TBBBIND", "UNAVAILABLE"); 464 } 465 466 void initialize() { 467 atomic_do_once(initialization_impl, initialization_state); 468 } 469 470 void destroy() { 471 destroy_system_topology_ptr(); 472 } 473 } // namespace system_topology 474 475 binding_handler* construct_binding_handler(int slot_num, int numa_id, int core_type_id, int max_threads_per_core) { 476 system_topology::initialize(); 477 return allocate_binding_handler_ptr(slot_num, numa_id, core_type_id, max_threads_per_core); 478 } 479 480 void destroy_binding_handler(binding_handler* handler_ptr) { 481 __TBB_ASSERT(deallocate_binding_handler_ptr, "tbbbind loading was not performed"); 482 deallocate_binding_handler_ptr(handler_ptr); 483 } 484 485 void apply_affinity_mask(binding_handler* handler_ptr, int slot_index) { 486 __TBB_ASSERT(slot_index >= 0, "Negative thread index"); 487 __TBB_ASSERT(apply_affinity_ptr, "tbbbind loading was not performed"); 488 apply_affinity_ptr(handler_ptr, slot_index); 489 } 490 491 void restore_affinity_mask(binding_handler* handler_ptr, int slot_index) { 492 __TBB_ASSERT(slot_index >= 0, "Negative thread index"); 493 __TBB_ASSERT(restore_affinity_ptr, "tbbbind loading was not performed"); 494 restore_affinity_ptr(handler_ptr, slot_index); 495 } 496 497 unsigned __TBB_EXPORTED_FUNC numa_node_count() { 498 system_topology::initialize(); 499 return system_topology::numa_nodes_count; 500 } 501 502 void __TBB_EXPORTED_FUNC fill_numa_indices(int* index_array) { 503 system_topology::initialize(); 504 std::memcpy(index_array, system_topology::numa_nodes_indexes, system_topology::numa_nodes_count * sizeof(int)); 505 } 506 507 int __TBB_EXPORTED_FUNC numa_default_concurrency(int node_id) { 508 if (node_id >= 0) { 509 system_topology::initialize(); 510 int result = get_default_concurrency_ptr( 511 node_id, 512 /*core_type*/system_topology::automatic, 513 /*threads_per_core*/system_topology::automatic 514 ); 515 if (result > 0) return result; 516 } 517 return governor::default_num_threads(); 518 } 519 520 unsigned __TBB_EXPORTED_FUNC core_type_count(intptr_t /*reserved*/) { 521 system_topology::initialize(); 522 return system_topology::core_types_count; 523 } 524 525 void __TBB_EXPORTED_FUNC fill_core_type_indices(int* index_array, intptr_t /*reserved*/) { 526 system_topology::initialize(); 527 std::memcpy(index_array, system_topology::core_types_indexes, system_topology::core_types_count * sizeof(int)); 528 } 529 530 void constraints_assertion(d1::constraints c) { 531 bool is_topology_initialized = system_topology::initialization_state == do_once_state::initialized; 532 __TBB_ASSERT_RELEASE(c.max_threads_per_core == system_topology::automatic || c.max_threads_per_core > 0, 533 "Wrong max_threads_per_core constraints field value."); 534 535 auto numa_nodes_begin = system_topology::numa_nodes_indexes; 536 auto numa_nodes_end = system_topology::numa_nodes_indexes + system_topology::numa_nodes_count; 537 __TBB_ASSERT_RELEASE( 538 c.numa_id == system_topology::automatic || 539 (is_topology_initialized && std::find(numa_nodes_begin, numa_nodes_end, c.numa_id) != numa_nodes_end), 540 "The constraints::numa_id value is not known to the library. Use tbb::info::numa_nodes() to get the list of possible values."); 541 542 int* core_types_begin = system_topology::core_types_indexes; 543 int* core_types_end = system_topology::core_types_indexes + system_topology::core_types_count; 544 __TBB_ASSERT_RELEASE(c.core_type == system_topology::automatic || 545 (is_topology_initialized && std::find(core_types_begin, core_types_end, c.core_type) != core_types_end), 546 "The constraints::core_type value is not known to the library. Use tbb::info::core_types() to get the list of possible values."); 547 } 548 549 int __TBB_EXPORTED_FUNC constraints_default_concurrency(const d1::constraints& c, intptr_t /*reserved*/) { 550 constraints_assertion(c); 551 552 if (c.numa_id >= 0 || c.core_type >= 0 || c.max_threads_per_core > 0) { 553 system_topology::initialize(); 554 return get_default_concurrency_ptr(c.numa_id, c.core_type, c.max_threads_per_core); 555 } 556 return governor::default_num_threads(); 557 } 558 559 int __TBB_EXPORTED_FUNC constraints_threads_per_core(const d1::constraints&, intptr_t /*reserved*/) { 560 return system_topology::automatic; 561 } 562 #endif /* __TBB_ARENA_BINDING */ 563 564 } // namespace r1 565 } // namespace detail 566 } // namespace tbb 567