xref: /oneTBB/src/tbb/governor.cpp (revision b14b68a5)
1 /*
2     Copyright (c) 2005-2022 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #include "governor.h"
18 #include "main.h"
19 #include "thread_data.h"
20 #include "market.h"
21 #include "arena.h"
22 #include "dynamic_link.h"
23 #include "concurrent_monitor.h"
24 
25 #include "oneapi/tbb/task_group.h"
26 #include "oneapi/tbb/global_control.h"
27 #include "oneapi/tbb/tbb_allocator.h"
28 #include "oneapi/tbb/info.h"
29 
30 #include "task_dispatcher.h"
31 
32 #include <cstdio>
33 #include <cstdlib>
34 #include <cstring>
35 #include <atomic>
36 #include <algorithm>
37 
38 namespace tbb {
39 namespace detail {
40 namespace r1 {
41 
42 void clear_address_waiter_table();
43 
44 //! global_control.cpp contains definition
45 bool remove_and_check_if_empty(d1::global_control& gc);
46 bool is_present(d1::global_control& gc);
47 
48 namespace rml {
49 tbb_server* make_private_server( tbb_client& client );
50 } // namespace rml
51 
52 namespace system_topology {
53     void destroy();
54 }
55 
56 //------------------------------------------------------------------------
57 // governor
58 //------------------------------------------------------------------------
59 
60 void governor::acquire_resources () {
61 #if __TBB_USE_POSIX
62     int status = theTLS.create(auto_terminate);
63 #else
64     int status = theTLS.create();
65 #endif
66     if( status )
67         handle_perror(status, "TBB failed to initialize task scheduler TLS\n");
68     detect_cpu_features(cpu_features);
69 
70     is_rethrow_broken = gcc_rethrow_exception_broken();
71 }
72 
73 void governor::release_resources () {
74     theRMLServerFactory.close();
75     destroy_process_mask();
76 
77     __TBB_ASSERT(!(__TBB_InitOnce::initialization_done() && theTLS.get()), "TBB is unloaded while thread data still alive?");
78 
79     int status = theTLS.destroy();
80     if( status )
81         runtime_warning("failed to destroy task scheduler TLS: %s", std::strerror(status));
82     clear_address_waiter_table();
83 
84     system_topology::destroy();
85     dynamic_unlink_all();
86 }
87 
88 rml::tbb_server* governor::create_rml_server ( rml::tbb_client& client ) {
89     rml::tbb_server* server = nullptr;
90     if( !UsePrivateRML ) {
91         ::rml::factory::status_type status = theRMLServerFactory.make_server( server, client );
92         if( status != ::rml::factory::st_success ) {
93             UsePrivateRML = true;
94             runtime_warning( "rml::tbb_factory::make_server failed with status %x, falling back on private rml", status );
95         }
96     }
97     if ( !server ) {
98         __TBB_ASSERT( UsePrivateRML, nullptr);
99         server = rml::make_private_server( client );
100     }
101     __TBB_ASSERT( server, "Failed to create RML server" );
102     return server;
103 }
104 
105 void governor::one_time_init() {
106     if ( !__TBB_InitOnce::initialization_done() ) {
107         DoOneTimeInitialization();
108     }
109 }
110 
111 /*
112     There is no portable way to get stack base address in Posix, however the modern
113     Linux versions provide pthread_attr_np API that can be used  to obtain thread's
114     stack size and base address. Unfortunately even this function does not provide
115     enough information for the main thread on IA-64 architecture (RSE spill area
116     and memory stack are allocated as two separate discontinuous chunks of memory),
117     and there is no portable way to discern the main and the secondary threads.
118     Thus for macOS* and IA-64 architecture for Linux* OS we use the TBB worker stack size for
119     all threads and use the current stack top as the stack base. This simplified
120     approach is based on the following assumptions:
121     1) If the default stack size is insufficient for the user app needs, the
122     required amount will be explicitly specified by the user at the point of the
123     TBB scheduler initialization (as an argument to tbb::task_scheduler_init
124     constructor).
125     2) When an external thread initializes the scheduler, it has enough space on its
126     stack. Here "enough" means "at least as much as worker threads have".
127     3) If the user app strives to conserve the memory by cutting stack size, it
128     should do this for TBB workers too (as in the #1).
129 */
130 static std::uintptr_t get_stack_base(std::size_t stack_size) {
131     // Stacks are growing top-down. Highest address is called "stack base",
132     // and the lowest is "stack limit".
133 #if __TBB_USE_WINAPI
134     suppress_unused_warning(stack_size);
135     NT_TIB* pteb = (NT_TIB*)NtCurrentTeb();
136     __TBB_ASSERT(&pteb < pteb->StackBase && &pteb > pteb->StackLimit, "invalid stack info in TEB");
137     return reinterpret_cast<std::uintptr_t>(pteb->StackBase);
138 #else
139     // There is no portable way to get stack base address in Posix, so we use
140     // non-portable method (on all modern Linux) or the simplified approach
141     // based on the common sense assumptions. The most important assumption
142     // is that the main thread's stack size is not less than that of other threads.
143 
144     // Points to the lowest addressable byte of a stack.
145     void* stack_limit = nullptr;
146 #if __linux__ && !__bg__
147     size_t np_stack_size = 0;
148     pthread_attr_t np_attr_stack;
149     if (0 == pthread_getattr_np(pthread_self(), &np_attr_stack)) {
150         if (0 == pthread_attr_getstack(&np_attr_stack, &stack_limit, &np_stack_size)) {
151             __TBB_ASSERT( &stack_limit > stack_limit, "stack size must be positive" );
152         }
153         pthread_attr_destroy(&np_attr_stack);
154     }
155 #endif /* __linux__ */
156     std::uintptr_t stack_base{};
157     if (stack_limit) {
158         stack_base = reinterpret_cast<std::uintptr_t>(stack_limit) + stack_size;
159     } else {
160         // Use an anchor as a base stack address.
161         int anchor{};
162         stack_base = reinterpret_cast<std::uintptr_t>(&anchor);
163     }
164     return stack_base;
165 #endif /* __TBB_USE_WINAPI */
166 }
167 
168 #if (_WIN32||_WIN64) && !__TBB_DYNAMIC_LOAD_ENABLED
169 static void register_external_thread_destructor() {
170     struct thread_destructor {
171         ~thread_destructor() {
172             governor::terminate_external_thread();
173         }
174     };
175     // ~thread_destructor() will be call during the calling thread termination
176     static thread_local thread_destructor thr_destructor;
177 }
178 #endif // (_WIN32||_WIN64) && !__TBB_DYNAMIC_LOAD_ENABLED
179 
180 void governor::init_external_thread() {
181     one_time_init();
182     // Create new scheduler instance with arena
183     int num_slots = default_num_threads();
184     // TODO_REVAMP: support an external thread without an implicit arena
185     int num_reserved_slots = 1;
186     unsigned arena_priority_level = 1; // corresponds to tbb::task_arena::priority::normal
187     std::size_t stack_size = 0;
188     arena& a = *market::create_arena(num_slots, num_reserved_slots, arena_priority_level, stack_size);
189     // We need an internal reference to the market. TODO: is it legacy?
190     market::global_market(false);
191     // External thread always occupies the first slot
192     thread_data& td = *new(cache_aligned_allocate(sizeof(thread_data))) thread_data(0, false);
193     td.attach_arena(a, /*slot index*/ 0);
194     __TBB_ASSERT(td.my_inbox.is_idle_state(false), nullptr);
195 
196     stack_size = a.my_market->worker_stack_size();
197     std::uintptr_t stack_base = get_stack_base(stack_size);
198     task_dispatcher& task_disp = td.my_arena_slot->default_task_dispatcher();
199     td.enter_task_dispatcher(task_disp, calculate_stealing_threshold(stack_base, stack_size));
200 
201     td.my_arena_slot->occupy();
202     a.my_market->add_external_thread(td);
203     set_thread_data(td);
204 #if (_WIN32||_WIN64) && !__TBB_DYNAMIC_LOAD_ENABLED
205     // The external thread destructor is called from dllMain but it is not available with a static build.
206     // Therefore, we need to register the current thread to call the destructor during thread termination.
207     register_external_thread_destructor();
208 #endif
209 }
210 
211 void governor::auto_terminate(void* tls) {
212     __TBB_ASSERT(get_thread_data_if_initialized() == nullptr ||
213         get_thread_data_if_initialized() == tls, nullptr);
214     if (tls) {
215         thread_data* td = static_cast<thread_data*>(tls);
216 
217         auto clear_tls = [td] {
218             td->~thread_data();
219             cache_aligned_deallocate(td);
220             clear_thread_data();
221         };
222 
223         // Only external thread can be inside an arena during termination.
224         if (td->my_arena_slot) {
225             arena* a = td->my_arena;
226             market* m = a->my_market;
227 
228             // If the TLS slot is already cleared by OS or underlying concurrency
229             // runtime, restore its value to properly clean up arena
230             if (!is_thread_data_set(td)) {
231                 set_thread_data(*td);
232             }
233 
234             a->my_observers.notify_exit_observers(td->my_last_observer, td->my_is_worker);
235 
236             td->leave_task_dispatcher();
237             td->my_arena_slot->release();
238             // Release an arena
239             a->on_thread_leaving<arena::ref_external>();
240 
241             m->remove_external_thread(*td);
242 
243             // The tls should be cleared before market::release because
244             // market can destroy the tls key if we keep the last reference
245             clear_tls();
246 
247             // If there was an associated arena, it added a public market reference
248             m->release( /*is_public*/ true, /*blocking_terminate*/ false);
249         } else {
250             clear_tls();
251         }
252     }
253     __TBB_ASSERT(get_thread_data_if_initialized() == nullptr, nullptr);
254 }
255 
256 void governor::initialize_rml_factory () {
257     ::rml::factory::status_type res = theRMLServerFactory.open();
258     UsePrivateRML = res != ::rml::factory::st_success;
259 }
260 
261 void __TBB_EXPORTED_FUNC get(d1::task_scheduler_handle& handle) {
262     handle.m_ctl = new(allocate_memory(sizeof(global_control))) global_control(global_control::scheduler_handle, 1);
263 }
264 
265 void release_impl(d1::task_scheduler_handle& handle) {
266     if (handle.m_ctl != nullptr) {
267         handle.m_ctl->~global_control();
268         deallocate_memory(handle.m_ctl);
269         handle.m_ctl = nullptr;
270     }
271 }
272 
273 bool finalize_impl(d1::task_scheduler_handle& handle) {
274     __TBB_ASSERT_RELEASE(handle, "trying to finalize with null handle");
275     market::global_market_mutex_type::scoped_lock lock( market::theMarketMutex );
276     bool ok = true; // ok if theMarket does not exist yet
277     market* m = market::theMarket; // read the state of theMarket
278     if (m != nullptr) {
279         lock.release();
280         __TBB_ASSERT(is_present(*handle.m_ctl), "finalize or release was already called on this object");
281         thread_data* td = governor::get_thread_data_if_initialized();
282         if (td) {
283             task_dispatcher* task_disp = td->my_task_dispatcher;
284             __TBB_ASSERT(task_disp, nullptr);
285             if (task_disp->m_properties.outermost && !td->my_is_worker) { // is not inside a parallel region
286                 governor::auto_terminate(td);
287             }
288         }
289         if (remove_and_check_if_empty(*handle.m_ctl)) {
290             ok = m->release(/*is_public*/ true, /*blocking_terminate*/ true);
291         } else {
292             ok = false;
293         }
294     }
295     return ok;
296 }
297 
298 bool __TBB_EXPORTED_FUNC finalize(d1::task_scheduler_handle& handle, std::intptr_t mode) {
299     if (mode == d1::release_nothrowing) {
300         release_impl(handle);
301         return true;
302     } else {
303         bool ok = finalize_impl(handle);
304         // TODO: it is unsafe when finalize is called concurrently and further library unload
305         release_impl(handle);
306         if (mode == d1::finalize_throwing && !ok) {
307             throw_exception(exception_id::unsafe_wait);
308         }
309         return ok;
310     }
311 }
312 
313 #if __TBB_ARENA_BINDING
314 
315 #if __TBB_WEAK_SYMBOLS_PRESENT
316 #pragma weak __TBB_internal_initialize_system_topology
317 #pragma weak __TBB_internal_destroy_system_topology
318 #pragma weak __TBB_internal_allocate_binding_handler
319 #pragma weak __TBB_internal_deallocate_binding_handler
320 #pragma weak __TBB_internal_apply_affinity
321 #pragma weak __TBB_internal_restore_affinity
322 #pragma weak __TBB_internal_get_default_concurrency
323 
324 extern "C" {
325 void __TBB_internal_initialize_system_topology(
326     size_t groups_num,
327     int& numa_nodes_count, int*& numa_indexes_list,
328     int& core_types_count, int*& core_types_indexes_list
329 );
330 void __TBB_internal_destroy_system_topology( );
331 
332 //TODO: consider renaming to `create_binding_handler` and `destroy_binding_handler`
333 binding_handler* __TBB_internal_allocate_binding_handler( int slot_num, int numa_id, int core_type_id, int max_threads_per_core );
334 void __TBB_internal_deallocate_binding_handler( binding_handler* handler_ptr );
335 
336 void __TBB_internal_apply_affinity( binding_handler* handler_ptr, int slot_num );
337 void __TBB_internal_restore_affinity( binding_handler* handler_ptr, int slot_num );
338 
339 int __TBB_internal_get_default_concurrency( int numa_id, int core_type_id, int max_threads_per_core );
340 }
341 #endif /* __TBB_WEAK_SYMBOLS_PRESENT */
342 
343 // Stubs that will be used if TBBbind library is unavailable.
344 static void dummy_destroy_system_topology ( ) { }
345 static binding_handler* dummy_allocate_binding_handler ( int, int, int, int ) { return nullptr; }
346 static void dummy_deallocate_binding_handler ( binding_handler* ) { }
347 static void dummy_apply_affinity ( binding_handler*, int ) { }
348 static void dummy_restore_affinity ( binding_handler*, int ) { }
349 static int dummy_get_default_concurrency( int, int, int ) { return governor::default_num_threads(); }
350 
351 // Handlers for communication with TBBbind
352 static void (*initialize_system_topology_ptr)(
353     size_t groups_num,
354     int& numa_nodes_count, int*& numa_indexes_list,
355     int& core_types_count, int*& core_types_indexes_list
356 ) = nullptr;
357 static void (*destroy_system_topology_ptr)( ) = dummy_destroy_system_topology;
358 
359 static binding_handler* (*allocate_binding_handler_ptr)( int slot_num, int numa_id, int core_type_id, int max_threads_per_core )
360     = dummy_allocate_binding_handler;
361 static void (*deallocate_binding_handler_ptr)( binding_handler* handler_ptr )
362     = dummy_deallocate_binding_handler;
363 static void (*apply_affinity_ptr)( binding_handler* handler_ptr, int slot_num )
364     = dummy_apply_affinity;
365 static void (*restore_affinity_ptr)( binding_handler* handler_ptr, int slot_num )
366     = dummy_restore_affinity;
367 int (*get_default_concurrency_ptr)( int numa_id, int core_type_id, int max_threads_per_core )
368     = dummy_get_default_concurrency;
369 
370 #if _WIN32 || _WIN64 || __unix__
371 // Table describing how to link the handlers.
372 static const dynamic_link_descriptor TbbBindLinkTable[] = {
373     DLD(__TBB_internal_initialize_system_topology, initialize_system_topology_ptr),
374     DLD(__TBB_internal_destroy_system_topology, destroy_system_topology_ptr),
375     DLD(__TBB_internal_allocate_binding_handler, allocate_binding_handler_ptr),
376     DLD(__TBB_internal_deallocate_binding_handler, deallocate_binding_handler_ptr),
377     DLD(__TBB_internal_apply_affinity, apply_affinity_ptr),
378     DLD(__TBB_internal_restore_affinity, restore_affinity_ptr),
379     DLD(__TBB_internal_get_default_concurrency, get_default_concurrency_ptr)
380 };
381 
382 static const unsigned LinkTableSize = sizeof(TbbBindLinkTable) / sizeof(dynamic_link_descriptor);
383 
384 #if TBB_USE_DEBUG
385 #define DEBUG_SUFFIX "_debug"
386 #else
387 #define DEBUG_SUFFIX
388 #endif /* TBB_USE_DEBUG */
389 
390 #if _WIN32 || _WIN64
391 #define LIBRARY_EXTENSION ".dll"
392 #define LIBRARY_PREFIX
393 #elif __unix__
394 #define LIBRARY_EXTENSION __TBB_STRING(.so.3)
395 #define LIBRARY_PREFIX "lib"
396 #endif /* __unix__ */
397 
398 #define TBBBIND_NAME LIBRARY_PREFIX "tbbbind" DEBUG_SUFFIX LIBRARY_EXTENSION
399 #define TBBBIND_2_0_NAME LIBRARY_PREFIX "tbbbind_2_0" DEBUG_SUFFIX LIBRARY_EXTENSION
400 
401 #define TBBBIND_2_5_NAME LIBRARY_PREFIX "tbbbind_2_5" DEBUG_SUFFIX LIBRARY_EXTENSION
402 #endif /* _WIN32 || _WIN64 || __unix__ */
403 
404 // Representation of system hardware topology information on the TBB side.
405 // System topology may be initialized by third-party component (e.g. hwloc)
406 // or just filled in with default stubs.
407 namespace system_topology {
408 
409 constexpr int automatic = -1;
410 
411 static std::atomic<do_once_state> initialization_state;
412 
413 namespace {
414 int  numa_nodes_count = 0;
415 int* numa_nodes_indexes = nullptr;
416 
417 int  core_types_count = 0;
418 int* core_types_indexes = nullptr;
419 
420 const char* load_tbbbind_shared_object() {
421 #if _WIN32 || _WIN64 || __unix__
422 #if _WIN32 && !_WIN64
423     // For 32-bit Windows applications, process affinity masks can only support up to 32 logical CPUs.
424     SYSTEM_INFO si;
425     GetNativeSystemInfo(&si);
426     if (si.dwNumberOfProcessors > 32) return nullptr;
427 #endif /* _WIN32 && !_WIN64 */
428     for (const auto& tbbbind_version : {TBBBIND_2_5_NAME, TBBBIND_2_0_NAME, TBBBIND_NAME}) {
429         if (dynamic_link(tbbbind_version, TbbBindLinkTable, LinkTableSize, nullptr, DYNAMIC_LINK_LOCAL_BINDING)) {
430             return tbbbind_version;
431         }
432     }
433 #endif /* _WIN32 || _WIN64 || __unix__ */
434     return nullptr;
435 }
436 
437 int processor_groups_num() {
438 #if _WIN32
439     return NumberOfProcessorGroups();
440 #else
441     // Stub to improve code readability by reducing number of the compile-time conditions
442     return 1;
443 #endif
444 }
445 } // internal namespace
446 
447 // Tries to load TBBbind library API, if success, gets NUMA topology information from it,
448 // in another case, fills NUMA topology by stubs.
449 void initialization_impl() {
450     governor::one_time_init();
451 
452     if (const char* tbbbind_name = load_tbbbind_shared_object()) {
453         initialize_system_topology_ptr(
454             processor_groups_num(),
455             numa_nodes_count, numa_nodes_indexes,
456             core_types_count, core_types_indexes
457         );
458 
459         PrintExtraVersionInfo("TBBBIND", tbbbind_name);
460         return;
461     }
462 
463     static int dummy_index = automatic;
464 
465     numa_nodes_count = 1;
466     numa_nodes_indexes = &dummy_index;
467 
468     core_types_count = 1;
469     core_types_indexes = &dummy_index;
470 
471     PrintExtraVersionInfo("TBBBIND", "UNAVAILABLE");
472 }
473 
474 void initialize() {
475     atomic_do_once(initialization_impl, initialization_state);
476 }
477 
478 void destroy() {
479     destroy_system_topology_ptr();
480 }
481 } // namespace system_topology
482 
483 binding_handler* construct_binding_handler(int slot_num, int numa_id, int core_type_id, int max_threads_per_core) {
484     system_topology::initialize();
485     return allocate_binding_handler_ptr(slot_num, numa_id, core_type_id, max_threads_per_core);
486 }
487 
488 void destroy_binding_handler(binding_handler* handler_ptr) {
489     __TBB_ASSERT(deallocate_binding_handler_ptr, "tbbbind loading was not performed");
490     deallocate_binding_handler_ptr(handler_ptr);
491 }
492 
493 void apply_affinity_mask(binding_handler* handler_ptr, int slot_index) {
494     __TBB_ASSERT(slot_index >= 0, "Negative thread index");
495     __TBB_ASSERT(apply_affinity_ptr, "tbbbind loading was not performed");
496     apply_affinity_ptr(handler_ptr, slot_index);
497 }
498 
499 void restore_affinity_mask(binding_handler* handler_ptr, int slot_index) {
500     __TBB_ASSERT(slot_index >= 0, "Negative thread index");
501     __TBB_ASSERT(restore_affinity_ptr, "tbbbind loading was not performed");
502     restore_affinity_ptr(handler_ptr, slot_index);
503 }
504 
505 unsigned __TBB_EXPORTED_FUNC numa_node_count() {
506     system_topology::initialize();
507     return system_topology::numa_nodes_count;
508 }
509 
510 void __TBB_EXPORTED_FUNC fill_numa_indices(int* index_array) {
511     system_topology::initialize();
512     std::memcpy(index_array, system_topology::numa_nodes_indexes, system_topology::numa_nodes_count * sizeof(int));
513 }
514 
515 int __TBB_EXPORTED_FUNC numa_default_concurrency(int node_id) {
516     if (node_id >= 0) {
517         system_topology::initialize();
518         int result = get_default_concurrency_ptr(
519             node_id,
520             /*core_type*/system_topology::automatic,
521             /*threads_per_core*/system_topology::automatic
522         );
523         if (result > 0) return result;
524     }
525     return governor::default_num_threads();
526 }
527 
528 unsigned __TBB_EXPORTED_FUNC core_type_count(intptr_t /*reserved*/) {
529     system_topology::initialize();
530     return system_topology::core_types_count;
531 }
532 
533 void __TBB_EXPORTED_FUNC fill_core_type_indices(int* index_array, intptr_t /*reserved*/) {
534     system_topology::initialize();
535     std::memcpy(index_array, system_topology::core_types_indexes, system_topology::core_types_count * sizeof(int));
536 }
537 
538 void constraints_assertion(d1::constraints c) {
539     bool is_topology_initialized = system_topology::initialization_state == do_once_state::initialized;
540     __TBB_ASSERT_RELEASE(c.max_threads_per_core == system_topology::automatic || c.max_threads_per_core > 0,
541         "Wrong max_threads_per_core constraints field value.");
542 
543     auto numa_nodes_begin = system_topology::numa_nodes_indexes;
544     auto numa_nodes_end = system_topology::numa_nodes_indexes + system_topology::numa_nodes_count;
545     __TBB_ASSERT_RELEASE(
546         c.numa_id == system_topology::automatic ||
547         (is_topology_initialized && std::find(numa_nodes_begin, numa_nodes_end, c.numa_id) != numa_nodes_end),
548         "The constraints::numa_id value is not known to the library. Use tbb::info::numa_nodes() to get the list of possible values.");
549 
550     int* core_types_begin = system_topology::core_types_indexes;
551     int* core_types_end = system_topology::core_types_indexes + system_topology::core_types_count;
552     __TBB_ASSERT_RELEASE(c.core_type == system_topology::automatic ||
553         (is_topology_initialized && std::find(core_types_begin, core_types_end, c.core_type) != core_types_end),
554         "The constraints::core_type value is not known to the library. Use tbb::info::core_types() to get the list of possible values.");
555 }
556 
557 int __TBB_EXPORTED_FUNC constraints_default_concurrency(const d1::constraints& c, intptr_t /*reserved*/) {
558     constraints_assertion(c);
559 
560     if (c.numa_id >= 0 || c.core_type >= 0 || c.max_threads_per_core > 0) {
561         system_topology::initialize();
562         return get_default_concurrency_ptr(c.numa_id, c.core_type, c.max_threads_per_core);
563     }
564     return governor::default_num_threads();
565 }
566 
567 int __TBB_EXPORTED_FUNC constraints_threads_per_core(const d1::constraints&, intptr_t /*reserved*/) {
568     return system_topology::automatic;
569 }
570 #endif /* __TBB_ARENA_BINDING */
571 
572 } // namespace r1
573 } // namespace detail
574 } // namespace tbb
575