xref: /oneTBB/src/tbbbind/tbb_bind.cpp (revision 478de5b1)
151c0b2f7Stbbdev /*
2b15aabb3Stbbdev     Copyright (c) 2019-2021 Intel Corporation
351c0b2f7Stbbdev 
451c0b2f7Stbbdev     Licensed under the Apache License, Version 2.0 (the "License");
551c0b2f7Stbbdev     you may not use this file except in compliance with the License.
651c0b2f7Stbbdev     You may obtain a copy of the License at
751c0b2f7Stbbdev 
851c0b2f7Stbbdev         http://www.apache.org/licenses/LICENSE-2.0
951c0b2f7Stbbdev 
1051c0b2f7Stbbdev     Unless required by applicable law or agreed to in writing, software
1151c0b2f7Stbbdev     distributed under the License is distributed on an "AS IS" BASIS,
1251c0b2f7Stbbdev     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1351c0b2f7Stbbdev     See the License for the specific language governing permissions and
1451c0b2f7Stbbdev     limitations under the License.
1551c0b2f7Stbbdev */
1651c0b2f7Stbbdev 
17b15aabb3Stbbdev #include <vector>
18b15aabb3Stbbdev #include <mutex>
19b15aabb3Stbbdev 
2051c0b2f7Stbbdev #include "../tbb/assert_impl.h" // Out-of-line TBB assertion handling routines are instantiated here.
2149e08aacStbbdev #include "oneapi/tbb/detail/_assert.h"
2251c0b2f7Stbbdev 
23*478de5b1Stbbdev #if _MSC_VER && !__INTEL_COMPILER && !__clang__
2451c0b2f7Stbbdev #pragma warning( push )
2551c0b2f7Stbbdev #pragma warning( disable : 4100 )
26*478de5b1Stbbdev #elif _MSC_VER && __clang__
27*478de5b1Stbbdev #pragma GCC diagnostic push
28*478de5b1Stbbdev #pragma GCC diagnostic ignored "-Wunused-parameter"
2951c0b2f7Stbbdev #endif
3051c0b2f7Stbbdev #include <hwloc.h>
31*478de5b1Stbbdev #if _MSC_VER && !__INTEL_COMPILER && !__clang__
3251c0b2f7Stbbdev #pragma warning( pop )
33*478de5b1Stbbdev #elif _MSC_VER && __clang__
34*478de5b1Stbbdev #pragma GCC diagnostic pop
3551c0b2f7Stbbdev #endif
3651c0b2f7Stbbdev 
37b15aabb3Stbbdev #define __HWLOC_HYBRID_CPUS_INTERFACES_PRESENT (HWLOC_API_VERSION >= 0x20400)
3851c0b2f7Stbbdev 
3951c0b2f7Stbbdev // Most of hwloc calls returns negative exit code on error.
4051c0b2f7Stbbdev // This macro tracks error codes that are returned from the hwloc interfaces.
4151c0b2f7Stbbdev #define assertion_hwloc_wrapper(command, ...) \
4251c0b2f7Stbbdev         __TBB_ASSERT_EX( (command(__VA_ARGS__)) >= 0, "Error occurred during call to hwloc API.");
4351c0b2f7Stbbdev 
4451c0b2f7Stbbdev namespace tbb {
4551c0b2f7Stbbdev namespace detail {
4651c0b2f7Stbbdev namespace r1 {
4751c0b2f7Stbbdev 
4851c0b2f7Stbbdev //------------------------------------------------------------------------
4951c0b2f7Stbbdev // Information about the machine's hardware TBB is happen to work on
5051c0b2f7Stbbdev //------------------------------------------------------------------------
5151c0b2f7Stbbdev class platform_topology {
52b15aabb3Stbbdev     friend class binding_handler;
5351c0b2f7Stbbdev 
54b15aabb3Stbbdev     // Common topology members
55b15aabb3Stbbdev     hwloc_topology_t topology{nullptr};
56b15aabb3Stbbdev     hwloc_cpuset_t   process_cpu_affinity_mask{nullptr};
57b15aabb3Stbbdev     hwloc_nodeset_t  process_node_affinity_mask{nullptr};
58b15aabb3Stbbdev     std::size_t number_of_processors_groups{1};
5951c0b2f7Stbbdev 
60b15aabb3Stbbdev     // NUMA API related topology members
61b15aabb3Stbbdev     std::vector<hwloc_cpuset_t> numa_affinity_masks_list{};
62b15aabb3Stbbdev     std::vector<int> numa_indexes_list{};
63b15aabb3Stbbdev     int numa_nodes_count{0};
64b15aabb3Stbbdev 
65b15aabb3Stbbdev     // Hybrid CPUs API related topology members
66b15aabb3Stbbdev     std::vector<hwloc_cpuset_t> core_types_affinity_masks_list{};
67b15aabb3Stbbdev     std::vector<int> core_types_indexes_list{};
6851c0b2f7Stbbdev 
6951c0b2f7Stbbdev     enum init_stages { uninitialized,
7051c0b2f7Stbbdev                        started,
7151c0b2f7Stbbdev                        topology_allocated,
7251c0b2f7Stbbdev                        topology_loaded,
7351c0b2f7Stbbdev                        topology_parsed } initialization_state;
7451c0b2f7Stbbdev 
75b15aabb3Stbbdev     // Binding threads that locate in another Windows Processor groups
7651c0b2f7Stbbdev     // is allowed only if machine topology contains several Windows Processors groups
7751c0b2f7Stbbdev     // and process affinity mask wasn`t limited manually (affinity mask cannot violates
7851c0b2f7Stbbdev     // processors group boundaries).
79b15aabb3Stbbdev     bool intergroup_binding_allowed(std::size_t groups_num) { return groups_num > 1; }
8051c0b2f7Stbbdev 
81b15aabb3Stbbdev private:
82b15aabb3Stbbdev     void topology_initialization(std::size_t groups_num) {
8351c0b2f7Stbbdev         initialization_state = started;
8451c0b2f7Stbbdev 
8551c0b2f7Stbbdev         // Parse topology
8651c0b2f7Stbbdev         if ( hwloc_topology_init( &topology ) == 0 ) {
8751c0b2f7Stbbdev             initialization_state = topology_allocated;
8851c0b2f7Stbbdev             if ( hwloc_topology_load( topology ) == 0 ) {
8951c0b2f7Stbbdev                 initialization_state = topology_loaded;
9051c0b2f7Stbbdev             }
9151c0b2f7Stbbdev         }
92b15aabb3Stbbdev         if ( initialization_state != topology_loaded )
9351c0b2f7Stbbdev             return;
9451c0b2f7Stbbdev 
9551c0b2f7Stbbdev         // Getting process affinity mask
9651c0b2f7Stbbdev         if ( intergroup_binding_allowed(groups_num) ) {
9751c0b2f7Stbbdev             process_cpu_affinity_mask  = hwloc_bitmap_dup(hwloc_topology_get_complete_cpuset (topology));
9851c0b2f7Stbbdev             process_node_affinity_mask = hwloc_bitmap_dup(hwloc_topology_get_complete_nodeset(topology));
9951c0b2f7Stbbdev         } else {
10051c0b2f7Stbbdev             process_cpu_affinity_mask  = hwloc_bitmap_alloc();
10151c0b2f7Stbbdev             process_node_affinity_mask = hwloc_bitmap_alloc();
10251c0b2f7Stbbdev 
10351c0b2f7Stbbdev             assertion_hwloc_wrapper(hwloc_get_cpubind, topology, process_cpu_affinity_mask, 0);
10451c0b2f7Stbbdev             hwloc_cpuset_to_nodeset(topology, process_cpu_affinity_mask, process_node_affinity_mask);
10551c0b2f7Stbbdev         }
10651c0b2f7Stbbdev 
107b15aabb3Stbbdev         number_of_processors_groups = groups_num;
108b15aabb3Stbbdev     }
109b15aabb3Stbbdev 
110b15aabb3Stbbdev     void numa_topology_parsing() {
111b15aabb3Stbbdev         // Fill parameters with stubs if topology parsing is broken.
112b15aabb3Stbbdev         if ( initialization_state != topology_loaded ) {
113b15aabb3Stbbdev             numa_nodes_count = 1;
114b15aabb3Stbbdev             numa_indexes_list.push_back(-1);
115b15aabb3Stbbdev             return;
116b15aabb3Stbbdev         }
117b15aabb3Stbbdev 
11851c0b2f7Stbbdev         // If system contains no NUMA nodes, HWLOC 1.11 returns an infinitely filled bitmap.
11951c0b2f7Stbbdev         // hwloc_bitmap_weight() returns negative value for such bitmaps, so we use this check
12051c0b2f7Stbbdev         // to change way of topology initialization.
12151c0b2f7Stbbdev         numa_nodes_count = hwloc_bitmap_weight(process_node_affinity_mask);
12251c0b2f7Stbbdev         if (numa_nodes_count <= 0) {
12351c0b2f7Stbbdev             // numa_nodes_count may be empty if the process affinity mask is empty too (invalid case)
12451c0b2f7Stbbdev             // or if some internal HWLOC error occurred.
12551c0b2f7Stbbdev             // So we place -1 as index in this case.
12651c0b2f7Stbbdev             numa_indexes_list.push_back(numa_nodes_count == 0 ? -1 : 0);
12751c0b2f7Stbbdev             numa_nodes_count = 1;
12851c0b2f7Stbbdev 
129b15aabb3Stbbdev             numa_affinity_masks_list.push_back(hwloc_bitmap_dup(process_cpu_affinity_mask));
130b15aabb3Stbbdev         } else {
13151c0b2f7Stbbdev             // Get NUMA logical indexes list
13251c0b2f7Stbbdev             unsigned counter = 0;
13351c0b2f7Stbbdev             int i = 0;
13451c0b2f7Stbbdev             int max_numa_index = -1;
13551c0b2f7Stbbdev             numa_indexes_list.resize(numa_nodes_count);
13651c0b2f7Stbbdev             hwloc_obj_t node_buffer;
13751c0b2f7Stbbdev             hwloc_bitmap_foreach_begin(i, process_node_affinity_mask) {
13851c0b2f7Stbbdev                 node_buffer = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, i);
13951c0b2f7Stbbdev                 numa_indexes_list[counter] = static_cast<int>(node_buffer->logical_index);
14051c0b2f7Stbbdev 
14151c0b2f7Stbbdev                 if ( numa_indexes_list[counter] > max_numa_index ) {
14251c0b2f7Stbbdev                     max_numa_index = numa_indexes_list[counter];
14351c0b2f7Stbbdev                 }
14451c0b2f7Stbbdev 
14551c0b2f7Stbbdev                 counter++;
14651c0b2f7Stbbdev             } hwloc_bitmap_foreach_end();
14751c0b2f7Stbbdev             __TBB_ASSERT(max_numa_index >= 0, "Maximal NUMA index must not be negative");
14851c0b2f7Stbbdev 
14951c0b2f7Stbbdev             // Fill concurrency and affinity masks lists
150b15aabb3Stbbdev             numa_affinity_masks_list.resize(max_numa_index + 1);
15151c0b2f7Stbbdev             int index = 0;
15251c0b2f7Stbbdev             hwloc_bitmap_foreach_begin(i, process_node_affinity_mask) {
15351c0b2f7Stbbdev                 node_buffer = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, i);
15451c0b2f7Stbbdev                 index = static_cast<int>(node_buffer->logical_index);
15551c0b2f7Stbbdev 
156b15aabb3Stbbdev                 hwloc_cpuset_t& current_mask = numa_affinity_masks_list[index];
15751c0b2f7Stbbdev                 current_mask = hwloc_bitmap_dup(node_buffer->cpuset);
15851c0b2f7Stbbdev 
15951c0b2f7Stbbdev                 hwloc_bitmap_and(current_mask, current_mask, process_cpu_affinity_mask);
16051c0b2f7Stbbdev                 __TBB_ASSERT(!hwloc_bitmap_iszero(current_mask), "hwloc detected unavailable NUMA node");
16151c0b2f7Stbbdev             } hwloc_bitmap_foreach_end();
162b15aabb3Stbbdev         }
163b15aabb3Stbbdev     }
164b15aabb3Stbbdev 
165b15aabb3Stbbdev     void core_types_topology_parsing() {
166b15aabb3Stbbdev         // Fill parameters with stubs if topology parsing is broken.
167b15aabb3Stbbdev         if ( initialization_state != topology_loaded ) {
168b15aabb3Stbbdev             core_types_indexes_list.push_back(-1);
169b15aabb3Stbbdev             return;
170b15aabb3Stbbdev         }
171b15aabb3Stbbdev #if __HWLOC_HYBRID_CPUS_INTERFACES_PRESENT
172b15aabb3Stbbdev         __TBB_ASSERT(hwloc_get_api_version() >= 0x20400, "Hybrid CPUs support interfaces required HWLOC >= 2.4");
173b15aabb3Stbbdev         // Parsing the hybrid CPU topology
174b15aabb3Stbbdev         int core_types_number = hwloc_cpukinds_get_nr(topology, 0);
175b15aabb3Stbbdev         bool core_types_parsing_broken = core_types_number <= 0;
176b15aabb3Stbbdev         if (!core_types_parsing_broken) {
177b15aabb3Stbbdev             core_types_affinity_masks_list.resize(core_types_number);
178b15aabb3Stbbdev             int efficiency{-1};
179b15aabb3Stbbdev 
180b15aabb3Stbbdev             for (int core_type = 0; core_type < core_types_number; ++core_type) {
181b15aabb3Stbbdev                 hwloc_cpuset_t& current_mask = core_types_affinity_masks_list[core_type];
182b15aabb3Stbbdev                 current_mask = hwloc_bitmap_alloc();
183b15aabb3Stbbdev 
184b15aabb3Stbbdev                 if (!hwloc_cpukinds_get_info(topology, core_type, current_mask, &efficiency, nullptr, nullptr, 0)
185b15aabb3Stbbdev                     && efficiency >= 0
186b15aabb3Stbbdev                 ) {
187b15aabb3Stbbdev                     hwloc_bitmap_and(current_mask, current_mask, process_cpu_affinity_mask);
188b15aabb3Stbbdev 
189b15aabb3Stbbdev                     if (hwloc_bitmap_weight(current_mask) > 0) {
190b15aabb3Stbbdev                         core_types_indexes_list.push_back(core_type);
191b15aabb3Stbbdev                     }
192b15aabb3Stbbdev                     __TBB_ASSERT(hwloc_bitmap_weight(current_mask) >= 0, "Infinivitely filled core type mask");
193b15aabb3Stbbdev                 } else {
194b15aabb3Stbbdev                     core_types_parsing_broken = true;
195b15aabb3Stbbdev                     break;
196b15aabb3Stbbdev                 }
197b15aabb3Stbbdev             }
198b15aabb3Stbbdev         }
199b15aabb3Stbbdev #else /*!__HWLOC_HYBRID_CPUS_INTERFACES_PRESENT*/
200b15aabb3Stbbdev         bool core_types_parsing_broken{true};
201b15aabb3Stbbdev #endif /*__HWLOC_HYBRID_CPUS_INTERFACES_PRESENT*/
202b15aabb3Stbbdev 
203b15aabb3Stbbdev         if (core_types_parsing_broken) {
204b15aabb3Stbbdev             for (auto& core_type_mask : core_types_affinity_masks_list) {
205b15aabb3Stbbdev                 hwloc_bitmap_free(core_type_mask);
206b15aabb3Stbbdev             }
207b15aabb3Stbbdev             core_types_affinity_masks_list.resize(1);
208b15aabb3Stbbdev             core_types_indexes_list.resize(1);
209b15aabb3Stbbdev 
210b15aabb3Stbbdev             core_types_affinity_masks_list[0] = hwloc_bitmap_dup(process_cpu_affinity_mask);
211b15aabb3Stbbdev             core_types_indexes_list[0] = -1;
212b15aabb3Stbbdev         }
213b15aabb3Stbbdev     }
214b15aabb3Stbbdev 
215b15aabb3Stbbdev public:
216b15aabb3Stbbdev     typedef hwloc_cpuset_t             affinity_mask;
217b15aabb3Stbbdev     typedef hwloc_const_cpuset_t const_affinity_mask;
218b15aabb3Stbbdev 
219b15aabb3Stbbdev     static platform_topology& instance() {
220b15aabb3Stbbdev         static platform_topology topology;
221b15aabb3Stbbdev         return topology;
222b15aabb3Stbbdev     }
223b15aabb3Stbbdev 
224b15aabb3Stbbdev     bool is_topology_parsed() { return initialization_state == topology_parsed; }
225b15aabb3Stbbdev 
226b15aabb3Stbbdev     void initialize( std::size_t groups_num ) {
227b15aabb3Stbbdev         if ( initialization_state != uninitialized )
228b15aabb3Stbbdev             return;
229b15aabb3Stbbdev 
230b15aabb3Stbbdev         topology_initialization(groups_num);
231b15aabb3Stbbdev         numa_topology_parsing();
232b15aabb3Stbbdev         core_types_topology_parsing();
233b15aabb3Stbbdev 
234b15aabb3Stbbdev         if (initialization_state == topology_loaded)
23551c0b2f7Stbbdev             initialization_state = topology_parsed;
23651c0b2f7Stbbdev     }
23751c0b2f7Stbbdev 
23851c0b2f7Stbbdev     ~platform_topology() {
23951c0b2f7Stbbdev         if ( is_topology_parsed() ) {
240b15aabb3Stbbdev             for (auto& numa_node_mask : numa_affinity_masks_list) {
241b15aabb3Stbbdev                 hwloc_bitmap_free(numa_node_mask);
24251c0b2f7Stbbdev             }
243b15aabb3Stbbdev 
244b15aabb3Stbbdev             for (auto& core_type_mask : core_types_affinity_masks_list) {
245b15aabb3Stbbdev                 hwloc_bitmap_free(core_type_mask);
246b15aabb3Stbbdev             }
247b15aabb3Stbbdev 
24851c0b2f7Stbbdev             hwloc_bitmap_free(process_node_affinity_mask);
24951c0b2f7Stbbdev             hwloc_bitmap_free(process_cpu_affinity_mask);
25051c0b2f7Stbbdev         }
25151c0b2f7Stbbdev 
25251c0b2f7Stbbdev         if ( initialization_state >= topology_allocated ) {
25351c0b2f7Stbbdev             hwloc_topology_destroy(topology);
25451c0b2f7Stbbdev         }
25551c0b2f7Stbbdev 
25651c0b2f7Stbbdev         initialization_state = uninitialized;
25751c0b2f7Stbbdev     }
25851c0b2f7Stbbdev 
259b15aabb3Stbbdev     void fill_topology_information(
260b15aabb3Stbbdev         int& _numa_nodes_count, int*& _numa_indexes_list,
261b15aabb3Stbbdev         int& _core_types_count, int*& _core_types_indexes_list
262b15aabb3Stbbdev     ) {
26351c0b2f7Stbbdev         __TBB_ASSERT(is_topology_parsed(), "Trying to get access to uninitialized platform_topology");
264b15aabb3Stbbdev         _numa_nodes_count = numa_nodes_count;
265b15aabb3Stbbdev         _numa_indexes_list = numa_indexes_list.data();
266b15aabb3Stbbdev 
267b15aabb3Stbbdev         _core_types_count = (int)core_types_indexes_list.size();
268b15aabb3Stbbdev         _core_types_indexes_list = core_types_indexes_list.data();
269b15aabb3Stbbdev     }
270b15aabb3Stbbdev 
271b15aabb3Stbbdev     void fill_constraints_affinity_mask(affinity_mask input_mask, int numa_node_index, int core_type_index, int max_threads_per_core) {
272b15aabb3Stbbdev         __TBB_ASSERT(is_topology_parsed(), "Trying to get access to uninitialized platform_topology");
273b15aabb3Stbbdev         __TBB_ASSERT(numa_node_index < (int)numa_affinity_masks_list.size(), "Wrong NUMA node id");
274b15aabb3Stbbdev         __TBB_ASSERT(core_type_index < (int)core_types_affinity_masks_list.size(), "Wrong core type id");
275b15aabb3Stbbdev         __TBB_ASSERT(max_threads_per_core == -1 || max_threads_per_core > 0, "Wrong max_threads_per_core");
276b15aabb3Stbbdev 
277b15aabb3Stbbdev         hwloc_cpuset_t constraints_mask = hwloc_bitmap_alloc();
278b15aabb3Stbbdev         hwloc_cpuset_t core_mask = hwloc_bitmap_alloc();
279b15aabb3Stbbdev 
280b15aabb3Stbbdev         hwloc_bitmap_copy(constraints_mask, process_cpu_affinity_mask);
281b15aabb3Stbbdev         if (numa_node_index >= 0) {
282b15aabb3Stbbdev             hwloc_bitmap_and(constraints_mask, constraints_mask, numa_affinity_masks_list[numa_node_index]);
283b15aabb3Stbbdev         }
284b15aabb3Stbbdev         if (core_type_index >= 0) {
285b15aabb3Stbbdev             hwloc_bitmap_and(constraints_mask, constraints_mask, core_types_affinity_masks_list[core_type_index]);
286b15aabb3Stbbdev         }
287b15aabb3Stbbdev         if (max_threads_per_core > 0) {
288b15aabb3Stbbdev             // clear input mask
289b15aabb3Stbbdev             hwloc_bitmap_zero(input_mask);
290b15aabb3Stbbdev 
291b15aabb3Stbbdev             hwloc_obj_t current_core = nullptr;
292b15aabb3Stbbdev             while ((current_core = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_CORE, current_core)) != nullptr) {
293b15aabb3Stbbdev                 hwloc_bitmap_and(core_mask, constraints_mask, current_core->cpuset);
294b15aabb3Stbbdev 
295b15aabb3Stbbdev                 // fit the core mask to required bits number
296b15aabb3Stbbdev                 int current_threads_per_core = 0;
297b15aabb3Stbbdev                 for (int id = hwloc_bitmap_first(core_mask); id != -1; id = hwloc_bitmap_next(core_mask, id)) {
298b15aabb3Stbbdev                     if (++current_threads_per_core > max_threads_per_core) {
299b15aabb3Stbbdev                         hwloc_bitmap_clr(core_mask, id);
300b15aabb3Stbbdev                     }
301b15aabb3Stbbdev                 }
302b15aabb3Stbbdev 
303b15aabb3Stbbdev                 hwloc_bitmap_or(input_mask, input_mask, core_mask);
304b15aabb3Stbbdev             }
305b15aabb3Stbbdev         } else {
306b15aabb3Stbbdev             hwloc_bitmap_copy(input_mask, constraints_mask);
307b15aabb3Stbbdev         }
308b15aabb3Stbbdev 
309b15aabb3Stbbdev         hwloc_bitmap_free(core_mask);
310b15aabb3Stbbdev         hwloc_bitmap_free(constraints_mask);
311b15aabb3Stbbdev     }
312b15aabb3Stbbdev 
313b15aabb3Stbbdev     void fit_num_threads_per_core(affinity_mask result_mask, affinity_mask current_mask, affinity_mask constraints_mask) {
314b15aabb3Stbbdev         hwloc_bitmap_zero(result_mask);
315b15aabb3Stbbdev         hwloc_obj_t current_core = nullptr;
316b15aabb3Stbbdev         while ((current_core = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_CORE, current_core)) != nullptr) {
317b15aabb3Stbbdev             if (hwloc_bitmap_intersects(current_mask, current_core->cpuset)) {
318b15aabb3Stbbdev                 hwloc_bitmap_or(result_mask, result_mask, current_core->cpuset);
319b15aabb3Stbbdev             }
320b15aabb3Stbbdev         }
321b15aabb3Stbbdev         hwloc_bitmap_and(result_mask, result_mask, constraints_mask);
322b15aabb3Stbbdev     }
323b15aabb3Stbbdev 
324b15aabb3Stbbdev     int get_default_concurrency(int numa_node_index, int core_type_index, int max_threads_per_core) {
325b15aabb3Stbbdev         __TBB_ASSERT(is_topology_parsed(), "Trying to get access to uninitialized platform_topology");
326b15aabb3Stbbdev 
327b15aabb3Stbbdev         hwloc_cpuset_t constraints_mask = hwloc_bitmap_alloc();
328b15aabb3Stbbdev         fill_constraints_affinity_mask(constraints_mask, numa_node_index, core_type_index, max_threads_per_core);
329b15aabb3Stbbdev 
330b15aabb3Stbbdev         int default_concurrency = hwloc_bitmap_weight(constraints_mask);
331b15aabb3Stbbdev         hwloc_bitmap_free(constraints_mask);
332b15aabb3Stbbdev         return default_concurrency;
33351c0b2f7Stbbdev     }
33451c0b2f7Stbbdev 
33551c0b2f7Stbbdev     affinity_mask allocate_process_affinity_mask() {
33651c0b2f7Stbbdev         __TBB_ASSERT(is_topology_parsed(), "Trying to get access to uninitialized platform_topology");
33751c0b2f7Stbbdev         return hwloc_bitmap_dup(process_cpu_affinity_mask);
33851c0b2f7Stbbdev     }
33951c0b2f7Stbbdev 
34051c0b2f7Stbbdev     void free_affinity_mask( affinity_mask mask_to_free ) {
34151c0b2f7Stbbdev         hwloc_bitmap_free(mask_to_free); // If bitmap is nullptr, no operation is performed.
34251c0b2f7Stbbdev     }
34351c0b2f7Stbbdev 
34451c0b2f7Stbbdev     void store_current_affinity_mask( affinity_mask current_mask ) {
34551c0b2f7Stbbdev         assertion_hwloc_wrapper(hwloc_get_cpubind, topology, current_mask, HWLOC_CPUBIND_THREAD);
34651c0b2f7Stbbdev 
34751c0b2f7Stbbdev         hwloc_bitmap_and(current_mask, current_mask, process_cpu_affinity_mask);
34851c0b2f7Stbbdev         __TBB_ASSERT(!hwloc_bitmap_iszero(current_mask),
34951c0b2f7Stbbdev             "Current affinity mask must intersects with process affinity mask");
35051c0b2f7Stbbdev     }
35151c0b2f7Stbbdev 
352b15aabb3Stbbdev     void set_affinity_mask( const_affinity_mask mask ) {
353b15aabb3Stbbdev         if (hwloc_bitmap_weight(mask) > 0) {
354b15aabb3Stbbdev             assertion_hwloc_wrapper(hwloc_set_cpubind, topology, mask, HWLOC_CPUBIND_THREAD);
35551c0b2f7Stbbdev         }
35651c0b2f7Stbbdev     }
35751c0b2f7Stbbdev };
35851c0b2f7Stbbdev 
35951c0b2f7Stbbdev class binding_handler {
36051c0b2f7Stbbdev     // Following vector saves thread affinity mask on scheduler entry to return it to this thread
36151c0b2f7Stbbdev     // on scheduler exit.
36251c0b2f7Stbbdev     typedef std::vector<platform_topology::affinity_mask> affinity_masks_container;
36351c0b2f7Stbbdev     affinity_masks_container affinity_backup;
364b15aabb3Stbbdev     platform_topology::affinity_mask handler_affinity_mask;
365b15aabb3Stbbdev 
366b15aabb3Stbbdev #if WIN32
367b15aabb3Stbbdev     affinity_masks_container affinity_buffer;
368b15aabb3Stbbdev     int my_numa_node_id;
369b15aabb3Stbbdev     int my_core_type_id;
370b15aabb3Stbbdev     int my_max_threads_per_core;
371b15aabb3Stbbdev #endif
37251c0b2f7Stbbdev 
37351c0b2f7Stbbdev public:
374b15aabb3Stbbdev     binding_handler( std::size_t size, int numa_node_id, int core_type_id, int max_threads_per_core )
375b15aabb3Stbbdev         : affinity_backup(size)
376b15aabb3Stbbdev #if WIN32
377b15aabb3Stbbdev         , affinity_buffer(size)
378b15aabb3Stbbdev         , my_numa_node_id(numa_node_id)
379b15aabb3Stbbdev         , my_core_type_id(core_type_id)
380b15aabb3Stbbdev         , my_max_threads_per_core(max_threads_per_core)
381b15aabb3Stbbdev #endif
382b15aabb3Stbbdev     {
383b15aabb3Stbbdev         for (std::size_t i = 0; i < size; ++i) {
384b15aabb3Stbbdev             affinity_backup[i] = platform_topology::instance().allocate_process_affinity_mask();
385b15aabb3Stbbdev #if WIN32
386b15aabb3Stbbdev             affinity_buffer[i] = platform_topology::instance().allocate_process_affinity_mask();
387b15aabb3Stbbdev #endif
38851c0b2f7Stbbdev         }
389b15aabb3Stbbdev         handler_affinity_mask = platform_topology::instance().allocate_process_affinity_mask();
390b15aabb3Stbbdev         platform_topology::instance().fill_constraints_affinity_mask
391b15aabb3Stbbdev             (handler_affinity_mask, numa_node_id, core_type_id, max_threads_per_core);
39251c0b2f7Stbbdev     }
39351c0b2f7Stbbdev 
39451c0b2f7Stbbdev     ~binding_handler() {
395b15aabb3Stbbdev         for (std::size_t i = 0; i < affinity_backup.size(); ++i) {
396b15aabb3Stbbdev             platform_topology::instance().free_affinity_mask(affinity_backup[i]);
397b15aabb3Stbbdev #if WIN32
398b15aabb3Stbbdev             platform_topology::instance().free_affinity_mask(affinity_buffer[i]);
399b15aabb3Stbbdev #endif
40051c0b2f7Stbbdev         }
401b15aabb3Stbbdev         platform_topology::instance().free_affinity_mask(handler_affinity_mask);
40251c0b2f7Stbbdev     }
40351c0b2f7Stbbdev 
404b15aabb3Stbbdev     void apply_affinity( unsigned slot_num ) {
405b15aabb3Stbbdev         auto& topology = platform_topology::instance();
40651c0b2f7Stbbdev         __TBB_ASSERT(slot_num < affinity_backup.size(),
40751c0b2f7Stbbdev             "The slot number is greater than the number of slots in the arena");
408b15aabb3Stbbdev         __TBB_ASSERT(topology.is_topology_parsed(),
40951c0b2f7Stbbdev             "Trying to get access to uninitialized platform_topology");
41051c0b2f7Stbbdev 
411b15aabb3Stbbdev         topology.store_current_affinity_mask(affinity_backup[slot_num]);
412b15aabb3Stbbdev 
413b15aabb3Stbbdev #if WIN32
414b15aabb3Stbbdev         // TBBBind supports only systems where NUMA nodes and core types do not cross the border
415b15aabb3Stbbdev         // between several processor groups. So if a certain NUMA node or core type constraint
416b15aabb3Stbbdev         // specified, then the constraints affinity mask will not cross the processor groups' border.
417b15aabb3Stbbdev 
418b15aabb3Stbbdev         // But if we have constraint based only on the max_threads_per_core setting, then the
419b15aabb3Stbbdev         // constraints affinity mask does may cross the border between several processor groups
420b15aabb3Stbbdev         // on machines with more then 64 hardware threads. That is why we need to use the special
421b15aabb3Stbbdev         // function, which regulates the number of threads in the current threads mask.
422b15aabb3Stbbdev         if (topology.number_of_processors_groups > 1 && my_max_threads_per_core != -1 &&
423b15aabb3Stbbdev             (my_numa_node_id == -1 || topology.numa_indexes_list.size() == 1) &&
424b15aabb3Stbbdev             (my_core_type_id == -1 || topology.core_types_indexes_list.size() == 1)
425b15aabb3Stbbdev         ) {
426b15aabb3Stbbdev             topology.fit_num_threads_per_core(affinity_buffer[slot_num], affinity_backup[slot_num], handler_affinity_mask);
427b15aabb3Stbbdev             topology.set_affinity_mask(affinity_buffer[slot_num]);
428b15aabb3Stbbdev             return;
429b15aabb3Stbbdev         }
430b15aabb3Stbbdev #endif
431b15aabb3Stbbdev         topology.set_affinity_mask(handler_affinity_mask);
43251c0b2f7Stbbdev     }
43351c0b2f7Stbbdev 
43451c0b2f7Stbbdev     void restore_previous_affinity_mask( unsigned slot_num ) {
435b15aabb3Stbbdev         auto& topology = platform_topology::instance();
436b15aabb3Stbbdev         __TBB_ASSERT(topology.is_topology_parsed(),
43751c0b2f7Stbbdev             "Trying to get access to uninitialized platform_topology");
438b15aabb3Stbbdev         topology.set_affinity_mask(affinity_backup[slot_num]);
43951c0b2f7Stbbdev     };
44051c0b2f7Stbbdev 
44151c0b2f7Stbbdev };
44251c0b2f7Stbbdev 
44351c0b2f7Stbbdev extern "C" { // exported to TBB interfaces
44451c0b2f7Stbbdev 
445b15aabb3Stbbdev void __TBB_internal_initialize_system_topology(
446b15aabb3Stbbdev     std::size_t groups_num,
447b15aabb3Stbbdev     int& numa_nodes_count, int*& numa_indexes_list,
448b15aabb3Stbbdev     int& core_types_count, int*& core_types_indexes_list
449b15aabb3Stbbdev ) {
45051c0b2f7Stbbdev     platform_topology::instance().initialize(groups_num);
451b15aabb3Stbbdev     platform_topology::instance().fill_topology_information(
452b15aabb3Stbbdev         numa_nodes_count, numa_indexes_list,
453b15aabb3Stbbdev         core_types_count, core_types_indexes_list
454b15aabb3Stbbdev     );
45551c0b2f7Stbbdev }
45651c0b2f7Stbbdev 
457b15aabb3Stbbdev binding_handler* __TBB_internal_allocate_binding_handler(int number_of_slots, int numa_id, int core_type_id, int max_threads_per_core) {
458b15aabb3Stbbdev     __TBB_ASSERT(number_of_slots > 0, "Trying to create numa handler for 0 threads.");
459b15aabb3Stbbdev     return new binding_handler(number_of_slots, numa_id, core_type_id, max_threads_per_core);
46051c0b2f7Stbbdev }
46151c0b2f7Stbbdev 
46251c0b2f7Stbbdev void __TBB_internal_deallocate_binding_handler(binding_handler* handler_ptr) {
46351c0b2f7Stbbdev     __TBB_ASSERT(handler_ptr != nullptr, "Trying to deallocate nullptr pointer.");
46451c0b2f7Stbbdev     delete handler_ptr;
46551c0b2f7Stbbdev }
46651c0b2f7Stbbdev 
467b15aabb3Stbbdev void __TBB_internal_apply_affinity(binding_handler* handler_ptr, int slot_num) {
46851c0b2f7Stbbdev     __TBB_ASSERT(handler_ptr != nullptr, "Trying to get access to uninitialized metadata.");
469b15aabb3Stbbdev     handler_ptr->apply_affinity(slot_num);
47051c0b2f7Stbbdev }
47151c0b2f7Stbbdev 
47251c0b2f7Stbbdev void __TBB_internal_restore_affinity(binding_handler* handler_ptr, int slot_num) {
47351c0b2f7Stbbdev     __TBB_ASSERT(handler_ptr != nullptr, "Trying to get access to uninitialized metadata.");
47451c0b2f7Stbbdev     handler_ptr->restore_previous_affinity_mask(slot_num);
47551c0b2f7Stbbdev }
47651c0b2f7Stbbdev 
477b15aabb3Stbbdev int __TBB_internal_get_default_concurrency(int numa_id, int core_type_id, int max_threads_per_core) {
478b15aabb3Stbbdev     return platform_topology::instance().get_default_concurrency(numa_id, core_type_id, max_threads_per_core);
479b15aabb3Stbbdev }
480b15aabb3Stbbdev 
48151c0b2f7Stbbdev } // extern "C"
48251c0b2f7Stbbdev 
48351c0b2f7Stbbdev } // namespace r1
48451c0b2f7Stbbdev } // namespace detail
48551c0b2f7Stbbdev } // namespace tbb
48651c0b2f7Stbbdev 
48751c0b2f7Stbbdev #undef assertion_hwloc_wrapper
488