xref: /oneTBB/src/tbbbind/tbb_bind.cpp (revision b53726aa)
151c0b2f7Stbbdev /*
2b15aabb3Stbbdev     Copyright (c) 2019-2021 Intel Corporation
351c0b2f7Stbbdev 
451c0b2f7Stbbdev     Licensed under the Apache License, Version 2.0 (the "License");
551c0b2f7Stbbdev     you may not use this file except in compliance with the License.
651c0b2f7Stbbdev     You may obtain a copy of the License at
751c0b2f7Stbbdev 
851c0b2f7Stbbdev         http://www.apache.org/licenses/LICENSE-2.0
951c0b2f7Stbbdev 
1051c0b2f7Stbbdev     Unless required by applicable law or agreed to in writing, software
1151c0b2f7Stbbdev     distributed under the License is distributed on an "AS IS" BASIS,
1251c0b2f7Stbbdev     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1351c0b2f7Stbbdev     See the License for the specific language governing permissions and
1451c0b2f7Stbbdev     limitations under the License.
1551c0b2f7Stbbdev */
1651c0b2f7Stbbdev 
17b15aabb3Stbbdev #include <vector>
18b15aabb3Stbbdev #include <mutex>
19b15aabb3Stbbdev 
2051c0b2f7Stbbdev #include "../tbb/assert_impl.h" // Out-of-line TBB assertion handling routines are instantiated here.
2149e08aacStbbdev #include "oneapi/tbb/detail/_assert.h"
228827ea7dSLong Nguyen #include "oneapi/tbb/detail/_config.h"
2351c0b2f7Stbbdev 
24478de5b1Stbbdev #if _MSC_VER && !__INTEL_COMPILER && !__clang__
2551c0b2f7Stbbdev #pragma warning( push )
2651c0b2f7Stbbdev #pragma warning( disable : 4100 )
27478de5b1Stbbdev #elif _MSC_VER && __clang__
28478de5b1Stbbdev #pragma GCC diagnostic push
29478de5b1Stbbdev #pragma GCC diagnostic ignored "-Wunused-parameter"
3051c0b2f7Stbbdev #endif
3151c0b2f7Stbbdev #include <hwloc.h>
32478de5b1Stbbdev #if _MSC_VER && !__INTEL_COMPILER && !__clang__
3351c0b2f7Stbbdev #pragma warning( pop )
34478de5b1Stbbdev #elif _MSC_VER && __clang__
35478de5b1Stbbdev #pragma GCC diagnostic pop
3651c0b2f7Stbbdev #endif
3751c0b2f7Stbbdev 
38e96dbf4bSIvan Kochin #define __TBBBIND_HWLOC_HYBRID_CPUS_INTERFACES_PRESENT (HWLOC_API_VERSION >= 0x20400)
39e96dbf4bSIvan Kochin #define __TBBBIND_HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING_PRESENT (HWLOC_API_VERSION >= 0x20500)
4051c0b2f7Stbbdev 
4151c0b2f7Stbbdev // Most of hwloc calls returns negative exit code on error.
4251c0b2f7Stbbdev // This macro tracks error codes that are returned from the hwloc interfaces.
4351c0b2f7Stbbdev #define assertion_hwloc_wrapper(command, ...) \
4451c0b2f7Stbbdev         __TBB_ASSERT_EX( (command(__VA_ARGS__)) >= 0, "Error occurred during call to hwloc API.");
4551c0b2f7Stbbdev 
4651c0b2f7Stbbdev namespace tbb {
4751c0b2f7Stbbdev namespace detail {
4851c0b2f7Stbbdev namespace r1 {
4951c0b2f7Stbbdev 
5051c0b2f7Stbbdev //------------------------------------------------------------------------
5151c0b2f7Stbbdev // Information about the machine's hardware TBB is happen to work on
5251c0b2f7Stbbdev //------------------------------------------------------------------------
53edc30c82SIvan Kochin class system_topology {
54b15aabb3Stbbdev     friend class binding_handler;
5551c0b2f7Stbbdev 
56b15aabb3Stbbdev     // Common topology members
57b15aabb3Stbbdev     hwloc_topology_t topology{nullptr};
58b15aabb3Stbbdev     hwloc_cpuset_t   process_cpu_affinity_mask{nullptr};
59b15aabb3Stbbdev     hwloc_nodeset_t  process_node_affinity_mask{nullptr};
60b15aabb3Stbbdev     std::size_t number_of_processors_groups{1};
6151c0b2f7Stbbdev 
62b15aabb3Stbbdev     // NUMA API related topology members
63b15aabb3Stbbdev     std::vector<hwloc_cpuset_t> numa_affinity_masks_list{};
64b15aabb3Stbbdev     std::vector<int> numa_indexes_list{};
65b15aabb3Stbbdev     int numa_nodes_count{0};
66b15aabb3Stbbdev 
67b15aabb3Stbbdev     // Hybrid CPUs API related topology members
68b15aabb3Stbbdev     std::vector<hwloc_cpuset_t> core_types_affinity_masks_list{};
69b15aabb3Stbbdev     std::vector<int> core_types_indexes_list{};
7051c0b2f7Stbbdev 
7151c0b2f7Stbbdev     enum init_stages { uninitialized,
7251c0b2f7Stbbdev                        started,
7351c0b2f7Stbbdev                        topology_allocated,
7451c0b2f7Stbbdev                        topology_loaded,
7551c0b2f7Stbbdev                        topology_parsed } initialization_state;
7651c0b2f7Stbbdev 
77b15aabb3Stbbdev     // Binding threads that locate in another Windows Processor groups
7851c0b2f7Stbbdev     // is allowed only if machine topology contains several Windows Processors groups
7951c0b2f7Stbbdev     // and process affinity mask wasn`t limited manually (affinity mask cannot violates
8051c0b2f7Stbbdev     // processors group boundaries).
81b15aabb3Stbbdev     bool intergroup_binding_allowed(std::size_t groups_num) { return groups_num > 1; }
8251c0b2f7Stbbdev 
83b15aabb3Stbbdev private:
84b15aabb3Stbbdev     void topology_initialization(std::size_t groups_num) {
8551c0b2f7Stbbdev         initialization_state = started;
8651c0b2f7Stbbdev 
8751c0b2f7Stbbdev         // Parse topology
8851c0b2f7Stbbdev         if ( hwloc_topology_init( &topology ) == 0 ) {
8951c0b2f7Stbbdev             initialization_state = topology_allocated;
90e96dbf4bSIvan Kochin #if __TBBBIND_HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING_PRESENT
91e96dbf4bSIvan Kochin             if ( groups_num == 1 &&
92e96dbf4bSIvan Kochin                  hwloc_topology_set_flags(topology,
93e96dbf4bSIvan Kochin                      HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM |
94e96dbf4bSIvan Kochin                      HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING
95e96dbf4bSIvan Kochin                  ) != 0
96e96dbf4bSIvan Kochin             ) {
97e96dbf4bSIvan Kochin                 return;
98e96dbf4bSIvan Kochin             }
99e96dbf4bSIvan Kochin #endif
10051c0b2f7Stbbdev             if ( hwloc_topology_load( topology ) == 0 ) {
10151c0b2f7Stbbdev                 initialization_state = topology_loaded;
10251c0b2f7Stbbdev             }
10351c0b2f7Stbbdev         }
104b15aabb3Stbbdev         if ( initialization_state != topology_loaded )
10551c0b2f7Stbbdev             return;
10651c0b2f7Stbbdev 
10751c0b2f7Stbbdev         // Getting process affinity mask
10851c0b2f7Stbbdev         if ( intergroup_binding_allowed(groups_num) ) {
10951c0b2f7Stbbdev             process_cpu_affinity_mask  = hwloc_bitmap_dup(hwloc_topology_get_complete_cpuset (topology));
11051c0b2f7Stbbdev             process_node_affinity_mask = hwloc_bitmap_dup(hwloc_topology_get_complete_nodeset(topology));
11151c0b2f7Stbbdev         } else {
11251c0b2f7Stbbdev             process_cpu_affinity_mask  = hwloc_bitmap_alloc();
11351c0b2f7Stbbdev             process_node_affinity_mask = hwloc_bitmap_alloc();
11451c0b2f7Stbbdev 
11551c0b2f7Stbbdev             assertion_hwloc_wrapper(hwloc_get_cpubind, topology, process_cpu_affinity_mask, 0);
11651c0b2f7Stbbdev             hwloc_cpuset_to_nodeset(topology, process_cpu_affinity_mask, process_node_affinity_mask);
11751c0b2f7Stbbdev         }
11851c0b2f7Stbbdev 
119b15aabb3Stbbdev         number_of_processors_groups = groups_num;
120b15aabb3Stbbdev     }
121b15aabb3Stbbdev 
122b15aabb3Stbbdev     void numa_topology_parsing() {
123b15aabb3Stbbdev         // Fill parameters with stubs if topology parsing is broken.
124b15aabb3Stbbdev         if ( initialization_state != topology_loaded ) {
125b15aabb3Stbbdev             numa_nodes_count = 1;
126b15aabb3Stbbdev             numa_indexes_list.push_back(-1);
127b15aabb3Stbbdev             return;
128b15aabb3Stbbdev         }
129b15aabb3Stbbdev 
13051c0b2f7Stbbdev         // If system contains no NUMA nodes, HWLOC 1.11 returns an infinitely filled bitmap.
13151c0b2f7Stbbdev         // hwloc_bitmap_weight() returns negative value for such bitmaps, so we use this check
13251c0b2f7Stbbdev         // to change way of topology initialization.
13351c0b2f7Stbbdev         numa_nodes_count = hwloc_bitmap_weight(process_node_affinity_mask);
13451c0b2f7Stbbdev         if (numa_nodes_count <= 0) {
13551c0b2f7Stbbdev             // numa_nodes_count may be empty if the process affinity mask is empty too (invalid case)
13651c0b2f7Stbbdev             // or if some internal HWLOC error occurred.
13751c0b2f7Stbbdev             // So we place -1 as index in this case.
13851c0b2f7Stbbdev             numa_indexes_list.push_back(numa_nodes_count == 0 ? -1 : 0);
13951c0b2f7Stbbdev             numa_nodes_count = 1;
14051c0b2f7Stbbdev 
141b15aabb3Stbbdev             numa_affinity_masks_list.push_back(hwloc_bitmap_dup(process_cpu_affinity_mask));
142b15aabb3Stbbdev         } else {
14351c0b2f7Stbbdev             // Get NUMA logical indexes list
14451c0b2f7Stbbdev             unsigned counter = 0;
14551c0b2f7Stbbdev             int i = 0;
14651c0b2f7Stbbdev             int max_numa_index = -1;
14751c0b2f7Stbbdev             numa_indexes_list.resize(numa_nodes_count);
14851c0b2f7Stbbdev             hwloc_obj_t node_buffer;
14951c0b2f7Stbbdev             hwloc_bitmap_foreach_begin(i, process_node_affinity_mask) {
150e96dbf4bSIvan Kochin                 node_buffer = hwloc_get_numanode_obj_by_os_index(topology, i);
15151c0b2f7Stbbdev                 numa_indexes_list[counter] = static_cast<int>(node_buffer->logical_index);
15251c0b2f7Stbbdev 
15351c0b2f7Stbbdev                 if ( numa_indexes_list[counter] > max_numa_index ) {
15451c0b2f7Stbbdev                     max_numa_index = numa_indexes_list[counter];
15551c0b2f7Stbbdev                 }
15651c0b2f7Stbbdev 
15751c0b2f7Stbbdev                 counter++;
15851c0b2f7Stbbdev             } hwloc_bitmap_foreach_end();
15951c0b2f7Stbbdev             __TBB_ASSERT(max_numa_index >= 0, "Maximal NUMA index must not be negative");
16051c0b2f7Stbbdev 
16151c0b2f7Stbbdev             // Fill concurrency and affinity masks lists
162b15aabb3Stbbdev             numa_affinity_masks_list.resize(max_numa_index + 1);
16351c0b2f7Stbbdev             int index = 0;
16451c0b2f7Stbbdev             hwloc_bitmap_foreach_begin(i, process_node_affinity_mask) {
165e96dbf4bSIvan Kochin                 node_buffer = hwloc_get_numanode_obj_by_os_index(topology, i);
16651c0b2f7Stbbdev                 index = static_cast<int>(node_buffer->logical_index);
16751c0b2f7Stbbdev 
168b15aabb3Stbbdev                 hwloc_cpuset_t& current_mask = numa_affinity_masks_list[index];
16951c0b2f7Stbbdev                 current_mask = hwloc_bitmap_dup(node_buffer->cpuset);
17051c0b2f7Stbbdev 
17151c0b2f7Stbbdev                 hwloc_bitmap_and(current_mask, current_mask, process_cpu_affinity_mask);
17251c0b2f7Stbbdev                 __TBB_ASSERT(!hwloc_bitmap_iszero(current_mask), "hwloc detected unavailable NUMA node");
17351c0b2f7Stbbdev             } hwloc_bitmap_foreach_end();
174b15aabb3Stbbdev         }
175b15aabb3Stbbdev     }
176b15aabb3Stbbdev 
177b15aabb3Stbbdev     void core_types_topology_parsing() {
178b15aabb3Stbbdev         // Fill parameters with stubs if topology parsing is broken.
179b15aabb3Stbbdev         if ( initialization_state != topology_loaded ) {
180b15aabb3Stbbdev             core_types_indexes_list.push_back(-1);
181b15aabb3Stbbdev             return;
182b15aabb3Stbbdev         }
183e96dbf4bSIvan Kochin #if __TBBBIND_HWLOC_HYBRID_CPUS_INTERFACES_PRESENT
184b15aabb3Stbbdev         __TBB_ASSERT(hwloc_get_api_version() >= 0x20400, "Hybrid CPUs support interfaces required HWLOC >= 2.4");
185b15aabb3Stbbdev         // Parsing the hybrid CPU topology
186b15aabb3Stbbdev         int core_types_number = hwloc_cpukinds_get_nr(topology, 0);
187b15aabb3Stbbdev         bool core_types_parsing_broken = core_types_number <= 0;
188b15aabb3Stbbdev         if (!core_types_parsing_broken) {
189b15aabb3Stbbdev             core_types_affinity_masks_list.resize(core_types_number);
190b15aabb3Stbbdev             int efficiency{-1};
191b15aabb3Stbbdev 
192b15aabb3Stbbdev             for (int core_type = 0; core_type < core_types_number; ++core_type) {
193b15aabb3Stbbdev                 hwloc_cpuset_t& current_mask = core_types_affinity_masks_list[core_type];
194b15aabb3Stbbdev                 current_mask = hwloc_bitmap_alloc();
195b15aabb3Stbbdev 
196b15aabb3Stbbdev                 if (!hwloc_cpukinds_get_info(topology, core_type, current_mask, &efficiency, nullptr, nullptr, 0)
197b15aabb3Stbbdev                     && efficiency >= 0
198b15aabb3Stbbdev                 ) {
199b15aabb3Stbbdev                     hwloc_bitmap_and(current_mask, current_mask, process_cpu_affinity_mask);
200b15aabb3Stbbdev 
201b15aabb3Stbbdev                     if (hwloc_bitmap_weight(current_mask) > 0) {
202b15aabb3Stbbdev                         core_types_indexes_list.push_back(core_type);
203b15aabb3Stbbdev                     }
204b15aabb3Stbbdev                     __TBB_ASSERT(hwloc_bitmap_weight(current_mask) >= 0, "Infinivitely filled core type mask");
205b15aabb3Stbbdev                 } else {
206b15aabb3Stbbdev                     core_types_parsing_broken = true;
207b15aabb3Stbbdev                     break;
208b15aabb3Stbbdev                 }
209b15aabb3Stbbdev             }
210b15aabb3Stbbdev         }
211e96dbf4bSIvan Kochin #else /*!__TBBBIND_HWLOC_HYBRID_CPUS_INTERFACES_PRESENT*/
212b15aabb3Stbbdev         bool core_types_parsing_broken{true};
213e96dbf4bSIvan Kochin #endif /*__TBBBIND_HWLOC_HYBRID_CPUS_INTERFACES_PRESENT*/
214b15aabb3Stbbdev 
215b15aabb3Stbbdev         if (core_types_parsing_broken) {
216b15aabb3Stbbdev             for (auto& core_type_mask : core_types_affinity_masks_list) {
217b15aabb3Stbbdev                 hwloc_bitmap_free(core_type_mask);
218b15aabb3Stbbdev             }
219b15aabb3Stbbdev             core_types_affinity_masks_list.resize(1);
220b15aabb3Stbbdev             core_types_indexes_list.resize(1);
221b15aabb3Stbbdev 
222b15aabb3Stbbdev             core_types_affinity_masks_list[0] = hwloc_bitmap_dup(process_cpu_affinity_mask);
223b15aabb3Stbbdev             core_types_indexes_list[0] = -1;
224b15aabb3Stbbdev         }
225b15aabb3Stbbdev     }
226b15aabb3Stbbdev 
227*b53726aaSIvan Kochin     void enforce_hwloc_2_5_runtime_linkage() {
228*b53726aaSIvan Kochin         // Without the call of this function HWLOC 2.4 can be successfully loaded during the tbbbind_2_5 loading.
229*b53726aaSIvan Kochin         // It is possible since tbbbind_2_5 don't use any new entry points that were introduced in HWLOC 2.5
230*b53726aaSIvan Kochin         // But tbbbind_2_5 compiles with HWLOC 2.5 header, therefore such situation requires binary forward compatibility
231*b53726aaSIvan Kochin         // which are not guaranteed by the HWLOC library. To enforce linkage tbbbind_2_5 only with HWLOC >= 2.5 version
232*b53726aaSIvan Kochin         // this function calls the interface that is available in the HWLOC 2.5 only.
233*b53726aaSIvan Kochin #if HWLOC_API_VERSION >= 0x20500
234*b53726aaSIvan Kochin         auto some_core = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_CORE, nullptr);
235*b53726aaSIvan Kochin         hwloc_get_obj_with_same_locality(topology, some_core, HWLOC_OBJ_CORE, nullptr, nullptr, 0);
236*b53726aaSIvan Kochin #endif
237*b53726aaSIvan Kochin     }
238*b53726aaSIvan Kochin 
239*b53726aaSIvan Kochin 
240b15aabb3Stbbdev     void initialize( std::size_t groups_num ) {
241b15aabb3Stbbdev         if ( initialization_state != uninitialized )
242b15aabb3Stbbdev             return;
243b15aabb3Stbbdev 
244b15aabb3Stbbdev         topology_initialization(groups_num);
245b15aabb3Stbbdev         numa_topology_parsing();
246b15aabb3Stbbdev         core_types_topology_parsing();
247b15aabb3Stbbdev 
248*b53726aaSIvan Kochin         enforce_hwloc_2_5_runtime_linkage();
249*b53726aaSIvan Kochin 
250b15aabb3Stbbdev         if (initialization_state == topology_loaded)
25151c0b2f7Stbbdev             initialization_state = topology_parsed;
25251c0b2f7Stbbdev     }
25351c0b2f7Stbbdev 
254edc30c82SIvan Kochin     static system_topology* instance_ptr;
255edc30c82SIvan Kochin public:
256edc30c82SIvan Kochin     typedef hwloc_cpuset_t             affinity_mask;
257edc30c82SIvan Kochin     typedef hwloc_const_cpuset_t const_affinity_mask;
258edc30c82SIvan Kochin 
259edc30c82SIvan Kochin     bool is_topology_parsed() { return initialization_state == topology_parsed; }
260edc30c82SIvan Kochin 
261edc30c82SIvan Kochin     static void construct( std::size_t groups_num ) {
262edc30c82SIvan Kochin         if (instance_ptr == nullptr) {
263edc30c82SIvan Kochin             instance_ptr = new system_topology();
264edc30c82SIvan Kochin             instance_ptr->initialize(groups_num);
265edc30c82SIvan Kochin         }
266edc30c82SIvan Kochin     }
267edc30c82SIvan Kochin 
268edc30c82SIvan Kochin     static system_topology& instance() {
269edc30c82SIvan Kochin         __TBB_ASSERT(instance_ptr != nullptr, "Getting instance of non-constructed topology");
270edc30c82SIvan Kochin         return *instance_ptr;
271edc30c82SIvan Kochin     }
272edc30c82SIvan Kochin 
273edc30c82SIvan Kochin     static void destroy() {
274edc30c82SIvan Kochin         __TBB_ASSERT(instance_ptr != nullptr, "Destroying non-constructed topology");
275edc30c82SIvan Kochin         delete instance_ptr;
276edc30c82SIvan Kochin     }
277edc30c82SIvan Kochin 
278edc30c82SIvan Kochin     ~system_topology() {
27951c0b2f7Stbbdev         if ( is_topology_parsed() ) {
280b15aabb3Stbbdev             for (auto& numa_node_mask : numa_affinity_masks_list) {
281b15aabb3Stbbdev                 hwloc_bitmap_free(numa_node_mask);
28251c0b2f7Stbbdev             }
283b15aabb3Stbbdev 
284b15aabb3Stbbdev             for (auto& core_type_mask : core_types_affinity_masks_list) {
285b15aabb3Stbbdev                 hwloc_bitmap_free(core_type_mask);
286b15aabb3Stbbdev             }
287b15aabb3Stbbdev 
28851c0b2f7Stbbdev             hwloc_bitmap_free(process_node_affinity_mask);
28951c0b2f7Stbbdev             hwloc_bitmap_free(process_cpu_affinity_mask);
29051c0b2f7Stbbdev         }
29151c0b2f7Stbbdev 
29251c0b2f7Stbbdev         if ( initialization_state >= topology_allocated ) {
29351c0b2f7Stbbdev             hwloc_topology_destroy(topology);
29451c0b2f7Stbbdev         }
29551c0b2f7Stbbdev 
29651c0b2f7Stbbdev         initialization_state = uninitialized;
29751c0b2f7Stbbdev     }
29851c0b2f7Stbbdev 
299b15aabb3Stbbdev     void fill_topology_information(
300b15aabb3Stbbdev         int& _numa_nodes_count, int*& _numa_indexes_list,
301b15aabb3Stbbdev         int& _core_types_count, int*& _core_types_indexes_list
302b15aabb3Stbbdev     ) {
303edc30c82SIvan Kochin         __TBB_ASSERT(is_topology_parsed(), "Trying to get access to uninitialized system_topology");
304b15aabb3Stbbdev         _numa_nodes_count = numa_nodes_count;
305b15aabb3Stbbdev         _numa_indexes_list = numa_indexes_list.data();
306b15aabb3Stbbdev 
307b15aabb3Stbbdev         _core_types_count = (int)core_types_indexes_list.size();
308b15aabb3Stbbdev         _core_types_indexes_list = core_types_indexes_list.data();
309b15aabb3Stbbdev     }
310b15aabb3Stbbdev 
311b15aabb3Stbbdev     void fill_constraints_affinity_mask(affinity_mask input_mask, int numa_node_index, int core_type_index, int max_threads_per_core) {
312edc30c82SIvan Kochin         __TBB_ASSERT(is_topology_parsed(), "Trying to get access to uninitialized system_topology");
313b15aabb3Stbbdev         __TBB_ASSERT(numa_node_index < (int)numa_affinity_masks_list.size(), "Wrong NUMA node id");
314b15aabb3Stbbdev         __TBB_ASSERT(core_type_index < (int)core_types_affinity_masks_list.size(), "Wrong core type id");
315b15aabb3Stbbdev         __TBB_ASSERT(max_threads_per_core == -1 || max_threads_per_core > 0, "Wrong max_threads_per_core");
316b15aabb3Stbbdev 
317b15aabb3Stbbdev         hwloc_cpuset_t constraints_mask = hwloc_bitmap_alloc();
318b15aabb3Stbbdev         hwloc_cpuset_t core_mask = hwloc_bitmap_alloc();
319b15aabb3Stbbdev 
320b15aabb3Stbbdev         hwloc_bitmap_copy(constraints_mask, process_cpu_affinity_mask);
321b15aabb3Stbbdev         if (numa_node_index >= 0) {
322b15aabb3Stbbdev             hwloc_bitmap_and(constraints_mask, constraints_mask, numa_affinity_masks_list[numa_node_index]);
323b15aabb3Stbbdev         }
324b15aabb3Stbbdev         if (core_type_index >= 0) {
325b15aabb3Stbbdev             hwloc_bitmap_and(constraints_mask, constraints_mask, core_types_affinity_masks_list[core_type_index]);
326b15aabb3Stbbdev         }
327b15aabb3Stbbdev         if (max_threads_per_core > 0) {
328b15aabb3Stbbdev             // clear input mask
329b15aabb3Stbbdev             hwloc_bitmap_zero(input_mask);
330b15aabb3Stbbdev 
331b15aabb3Stbbdev             hwloc_obj_t current_core = nullptr;
332b15aabb3Stbbdev             while ((current_core = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_CORE, current_core)) != nullptr) {
333b15aabb3Stbbdev                 hwloc_bitmap_and(core_mask, constraints_mask, current_core->cpuset);
334b15aabb3Stbbdev 
335b15aabb3Stbbdev                 // fit the core mask to required bits number
336b15aabb3Stbbdev                 int current_threads_per_core = 0;
337b15aabb3Stbbdev                 for (int id = hwloc_bitmap_first(core_mask); id != -1; id = hwloc_bitmap_next(core_mask, id)) {
338b15aabb3Stbbdev                     if (++current_threads_per_core > max_threads_per_core) {
339b15aabb3Stbbdev                         hwloc_bitmap_clr(core_mask, id);
340b15aabb3Stbbdev                     }
341b15aabb3Stbbdev                 }
342b15aabb3Stbbdev 
343b15aabb3Stbbdev                 hwloc_bitmap_or(input_mask, input_mask, core_mask);
344b15aabb3Stbbdev             }
345b15aabb3Stbbdev         } else {
346b15aabb3Stbbdev             hwloc_bitmap_copy(input_mask, constraints_mask);
347b15aabb3Stbbdev         }
348b15aabb3Stbbdev 
349b15aabb3Stbbdev         hwloc_bitmap_free(core_mask);
350b15aabb3Stbbdev         hwloc_bitmap_free(constraints_mask);
351b15aabb3Stbbdev     }
352b15aabb3Stbbdev 
353b15aabb3Stbbdev     void fit_num_threads_per_core(affinity_mask result_mask, affinity_mask current_mask, affinity_mask constraints_mask) {
354b15aabb3Stbbdev         hwloc_bitmap_zero(result_mask);
355b15aabb3Stbbdev         hwloc_obj_t current_core = nullptr;
356b15aabb3Stbbdev         while ((current_core = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_CORE, current_core)) != nullptr) {
357b15aabb3Stbbdev             if (hwloc_bitmap_intersects(current_mask, current_core->cpuset)) {
358b15aabb3Stbbdev                 hwloc_bitmap_or(result_mask, result_mask, current_core->cpuset);
359b15aabb3Stbbdev             }
360b15aabb3Stbbdev         }
361b15aabb3Stbbdev         hwloc_bitmap_and(result_mask, result_mask, constraints_mask);
362b15aabb3Stbbdev     }
363b15aabb3Stbbdev 
364b15aabb3Stbbdev     int get_default_concurrency(int numa_node_index, int core_type_index, int max_threads_per_core) {
365edc30c82SIvan Kochin         __TBB_ASSERT(is_topology_parsed(), "Trying to get access to uninitialized system_topology");
366b15aabb3Stbbdev 
367b15aabb3Stbbdev         hwloc_cpuset_t constraints_mask = hwloc_bitmap_alloc();
368b15aabb3Stbbdev         fill_constraints_affinity_mask(constraints_mask, numa_node_index, core_type_index, max_threads_per_core);
369b15aabb3Stbbdev 
370b15aabb3Stbbdev         int default_concurrency = hwloc_bitmap_weight(constraints_mask);
371b15aabb3Stbbdev         hwloc_bitmap_free(constraints_mask);
372b15aabb3Stbbdev         return default_concurrency;
37351c0b2f7Stbbdev     }
37451c0b2f7Stbbdev 
37551c0b2f7Stbbdev     affinity_mask allocate_process_affinity_mask() {
376edc30c82SIvan Kochin         __TBB_ASSERT(is_topology_parsed(), "Trying to get access to uninitialized system_topology");
37751c0b2f7Stbbdev         return hwloc_bitmap_dup(process_cpu_affinity_mask);
37851c0b2f7Stbbdev     }
37951c0b2f7Stbbdev 
38051c0b2f7Stbbdev     void free_affinity_mask( affinity_mask mask_to_free ) {
38151c0b2f7Stbbdev         hwloc_bitmap_free(mask_to_free); // If bitmap is nullptr, no operation is performed.
38251c0b2f7Stbbdev     }
38351c0b2f7Stbbdev 
38451c0b2f7Stbbdev     void store_current_affinity_mask( affinity_mask current_mask ) {
38551c0b2f7Stbbdev         assertion_hwloc_wrapper(hwloc_get_cpubind, topology, current_mask, HWLOC_CPUBIND_THREAD);
38651c0b2f7Stbbdev 
38751c0b2f7Stbbdev         hwloc_bitmap_and(current_mask, current_mask, process_cpu_affinity_mask);
38851c0b2f7Stbbdev         __TBB_ASSERT(!hwloc_bitmap_iszero(current_mask),
38951c0b2f7Stbbdev             "Current affinity mask must intersects with process affinity mask");
39051c0b2f7Stbbdev     }
39151c0b2f7Stbbdev 
392b15aabb3Stbbdev     void set_affinity_mask( const_affinity_mask mask ) {
393b15aabb3Stbbdev         if (hwloc_bitmap_weight(mask) > 0) {
394b15aabb3Stbbdev             assertion_hwloc_wrapper(hwloc_set_cpubind, topology, mask, HWLOC_CPUBIND_THREAD);
39551c0b2f7Stbbdev         }
39651c0b2f7Stbbdev     }
39751c0b2f7Stbbdev };
39851c0b2f7Stbbdev 
399edc30c82SIvan Kochin system_topology* system_topology::instance_ptr{nullptr};
400edc30c82SIvan Kochin 
40151c0b2f7Stbbdev class binding_handler {
40251c0b2f7Stbbdev     // Following vector saves thread affinity mask on scheduler entry to return it to this thread
40351c0b2f7Stbbdev     // on scheduler exit.
404edc30c82SIvan Kochin     typedef std::vector<system_topology::affinity_mask> affinity_masks_container;
40551c0b2f7Stbbdev     affinity_masks_container affinity_backup;
406edc30c82SIvan Kochin     system_topology::affinity_mask handler_affinity_mask;
407b15aabb3Stbbdev 
408b15aabb3Stbbdev #if WIN32
409b15aabb3Stbbdev     affinity_masks_container affinity_buffer;
410b15aabb3Stbbdev     int my_numa_node_id;
411b15aabb3Stbbdev     int my_core_type_id;
412b15aabb3Stbbdev     int my_max_threads_per_core;
413b15aabb3Stbbdev #endif
41451c0b2f7Stbbdev 
41551c0b2f7Stbbdev public:
416b15aabb3Stbbdev     binding_handler( std::size_t size, int numa_node_id, int core_type_id, int max_threads_per_core )
417b15aabb3Stbbdev         : affinity_backup(size)
418b15aabb3Stbbdev #if WIN32
419b15aabb3Stbbdev         , affinity_buffer(size)
420b15aabb3Stbbdev         , my_numa_node_id(numa_node_id)
421b15aabb3Stbbdev         , my_core_type_id(core_type_id)
422b15aabb3Stbbdev         , my_max_threads_per_core(max_threads_per_core)
423b15aabb3Stbbdev #endif
424b15aabb3Stbbdev     {
425b15aabb3Stbbdev         for (std::size_t i = 0; i < size; ++i) {
426edc30c82SIvan Kochin             affinity_backup[i] = system_topology::instance().allocate_process_affinity_mask();
427b15aabb3Stbbdev #if WIN32
428edc30c82SIvan Kochin             affinity_buffer[i] = system_topology::instance().allocate_process_affinity_mask();
429b15aabb3Stbbdev #endif
43051c0b2f7Stbbdev         }
431edc30c82SIvan Kochin         handler_affinity_mask = system_topology::instance().allocate_process_affinity_mask();
432edc30c82SIvan Kochin         system_topology::instance().fill_constraints_affinity_mask
433b15aabb3Stbbdev             (handler_affinity_mask, numa_node_id, core_type_id, max_threads_per_core);
43451c0b2f7Stbbdev     }
43551c0b2f7Stbbdev 
43651c0b2f7Stbbdev     ~binding_handler() {
437b15aabb3Stbbdev         for (std::size_t i = 0; i < affinity_backup.size(); ++i) {
438edc30c82SIvan Kochin             system_topology::instance().free_affinity_mask(affinity_backup[i]);
439b15aabb3Stbbdev #if WIN32
440edc30c82SIvan Kochin             system_topology::instance().free_affinity_mask(affinity_buffer[i]);
441b15aabb3Stbbdev #endif
44251c0b2f7Stbbdev         }
443edc30c82SIvan Kochin         system_topology::instance().free_affinity_mask(handler_affinity_mask);
44451c0b2f7Stbbdev     }
44551c0b2f7Stbbdev 
446b15aabb3Stbbdev     void apply_affinity( unsigned slot_num ) {
447edc30c82SIvan Kochin         auto& topology = system_topology::instance();
44851c0b2f7Stbbdev         __TBB_ASSERT(slot_num < affinity_backup.size(),
44951c0b2f7Stbbdev             "The slot number is greater than the number of slots in the arena");
450b15aabb3Stbbdev         __TBB_ASSERT(topology.is_topology_parsed(),
451edc30c82SIvan Kochin             "Trying to get access to uninitialized system_topology");
45251c0b2f7Stbbdev 
453b15aabb3Stbbdev         topology.store_current_affinity_mask(affinity_backup[slot_num]);
454b15aabb3Stbbdev 
455b15aabb3Stbbdev #if WIN32
456b15aabb3Stbbdev         // TBBBind supports only systems where NUMA nodes and core types do not cross the border
457b15aabb3Stbbdev         // between several processor groups. So if a certain NUMA node or core type constraint
458b15aabb3Stbbdev         // specified, then the constraints affinity mask will not cross the processor groups' border.
459b15aabb3Stbbdev 
460b15aabb3Stbbdev         // But if we have constraint based only on the max_threads_per_core setting, then the
461b15aabb3Stbbdev         // constraints affinity mask does may cross the border between several processor groups
462b15aabb3Stbbdev         // on machines with more then 64 hardware threads. That is why we need to use the special
463b15aabb3Stbbdev         // function, which regulates the number of threads in the current threads mask.
464b15aabb3Stbbdev         if (topology.number_of_processors_groups > 1 && my_max_threads_per_core != -1 &&
465b15aabb3Stbbdev             (my_numa_node_id == -1 || topology.numa_indexes_list.size() == 1) &&
466b15aabb3Stbbdev             (my_core_type_id == -1 || topology.core_types_indexes_list.size() == 1)
467b15aabb3Stbbdev         ) {
468b15aabb3Stbbdev             topology.fit_num_threads_per_core(affinity_buffer[slot_num], affinity_backup[slot_num], handler_affinity_mask);
469b15aabb3Stbbdev             topology.set_affinity_mask(affinity_buffer[slot_num]);
470b15aabb3Stbbdev             return;
471b15aabb3Stbbdev         }
472b15aabb3Stbbdev #endif
473b15aabb3Stbbdev         topology.set_affinity_mask(handler_affinity_mask);
47451c0b2f7Stbbdev     }
47551c0b2f7Stbbdev 
47651c0b2f7Stbbdev     void restore_previous_affinity_mask( unsigned slot_num ) {
477edc30c82SIvan Kochin         auto& topology = system_topology::instance();
478b15aabb3Stbbdev         __TBB_ASSERT(topology.is_topology_parsed(),
479edc30c82SIvan Kochin             "Trying to get access to uninitialized system_topology");
480b15aabb3Stbbdev         topology.set_affinity_mask(affinity_backup[slot_num]);
48151c0b2f7Stbbdev     };
48251c0b2f7Stbbdev 
48351c0b2f7Stbbdev };
48451c0b2f7Stbbdev 
48551c0b2f7Stbbdev extern "C" { // exported to TBB interfaces
48651c0b2f7Stbbdev 
4878827ea7dSLong Nguyen TBBBIND_EXPORT void __TBB_internal_initialize_system_topology(
488b15aabb3Stbbdev     std::size_t groups_num,
489b15aabb3Stbbdev     int& numa_nodes_count, int*& numa_indexes_list,
490b15aabb3Stbbdev     int& core_types_count, int*& core_types_indexes_list
491b15aabb3Stbbdev ) {
492edc30c82SIvan Kochin     system_topology::construct(groups_num);
493edc30c82SIvan Kochin     system_topology::instance().fill_topology_information(
494b15aabb3Stbbdev         numa_nodes_count, numa_indexes_list,
495b15aabb3Stbbdev         core_types_count, core_types_indexes_list
496b15aabb3Stbbdev     );
49751c0b2f7Stbbdev }
49851c0b2f7Stbbdev 
4998827ea7dSLong Nguyen TBBBIND_EXPORT binding_handler* __TBB_internal_allocate_binding_handler(int number_of_slots, int numa_id, int core_type_id, int max_threads_per_core) {
500b15aabb3Stbbdev     __TBB_ASSERT(number_of_slots > 0, "Trying to create numa handler for 0 threads.");
501b15aabb3Stbbdev     return new binding_handler(number_of_slots, numa_id, core_type_id, max_threads_per_core);
50251c0b2f7Stbbdev }
50351c0b2f7Stbbdev 
5048827ea7dSLong Nguyen TBBBIND_EXPORT void __TBB_internal_deallocate_binding_handler(binding_handler* handler_ptr) {
50551c0b2f7Stbbdev     __TBB_ASSERT(handler_ptr != nullptr, "Trying to deallocate nullptr pointer.");
50651c0b2f7Stbbdev     delete handler_ptr;
50751c0b2f7Stbbdev }
50851c0b2f7Stbbdev 
5098827ea7dSLong Nguyen TBBBIND_EXPORT void __TBB_internal_apply_affinity(binding_handler* handler_ptr, int slot_num) {
51051c0b2f7Stbbdev     __TBB_ASSERT(handler_ptr != nullptr, "Trying to get access to uninitialized metadata.");
511b15aabb3Stbbdev     handler_ptr->apply_affinity(slot_num);
51251c0b2f7Stbbdev }
51351c0b2f7Stbbdev 
5148827ea7dSLong Nguyen TBBBIND_EXPORT void __TBB_internal_restore_affinity(binding_handler* handler_ptr, int slot_num) {
51551c0b2f7Stbbdev     __TBB_ASSERT(handler_ptr != nullptr, "Trying to get access to uninitialized metadata.");
51651c0b2f7Stbbdev     handler_ptr->restore_previous_affinity_mask(slot_num);
51751c0b2f7Stbbdev }
51851c0b2f7Stbbdev 
5198827ea7dSLong Nguyen TBBBIND_EXPORT int __TBB_internal_get_default_concurrency(int numa_id, int core_type_id, int max_threads_per_core) {
520edc30c82SIvan Kochin     return system_topology::instance().get_default_concurrency(numa_id, core_type_id, max_threads_per_core);
521edc30c82SIvan Kochin }
522edc30c82SIvan Kochin 
523edc30c82SIvan Kochin void __TBB_internal_destroy_system_topology() {
524edc30c82SIvan Kochin     return system_topology::destroy();
525b15aabb3Stbbdev }
526b15aabb3Stbbdev 
52751c0b2f7Stbbdev } // extern "C"
52851c0b2f7Stbbdev 
52951c0b2f7Stbbdev } // namespace r1
53051c0b2f7Stbbdev } // namespace detail
53151c0b2f7Stbbdev } // namespace tbb
53251c0b2f7Stbbdev 
53351c0b2f7Stbbdev #undef assertion_hwloc_wrapper
534