151c0b2f7Stbbdev /*
2f9fd1beeSIlya Isaev Copyright (c) 2019-2023 Intel Corporation
351c0b2f7Stbbdev
451c0b2f7Stbbdev Licensed under the Apache License, Version 2.0 (the "License");
551c0b2f7Stbbdev you may not use this file except in compliance with the License.
651c0b2f7Stbbdev You may obtain a copy of the License at
751c0b2f7Stbbdev
851c0b2f7Stbbdev http://www.apache.org/licenses/LICENSE-2.0
951c0b2f7Stbbdev
1051c0b2f7Stbbdev Unless required by applicable law or agreed to in writing, software
1151c0b2f7Stbbdev distributed under the License is distributed on an "AS IS" BASIS,
1251c0b2f7Stbbdev WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1351c0b2f7Stbbdev See the License for the specific language governing permissions and
1451c0b2f7Stbbdev limitations under the License.
1551c0b2f7Stbbdev */
1651c0b2f7Stbbdev
17b15aabb3Stbbdev #include <vector>
18b15aabb3Stbbdev #include <mutex>
19b15aabb3Stbbdev
2051c0b2f7Stbbdev #include "../tbb/assert_impl.h" // Out-of-line TBB assertion handling routines are instantiated here.
2149e08aacStbbdev #include "oneapi/tbb/detail/_assert.h"
228827ea7dSLong Nguyen #include "oneapi/tbb/detail/_config.h"
2351c0b2f7Stbbdev
24478de5b1Stbbdev #if _MSC_VER && !__INTEL_COMPILER && !__clang__
2551c0b2f7Stbbdev #pragma warning( push )
2651c0b2f7Stbbdev #pragma warning( disable : 4100 )
27478de5b1Stbbdev #elif _MSC_VER && __clang__
28478de5b1Stbbdev #pragma GCC diagnostic push
29478de5b1Stbbdev #pragma GCC diagnostic ignored "-Wunused-parameter"
3051c0b2f7Stbbdev #endif
3151c0b2f7Stbbdev #include <hwloc.h>
32478de5b1Stbbdev #if _MSC_VER && !__INTEL_COMPILER && !__clang__
3351c0b2f7Stbbdev #pragma warning( pop )
34478de5b1Stbbdev #elif _MSC_VER && __clang__
35478de5b1Stbbdev #pragma GCC diagnostic pop
3651c0b2f7Stbbdev #endif
3751c0b2f7Stbbdev
38e96dbf4bSIvan Kochin #define __TBBBIND_HWLOC_HYBRID_CPUS_INTERFACES_PRESENT (HWLOC_API_VERSION >= 0x20400)
39e96dbf4bSIvan Kochin #define __TBBBIND_HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING_PRESENT (HWLOC_API_VERSION >= 0x20500)
4051c0b2f7Stbbdev
4151c0b2f7Stbbdev // Most of hwloc calls returns negative exit code on error.
4251c0b2f7Stbbdev // This macro tracks error codes that are returned from the hwloc interfaces.
4351c0b2f7Stbbdev #define assertion_hwloc_wrapper(command, ...) \
4451c0b2f7Stbbdev __TBB_ASSERT_EX( (command(__VA_ARGS__)) >= 0, "Error occurred during call to hwloc API.");
4551c0b2f7Stbbdev
4651c0b2f7Stbbdev namespace tbb {
4751c0b2f7Stbbdev namespace detail {
4851c0b2f7Stbbdev namespace r1 {
4951c0b2f7Stbbdev
5051c0b2f7Stbbdev //------------------------------------------------------------------------
5151c0b2f7Stbbdev // Information about the machine's hardware TBB is happen to work on
5251c0b2f7Stbbdev //------------------------------------------------------------------------
53edc30c82SIvan Kochin class system_topology {
54b15aabb3Stbbdev friend class binding_handler;
5551c0b2f7Stbbdev
56b15aabb3Stbbdev // Common topology members
57b15aabb3Stbbdev hwloc_topology_t topology{nullptr};
58b15aabb3Stbbdev hwloc_cpuset_t process_cpu_affinity_mask{nullptr};
59b15aabb3Stbbdev hwloc_nodeset_t process_node_affinity_mask{nullptr};
60b15aabb3Stbbdev std::size_t number_of_processors_groups{1};
6151c0b2f7Stbbdev
62b15aabb3Stbbdev // NUMA API related topology members
63b15aabb3Stbbdev std::vector<hwloc_cpuset_t> numa_affinity_masks_list{};
64b15aabb3Stbbdev std::vector<int> numa_indexes_list{};
65b15aabb3Stbbdev int numa_nodes_count{0};
66b15aabb3Stbbdev
67b15aabb3Stbbdev // Hybrid CPUs API related topology members
68b15aabb3Stbbdev std::vector<hwloc_cpuset_t> core_types_affinity_masks_list{};
69b15aabb3Stbbdev std::vector<int> core_types_indexes_list{};
7051c0b2f7Stbbdev
7151c0b2f7Stbbdev enum init_stages { uninitialized,
7251c0b2f7Stbbdev started,
7351c0b2f7Stbbdev topology_allocated,
7451c0b2f7Stbbdev topology_loaded,
7551c0b2f7Stbbdev topology_parsed } initialization_state;
7651c0b2f7Stbbdev
77b15aabb3Stbbdev // Binding threads that locate in another Windows Processor groups
7851c0b2f7Stbbdev // is allowed only if machine topology contains several Windows Processors groups
79*c4a799dfSJhaShweta1 // and process affinity mask wasn't limited manually (affinity mask cannot violates
8051c0b2f7Stbbdev // processors group boundaries).
intergroup_binding_allowed(std::size_t groups_num)81b15aabb3Stbbdev bool intergroup_binding_allowed(std::size_t groups_num) { return groups_num > 1; }
8251c0b2f7Stbbdev
83b15aabb3Stbbdev private:
topology_initialization(std::size_t groups_num)84b15aabb3Stbbdev void topology_initialization(std::size_t groups_num) {
8551c0b2f7Stbbdev initialization_state = started;
8651c0b2f7Stbbdev
8751c0b2f7Stbbdev // Parse topology
8851c0b2f7Stbbdev if ( hwloc_topology_init( &topology ) == 0 ) {
8951c0b2f7Stbbdev initialization_state = topology_allocated;
90e96dbf4bSIvan Kochin #if __TBBBIND_HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING_PRESENT
91e96dbf4bSIvan Kochin if ( groups_num == 1 &&
92e96dbf4bSIvan Kochin hwloc_topology_set_flags(topology,
93e96dbf4bSIvan Kochin HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM |
94e96dbf4bSIvan Kochin HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING
95e96dbf4bSIvan Kochin ) != 0
96e96dbf4bSIvan Kochin ) {
97e96dbf4bSIvan Kochin return;
98e96dbf4bSIvan Kochin }
99e96dbf4bSIvan Kochin #endif
10051c0b2f7Stbbdev if ( hwloc_topology_load( topology ) == 0 ) {
10151c0b2f7Stbbdev initialization_state = topology_loaded;
10251c0b2f7Stbbdev }
10351c0b2f7Stbbdev }
104b15aabb3Stbbdev if ( initialization_state != topology_loaded )
10551c0b2f7Stbbdev return;
10651c0b2f7Stbbdev
107f9fd1beeSIlya Isaev #if __TBB_CPUBIND_PRESENT
10851c0b2f7Stbbdev // Getting process affinity mask
10951c0b2f7Stbbdev if ( intergroup_binding_allowed(groups_num) ) {
11051c0b2f7Stbbdev process_cpu_affinity_mask = hwloc_bitmap_dup(hwloc_topology_get_complete_cpuset (topology));
11151c0b2f7Stbbdev process_node_affinity_mask = hwloc_bitmap_dup(hwloc_topology_get_complete_nodeset(topology));
11251c0b2f7Stbbdev } else {
11351c0b2f7Stbbdev process_cpu_affinity_mask = hwloc_bitmap_alloc();
11451c0b2f7Stbbdev process_node_affinity_mask = hwloc_bitmap_alloc();
11551c0b2f7Stbbdev
11651c0b2f7Stbbdev assertion_hwloc_wrapper(hwloc_get_cpubind, topology, process_cpu_affinity_mask, 0);
11751c0b2f7Stbbdev hwloc_cpuset_to_nodeset(topology, process_cpu_affinity_mask, process_node_affinity_mask);
11851c0b2f7Stbbdev }
119f9fd1beeSIlya Isaev #else
120f9fd1beeSIlya Isaev process_cpu_affinity_mask = hwloc_bitmap_dup(hwloc_topology_get_complete_cpuset (topology));
121f9fd1beeSIlya Isaev process_node_affinity_mask = hwloc_bitmap_dup(hwloc_topology_get_complete_nodeset(topology));
122f9fd1beeSIlya Isaev #endif
12351c0b2f7Stbbdev
124b15aabb3Stbbdev number_of_processors_groups = groups_num;
125b15aabb3Stbbdev }
126b15aabb3Stbbdev
numa_topology_parsing()127b15aabb3Stbbdev void numa_topology_parsing() {
128b15aabb3Stbbdev // Fill parameters with stubs if topology parsing is broken.
129b15aabb3Stbbdev if ( initialization_state != topology_loaded ) {
130b15aabb3Stbbdev numa_nodes_count = 1;
131b15aabb3Stbbdev numa_indexes_list.push_back(-1);
132b15aabb3Stbbdev return;
133b15aabb3Stbbdev }
134b15aabb3Stbbdev
13551c0b2f7Stbbdev // If system contains no NUMA nodes, HWLOC 1.11 returns an infinitely filled bitmap.
13651c0b2f7Stbbdev // hwloc_bitmap_weight() returns negative value for such bitmaps, so we use this check
13751c0b2f7Stbbdev // to change way of topology initialization.
13851c0b2f7Stbbdev numa_nodes_count = hwloc_bitmap_weight(process_node_affinity_mask);
13951c0b2f7Stbbdev if (numa_nodes_count <= 0) {
14051c0b2f7Stbbdev // numa_nodes_count may be empty if the process affinity mask is empty too (invalid case)
14151c0b2f7Stbbdev // or if some internal HWLOC error occurred.
14251c0b2f7Stbbdev // So we place -1 as index in this case.
14351c0b2f7Stbbdev numa_indexes_list.push_back(numa_nodes_count == 0 ? -1 : 0);
14451c0b2f7Stbbdev numa_nodes_count = 1;
14551c0b2f7Stbbdev
146b15aabb3Stbbdev numa_affinity_masks_list.push_back(hwloc_bitmap_dup(process_cpu_affinity_mask));
147b15aabb3Stbbdev } else {
14851c0b2f7Stbbdev // Get NUMA logical indexes list
14951c0b2f7Stbbdev unsigned counter = 0;
15051c0b2f7Stbbdev int i = 0;
15151c0b2f7Stbbdev int max_numa_index = -1;
15251c0b2f7Stbbdev numa_indexes_list.resize(numa_nodes_count);
15351c0b2f7Stbbdev hwloc_obj_t node_buffer;
15451c0b2f7Stbbdev hwloc_bitmap_foreach_begin(i, process_node_affinity_mask) {
155e96dbf4bSIvan Kochin node_buffer = hwloc_get_numanode_obj_by_os_index(topology, i);
15651c0b2f7Stbbdev numa_indexes_list[counter] = static_cast<int>(node_buffer->logical_index);
15751c0b2f7Stbbdev
15851c0b2f7Stbbdev if ( numa_indexes_list[counter] > max_numa_index ) {
15951c0b2f7Stbbdev max_numa_index = numa_indexes_list[counter];
16051c0b2f7Stbbdev }
16151c0b2f7Stbbdev
16251c0b2f7Stbbdev counter++;
16351c0b2f7Stbbdev } hwloc_bitmap_foreach_end();
16451c0b2f7Stbbdev __TBB_ASSERT(max_numa_index >= 0, "Maximal NUMA index must not be negative");
16551c0b2f7Stbbdev
16651c0b2f7Stbbdev // Fill concurrency and affinity masks lists
167b15aabb3Stbbdev numa_affinity_masks_list.resize(max_numa_index + 1);
16851c0b2f7Stbbdev int index = 0;
16951c0b2f7Stbbdev hwloc_bitmap_foreach_begin(i, process_node_affinity_mask) {
170e96dbf4bSIvan Kochin node_buffer = hwloc_get_numanode_obj_by_os_index(topology, i);
17151c0b2f7Stbbdev index = static_cast<int>(node_buffer->logical_index);
17251c0b2f7Stbbdev
173b15aabb3Stbbdev hwloc_cpuset_t& current_mask = numa_affinity_masks_list[index];
17451c0b2f7Stbbdev current_mask = hwloc_bitmap_dup(node_buffer->cpuset);
17551c0b2f7Stbbdev
17651c0b2f7Stbbdev hwloc_bitmap_and(current_mask, current_mask, process_cpu_affinity_mask);
17751c0b2f7Stbbdev __TBB_ASSERT(!hwloc_bitmap_iszero(current_mask), "hwloc detected unavailable NUMA node");
17851c0b2f7Stbbdev } hwloc_bitmap_foreach_end();
179b15aabb3Stbbdev }
180b15aabb3Stbbdev }
181b15aabb3Stbbdev
core_types_topology_parsing()182b15aabb3Stbbdev void core_types_topology_parsing() {
183b15aabb3Stbbdev // Fill parameters with stubs if topology parsing is broken.
184b15aabb3Stbbdev if ( initialization_state != topology_loaded ) {
185b15aabb3Stbbdev core_types_indexes_list.push_back(-1);
186b15aabb3Stbbdev return;
187b15aabb3Stbbdev }
188e96dbf4bSIvan Kochin #if __TBBBIND_HWLOC_HYBRID_CPUS_INTERFACES_PRESENT
189b15aabb3Stbbdev __TBB_ASSERT(hwloc_get_api_version() >= 0x20400, "Hybrid CPUs support interfaces required HWLOC >= 2.4");
190b15aabb3Stbbdev // Parsing the hybrid CPU topology
191b15aabb3Stbbdev int core_types_number = hwloc_cpukinds_get_nr(topology, 0);
192b15aabb3Stbbdev bool core_types_parsing_broken = core_types_number <= 0;
193b15aabb3Stbbdev if (!core_types_parsing_broken) {
194b15aabb3Stbbdev core_types_affinity_masks_list.resize(core_types_number);
195b15aabb3Stbbdev int efficiency{-1};
196b15aabb3Stbbdev
197b15aabb3Stbbdev for (int core_type = 0; core_type < core_types_number; ++core_type) {
198b15aabb3Stbbdev hwloc_cpuset_t& current_mask = core_types_affinity_masks_list[core_type];
199b15aabb3Stbbdev current_mask = hwloc_bitmap_alloc();
200b15aabb3Stbbdev
201b15aabb3Stbbdev if (!hwloc_cpukinds_get_info(topology, core_type, current_mask, &efficiency, nullptr, nullptr, 0)
202b15aabb3Stbbdev && efficiency >= 0
203b15aabb3Stbbdev ) {
204b15aabb3Stbbdev hwloc_bitmap_and(current_mask, current_mask, process_cpu_affinity_mask);
205b15aabb3Stbbdev
206b15aabb3Stbbdev if (hwloc_bitmap_weight(current_mask) > 0) {
207b15aabb3Stbbdev core_types_indexes_list.push_back(core_type);
208b15aabb3Stbbdev }
209b15aabb3Stbbdev __TBB_ASSERT(hwloc_bitmap_weight(current_mask) >= 0, "Infinivitely filled core type mask");
210b15aabb3Stbbdev } else {
211b15aabb3Stbbdev core_types_parsing_broken = true;
212b15aabb3Stbbdev break;
213b15aabb3Stbbdev }
214b15aabb3Stbbdev }
215b15aabb3Stbbdev }
216e96dbf4bSIvan Kochin #else /*!__TBBBIND_HWLOC_HYBRID_CPUS_INTERFACES_PRESENT*/
217b15aabb3Stbbdev bool core_types_parsing_broken{true};
218e96dbf4bSIvan Kochin #endif /*__TBBBIND_HWLOC_HYBRID_CPUS_INTERFACES_PRESENT*/
219b15aabb3Stbbdev
220b15aabb3Stbbdev if (core_types_parsing_broken) {
221b15aabb3Stbbdev for (auto& core_type_mask : core_types_affinity_masks_list) {
222b15aabb3Stbbdev hwloc_bitmap_free(core_type_mask);
223b15aabb3Stbbdev }
224b15aabb3Stbbdev core_types_affinity_masks_list.resize(1);
225b15aabb3Stbbdev core_types_indexes_list.resize(1);
226b15aabb3Stbbdev
227b15aabb3Stbbdev core_types_affinity_masks_list[0] = hwloc_bitmap_dup(process_cpu_affinity_mask);
228b15aabb3Stbbdev core_types_indexes_list[0] = -1;
229b15aabb3Stbbdev }
230b15aabb3Stbbdev }
231b15aabb3Stbbdev
enforce_hwloc_2_5_runtime_linkage()232b53726aaSIvan Kochin void enforce_hwloc_2_5_runtime_linkage() {
233b53726aaSIvan Kochin // Without the call of this function HWLOC 2.4 can be successfully loaded during the tbbbind_2_5 loading.
234b53726aaSIvan Kochin // It is possible since tbbbind_2_5 don't use any new entry points that were introduced in HWLOC 2.5
235b53726aaSIvan Kochin // But tbbbind_2_5 compiles with HWLOC 2.5 header, therefore such situation requires binary forward compatibility
236b53726aaSIvan Kochin // which are not guaranteed by the HWLOC library. To enforce linkage tbbbind_2_5 only with HWLOC >= 2.5 version
237b53726aaSIvan Kochin // this function calls the interface that is available in the HWLOC 2.5 only.
238b53726aaSIvan Kochin #if HWLOC_API_VERSION >= 0x20500
239b53726aaSIvan Kochin auto some_core = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_CORE, nullptr);
240b53726aaSIvan Kochin hwloc_get_obj_with_same_locality(topology, some_core, HWLOC_OBJ_CORE, nullptr, nullptr, 0);
241b53726aaSIvan Kochin #endif
242b53726aaSIvan Kochin }
243b53726aaSIvan Kochin
244b53726aaSIvan Kochin
initialize(std::size_t groups_num)245b15aabb3Stbbdev void initialize( std::size_t groups_num ) {
246b15aabb3Stbbdev if ( initialization_state != uninitialized )
247b15aabb3Stbbdev return;
248b15aabb3Stbbdev
249b15aabb3Stbbdev topology_initialization(groups_num);
250b15aabb3Stbbdev numa_topology_parsing();
251b15aabb3Stbbdev core_types_topology_parsing();
252b15aabb3Stbbdev
253b53726aaSIvan Kochin enforce_hwloc_2_5_runtime_linkage();
254b53726aaSIvan Kochin
255b15aabb3Stbbdev if (initialization_state == topology_loaded)
25651c0b2f7Stbbdev initialization_state = topology_parsed;
25751c0b2f7Stbbdev }
25851c0b2f7Stbbdev
259edc30c82SIvan Kochin static system_topology* instance_ptr;
260edc30c82SIvan Kochin public:
261edc30c82SIvan Kochin typedef hwloc_cpuset_t affinity_mask;
262edc30c82SIvan Kochin typedef hwloc_const_cpuset_t const_affinity_mask;
263edc30c82SIvan Kochin
is_topology_parsed()264edc30c82SIvan Kochin bool is_topology_parsed() { return initialization_state == topology_parsed; }
265edc30c82SIvan Kochin
construct(std::size_t groups_num)266edc30c82SIvan Kochin static void construct( std::size_t groups_num ) {
267edc30c82SIvan Kochin if (instance_ptr == nullptr) {
268edc30c82SIvan Kochin instance_ptr = new system_topology();
269edc30c82SIvan Kochin instance_ptr->initialize(groups_num);
270edc30c82SIvan Kochin }
271edc30c82SIvan Kochin }
272edc30c82SIvan Kochin
instance()273edc30c82SIvan Kochin static system_topology& instance() {
274edc30c82SIvan Kochin __TBB_ASSERT(instance_ptr != nullptr, "Getting instance of non-constructed topology");
275edc30c82SIvan Kochin return *instance_ptr;
276edc30c82SIvan Kochin }
277edc30c82SIvan Kochin
destroy()278edc30c82SIvan Kochin static void destroy() {
279edc30c82SIvan Kochin __TBB_ASSERT(instance_ptr != nullptr, "Destroying non-constructed topology");
280edc30c82SIvan Kochin delete instance_ptr;
281edc30c82SIvan Kochin }
282edc30c82SIvan Kochin
~system_topology()283edc30c82SIvan Kochin ~system_topology() {
28451c0b2f7Stbbdev if ( is_topology_parsed() ) {
285b15aabb3Stbbdev for (auto& numa_node_mask : numa_affinity_masks_list) {
286b15aabb3Stbbdev hwloc_bitmap_free(numa_node_mask);
28751c0b2f7Stbbdev }
288b15aabb3Stbbdev
289b15aabb3Stbbdev for (auto& core_type_mask : core_types_affinity_masks_list) {
290b15aabb3Stbbdev hwloc_bitmap_free(core_type_mask);
291b15aabb3Stbbdev }
292b15aabb3Stbbdev
29351c0b2f7Stbbdev hwloc_bitmap_free(process_node_affinity_mask);
29451c0b2f7Stbbdev hwloc_bitmap_free(process_cpu_affinity_mask);
29551c0b2f7Stbbdev }
29651c0b2f7Stbbdev
29751c0b2f7Stbbdev if ( initialization_state >= topology_allocated ) {
29851c0b2f7Stbbdev hwloc_topology_destroy(topology);
29951c0b2f7Stbbdev }
30051c0b2f7Stbbdev
30151c0b2f7Stbbdev initialization_state = uninitialized;
30251c0b2f7Stbbdev }
30351c0b2f7Stbbdev
fill_topology_information(int & _numa_nodes_count,int * & _numa_indexes_list,int & _core_types_count,int * & _core_types_indexes_list)304b15aabb3Stbbdev void fill_topology_information(
305b15aabb3Stbbdev int& _numa_nodes_count, int*& _numa_indexes_list,
306b15aabb3Stbbdev int& _core_types_count, int*& _core_types_indexes_list
307b15aabb3Stbbdev ) {
308edc30c82SIvan Kochin __TBB_ASSERT(is_topology_parsed(), "Trying to get access to uninitialized system_topology");
309b15aabb3Stbbdev _numa_nodes_count = numa_nodes_count;
310b15aabb3Stbbdev _numa_indexes_list = numa_indexes_list.data();
311b15aabb3Stbbdev
312b15aabb3Stbbdev _core_types_count = (int)core_types_indexes_list.size();
313b15aabb3Stbbdev _core_types_indexes_list = core_types_indexes_list.data();
314b15aabb3Stbbdev }
315b15aabb3Stbbdev
fill_constraints_affinity_mask(affinity_mask input_mask,int numa_node_index,int core_type_index,int max_threads_per_core)316b15aabb3Stbbdev void fill_constraints_affinity_mask(affinity_mask input_mask, int numa_node_index, int core_type_index, int max_threads_per_core) {
317edc30c82SIvan Kochin __TBB_ASSERT(is_topology_parsed(), "Trying to get access to uninitialized system_topology");
318b15aabb3Stbbdev __TBB_ASSERT(numa_node_index < (int)numa_affinity_masks_list.size(), "Wrong NUMA node id");
319b15aabb3Stbbdev __TBB_ASSERT(core_type_index < (int)core_types_affinity_masks_list.size(), "Wrong core type id");
320b15aabb3Stbbdev __TBB_ASSERT(max_threads_per_core == -1 || max_threads_per_core > 0, "Wrong max_threads_per_core");
321b15aabb3Stbbdev
322b15aabb3Stbbdev hwloc_cpuset_t constraints_mask = hwloc_bitmap_alloc();
323b15aabb3Stbbdev hwloc_cpuset_t core_mask = hwloc_bitmap_alloc();
324b15aabb3Stbbdev
325b15aabb3Stbbdev hwloc_bitmap_copy(constraints_mask, process_cpu_affinity_mask);
326b15aabb3Stbbdev if (numa_node_index >= 0) {
327b15aabb3Stbbdev hwloc_bitmap_and(constraints_mask, constraints_mask, numa_affinity_masks_list[numa_node_index]);
328b15aabb3Stbbdev }
329b15aabb3Stbbdev if (core_type_index >= 0) {
330b15aabb3Stbbdev hwloc_bitmap_and(constraints_mask, constraints_mask, core_types_affinity_masks_list[core_type_index]);
331b15aabb3Stbbdev }
332b15aabb3Stbbdev if (max_threads_per_core > 0) {
333b15aabb3Stbbdev // clear input mask
334b15aabb3Stbbdev hwloc_bitmap_zero(input_mask);
335b15aabb3Stbbdev
336b15aabb3Stbbdev hwloc_obj_t current_core = nullptr;
337b15aabb3Stbbdev while ((current_core = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_CORE, current_core)) != nullptr) {
338b15aabb3Stbbdev hwloc_bitmap_and(core_mask, constraints_mask, current_core->cpuset);
339b15aabb3Stbbdev
340b15aabb3Stbbdev // fit the core mask to required bits number
341b15aabb3Stbbdev int current_threads_per_core = 0;
342b15aabb3Stbbdev for (int id = hwloc_bitmap_first(core_mask); id != -1; id = hwloc_bitmap_next(core_mask, id)) {
343b15aabb3Stbbdev if (++current_threads_per_core > max_threads_per_core) {
344b15aabb3Stbbdev hwloc_bitmap_clr(core_mask, id);
345b15aabb3Stbbdev }
346b15aabb3Stbbdev }
347b15aabb3Stbbdev
348b15aabb3Stbbdev hwloc_bitmap_or(input_mask, input_mask, core_mask);
349b15aabb3Stbbdev }
350b15aabb3Stbbdev } else {
351b15aabb3Stbbdev hwloc_bitmap_copy(input_mask, constraints_mask);
352b15aabb3Stbbdev }
353b15aabb3Stbbdev
354b15aabb3Stbbdev hwloc_bitmap_free(core_mask);
355b15aabb3Stbbdev hwloc_bitmap_free(constraints_mask);
356b15aabb3Stbbdev }
357b15aabb3Stbbdev
fit_num_threads_per_core(affinity_mask result_mask,affinity_mask current_mask,affinity_mask constraints_mask)358b15aabb3Stbbdev void fit_num_threads_per_core(affinity_mask result_mask, affinity_mask current_mask, affinity_mask constraints_mask) {
359b15aabb3Stbbdev hwloc_bitmap_zero(result_mask);
360b15aabb3Stbbdev hwloc_obj_t current_core = nullptr;
361b15aabb3Stbbdev while ((current_core = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_CORE, current_core)) != nullptr) {
362b15aabb3Stbbdev if (hwloc_bitmap_intersects(current_mask, current_core->cpuset)) {
363b15aabb3Stbbdev hwloc_bitmap_or(result_mask, result_mask, current_core->cpuset);
364b15aabb3Stbbdev }
365b15aabb3Stbbdev }
366b15aabb3Stbbdev hwloc_bitmap_and(result_mask, result_mask, constraints_mask);
367b15aabb3Stbbdev }
368b15aabb3Stbbdev
get_default_concurrency(int numa_node_index,int core_type_index,int max_threads_per_core)369b15aabb3Stbbdev int get_default_concurrency(int numa_node_index, int core_type_index, int max_threads_per_core) {
370edc30c82SIvan Kochin __TBB_ASSERT(is_topology_parsed(), "Trying to get access to uninitialized system_topology");
371b15aabb3Stbbdev
372b15aabb3Stbbdev hwloc_cpuset_t constraints_mask = hwloc_bitmap_alloc();
373b15aabb3Stbbdev fill_constraints_affinity_mask(constraints_mask, numa_node_index, core_type_index, max_threads_per_core);
374b15aabb3Stbbdev
375b15aabb3Stbbdev int default_concurrency = hwloc_bitmap_weight(constraints_mask);
376b15aabb3Stbbdev hwloc_bitmap_free(constraints_mask);
377b15aabb3Stbbdev return default_concurrency;
37851c0b2f7Stbbdev }
37951c0b2f7Stbbdev
allocate_process_affinity_mask()38051c0b2f7Stbbdev affinity_mask allocate_process_affinity_mask() {
381edc30c82SIvan Kochin __TBB_ASSERT(is_topology_parsed(), "Trying to get access to uninitialized system_topology");
38251c0b2f7Stbbdev return hwloc_bitmap_dup(process_cpu_affinity_mask);
38351c0b2f7Stbbdev }
38451c0b2f7Stbbdev
free_affinity_mask(affinity_mask mask_to_free)38551c0b2f7Stbbdev void free_affinity_mask( affinity_mask mask_to_free ) {
38651c0b2f7Stbbdev hwloc_bitmap_free(mask_to_free); // If bitmap is nullptr, no operation is performed.
38751c0b2f7Stbbdev }
38851c0b2f7Stbbdev
store_current_affinity_mask(affinity_mask current_mask)38951c0b2f7Stbbdev void store_current_affinity_mask( affinity_mask current_mask ) {
39051c0b2f7Stbbdev assertion_hwloc_wrapper(hwloc_get_cpubind, topology, current_mask, HWLOC_CPUBIND_THREAD);
39151c0b2f7Stbbdev
39251c0b2f7Stbbdev hwloc_bitmap_and(current_mask, current_mask, process_cpu_affinity_mask);
39351c0b2f7Stbbdev __TBB_ASSERT(!hwloc_bitmap_iszero(current_mask),
39451c0b2f7Stbbdev "Current affinity mask must intersects with process affinity mask");
39551c0b2f7Stbbdev }
39651c0b2f7Stbbdev
set_affinity_mask(const_affinity_mask mask)397b15aabb3Stbbdev void set_affinity_mask( const_affinity_mask mask ) {
398b15aabb3Stbbdev if (hwloc_bitmap_weight(mask) > 0) {
399b15aabb3Stbbdev assertion_hwloc_wrapper(hwloc_set_cpubind, topology, mask, HWLOC_CPUBIND_THREAD);
40051c0b2f7Stbbdev }
40151c0b2f7Stbbdev }
40251c0b2f7Stbbdev };
40351c0b2f7Stbbdev
404edc30c82SIvan Kochin system_topology* system_topology::instance_ptr{nullptr};
405edc30c82SIvan Kochin
40651c0b2f7Stbbdev class binding_handler {
40751c0b2f7Stbbdev // Following vector saves thread affinity mask on scheduler entry to return it to this thread
40851c0b2f7Stbbdev // on scheduler exit.
409edc30c82SIvan Kochin typedef std::vector<system_topology::affinity_mask> affinity_masks_container;
41051c0b2f7Stbbdev affinity_masks_container affinity_backup;
411edc30c82SIvan Kochin system_topology::affinity_mask handler_affinity_mask;
412b15aabb3Stbbdev
413ce476173SJulien Schueller #ifdef _WIN32
414b15aabb3Stbbdev affinity_masks_container affinity_buffer;
415b15aabb3Stbbdev int my_numa_node_id;
416b15aabb3Stbbdev int my_core_type_id;
417b15aabb3Stbbdev int my_max_threads_per_core;
418b15aabb3Stbbdev #endif
41951c0b2f7Stbbdev
42051c0b2f7Stbbdev public:
binding_handler(std::size_t size,int numa_node_id,int core_type_id,int max_threads_per_core)421b15aabb3Stbbdev binding_handler( std::size_t size, int numa_node_id, int core_type_id, int max_threads_per_core )
422b15aabb3Stbbdev : affinity_backup(size)
423ce476173SJulien Schueller #ifdef _WIN32
424b15aabb3Stbbdev , affinity_buffer(size)
425b15aabb3Stbbdev , my_numa_node_id(numa_node_id)
426b15aabb3Stbbdev , my_core_type_id(core_type_id)
427b15aabb3Stbbdev , my_max_threads_per_core(max_threads_per_core)
428b15aabb3Stbbdev #endif
429b15aabb3Stbbdev {
430b15aabb3Stbbdev for (std::size_t i = 0; i < size; ++i) {
431edc30c82SIvan Kochin affinity_backup[i] = system_topology::instance().allocate_process_affinity_mask();
432ce476173SJulien Schueller #ifdef _WIN32
433edc30c82SIvan Kochin affinity_buffer[i] = system_topology::instance().allocate_process_affinity_mask();
434b15aabb3Stbbdev #endif
43551c0b2f7Stbbdev }
436edc30c82SIvan Kochin handler_affinity_mask = system_topology::instance().allocate_process_affinity_mask();
437edc30c82SIvan Kochin system_topology::instance().fill_constraints_affinity_mask
438b15aabb3Stbbdev (handler_affinity_mask, numa_node_id, core_type_id, max_threads_per_core);
43951c0b2f7Stbbdev }
44051c0b2f7Stbbdev
~binding_handler()44151c0b2f7Stbbdev ~binding_handler() {
442b15aabb3Stbbdev for (std::size_t i = 0; i < affinity_backup.size(); ++i) {
443edc30c82SIvan Kochin system_topology::instance().free_affinity_mask(affinity_backup[i]);
444ce476173SJulien Schueller #ifdef _WIN32
445edc30c82SIvan Kochin system_topology::instance().free_affinity_mask(affinity_buffer[i]);
446b15aabb3Stbbdev #endif
44751c0b2f7Stbbdev }
448edc30c82SIvan Kochin system_topology::instance().free_affinity_mask(handler_affinity_mask);
44951c0b2f7Stbbdev }
45051c0b2f7Stbbdev
apply_affinity(unsigned slot_num)451b15aabb3Stbbdev void apply_affinity( unsigned slot_num ) {
452edc30c82SIvan Kochin auto& topology = system_topology::instance();
45351c0b2f7Stbbdev __TBB_ASSERT(slot_num < affinity_backup.size(),
45451c0b2f7Stbbdev "The slot number is greater than the number of slots in the arena");
455b15aabb3Stbbdev __TBB_ASSERT(topology.is_topology_parsed(),
456edc30c82SIvan Kochin "Trying to get access to uninitialized system_topology");
45751c0b2f7Stbbdev
458b15aabb3Stbbdev topology.store_current_affinity_mask(affinity_backup[slot_num]);
459b15aabb3Stbbdev
460ce476173SJulien Schueller #ifdef _WIN32
461b15aabb3Stbbdev // TBBBind supports only systems where NUMA nodes and core types do not cross the border
462b15aabb3Stbbdev // between several processor groups. So if a certain NUMA node or core type constraint
463b15aabb3Stbbdev // specified, then the constraints affinity mask will not cross the processor groups' border.
464b15aabb3Stbbdev
465b15aabb3Stbbdev // But if we have constraint based only on the max_threads_per_core setting, then the
466b15aabb3Stbbdev // constraints affinity mask does may cross the border between several processor groups
467b15aabb3Stbbdev // on machines with more then 64 hardware threads. That is why we need to use the special
468b15aabb3Stbbdev // function, which regulates the number of threads in the current threads mask.
469b15aabb3Stbbdev if (topology.number_of_processors_groups > 1 && my_max_threads_per_core != -1 &&
470b15aabb3Stbbdev (my_numa_node_id == -1 || topology.numa_indexes_list.size() == 1) &&
471b15aabb3Stbbdev (my_core_type_id == -1 || topology.core_types_indexes_list.size() == 1)
472b15aabb3Stbbdev ) {
473b15aabb3Stbbdev topology.fit_num_threads_per_core(affinity_buffer[slot_num], affinity_backup[slot_num], handler_affinity_mask);
474b15aabb3Stbbdev topology.set_affinity_mask(affinity_buffer[slot_num]);
475b15aabb3Stbbdev return;
476b15aabb3Stbbdev }
477b15aabb3Stbbdev #endif
478b15aabb3Stbbdev topology.set_affinity_mask(handler_affinity_mask);
47951c0b2f7Stbbdev }
48051c0b2f7Stbbdev
restore_previous_affinity_mask(unsigned slot_num)48151c0b2f7Stbbdev void restore_previous_affinity_mask( unsigned slot_num ) {
482edc30c82SIvan Kochin auto& topology = system_topology::instance();
483b15aabb3Stbbdev __TBB_ASSERT(topology.is_topology_parsed(),
484edc30c82SIvan Kochin "Trying to get access to uninitialized system_topology");
485b15aabb3Stbbdev topology.set_affinity_mask(affinity_backup[slot_num]);
48651c0b2f7Stbbdev };
48751c0b2f7Stbbdev
48851c0b2f7Stbbdev };
48951c0b2f7Stbbdev
49051c0b2f7Stbbdev extern "C" { // exported to TBB interfaces
49151c0b2f7Stbbdev
__TBB_internal_initialize_system_topology(std::size_t groups_num,int & numa_nodes_count,int * & numa_indexes_list,int & core_types_count,int * & core_types_indexes_list)4928827ea7dSLong Nguyen TBBBIND_EXPORT void __TBB_internal_initialize_system_topology(
493b15aabb3Stbbdev std::size_t groups_num,
494b15aabb3Stbbdev int& numa_nodes_count, int*& numa_indexes_list,
495b15aabb3Stbbdev int& core_types_count, int*& core_types_indexes_list
496b15aabb3Stbbdev ) {
497edc30c82SIvan Kochin system_topology::construct(groups_num);
498edc30c82SIvan Kochin system_topology::instance().fill_topology_information(
499b15aabb3Stbbdev numa_nodes_count, numa_indexes_list,
500b15aabb3Stbbdev core_types_count, core_types_indexes_list
501b15aabb3Stbbdev );
50251c0b2f7Stbbdev }
50351c0b2f7Stbbdev
__TBB_internal_allocate_binding_handler(int number_of_slots,int numa_id,int core_type_id,int max_threads_per_core)5048827ea7dSLong Nguyen TBBBIND_EXPORT binding_handler* __TBB_internal_allocate_binding_handler(int number_of_slots, int numa_id, int core_type_id, int max_threads_per_core) {
505b15aabb3Stbbdev __TBB_ASSERT(number_of_slots > 0, "Trying to create numa handler for 0 threads.");
506b15aabb3Stbbdev return new binding_handler(number_of_slots, numa_id, core_type_id, max_threads_per_core);
50751c0b2f7Stbbdev }
50851c0b2f7Stbbdev
__TBB_internal_deallocate_binding_handler(binding_handler * handler_ptr)5098827ea7dSLong Nguyen TBBBIND_EXPORT void __TBB_internal_deallocate_binding_handler(binding_handler* handler_ptr) {
51051c0b2f7Stbbdev __TBB_ASSERT(handler_ptr != nullptr, "Trying to deallocate nullptr pointer.");
51151c0b2f7Stbbdev delete handler_ptr;
51251c0b2f7Stbbdev }
51351c0b2f7Stbbdev
__TBB_internal_apply_affinity(binding_handler * handler_ptr,int slot_num)5148827ea7dSLong Nguyen TBBBIND_EXPORT void __TBB_internal_apply_affinity(binding_handler* handler_ptr, int slot_num) {
51551c0b2f7Stbbdev __TBB_ASSERT(handler_ptr != nullptr, "Trying to get access to uninitialized metadata.");
516b15aabb3Stbbdev handler_ptr->apply_affinity(slot_num);
51751c0b2f7Stbbdev }
51851c0b2f7Stbbdev
__TBB_internal_restore_affinity(binding_handler * handler_ptr,int slot_num)5198827ea7dSLong Nguyen TBBBIND_EXPORT void __TBB_internal_restore_affinity(binding_handler* handler_ptr, int slot_num) {
52051c0b2f7Stbbdev __TBB_ASSERT(handler_ptr != nullptr, "Trying to get access to uninitialized metadata.");
52151c0b2f7Stbbdev handler_ptr->restore_previous_affinity_mask(slot_num);
52251c0b2f7Stbbdev }
52351c0b2f7Stbbdev
__TBB_internal_get_default_concurrency(int numa_id,int core_type_id,int max_threads_per_core)5248827ea7dSLong Nguyen TBBBIND_EXPORT int __TBB_internal_get_default_concurrency(int numa_id, int core_type_id, int max_threads_per_core) {
525edc30c82SIvan Kochin return system_topology::instance().get_default_concurrency(numa_id, core_type_id, max_threads_per_core);
526edc30c82SIvan Kochin }
527edc30c82SIvan Kochin
__TBB_internal_destroy_system_topology()528edc30c82SIvan Kochin void __TBB_internal_destroy_system_topology() {
529edc30c82SIvan Kochin return system_topology::destroy();
530b15aabb3Stbbdev }
531b15aabb3Stbbdev
53251c0b2f7Stbbdev } // extern "C"
53351c0b2f7Stbbdev
53451c0b2f7Stbbdev } // namespace r1
53551c0b2f7Stbbdev } // namespace detail
53651c0b2f7Stbbdev } // namespace tbb
53751c0b2f7Stbbdev
53851c0b2f7Stbbdev #undef assertion_hwloc_wrapper
539