151c0b2f7Stbbdev /* 2b15aabb3Stbbdev Copyright (c) 2019-2021 Intel Corporation 351c0b2f7Stbbdev 451c0b2f7Stbbdev Licensed under the Apache License, Version 2.0 (the "License"); 551c0b2f7Stbbdev you may not use this file except in compliance with the License. 651c0b2f7Stbbdev You may obtain a copy of the License at 751c0b2f7Stbbdev 851c0b2f7Stbbdev http://www.apache.org/licenses/LICENSE-2.0 951c0b2f7Stbbdev 1051c0b2f7Stbbdev Unless required by applicable law or agreed to in writing, software 1151c0b2f7Stbbdev distributed under the License is distributed on an "AS IS" BASIS, 1251c0b2f7Stbbdev WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1351c0b2f7Stbbdev See the License for the specific language governing permissions and 1451c0b2f7Stbbdev limitations under the License. 1551c0b2f7Stbbdev */ 1651c0b2f7Stbbdev 17b15aabb3Stbbdev #include <vector> 18b15aabb3Stbbdev #include <mutex> 19b15aabb3Stbbdev 2051c0b2f7Stbbdev #include "../tbb/assert_impl.h" // Out-of-line TBB assertion handling routines are instantiated here. 2149e08aacStbbdev #include "oneapi/tbb/detail/_assert.h" 228827ea7dSLong Nguyen #include "oneapi/tbb/detail/_config.h" 2351c0b2f7Stbbdev 24478de5b1Stbbdev #if _MSC_VER && !__INTEL_COMPILER && !__clang__ 2551c0b2f7Stbbdev #pragma warning( push ) 2651c0b2f7Stbbdev #pragma warning( disable : 4100 ) 27478de5b1Stbbdev #elif _MSC_VER && __clang__ 28478de5b1Stbbdev #pragma GCC diagnostic push 29478de5b1Stbbdev #pragma GCC diagnostic ignored "-Wunused-parameter" 3051c0b2f7Stbbdev #endif 3151c0b2f7Stbbdev #include <hwloc.h> 32478de5b1Stbbdev #if _MSC_VER && !__INTEL_COMPILER && !__clang__ 3351c0b2f7Stbbdev #pragma warning( pop ) 34478de5b1Stbbdev #elif _MSC_VER && __clang__ 35478de5b1Stbbdev #pragma GCC diagnostic pop 3651c0b2f7Stbbdev #endif 3751c0b2f7Stbbdev 38e96dbf4bSIvan Kochin #define __TBBBIND_HWLOC_HYBRID_CPUS_INTERFACES_PRESENT (HWLOC_API_VERSION >= 0x20400) 39e96dbf4bSIvan Kochin #define __TBBBIND_HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING_PRESENT (HWLOC_API_VERSION >= 0x20500) 4051c0b2f7Stbbdev 4151c0b2f7Stbbdev // Most of hwloc calls returns negative exit code on error. 4251c0b2f7Stbbdev // This macro tracks error codes that are returned from the hwloc interfaces. 4351c0b2f7Stbbdev #define assertion_hwloc_wrapper(command, ...) \ 4451c0b2f7Stbbdev __TBB_ASSERT_EX( (command(__VA_ARGS__)) >= 0, "Error occurred during call to hwloc API."); 4551c0b2f7Stbbdev 4651c0b2f7Stbbdev namespace tbb { 4751c0b2f7Stbbdev namespace detail { 4851c0b2f7Stbbdev namespace r1 { 4951c0b2f7Stbbdev 5051c0b2f7Stbbdev //------------------------------------------------------------------------ 5151c0b2f7Stbbdev // Information about the machine's hardware TBB is happen to work on 5251c0b2f7Stbbdev //------------------------------------------------------------------------ 53*edc30c82SIvan Kochin class system_topology { 54b15aabb3Stbbdev friend class binding_handler; 5551c0b2f7Stbbdev 56b15aabb3Stbbdev // Common topology members 57b15aabb3Stbbdev hwloc_topology_t topology{nullptr}; 58b15aabb3Stbbdev hwloc_cpuset_t process_cpu_affinity_mask{nullptr}; 59b15aabb3Stbbdev hwloc_nodeset_t process_node_affinity_mask{nullptr}; 60b15aabb3Stbbdev std::size_t number_of_processors_groups{1}; 6151c0b2f7Stbbdev 62b15aabb3Stbbdev // NUMA API related topology members 63b15aabb3Stbbdev std::vector<hwloc_cpuset_t> numa_affinity_masks_list{}; 64b15aabb3Stbbdev std::vector<int> numa_indexes_list{}; 65b15aabb3Stbbdev int numa_nodes_count{0}; 66b15aabb3Stbbdev 67b15aabb3Stbbdev // Hybrid CPUs API related topology members 68b15aabb3Stbbdev std::vector<hwloc_cpuset_t> core_types_affinity_masks_list{}; 69b15aabb3Stbbdev std::vector<int> core_types_indexes_list{}; 7051c0b2f7Stbbdev 7151c0b2f7Stbbdev enum init_stages { uninitialized, 7251c0b2f7Stbbdev started, 7351c0b2f7Stbbdev topology_allocated, 7451c0b2f7Stbbdev topology_loaded, 7551c0b2f7Stbbdev topology_parsed } initialization_state; 7651c0b2f7Stbbdev 77b15aabb3Stbbdev // Binding threads that locate in another Windows Processor groups 7851c0b2f7Stbbdev // is allowed only if machine topology contains several Windows Processors groups 7951c0b2f7Stbbdev // and process affinity mask wasn`t limited manually (affinity mask cannot violates 8051c0b2f7Stbbdev // processors group boundaries). 81b15aabb3Stbbdev bool intergroup_binding_allowed(std::size_t groups_num) { return groups_num > 1; } 8251c0b2f7Stbbdev 83b15aabb3Stbbdev private: 84b15aabb3Stbbdev void topology_initialization(std::size_t groups_num) { 8551c0b2f7Stbbdev initialization_state = started; 8651c0b2f7Stbbdev 8751c0b2f7Stbbdev // Parse topology 8851c0b2f7Stbbdev if ( hwloc_topology_init( &topology ) == 0 ) { 8951c0b2f7Stbbdev initialization_state = topology_allocated; 90e96dbf4bSIvan Kochin #if __TBBBIND_HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING_PRESENT 91e96dbf4bSIvan Kochin if ( groups_num == 1 && 92e96dbf4bSIvan Kochin hwloc_topology_set_flags(topology, 93e96dbf4bSIvan Kochin HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | 94e96dbf4bSIvan Kochin HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING 95e96dbf4bSIvan Kochin ) != 0 96e96dbf4bSIvan Kochin ) { 97e96dbf4bSIvan Kochin return; 98e96dbf4bSIvan Kochin } 99e96dbf4bSIvan Kochin #endif 10051c0b2f7Stbbdev if ( hwloc_topology_load( topology ) == 0 ) { 10151c0b2f7Stbbdev initialization_state = topology_loaded; 10251c0b2f7Stbbdev } 10351c0b2f7Stbbdev } 104b15aabb3Stbbdev if ( initialization_state != topology_loaded ) 10551c0b2f7Stbbdev return; 10651c0b2f7Stbbdev 10751c0b2f7Stbbdev // Getting process affinity mask 10851c0b2f7Stbbdev if ( intergroup_binding_allowed(groups_num) ) { 10951c0b2f7Stbbdev process_cpu_affinity_mask = hwloc_bitmap_dup(hwloc_topology_get_complete_cpuset (topology)); 11051c0b2f7Stbbdev process_node_affinity_mask = hwloc_bitmap_dup(hwloc_topology_get_complete_nodeset(topology)); 11151c0b2f7Stbbdev } else { 11251c0b2f7Stbbdev process_cpu_affinity_mask = hwloc_bitmap_alloc(); 11351c0b2f7Stbbdev process_node_affinity_mask = hwloc_bitmap_alloc(); 11451c0b2f7Stbbdev 11551c0b2f7Stbbdev assertion_hwloc_wrapper(hwloc_get_cpubind, topology, process_cpu_affinity_mask, 0); 11651c0b2f7Stbbdev hwloc_cpuset_to_nodeset(topology, process_cpu_affinity_mask, process_node_affinity_mask); 11751c0b2f7Stbbdev } 11851c0b2f7Stbbdev 119b15aabb3Stbbdev number_of_processors_groups = groups_num; 120b15aabb3Stbbdev } 121b15aabb3Stbbdev 122b15aabb3Stbbdev void numa_topology_parsing() { 123b15aabb3Stbbdev // Fill parameters with stubs if topology parsing is broken. 124b15aabb3Stbbdev if ( initialization_state != topology_loaded ) { 125b15aabb3Stbbdev numa_nodes_count = 1; 126b15aabb3Stbbdev numa_indexes_list.push_back(-1); 127b15aabb3Stbbdev return; 128b15aabb3Stbbdev } 129b15aabb3Stbbdev 13051c0b2f7Stbbdev // If system contains no NUMA nodes, HWLOC 1.11 returns an infinitely filled bitmap. 13151c0b2f7Stbbdev // hwloc_bitmap_weight() returns negative value for such bitmaps, so we use this check 13251c0b2f7Stbbdev // to change way of topology initialization. 13351c0b2f7Stbbdev numa_nodes_count = hwloc_bitmap_weight(process_node_affinity_mask); 13451c0b2f7Stbbdev if (numa_nodes_count <= 0) { 13551c0b2f7Stbbdev // numa_nodes_count may be empty if the process affinity mask is empty too (invalid case) 13651c0b2f7Stbbdev // or if some internal HWLOC error occurred. 13751c0b2f7Stbbdev // So we place -1 as index in this case. 13851c0b2f7Stbbdev numa_indexes_list.push_back(numa_nodes_count == 0 ? -1 : 0); 13951c0b2f7Stbbdev numa_nodes_count = 1; 14051c0b2f7Stbbdev 141b15aabb3Stbbdev numa_affinity_masks_list.push_back(hwloc_bitmap_dup(process_cpu_affinity_mask)); 142b15aabb3Stbbdev } else { 14351c0b2f7Stbbdev // Get NUMA logical indexes list 14451c0b2f7Stbbdev unsigned counter = 0; 14551c0b2f7Stbbdev int i = 0; 14651c0b2f7Stbbdev int max_numa_index = -1; 14751c0b2f7Stbbdev numa_indexes_list.resize(numa_nodes_count); 14851c0b2f7Stbbdev hwloc_obj_t node_buffer; 14951c0b2f7Stbbdev hwloc_bitmap_foreach_begin(i, process_node_affinity_mask) { 150e96dbf4bSIvan Kochin node_buffer = hwloc_get_numanode_obj_by_os_index(topology, i); 15151c0b2f7Stbbdev numa_indexes_list[counter] = static_cast<int>(node_buffer->logical_index); 15251c0b2f7Stbbdev 15351c0b2f7Stbbdev if ( numa_indexes_list[counter] > max_numa_index ) { 15451c0b2f7Stbbdev max_numa_index = numa_indexes_list[counter]; 15551c0b2f7Stbbdev } 15651c0b2f7Stbbdev 15751c0b2f7Stbbdev counter++; 15851c0b2f7Stbbdev } hwloc_bitmap_foreach_end(); 15951c0b2f7Stbbdev __TBB_ASSERT(max_numa_index >= 0, "Maximal NUMA index must not be negative"); 16051c0b2f7Stbbdev 16151c0b2f7Stbbdev // Fill concurrency and affinity masks lists 162b15aabb3Stbbdev numa_affinity_masks_list.resize(max_numa_index + 1); 16351c0b2f7Stbbdev int index = 0; 16451c0b2f7Stbbdev hwloc_bitmap_foreach_begin(i, process_node_affinity_mask) { 165e96dbf4bSIvan Kochin node_buffer = hwloc_get_numanode_obj_by_os_index(topology, i); 16651c0b2f7Stbbdev index = static_cast<int>(node_buffer->logical_index); 16751c0b2f7Stbbdev 168b15aabb3Stbbdev hwloc_cpuset_t& current_mask = numa_affinity_masks_list[index]; 16951c0b2f7Stbbdev current_mask = hwloc_bitmap_dup(node_buffer->cpuset); 17051c0b2f7Stbbdev 17151c0b2f7Stbbdev hwloc_bitmap_and(current_mask, current_mask, process_cpu_affinity_mask); 17251c0b2f7Stbbdev __TBB_ASSERT(!hwloc_bitmap_iszero(current_mask), "hwloc detected unavailable NUMA node"); 17351c0b2f7Stbbdev } hwloc_bitmap_foreach_end(); 174b15aabb3Stbbdev } 175b15aabb3Stbbdev } 176b15aabb3Stbbdev 177b15aabb3Stbbdev void core_types_topology_parsing() { 178b15aabb3Stbbdev // Fill parameters with stubs if topology parsing is broken. 179b15aabb3Stbbdev if ( initialization_state != topology_loaded ) { 180b15aabb3Stbbdev core_types_indexes_list.push_back(-1); 181b15aabb3Stbbdev return; 182b15aabb3Stbbdev } 183e96dbf4bSIvan Kochin #if __TBBBIND_HWLOC_HYBRID_CPUS_INTERFACES_PRESENT 184b15aabb3Stbbdev __TBB_ASSERT(hwloc_get_api_version() >= 0x20400, "Hybrid CPUs support interfaces required HWLOC >= 2.4"); 185b15aabb3Stbbdev // Parsing the hybrid CPU topology 186b15aabb3Stbbdev int core_types_number = hwloc_cpukinds_get_nr(topology, 0); 187b15aabb3Stbbdev bool core_types_parsing_broken = core_types_number <= 0; 188b15aabb3Stbbdev if (!core_types_parsing_broken) { 189b15aabb3Stbbdev core_types_affinity_masks_list.resize(core_types_number); 190b15aabb3Stbbdev int efficiency{-1}; 191b15aabb3Stbbdev 192b15aabb3Stbbdev for (int core_type = 0; core_type < core_types_number; ++core_type) { 193b15aabb3Stbbdev hwloc_cpuset_t& current_mask = core_types_affinity_masks_list[core_type]; 194b15aabb3Stbbdev current_mask = hwloc_bitmap_alloc(); 195b15aabb3Stbbdev 196b15aabb3Stbbdev if (!hwloc_cpukinds_get_info(topology, core_type, current_mask, &efficiency, nullptr, nullptr, 0) 197b15aabb3Stbbdev && efficiency >= 0 198b15aabb3Stbbdev ) { 199b15aabb3Stbbdev hwloc_bitmap_and(current_mask, current_mask, process_cpu_affinity_mask); 200b15aabb3Stbbdev 201b15aabb3Stbbdev if (hwloc_bitmap_weight(current_mask) > 0) { 202b15aabb3Stbbdev core_types_indexes_list.push_back(core_type); 203b15aabb3Stbbdev } 204b15aabb3Stbbdev __TBB_ASSERT(hwloc_bitmap_weight(current_mask) >= 0, "Infinivitely filled core type mask"); 205b15aabb3Stbbdev } else { 206b15aabb3Stbbdev core_types_parsing_broken = true; 207b15aabb3Stbbdev break; 208b15aabb3Stbbdev } 209b15aabb3Stbbdev } 210b15aabb3Stbbdev } 211e96dbf4bSIvan Kochin #else /*!__TBBBIND_HWLOC_HYBRID_CPUS_INTERFACES_PRESENT*/ 212b15aabb3Stbbdev bool core_types_parsing_broken{true}; 213e96dbf4bSIvan Kochin #endif /*__TBBBIND_HWLOC_HYBRID_CPUS_INTERFACES_PRESENT*/ 214b15aabb3Stbbdev 215b15aabb3Stbbdev if (core_types_parsing_broken) { 216b15aabb3Stbbdev for (auto& core_type_mask : core_types_affinity_masks_list) { 217b15aabb3Stbbdev hwloc_bitmap_free(core_type_mask); 218b15aabb3Stbbdev } 219b15aabb3Stbbdev core_types_affinity_masks_list.resize(1); 220b15aabb3Stbbdev core_types_indexes_list.resize(1); 221b15aabb3Stbbdev 222b15aabb3Stbbdev core_types_affinity_masks_list[0] = hwloc_bitmap_dup(process_cpu_affinity_mask); 223b15aabb3Stbbdev core_types_indexes_list[0] = -1; 224b15aabb3Stbbdev } 225b15aabb3Stbbdev } 226b15aabb3Stbbdev 227b15aabb3Stbbdev void initialize( std::size_t groups_num ) { 228b15aabb3Stbbdev if ( initialization_state != uninitialized ) 229b15aabb3Stbbdev return; 230b15aabb3Stbbdev 231b15aabb3Stbbdev topology_initialization(groups_num); 232b15aabb3Stbbdev numa_topology_parsing(); 233b15aabb3Stbbdev core_types_topology_parsing(); 234b15aabb3Stbbdev 235b15aabb3Stbbdev if (initialization_state == topology_loaded) 23651c0b2f7Stbbdev initialization_state = topology_parsed; 23751c0b2f7Stbbdev } 23851c0b2f7Stbbdev 239*edc30c82SIvan Kochin static system_topology* instance_ptr; 240*edc30c82SIvan Kochin public: 241*edc30c82SIvan Kochin typedef hwloc_cpuset_t affinity_mask; 242*edc30c82SIvan Kochin typedef hwloc_const_cpuset_t const_affinity_mask; 243*edc30c82SIvan Kochin 244*edc30c82SIvan Kochin bool is_topology_parsed() { return initialization_state == topology_parsed; } 245*edc30c82SIvan Kochin 246*edc30c82SIvan Kochin static void construct( std::size_t groups_num ) { 247*edc30c82SIvan Kochin if (instance_ptr == nullptr) { 248*edc30c82SIvan Kochin instance_ptr = new system_topology(); 249*edc30c82SIvan Kochin instance_ptr->initialize(groups_num); 250*edc30c82SIvan Kochin } 251*edc30c82SIvan Kochin } 252*edc30c82SIvan Kochin 253*edc30c82SIvan Kochin static system_topology& instance() { 254*edc30c82SIvan Kochin __TBB_ASSERT(instance_ptr != nullptr, "Getting instance of non-constructed topology"); 255*edc30c82SIvan Kochin return *instance_ptr; 256*edc30c82SIvan Kochin } 257*edc30c82SIvan Kochin 258*edc30c82SIvan Kochin static void destroy() { 259*edc30c82SIvan Kochin __TBB_ASSERT(instance_ptr != nullptr, "Destroying non-constructed topology"); 260*edc30c82SIvan Kochin delete instance_ptr; 261*edc30c82SIvan Kochin } 262*edc30c82SIvan Kochin 263*edc30c82SIvan Kochin ~system_topology() { 26451c0b2f7Stbbdev if ( is_topology_parsed() ) { 265b15aabb3Stbbdev for (auto& numa_node_mask : numa_affinity_masks_list) { 266b15aabb3Stbbdev hwloc_bitmap_free(numa_node_mask); 26751c0b2f7Stbbdev } 268b15aabb3Stbbdev 269b15aabb3Stbbdev for (auto& core_type_mask : core_types_affinity_masks_list) { 270b15aabb3Stbbdev hwloc_bitmap_free(core_type_mask); 271b15aabb3Stbbdev } 272b15aabb3Stbbdev 27351c0b2f7Stbbdev hwloc_bitmap_free(process_node_affinity_mask); 27451c0b2f7Stbbdev hwloc_bitmap_free(process_cpu_affinity_mask); 27551c0b2f7Stbbdev } 27651c0b2f7Stbbdev 27751c0b2f7Stbbdev if ( initialization_state >= topology_allocated ) { 27851c0b2f7Stbbdev hwloc_topology_destroy(topology); 27951c0b2f7Stbbdev } 28051c0b2f7Stbbdev 28151c0b2f7Stbbdev initialization_state = uninitialized; 28251c0b2f7Stbbdev } 28351c0b2f7Stbbdev 284b15aabb3Stbbdev void fill_topology_information( 285b15aabb3Stbbdev int& _numa_nodes_count, int*& _numa_indexes_list, 286b15aabb3Stbbdev int& _core_types_count, int*& _core_types_indexes_list 287b15aabb3Stbbdev ) { 288*edc30c82SIvan Kochin __TBB_ASSERT(is_topology_parsed(), "Trying to get access to uninitialized system_topology"); 289b15aabb3Stbbdev _numa_nodes_count = numa_nodes_count; 290b15aabb3Stbbdev _numa_indexes_list = numa_indexes_list.data(); 291b15aabb3Stbbdev 292b15aabb3Stbbdev _core_types_count = (int)core_types_indexes_list.size(); 293b15aabb3Stbbdev _core_types_indexes_list = core_types_indexes_list.data(); 294b15aabb3Stbbdev } 295b15aabb3Stbbdev 296b15aabb3Stbbdev void fill_constraints_affinity_mask(affinity_mask input_mask, int numa_node_index, int core_type_index, int max_threads_per_core) { 297*edc30c82SIvan Kochin __TBB_ASSERT(is_topology_parsed(), "Trying to get access to uninitialized system_topology"); 298b15aabb3Stbbdev __TBB_ASSERT(numa_node_index < (int)numa_affinity_masks_list.size(), "Wrong NUMA node id"); 299b15aabb3Stbbdev __TBB_ASSERT(core_type_index < (int)core_types_affinity_masks_list.size(), "Wrong core type id"); 300b15aabb3Stbbdev __TBB_ASSERT(max_threads_per_core == -1 || max_threads_per_core > 0, "Wrong max_threads_per_core"); 301b15aabb3Stbbdev 302b15aabb3Stbbdev hwloc_cpuset_t constraints_mask = hwloc_bitmap_alloc(); 303b15aabb3Stbbdev hwloc_cpuset_t core_mask = hwloc_bitmap_alloc(); 304b15aabb3Stbbdev 305b15aabb3Stbbdev hwloc_bitmap_copy(constraints_mask, process_cpu_affinity_mask); 306b15aabb3Stbbdev if (numa_node_index >= 0) { 307b15aabb3Stbbdev hwloc_bitmap_and(constraints_mask, constraints_mask, numa_affinity_masks_list[numa_node_index]); 308b15aabb3Stbbdev } 309b15aabb3Stbbdev if (core_type_index >= 0) { 310b15aabb3Stbbdev hwloc_bitmap_and(constraints_mask, constraints_mask, core_types_affinity_masks_list[core_type_index]); 311b15aabb3Stbbdev } 312b15aabb3Stbbdev if (max_threads_per_core > 0) { 313b15aabb3Stbbdev // clear input mask 314b15aabb3Stbbdev hwloc_bitmap_zero(input_mask); 315b15aabb3Stbbdev 316b15aabb3Stbbdev hwloc_obj_t current_core = nullptr; 317b15aabb3Stbbdev while ((current_core = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_CORE, current_core)) != nullptr) { 318b15aabb3Stbbdev hwloc_bitmap_and(core_mask, constraints_mask, current_core->cpuset); 319b15aabb3Stbbdev 320b15aabb3Stbbdev // fit the core mask to required bits number 321b15aabb3Stbbdev int current_threads_per_core = 0; 322b15aabb3Stbbdev for (int id = hwloc_bitmap_first(core_mask); id != -1; id = hwloc_bitmap_next(core_mask, id)) { 323b15aabb3Stbbdev if (++current_threads_per_core > max_threads_per_core) { 324b15aabb3Stbbdev hwloc_bitmap_clr(core_mask, id); 325b15aabb3Stbbdev } 326b15aabb3Stbbdev } 327b15aabb3Stbbdev 328b15aabb3Stbbdev hwloc_bitmap_or(input_mask, input_mask, core_mask); 329b15aabb3Stbbdev } 330b15aabb3Stbbdev } else { 331b15aabb3Stbbdev hwloc_bitmap_copy(input_mask, constraints_mask); 332b15aabb3Stbbdev } 333b15aabb3Stbbdev 334b15aabb3Stbbdev hwloc_bitmap_free(core_mask); 335b15aabb3Stbbdev hwloc_bitmap_free(constraints_mask); 336b15aabb3Stbbdev } 337b15aabb3Stbbdev 338b15aabb3Stbbdev void fit_num_threads_per_core(affinity_mask result_mask, affinity_mask current_mask, affinity_mask constraints_mask) { 339b15aabb3Stbbdev hwloc_bitmap_zero(result_mask); 340b15aabb3Stbbdev hwloc_obj_t current_core = nullptr; 341b15aabb3Stbbdev while ((current_core = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_CORE, current_core)) != nullptr) { 342b15aabb3Stbbdev if (hwloc_bitmap_intersects(current_mask, current_core->cpuset)) { 343b15aabb3Stbbdev hwloc_bitmap_or(result_mask, result_mask, current_core->cpuset); 344b15aabb3Stbbdev } 345b15aabb3Stbbdev } 346b15aabb3Stbbdev hwloc_bitmap_and(result_mask, result_mask, constraints_mask); 347b15aabb3Stbbdev } 348b15aabb3Stbbdev 349b15aabb3Stbbdev int get_default_concurrency(int numa_node_index, int core_type_index, int max_threads_per_core) { 350*edc30c82SIvan Kochin __TBB_ASSERT(is_topology_parsed(), "Trying to get access to uninitialized system_topology"); 351b15aabb3Stbbdev 352b15aabb3Stbbdev hwloc_cpuset_t constraints_mask = hwloc_bitmap_alloc(); 353b15aabb3Stbbdev fill_constraints_affinity_mask(constraints_mask, numa_node_index, core_type_index, max_threads_per_core); 354b15aabb3Stbbdev 355b15aabb3Stbbdev int default_concurrency = hwloc_bitmap_weight(constraints_mask); 356b15aabb3Stbbdev hwloc_bitmap_free(constraints_mask); 357b15aabb3Stbbdev return default_concurrency; 35851c0b2f7Stbbdev } 35951c0b2f7Stbbdev 36051c0b2f7Stbbdev affinity_mask allocate_process_affinity_mask() { 361*edc30c82SIvan Kochin __TBB_ASSERT(is_topology_parsed(), "Trying to get access to uninitialized system_topology"); 36251c0b2f7Stbbdev return hwloc_bitmap_dup(process_cpu_affinity_mask); 36351c0b2f7Stbbdev } 36451c0b2f7Stbbdev 36551c0b2f7Stbbdev void free_affinity_mask( affinity_mask mask_to_free ) { 36651c0b2f7Stbbdev hwloc_bitmap_free(mask_to_free); // If bitmap is nullptr, no operation is performed. 36751c0b2f7Stbbdev } 36851c0b2f7Stbbdev 36951c0b2f7Stbbdev void store_current_affinity_mask( affinity_mask current_mask ) { 37051c0b2f7Stbbdev assertion_hwloc_wrapper(hwloc_get_cpubind, topology, current_mask, HWLOC_CPUBIND_THREAD); 37151c0b2f7Stbbdev 37251c0b2f7Stbbdev hwloc_bitmap_and(current_mask, current_mask, process_cpu_affinity_mask); 37351c0b2f7Stbbdev __TBB_ASSERT(!hwloc_bitmap_iszero(current_mask), 37451c0b2f7Stbbdev "Current affinity mask must intersects with process affinity mask"); 37551c0b2f7Stbbdev } 37651c0b2f7Stbbdev 377b15aabb3Stbbdev void set_affinity_mask( const_affinity_mask mask ) { 378b15aabb3Stbbdev if (hwloc_bitmap_weight(mask) > 0) { 379b15aabb3Stbbdev assertion_hwloc_wrapper(hwloc_set_cpubind, topology, mask, HWLOC_CPUBIND_THREAD); 38051c0b2f7Stbbdev } 38151c0b2f7Stbbdev } 38251c0b2f7Stbbdev }; 38351c0b2f7Stbbdev 384*edc30c82SIvan Kochin system_topology* system_topology::instance_ptr{nullptr}; 385*edc30c82SIvan Kochin 38651c0b2f7Stbbdev class binding_handler { 38751c0b2f7Stbbdev // Following vector saves thread affinity mask on scheduler entry to return it to this thread 38851c0b2f7Stbbdev // on scheduler exit. 389*edc30c82SIvan Kochin typedef std::vector<system_topology::affinity_mask> affinity_masks_container; 39051c0b2f7Stbbdev affinity_masks_container affinity_backup; 391*edc30c82SIvan Kochin system_topology::affinity_mask handler_affinity_mask; 392b15aabb3Stbbdev 393b15aabb3Stbbdev #if WIN32 394b15aabb3Stbbdev affinity_masks_container affinity_buffer; 395b15aabb3Stbbdev int my_numa_node_id; 396b15aabb3Stbbdev int my_core_type_id; 397b15aabb3Stbbdev int my_max_threads_per_core; 398b15aabb3Stbbdev #endif 39951c0b2f7Stbbdev 40051c0b2f7Stbbdev public: 401b15aabb3Stbbdev binding_handler( std::size_t size, int numa_node_id, int core_type_id, int max_threads_per_core ) 402b15aabb3Stbbdev : affinity_backup(size) 403b15aabb3Stbbdev #if WIN32 404b15aabb3Stbbdev , affinity_buffer(size) 405b15aabb3Stbbdev , my_numa_node_id(numa_node_id) 406b15aabb3Stbbdev , my_core_type_id(core_type_id) 407b15aabb3Stbbdev , my_max_threads_per_core(max_threads_per_core) 408b15aabb3Stbbdev #endif 409b15aabb3Stbbdev { 410b15aabb3Stbbdev for (std::size_t i = 0; i < size; ++i) { 411*edc30c82SIvan Kochin affinity_backup[i] = system_topology::instance().allocate_process_affinity_mask(); 412b15aabb3Stbbdev #if WIN32 413*edc30c82SIvan Kochin affinity_buffer[i] = system_topology::instance().allocate_process_affinity_mask(); 414b15aabb3Stbbdev #endif 41551c0b2f7Stbbdev } 416*edc30c82SIvan Kochin handler_affinity_mask = system_topology::instance().allocate_process_affinity_mask(); 417*edc30c82SIvan Kochin system_topology::instance().fill_constraints_affinity_mask 418b15aabb3Stbbdev (handler_affinity_mask, numa_node_id, core_type_id, max_threads_per_core); 41951c0b2f7Stbbdev } 42051c0b2f7Stbbdev 42151c0b2f7Stbbdev ~binding_handler() { 422b15aabb3Stbbdev for (std::size_t i = 0; i < affinity_backup.size(); ++i) { 423*edc30c82SIvan Kochin system_topology::instance().free_affinity_mask(affinity_backup[i]); 424b15aabb3Stbbdev #if WIN32 425*edc30c82SIvan Kochin system_topology::instance().free_affinity_mask(affinity_buffer[i]); 426b15aabb3Stbbdev #endif 42751c0b2f7Stbbdev } 428*edc30c82SIvan Kochin system_topology::instance().free_affinity_mask(handler_affinity_mask); 42951c0b2f7Stbbdev } 43051c0b2f7Stbbdev 431b15aabb3Stbbdev void apply_affinity( unsigned slot_num ) { 432*edc30c82SIvan Kochin auto& topology = system_topology::instance(); 43351c0b2f7Stbbdev __TBB_ASSERT(slot_num < affinity_backup.size(), 43451c0b2f7Stbbdev "The slot number is greater than the number of slots in the arena"); 435b15aabb3Stbbdev __TBB_ASSERT(topology.is_topology_parsed(), 436*edc30c82SIvan Kochin "Trying to get access to uninitialized system_topology"); 43751c0b2f7Stbbdev 438b15aabb3Stbbdev topology.store_current_affinity_mask(affinity_backup[slot_num]); 439b15aabb3Stbbdev 440b15aabb3Stbbdev #if WIN32 441b15aabb3Stbbdev // TBBBind supports only systems where NUMA nodes and core types do not cross the border 442b15aabb3Stbbdev // between several processor groups. So if a certain NUMA node or core type constraint 443b15aabb3Stbbdev // specified, then the constraints affinity mask will not cross the processor groups' border. 444b15aabb3Stbbdev 445b15aabb3Stbbdev // But if we have constraint based only on the max_threads_per_core setting, then the 446b15aabb3Stbbdev // constraints affinity mask does may cross the border between several processor groups 447b15aabb3Stbbdev // on machines with more then 64 hardware threads. That is why we need to use the special 448b15aabb3Stbbdev // function, which regulates the number of threads in the current threads mask. 449b15aabb3Stbbdev if (topology.number_of_processors_groups > 1 && my_max_threads_per_core != -1 && 450b15aabb3Stbbdev (my_numa_node_id == -1 || topology.numa_indexes_list.size() == 1) && 451b15aabb3Stbbdev (my_core_type_id == -1 || topology.core_types_indexes_list.size() == 1) 452b15aabb3Stbbdev ) { 453b15aabb3Stbbdev topology.fit_num_threads_per_core(affinity_buffer[slot_num], affinity_backup[slot_num], handler_affinity_mask); 454b15aabb3Stbbdev topology.set_affinity_mask(affinity_buffer[slot_num]); 455b15aabb3Stbbdev return; 456b15aabb3Stbbdev } 457b15aabb3Stbbdev #endif 458b15aabb3Stbbdev topology.set_affinity_mask(handler_affinity_mask); 45951c0b2f7Stbbdev } 46051c0b2f7Stbbdev 46151c0b2f7Stbbdev void restore_previous_affinity_mask( unsigned slot_num ) { 462*edc30c82SIvan Kochin auto& topology = system_topology::instance(); 463b15aabb3Stbbdev __TBB_ASSERT(topology.is_topology_parsed(), 464*edc30c82SIvan Kochin "Trying to get access to uninitialized system_topology"); 465b15aabb3Stbbdev topology.set_affinity_mask(affinity_backup[slot_num]); 46651c0b2f7Stbbdev }; 46751c0b2f7Stbbdev 46851c0b2f7Stbbdev }; 46951c0b2f7Stbbdev 47051c0b2f7Stbbdev extern "C" { // exported to TBB interfaces 47151c0b2f7Stbbdev 4728827ea7dSLong Nguyen TBBBIND_EXPORT void __TBB_internal_initialize_system_topology( 473b15aabb3Stbbdev std::size_t groups_num, 474b15aabb3Stbbdev int& numa_nodes_count, int*& numa_indexes_list, 475b15aabb3Stbbdev int& core_types_count, int*& core_types_indexes_list 476b15aabb3Stbbdev ) { 477*edc30c82SIvan Kochin system_topology::construct(groups_num); 478*edc30c82SIvan Kochin system_topology::instance().fill_topology_information( 479b15aabb3Stbbdev numa_nodes_count, numa_indexes_list, 480b15aabb3Stbbdev core_types_count, core_types_indexes_list 481b15aabb3Stbbdev ); 48251c0b2f7Stbbdev } 48351c0b2f7Stbbdev 4848827ea7dSLong Nguyen TBBBIND_EXPORT binding_handler* __TBB_internal_allocate_binding_handler(int number_of_slots, int numa_id, int core_type_id, int max_threads_per_core) { 485b15aabb3Stbbdev __TBB_ASSERT(number_of_slots > 0, "Trying to create numa handler for 0 threads."); 486b15aabb3Stbbdev return new binding_handler(number_of_slots, numa_id, core_type_id, max_threads_per_core); 48751c0b2f7Stbbdev } 48851c0b2f7Stbbdev 4898827ea7dSLong Nguyen TBBBIND_EXPORT void __TBB_internal_deallocate_binding_handler(binding_handler* handler_ptr) { 49051c0b2f7Stbbdev __TBB_ASSERT(handler_ptr != nullptr, "Trying to deallocate nullptr pointer."); 49151c0b2f7Stbbdev delete handler_ptr; 49251c0b2f7Stbbdev } 49351c0b2f7Stbbdev 4948827ea7dSLong Nguyen TBBBIND_EXPORT void __TBB_internal_apply_affinity(binding_handler* handler_ptr, int slot_num) { 49551c0b2f7Stbbdev __TBB_ASSERT(handler_ptr != nullptr, "Trying to get access to uninitialized metadata."); 496b15aabb3Stbbdev handler_ptr->apply_affinity(slot_num); 49751c0b2f7Stbbdev } 49851c0b2f7Stbbdev 4998827ea7dSLong Nguyen TBBBIND_EXPORT void __TBB_internal_restore_affinity(binding_handler* handler_ptr, int slot_num) { 50051c0b2f7Stbbdev __TBB_ASSERT(handler_ptr != nullptr, "Trying to get access to uninitialized metadata."); 50151c0b2f7Stbbdev handler_ptr->restore_previous_affinity_mask(slot_num); 50251c0b2f7Stbbdev } 50351c0b2f7Stbbdev 5048827ea7dSLong Nguyen TBBBIND_EXPORT int __TBB_internal_get_default_concurrency(int numa_id, int core_type_id, int max_threads_per_core) { 505*edc30c82SIvan Kochin return system_topology::instance().get_default_concurrency(numa_id, core_type_id, max_threads_per_core); 506*edc30c82SIvan Kochin } 507*edc30c82SIvan Kochin 508*edc30c82SIvan Kochin void __TBB_internal_destroy_system_topology() { 509*edc30c82SIvan Kochin return system_topology::destroy(); 510b15aabb3Stbbdev } 511b15aabb3Stbbdev 51251c0b2f7Stbbdev } // extern "C" 51351c0b2f7Stbbdev 51451c0b2f7Stbbdev } // namespace r1 51551c0b2f7Stbbdev } // namespace detail 51651c0b2f7Stbbdev } // namespace tbb 51751c0b2f7Stbbdev 51851c0b2f7Stbbdev #undef assertion_hwloc_wrapper 519