1 /* 2 Copyright (c) 2019-2020 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #include "../tbb/assert_impl.h" // Out-of-line TBB assertion handling routines are instantiated here. 18 #include "oneapi/tbb/detail/_assert.h" 19 20 #if _MSC_VER && !__INTEL_COMPILER 21 #pragma warning( push ) 22 #pragma warning( disable : 4100 ) 23 #endif 24 #include <hwloc.h> 25 #if _MSC_VER && !__INTEL_COMPILER 26 #pragma warning( pop ) 27 #endif 28 29 #include <vector> 30 31 // Most of hwloc calls returns negative exit code on error. 32 // This macro tracks error codes that are returned from the hwloc interfaces. 33 #define assertion_hwloc_wrapper(command, ...) \ 34 __TBB_ASSERT_EX( (command(__VA_ARGS__)) >= 0, "Error occurred during call to hwloc API."); 35 36 namespace tbb { 37 namespace detail { 38 namespace r1 { 39 40 //------------------------------------------------------------------------ 41 // Information about the machine's hardware TBB is happen to work on 42 //------------------------------------------------------------------------ 43 class platform_topology { 44 friend class numa_affinity_handler; 45 46 // TODO: add the `my_` prefix to the members 47 hwloc_topology_t topology; 48 hwloc_cpuset_t process_cpu_affinity_mask; 49 hwloc_nodeset_t process_node_affinity_mask; 50 std::vector<hwloc_cpuset_t> affinity_masks_list; 51 52 std::vector<int> default_concurrency_list; 53 std::vector<int> numa_indexes_list; 54 int numa_nodes_count; 55 56 enum init_stages { uninitialized, 57 started, 58 topology_allocated, 59 topology_loaded, 60 topology_parsed } initialization_state; 61 62 // Binding threads to NUMA nodes that locates in another Windows Processor groups 63 // is allowed only if machine topology contains several Windows Processors groups 64 // and process affinity mask wasn`t limited manually (affinity mask cannot violates 65 // processors group boundaries). 66 bool intergroup_binding_allowed(size_t groups_num) { return groups_num > 1; } 67 68 platform_topology() : topology(nullptr), 69 process_cpu_affinity_mask(nullptr), 70 process_node_affinity_mask(nullptr), 71 numa_nodes_count(0), 72 initialization_state(uninitialized) {} 73 74 public: 75 typedef hwloc_cpuset_t affinity_mask; 76 typedef hwloc_const_cpuset_t const_affinity_mask; 77 78 static platform_topology& instance() { 79 static platform_topology topology; 80 return topology; 81 } 82 83 bool is_topology_parsed() { return initialization_state == topology_parsed; } 84 85 void initialize( size_t groups_num ) { 86 if ( initialization_state != uninitialized ) 87 return; 88 initialization_state = started; 89 90 // Parse topology 91 if ( hwloc_topology_init( &topology ) == 0 ) { 92 initialization_state = topology_allocated; 93 if ( hwloc_topology_load( topology ) == 0 ) { 94 initialization_state = topology_loaded; 95 } 96 } 97 98 // Fill parameters with stubs if topology parsing is broken. 99 if ( initialization_state != topology_loaded ) { 100 if ( initialization_state == topology_allocated ) { 101 hwloc_topology_destroy(topology); 102 } 103 numa_nodes_count = 1; 104 numa_indexes_list.push_back(-1); 105 default_concurrency_list.push_back(-1); 106 return; 107 } 108 109 // Getting process affinity mask 110 if ( intergroup_binding_allowed(groups_num) ) { 111 process_cpu_affinity_mask = hwloc_bitmap_dup(hwloc_topology_get_complete_cpuset (topology)); 112 process_node_affinity_mask = hwloc_bitmap_dup(hwloc_topology_get_complete_nodeset(topology)); 113 } else { 114 process_cpu_affinity_mask = hwloc_bitmap_alloc(); 115 process_node_affinity_mask = hwloc_bitmap_alloc(); 116 117 assertion_hwloc_wrapper(hwloc_get_cpubind, topology, process_cpu_affinity_mask, 0); 118 hwloc_cpuset_to_nodeset(topology, process_cpu_affinity_mask, process_node_affinity_mask); 119 } 120 121 // Get the number of available NUMA nodes 122 // If system contains no NUMA nodes, HWLOC 1.11 returns an infinitely filled bitmap. 123 // hwloc_bitmap_weight() returns negative value for such bitmaps, so we use this check 124 // to change way of topology initialization. 125 numa_nodes_count = hwloc_bitmap_weight(process_node_affinity_mask); 126 if (numa_nodes_count <= 0) { 127 // numa_nodes_count may be empty if the process affinity mask is empty too (invalid case) 128 // or if some internal HWLOC error occurred. 129 // So we place -1 as index in this case. 130 numa_indexes_list.push_back(numa_nodes_count == 0 ? -1 : 0); 131 numa_nodes_count = 1; 132 default_concurrency_list.push_back(hwloc_bitmap_weight(process_cpu_affinity_mask)); 133 134 affinity_masks_list.push_back(hwloc_bitmap_dup(process_cpu_affinity_mask)); 135 initialization_state = topology_parsed; 136 return; 137 } 138 139 // Get NUMA logical indexes list 140 unsigned counter = 0; 141 int i = 0; 142 int max_numa_index = -1; 143 numa_indexes_list.resize(numa_nodes_count); 144 hwloc_obj_t node_buffer; 145 hwloc_bitmap_foreach_begin(i, process_node_affinity_mask) { 146 node_buffer = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, i); 147 numa_indexes_list[counter] = static_cast<int>(node_buffer->logical_index); 148 149 if ( numa_indexes_list[counter] > max_numa_index ) { 150 max_numa_index = numa_indexes_list[counter]; 151 } 152 153 counter++; 154 } hwloc_bitmap_foreach_end(); 155 __TBB_ASSERT(max_numa_index >= 0, "Maximal NUMA index must not be negative"); 156 157 // Fill concurrency and affinity masks lists 158 default_concurrency_list.resize(max_numa_index + 1); 159 affinity_masks_list.resize(max_numa_index + 1); 160 161 int index = 0; 162 hwloc_bitmap_foreach_begin(i, process_node_affinity_mask) { 163 node_buffer = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, i); 164 index = static_cast<int>(node_buffer->logical_index); 165 166 hwloc_cpuset_t& current_mask = affinity_masks_list[index]; 167 current_mask = hwloc_bitmap_dup(node_buffer->cpuset); 168 169 hwloc_bitmap_and(current_mask, current_mask, process_cpu_affinity_mask); 170 __TBB_ASSERT(!hwloc_bitmap_iszero(current_mask), "hwloc detected unavailable NUMA node"); 171 default_concurrency_list[index] = hwloc_bitmap_weight(current_mask); 172 } hwloc_bitmap_foreach_end(); 173 initialization_state = topology_parsed; 174 } 175 176 ~platform_topology() { 177 if ( is_topology_parsed() ) { 178 for (int i = 0; i < numa_nodes_count; i++) { 179 hwloc_bitmap_free(affinity_masks_list[numa_indexes_list[i]]); 180 } 181 hwloc_bitmap_free(process_node_affinity_mask); 182 hwloc_bitmap_free(process_cpu_affinity_mask); 183 } 184 185 if ( initialization_state >= topology_allocated ) { 186 hwloc_topology_destroy(topology); 187 } 188 189 initialization_state = uninitialized; 190 } 191 192 void fill(int& nodes_count, int*& indexes_list, int*& concurrency_list ) { 193 __TBB_ASSERT(is_topology_parsed(), "Trying to get access to uninitialized platform_topology"); 194 nodes_count = numa_nodes_count; 195 indexes_list = &numa_indexes_list.front(); 196 concurrency_list = &default_concurrency_list.front(); 197 } 198 199 affinity_mask allocate_process_affinity_mask() { 200 __TBB_ASSERT(is_topology_parsed(), "Trying to get access to uninitialized platform_topology"); 201 return hwloc_bitmap_dup(process_cpu_affinity_mask); 202 } 203 204 void free_affinity_mask( affinity_mask mask_to_free ) { 205 hwloc_bitmap_free(mask_to_free); // If bitmap is nullptr, no operation is performed. 206 } 207 208 void store_current_affinity_mask( affinity_mask current_mask ) { 209 assertion_hwloc_wrapper(hwloc_get_cpubind, topology, current_mask, HWLOC_CPUBIND_THREAD); 210 211 hwloc_bitmap_and(current_mask, current_mask, process_cpu_affinity_mask); 212 __TBB_ASSERT(!hwloc_bitmap_iszero(current_mask), 213 "Current affinity mask must intersects with process affinity mask"); 214 } 215 216 void set_new_affinity_mask( const_affinity_mask new_mask ) { 217 assertion_hwloc_wrapper(hwloc_set_cpubind, topology, new_mask, HWLOC_CPUBIND_THREAD); 218 } 219 220 const_affinity_mask get_node_affinity_mask( int node_index ) { 221 __TBB_ASSERT((int)affinity_masks_list.size() > node_index, 222 "Trying to get affinity mask for uninitialized NUMA node"); 223 return affinity_masks_list[node_index]; 224 } 225 }; 226 227 class binding_handler { 228 // Following vector saves thread affinity mask on scheduler entry to return it to this thread 229 // on scheduler exit. 230 typedef std::vector<platform_topology::affinity_mask> affinity_masks_container; 231 affinity_masks_container affinity_backup; 232 233 public: 234 binding_handler( size_t size ) : affinity_backup(size) { 235 for (affinity_masks_container::iterator it = affinity_backup.begin(); 236 it != affinity_backup.end(); it++) { 237 *it = platform_topology::instance().allocate_process_affinity_mask(); 238 } 239 } 240 241 ~binding_handler() { 242 for (affinity_masks_container::iterator it = affinity_backup.begin(); 243 it != affinity_backup.end(); it++) { 244 platform_topology::instance().free_affinity_mask(*it); 245 } 246 } 247 248 void bind_thread_to_node( unsigned slot_num, unsigned numa_node_id ) { 249 __TBB_ASSERT(slot_num < affinity_backup.size(), 250 "The slot number is greater than the number of slots in the arena"); 251 __TBB_ASSERT(platform_topology::instance().is_topology_parsed(), 252 "Trying to get access to uninitialized platform_topology"); 253 platform_topology::instance().store_current_affinity_mask(affinity_backup[slot_num]); 254 255 platform_topology::instance().set_new_affinity_mask( 256 platform_topology::instance().get_node_affinity_mask(numa_node_id)); 257 } 258 259 void restore_previous_affinity_mask( unsigned slot_num ) { 260 __TBB_ASSERT(platform_topology::instance().is_topology_parsed(), 261 "Trying to get access to uninitialized platform_topology"); 262 platform_topology::instance().set_new_affinity_mask(affinity_backup[slot_num]); 263 }; 264 265 }; 266 267 extern "C" { // exported to TBB interfaces 268 269 void __TBB_internal_initialize_numa_topology( size_t groups_num, int& nodes_count, int*& indexes_list, int*& concurrency_list ) { 270 platform_topology::instance().initialize(groups_num); 271 platform_topology::instance().fill(nodes_count, indexes_list, concurrency_list); 272 } 273 274 binding_handler* __TBB_internal_allocate_binding_handler(int slot_num) { 275 __TBB_ASSERT(slot_num > 0, "Trying to create numa handler for 0 threads."); 276 return new binding_handler(slot_num); 277 } 278 279 void __TBB_internal_deallocate_binding_handler(binding_handler* handler_ptr) { 280 __TBB_ASSERT(handler_ptr != nullptr, "Trying to deallocate nullptr pointer."); 281 delete handler_ptr; 282 } 283 284 void __TBB_internal_bind_to_node(binding_handler* handler_ptr, int slot_num, int numa_id) { 285 __TBB_ASSERT(handler_ptr != nullptr, "Trying to get access to uninitialized metadata."); 286 __TBB_ASSERT(platform_topology::instance().is_topology_parsed(), 287 "Trying to get access to uninitialized platform_topology."); 288 handler_ptr->bind_thread_to_node(slot_num, numa_id); 289 } 290 291 void __TBB_internal_restore_affinity(binding_handler* handler_ptr, int slot_num) { 292 __TBB_ASSERT(handler_ptr != nullptr, "Trying to get access to uninitialized metadata."); 293 __TBB_ASSERT(platform_topology::instance().is_topology_parsed(), 294 "Trying to get access to uninitialized platform_topology."); 295 handler_ptr->restore_previous_affinity_mask(slot_num); 296 } 297 298 } // extern "C" 299 300 } // namespace r1 301 } // namespace detail 302 } // namespace tbb 303 304 #undef assertion_hwloc_wrapper 305