1 /* 2 * kmp_affinity.h -- header for affinity management 3 */ 4 5 6 //===----------------------------------------------------------------------===// 7 // 8 // The LLVM Compiler Infrastructure 9 // 10 // This file is dual licensed under the MIT and the University of Illinois Open 11 // Source Licenses. See LICENSE.txt for details. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef KMP_AFFINITY_H 16 #define KMP_AFFINITY_H 17 18 #include "kmp_os.h" 19 #include "kmp.h" 20 21 #if KMP_AFFINITY_SUPPORTED 22 #if KMP_USE_HWLOC 23 class KMPHwlocAffinity: public KMPAffinity { 24 public: 25 class Mask : public KMPAffinity::Mask { 26 hwloc_cpuset_t mask; 27 public: 28 Mask() { mask = hwloc_bitmap_alloc(); this->zero(); } 29 ~Mask() { hwloc_bitmap_free(mask); } 30 void set(int i) override { hwloc_bitmap_set(mask, i); } 31 bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); } 32 void clear(int i) override { hwloc_bitmap_clr(mask, i); } 33 void zero() override { hwloc_bitmap_zero(mask); } 34 void copy(const KMPAffinity::Mask* src) override { 35 const Mask* convert = static_cast<const Mask*>(src); 36 hwloc_bitmap_copy(mask, convert->mask); 37 } 38 void bitwise_and(const KMPAffinity::Mask* rhs) override { 39 const Mask* convert = static_cast<const Mask*>(rhs); 40 hwloc_bitmap_and(mask, mask, convert->mask); 41 } 42 void bitwise_or(const KMPAffinity::Mask * rhs) override { 43 const Mask* convert = static_cast<const Mask*>(rhs); 44 hwloc_bitmap_or(mask, mask, convert->mask); 45 } 46 void bitwise_not() override { hwloc_bitmap_not(mask, mask); } 47 int begin() const override { return hwloc_bitmap_first(mask); } 48 int end() const override { return -1; } 49 int next(int previous) const override { return hwloc_bitmap_next(mask, previous); } 50 int get_system_affinity(bool abort_on_error) override { 51 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 52 "Illegal get affinity operation when not capable"); 53 int retval = hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); 54 if (retval >= 0) { 55 return 0; 56 } 57 int error = errno; 58 if (abort_on_error) { 59 __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null); 60 } 61 return error; 62 } 63 int set_system_affinity(bool abort_on_error) const override { 64 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 65 "Illegal get affinity operation when not capable"); 66 int retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); 67 if (retval >= 0) { 68 return 0; 69 } 70 int error = errno; 71 if (abort_on_error) { 72 __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null); 73 } 74 return error; 75 } 76 int get_proc_group() const override { 77 int i; 78 int group = -1; 79 # if KMP_OS_WINDOWS 80 if (__kmp_num_proc_groups == 1) { 81 return 1; 82 } 83 for (i = 0; i < __kmp_num_proc_groups; i++) { 84 // On windows, the long type is always 32 bits 85 unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i*2); 86 unsigned long second_32_bits = hwloc_bitmap_to_ith_ulong(mask, i*2+1); 87 if (first_32_bits == 0 && second_32_bits == 0) { 88 continue; 89 } 90 if (group >= 0) { 91 return -1; 92 } 93 group = i; 94 } 95 # endif /* KMP_OS_WINDOWS */ 96 return group; 97 } 98 }; 99 void determine_capable(const char* var) override { 100 const hwloc_topology_support* topology_support; 101 if(__kmp_hwloc_topology == NULL) { 102 if(hwloc_topology_init(&__kmp_hwloc_topology) < 0) { 103 __kmp_hwloc_error = TRUE; 104 if(__kmp_affinity_verbose) 105 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()"); 106 } 107 if(hwloc_topology_load(__kmp_hwloc_topology) < 0) { 108 __kmp_hwloc_error = TRUE; 109 if(__kmp_affinity_verbose) 110 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()"); 111 } 112 } 113 topology_support = hwloc_topology_get_support(__kmp_hwloc_topology); 114 // Is the system capable of setting/getting this thread's affinity? 115 // also, is topology discovery possible? (pu indicates ability to discover processing units) 116 // and finally, were there no errors when calling any hwloc_* API functions? 117 if(topology_support && topology_support->cpubind->set_thisthread_cpubind && 118 topology_support->cpubind->get_thisthread_cpubind && 119 topology_support->discovery->pu && 120 !__kmp_hwloc_error) 121 { 122 // enables affinity according to KMP_AFFINITY_CAPABLE() macro 123 KMP_AFFINITY_ENABLE(TRUE); 124 } else { 125 // indicate that hwloc didn't work and disable affinity 126 __kmp_hwloc_error = TRUE; 127 KMP_AFFINITY_DISABLE(); 128 } 129 } 130 void bind_thread(int which) override { 131 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 132 "Illegal set affinity operation when not capable"); 133 KMPAffinity::Mask *mask; 134 KMP_CPU_ALLOC_ON_STACK(mask); 135 KMP_CPU_ZERO(mask); 136 KMP_CPU_SET(which, mask); 137 __kmp_set_system_affinity(mask, TRUE); 138 KMP_CPU_FREE_FROM_STACK(mask); 139 } 140 KMPAffinity::Mask* allocate_mask() override { return new Mask(); } 141 void deallocate_mask(KMPAffinity::Mask* m) override { delete m; } 142 KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; } 143 void deallocate_mask_array(KMPAffinity::Mask* array) override { 144 Mask* hwloc_array = static_cast<Mask*>(array); 145 delete[] hwloc_array; 146 } 147 KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override { 148 Mask* hwloc_array = static_cast<Mask*>(array); 149 return &(hwloc_array[index]); 150 } 151 api_type get_api_type() const override { return HWLOC; } 152 }; 153 #endif /* KMP_USE_HWLOC */ 154 155 #if KMP_OS_LINUX 156 /* 157 * On some of the older OS's that we build on, these constants aren't present 158 * in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on 159 * all systems of the same arch where they are defined, and they cannot change. 160 * stone forever. 161 */ 162 #include <sys/syscall.h> 163 # if KMP_ARCH_X86 || KMP_ARCH_ARM 164 # ifndef __NR_sched_setaffinity 165 # define __NR_sched_setaffinity 241 166 # elif __NR_sched_setaffinity != 241 167 # error Wrong code for setaffinity system call. 168 # endif /* __NR_sched_setaffinity */ 169 # ifndef __NR_sched_getaffinity 170 # define __NR_sched_getaffinity 242 171 # elif __NR_sched_getaffinity != 242 172 # error Wrong code for getaffinity system call. 173 # endif /* __NR_sched_getaffinity */ 174 # elif KMP_ARCH_AARCH64 175 # ifndef __NR_sched_setaffinity 176 # define __NR_sched_setaffinity 122 177 # elif __NR_sched_setaffinity != 122 178 # error Wrong code for setaffinity system call. 179 # endif /* __NR_sched_setaffinity */ 180 # ifndef __NR_sched_getaffinity 181 # define __NR_sched_getaffinity 123 182 # elif __NR_sched_getaffinity != 123 183 # error Wrong code for getaffinity system call. 184 # endif /* __NR_sched_getaffinity */ 185 # elif KMP_ARCH_X86_64 186 # ifndef __NR_sched_setaffinity 187 # define __NR_sched_setaffinity 203 188 # elif __NR_sched_setaffinity != 203 189 # error Wrong code for setaffinity system call. 190 # endif /* __NR_sched_setaffinity */ 191 # ifndef __NR_sched_getaffinity 192 # define __NR_sched_getaffinity 204 193 # elif __NR_sched_getaffinity != 204 194 # error Wrong code for getaffinity system call. 195 # endif /* __NR_sched_getaffinity */ 196 # elif KMP_ARCH_PPC64 197 # ifndef __NR_sched_setaffinity 198 # define __NR_sched_setaffinity 222 199 # elif __NR_sched_setaffinity != 222 200 # error Wrong code for setaffinity system call. 201 # endif /* __NR_sched_setaffinity */ 202 # ifndef __NR_sched_getaffinity 203 # define __NR_sched_getaffinity 223 204 # elif __NR_sched_getaffinity != 223 205 # error Wrong code for getaffinity system call. 206 # endif /* __NR_sched_getaffinity */ 207 # elif KMP_ARCH_MIPS 208 # ifndef __NR_sched_setaffinity 209 # define __NR_sched_setaffinity 4239 210 # elif __NR_sched_setaffinity != 4239 211 # error Wrong code for setaffinity system call. 212 # endif /* __NR_sched_setaffinity */ 213 # ifndef __NR_sched_getaffinity 214 # define __NR_sched_getaffinity 4240 215 # elif __NR_sched_getaffinity != 4240 216 # error Wrong code for getaffinity system call. 217 # endif /* __NR_sched_getaffinity */ 218 # elif KMP_ARCH_MIPS64 219 # ifndef __NR_sched_setaffinity 220 # define __NR_sched_setaffinity 5195 221 # elif __NR_sched_setaffinity != 5195 222 # error Wrong code for setaffinity system call. 223 # endif /* __NR_sched_setaffinity */ 224 # ifndef __NR_sched_getaffinity 225 # define __NR_sched_getaffinity 5196 226 # elif __NR_sched_getaffinity != 5196 227 # error Wrong code for getaffinity system call. 228 # endif /* __NR_sched_getaffinity */ 229 # error Unknown or unsupported architecture 230 # endif /* KMP_ARCH_* */ 231 class KMPNativeAffinity : public KMPAffinity { 232 class Mask : public KMPAffinity::Mask { 233 typedef unsigned char mask_t; 234 static const int BITS_PER_MASK_T = sizeof(mask_t)*CHAR_BIT; 235 public: 236 mask_t* mask; 237 Mask() { mask = (mask_t*)__kmp_allocate(__kmp_affin_mask_size); } 238 ~Mask() { if (mask) __kmp_free(mask); } 239 void set(int i) override { mask[i/BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); } 240 bool is_set(int i) const override { return (mask[i/BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); } 241 void clear(int i) override { mask[i/BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); } 242 void zero() override { 243 for (size_t i=0; i<__kmp_affin_mask_size; ++i) 244 mask[i] = 0; 245 } 246 void copy(const KMPAffinity::Mask* src) override { 247 const Mask * convert = static_cast<const Mask*>(src); 248 for (size_t i=0; i<__kmp_affin_mask_size; ++i) 249 mask[i] = convert->mask[i]; 250 } 251 void bitwise_and(const KMPAffinity::Mask* rhs) override { 252 const Mask * convert = static_cast<const Mask*>(rhs); 253 for (size_t i=0; i<__kmp_affin_mask_size; ++i) 254 mask[i] &= convert->mask[i]; 255 } 256 void bitwise_or(const KMPAffinity::Mask* rhs) override { 257 const Mask * convert = static_cast<const Mask*>(rhs); 258 for (size_t i=0; i<__kmp_affin_mask_size; ++i) 259 mask[i] |= convert->mask[i]; 260 } 261 void bitwise_not() override { 262 for (size_t i=0; i<__kmp_affin_mask_size; ++i) 263 mask[i] = ~(mask[i]); 264 } 265 int begin() const override { 266 int retval = 0; 267 while (retval < end() && !is_set(retval)) 268 ++retval; 269 return retval; 270 } 271 int end() const override { return __kmp_affin_mask_size*BITS_PER_MASK_T; } 272 int next(int previous) const override { 273 int retval = previous+1; 274 while (retval < end() && !is_set(retval)) 275 ++retval; 276 return retval; 277 } 278 int get_system_affinity(bool abort_on_error) override { 279 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 280 "Illegal get affinity operation when not capable"); 281 int retval = syscall( __NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask ); 282 if (retval >= 0) { 283 return 0; 284 } 285 int error = errno; 286 if (abort_on_error) { 287 __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null); 288 } 289 return error; 290 } 291 int set_system_affinity(bool abort_on_error) const override { 292 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 293 "Illegal get affinity operation when not capable"); 294 int retval = syscall( __NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask ); 295 if (retval >= 0) { 296 return 0; 297 } 298 int error = errno; 299 if (abort_on_error) { 300 __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null); 301 } 302 return error; 303 } 304 }; 305 void determine_capable(const char* env_var) override { 306 __kmp_affinity_determine_capable(env_var); 307 } 308 void bind_thread(int which) override { 309 __kmp_affinity_bind_thread(which); 310 } 311 KMPAffinity::Mask* allocate_mask() override { 312 KMPNativeAffinity::Mask* retval = new Mask(); 313 return retval; 314 } 315 void deallocate_mask(KMPAffinity::Mask* m) override { 316 KMPNativeAffinity::Mask* native_mask = static_cast<KMPNativeAffinity::Mask*>(m); 317 delete m; 318 } 319 KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; } 320 void deallocate_mask_array(KMPAffinity::Mask* array) override { 321 Mask* linux_array = static_cast<Mask*>(array); 322 delete[] linux_array; 323 } 324 KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override { 325 Mask* linux_array = static_cast<Mask*>(array); 326 return &(linux_array[index]); 327 } 328 api_type get_api_type() const override { return NATIVE_OS; } 329 }; 330 #endif /* KMP_OS_LINUX */ 331 332 #if KMP_OS_WINDOWS 333 class KMPNativeAffinity : public KMPAffinity { 334 class Mask : public KMPAffinity::Mask { 335 typedef ULONG_PTR mask_t; 336 static const int BITS_PER_MASK_T = sizeof(mask_t)*CHAR_BIT; 337 mask_t* mask; 338 public: 339 Mask() { mask = (mask_t*)__kmp_allocate(sizeof(mask_t)*__kmp_num_proc_groups); } 340 ~Mask() { if (mask) __kmp_free(mask); } 341 void set(int i) override { mask[i/BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); } 342 bool is_set(int i) const override { return (mask[i/BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); } 343 void clear(int i) override { mask[i/BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); } 344 void zero() override { 345 for (size_t i=0; i<__kmp_num_proc_groups; ++i) 346 mask[i] = 0; 347 } 348 void copy(const KMPAffinity::Mask* src) override { 349 const Mask * convert = static_cast<const Mask*>(src); 350 for (size_t i=0; i<__kmp_num_proc_groups; ++i) 351 mask[i] = convert->mask[i]; 352 } 353 void bitwise_and(const KMPAffinity::Mask* rhs) override { 354 const Mask * convert = static_cast<const Mask*>(rhs); 355 for (size_t i=0; i<__kmp_num_proc_groups; ++i) 356 mask[i] &= convert->mask[i]; 357 } 358 void bitwise_or(const KMPAffinity::Mask* rhs) override { 359 const Mask * convert = static_cast<const Mask*>(rhs); 360 for (size_t i=0; i<__kmp_num_proc_groups; ++i) 361 mask[i] |= convert->mask[i]; 362 } 363 void bitwise_not() override { 364 for (size_t i=0; i<__kmp_num_proc_groups; ++i) 365 mask[i] = ~(mask[i]); 366 } 367 int begin() const override { 368 int retval = 0; 369 while (retval < end() && !is_set(retval)) 370 ++retval; 371 return retval; 372 } 373 int end() const override { return __kmp_num_proc_groups*BITS_PER_MASK_T; } 374 int next(int previous) const override { 375 int retval = previous+1; 376 while (retval < end() && !is_set(retval)) 377 ++retval; 378 return retval; 379 } 380 int set_system_affinity(bool abort_on_error) const override { 381 if (__kmp_num_proc_groups > 1) { 382 // Check for a valid mask. 383 GROUP_AFFINITY ga; 384 int group = get_proc_group(); 385 if (group < 0) { 386 if (abort_on_error) { 387 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); 388 } 389 return -1; 390 } 391 // Transform the bit vector into a GROUP_AFFINITY struct 392 // and make the system call to set affinity. 393 ga.Group = group; 394 ga.Mask = mask[group]; 395 ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0; 396 397 KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); 398 if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { 399 DWORD error = GetLastError(); 400 if (abort_on_error) { 401 __kmp_msg(kmp_ms_fatal, KMP_MSG( CantSetThreadAffMask ), 402 KMP_ERR( error ), __kmp_msg_null); 403 } 404 return error; 405 } 406 } else { 407 if (!SetThreadAffinityMask( GetCurrentThread(), *mask )) { 408 DWORD error = GetLastError(); 409 if (abort_on_error) { 410 __kmp_msg(kmp_ms_fatal, KMP_MSG( CantSetThreadAffMask ), 411 KMP_ERR( error ), __kmp_msg_null); 412 } 413 return error; 414 } 415 } 416 return 0; 417 } 418 int get_system_affinity(bool abort_on_error) override { 419 if (__kmp_num_proc_groups > 1) { 420 this->zero(); 421 GROUP_AFFINITY ga; 422 KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL); 423 if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) { 424 DWORD error = GetLastError(); 425 if (abort_on_error) { 426 __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "GetThreadGroupAffinity()"), 427 KMP_ERR(error), __kmp_msg_null); 428 } 429 return error; 430 } 431 if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) || (ga.Mask == 0)) { 432 return -1; 433 } 434 mask[ga.Group] = ga.Mask; 435 } else { 436 mask_t newMask, sysMask, retval; 437 if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) { 438 DWORD error = GetLastError(); 439 if (abort_on_error) { 440 __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "GetProcessAffinityMask()"), 441 KMP_ERR(error), __kmp_msg_null); 442 } 443 return error; 444 } 445 retval = SetThreadAffinityMask(GetCurrentThread(), newMask); 446 if (! retval) { 447 DWORD error = GetLastError(); 448 if (abort_on_error) { 449 __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "SetThreadAffinityMask()"), 450 KMP_ERR(error), __kmp_msg_null); 451 } 452 return error; 453 } 454 newMask = SetThreadAffinityMask(GetCurrentThread(), retval); 455 if (! newMask) { 456 DWORD error = GetLastError(); 457 if (abort_on_error) { 458 __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "SetThreadAffinityMask()"), 459 KMP_ERR(error), __kmp_msg_null); 460 } 461 } 462 *mask = retval; 463 } 464 return 0; 465 } 466 int get_proc_group() const override { 467 int group = -1; 468 if (__kmp_num_proc_groups == 1) { 469 return 1; 470 } 471 for (int i = 0; i < __kmp_num_proc_groups; i++) { 472 if (mask[i] == 0) 473 continue; 474 if (group >= 0) 475 return -1; 476 group = i; 477 } 478 return group; 479 } 480 }; 481 void determine_capable(const char* env_var) override { 482 __kmp_affinity_determine_capable(env_var); 483 } 484 void bind_thread(int which) override { 485 __kmp_affinity_bind_thread(which); 486 } 487 KMPAffinity::Mask* allocate_mask() override { return new Mask(); } 488 void deallocate_mask(KMPAffinity::Mask* m) override { delete m; } 489 KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; } 490 void deallocate_mask_array(KMPAffinity::Mask* array) override { 491 Mask* windows_array = static_cast<Mask*>(array); 492 delete[] windows_array; 493 } 494 KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override { 495 Mask* windows_array = static_cast<Mask*>(array); 496 return &(windows_array[index]); 497 } 498 api_type get_api_type() const override { return NATIVE_OS; } 499 }; 500 #endif /* KMP_OS_WINDOWS */ 501 #endif /* KMP_AFFINITY_SUPPORTED */ 502 503 class Address { 504 public: 505 static const unsigned maxDepth = 32; 506 unsigned labels[maxDepth]; 507 unsigned childNums[maxDepth]; 508 unsigned depth; 509 unsigned leader; 510 Address(unsigned _depth) 511 : depth(_depth), leader(FALSE) { 512 } 513 Address &operator=(const Address &b) { 514 depth = b.depth; 515 for (unsigned i = 0; i < depth; i++) { 516 labels[i] = b.labels[i]; 517 childNums[i] = b.childNums[i]; 518 } 519 leader = FALSE; 520 return *this; 521 } 522 bool operator==(const Address &b) const { 523 if (depth != b.depth) 524 return false; 525 for (unsigned i = 0; i < depth; i++) 526 if(labels[i] != b.labels[i]) 527 return false; 528 return true; 529 } 530 bool isClose(const Address &b, int level) const { 531 if (depth != b.depth) 532 return false; 533 if ((unsigned)level >= depth) 534 return true; 535 for (unsigned i = 0; i < (depth - level); i++) 536 if(labels[i] != b.labels[i]) 537 return false; 538 return true; 539 } 540 bool operator!=(const Address &b) const { 541 return !operator==(b); 542 } 543 void print() const { 544 unsigned i; 545 printf("Depth: %u --- ", depth); 546 for(i=0;i<depth;i++) { 547 printf("%u ", labels[i]); 548 } 549 } 550 }; 551 552 class AddrUnsPair { 553 public: 554 Address first; 555 unsigned second; 556 AddrUnsPair(Address _first, unsigned _second) 557 : first(_first), second(_second) { 558 } 559 AddrUnsPair &operator=(const AddrUnsPair &b) 560 { 561 first = b.first; 562 second = b.second; 563 return *this; 564 } 565 void print() const { 566 printf("first = "); first.print(); 567 printf(" --- second = %u", second); 568 } 569 bool operator==(const AddrUnsPair &b) const { 570 if(first != b.first) return false; 571 if(second != b.second) return false; 572 return true; 573 } 574 bool operator!=(const AddrUnsPair &b) const { 575 return !operator==(b); 576 } 577 }; 578 579 580 static int 581 __kmp_affinity_cmp_Address_labels(const void *a, const void *b) 582 { 583 const Address *aa = (const Address *)&(((AddrUnsPair *)a) 584 ->first); 585 const Address *bb = (const Address *)&(((AddrUnsPair *)b) 586 ->first); 587 unsigned depth = aa->depth; 588 unsigned i; 589 KMP_DEBUG_ASSERT(depth == bb->depth); 590 for (i = 0; i < depth; i++) { 591 if (aa->labels[i] < bb->labels[i]) return -1; 592 if (aa->labels[i] > bb->labels[i]) return 1; 593 } 594 return 0; 595 } 596 597 598 /** A structure for holding machine-specific hierarchy info to be computed once at init. 599 This structure represents a mapping of threads to the actual machine hierarchy, or to 600 our best guess at what the hierarchy might be, for the purpose of performing an 601 efficient barrier. In the worst case, when there is no machine hierarchy information, 602 it produces a tree suitable for a barrier, similar to the tree used in the hyper barrier. */ 603 class hierarchy_info { 604 public: 605 /** Good default values for number of leaves and branching factor, given no affinity information. 606 Behaves a bit like hyper barrier. */ 607 static const kmp_uint32 maxLeaves=4; 608 static const kmp_uint32 minBranch=4; 609 /** Number of levels in the hierarchy. Typical levels are threads/core, cores/package 610 or socket, packages/node, nodes/machine, etc. We don't want to get specific with 611 nomenclature. When the machine is oversubscribed we add levels to duplicate the 612 hierarchy, doubling the thread capacity of the hierarchy each time we add a level. */ 613 kmp_uint32 maxLevels; 614 615 /** This is specifically the depth of the machine configuration hierarchy, in terms of the 616 number of levels along the longest path from root to any leaf. It corresponds to the 617 number of entries in numPerLevel if we exclude all but one trailing 1. */ 618 kmp_uint32 depth; 619 kmp_uint32 base_num_threads; 620 enum init_status { initialized=0, not_initialized=1, initializing=2 }; 621 volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 2=initialization in progress 622 volatile kmp_int8 resizing; // 0=not resizing, 1=resizing 623 624 /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a 625 node at level i has. For example, if we have a machine with 4 packages, 4 cores/package 626 and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */ 627 kmp_uint32 *numPerLevel; 628 kmp_uint32 *skipPerLevel; 629 630 void deriveLevels(AddrUnsPair *adr2os, int num_addrs) { 631 int hier_depth = adr2os[0].first.depth; 632 int level = 0; 633 for (int i=hier_depth-1; i>=0; --i) { 634 int max = -1; 635 for (int j=0; j<num_addrs; ++j) { 636 int next = adr2os[j].first.childNums[i]; 637 if (next > max) max = next; 638 } 639 numPerLevel[level] = max+1; 640 ++level; 641 } 642 } 643 644 hierarchy_info() : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {} 645 646 void fini() { if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); } 647 648 void init(AddrUnsPair *adr2os, int num_addrs) 649 { 650 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, not_initialized, initializing); 651 if (bool_result == 0) { // Wait for initialization 652 while (TCR_1(uninitialized) != initialized) KMP_CPU_PAUSE(); 653 return; 654 } 655 KMP_DEBUG_ASSERT(bool_result==1); 656 657 /* Added explicit initialization of the data fields here to prevent usage of dirty value 658 observed when static library is re-initialized multiple times (e.g. when 659 non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */ 660 depth = 1; 661 resizing = 0; 662 maxLevels = 7; 663 numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32)); 664 skipPerLevel = &(numPerLevel[maxLevels]); 665 for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level 666 numPerLevel[i] = 1; 667 skipPerLevel[i] = 1; 668 } 669 670 // Sort table by physical ID 671 if (adr2os) { 672 qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels); 673 deriveLevels(adr2os, num_addrs); 674 } 675 else { 676 numPerLevel[0] = maxLeaves; 677 numPerLevel[1] = num_addrs/maxLeaves; 678 if (num_addrs%maxLeaves) numPerLevel[1]++; 679 } 680 681 base_num_threads = num_addrs; 682 for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth 683 if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1' 684 depth++; 685 686 kmp_uint32 branch = minBranch; 687 if (numPerLevel[0] == 1) branch = num_addrs/maxLeaves; 688 if (branch<minBranch) branch=minBranch; 689 for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width 690 while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>maxLeaves)) { // max 4 on level 0! 691 if (numPerLevel[d] & 1) numPerLevel[d]++; 692 numPerLevel[d] = numPerLevel[d] >> 1; 693 if (numPerLevel[d+1] == 1) depth++; 694 numPerLevel[d+1] = numPerLevel[d+1] << 1; 695 } 696 if(numPerLevel[0] == 1) { 697 branch = branch >> 1; 698 if (branch<4) branch = minBranch; 699 } 700 } 701 702 for (kmp_uint32 i=1; i<depth; ++i) 703 skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1]; 704 // Fill in hierarchy in the case of oversubscription 705 for (kmp_uint32 i=depth; i<maxLevels; ++i) 706 skipPerLevel[i] = 2*skipPerLevel[i-1]; 707 708 uninitialized = initialized; // One writer 709 710 } 711 712 // Resize the hierarchy if nproc changes to something larger than before 713 void resize(kmp_uint32 nproc) 714 { 715 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); 716 while (bool_result == 0) { // someone else is trying to resize 717 KMP_CPU_PAUSE(); 718 if (nproc <= base_num_threads) // happy with other thread's resize 719 return; 720 else // try to resize 721 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); 722 } 723 KMP_DEBUG_ASSERT(bool_result!=0); 724 if (nproc <= base_num_threads) return; // happy with other thread's resize 725 726 // Calculate new maxLevels 727 kmp_uint32 old_sz = skipPerLevel[depth-1]; 728 kmp_uint32 incs = 0, old_maxLevels = maxLevels; 729 // First see if old maxLevels is enough to contain new size 730 for (kmp_uint32 i=depth; i<maxLevels && nproc>old_sz; ++i) { 731 skipPerLevel[i] = 2*skipPerLevel[i-1]; 732 numPerLevel[i-1] *= 2; 733 old_sz *= 2; 734 depth++; 735 } 736 if (nproc > old_sz) { // Not enough space, need to expand hierarchy 737 while (nproc > old_sz) { 738 old_sz *=2; 739 incs++; 740 depth++; 741 } 742 maxLevels += incs; 743 744 // Resize arrays 745 kmp_uint32 *old_numPerLevel = numPerLevel; 746 kmp_uint32 *old_skipPerLevel = skipPerLevel; 747 numPerLevel = skipPerLevel = NULL; 748 numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32)); 749 skipPerLevel = &(numPerLevel[maxLevels]); 750 751 // Copy old elements from old arrays 752 for (kmp_uint32 i=0; i<old_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level 753 numPerLevel[i] = old_numPerLevel[i]; 754 skipPerLevel[i] = old_skipPerLevel[i]; 755 } 756 757 // Init new elements in arrays to 1 758 for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level 759 numPerLevel[i] = 1; 760 skipPerLevel[i] = 1; 761 } 762 763 // Free old arrays 764 __kmp_free(old_numPerLevel); 765 } 766 767 // Fill in oversubscription levels of hierarchy 768 for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) 769 skipPerLevel[i] = 2*skipPerLevel[i-1]; 770 771 base_num_threads = nproc; 772 resizing = 0; // One writer 773 774 } 775 }; 776 #endif // KMP_AFFINITY_H 777