xref: /oneTBB/src/tbb/misc_ex.cpp (revision ddc3bd03)
151c0b2f7Stbbdev /*
2*ddc3bd03SPavel Kumbrasev     Copyright (c) 2005-2023 Intel Corporation
351c0b2f7Stbbdev 
451c0b2f7Stbbdev     Licensed under the Apache License, Version 2.0 (the "License");
551c0b2f7Stbbdev     you may not use this file except in compliance with the License.
651c0b2f7Stbbdev     You may obtain a copy of the License at
751c0b2f7Stbbdev 
851c0b2f7Stbbdev         http://www.apache.org/licenses/LICENSE-2.0
951c0b2f7Stbbdev 
1051c0b2f7Stbbdev     Unless required by applicable law or agreed to in writing, software
1151c0b2f7Stbbdev     distributed under the License is distributed on an "AS IS" BASIS,
1251c0b2f7Stbbdev     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1351c0b2f7Stbbdev     See the License for the specific language governing permissions and
1451c0b2f7Stbbdev     limitations under the License.
1551c0b2f7Stbbdev */
1651c0b2f7Stbbdev 
1751c0b2f7Stbbdev // Source file for miscellaneous entities that are infrequently referenced by
1851c0b2f7Stbbdev // an executing program, and implementation of which requires dynamic linking.
1951c0b2f7Stbbdev 
2051c0b2f7Stbbdev #include "misc.h"
2151c0b2f7Stbbdev 
2251c0b2f7Stbbdev #if !defined(__TBB_HardwareConcurrency)
2351c0b2f7Stbbdev 
2451c0b2f7Stbbdev #include "dynamic_link.h"
2551c0b2f7Stbbdev #include <stdio.h>
2651c0b2f7Stbbdev #include <limits.h>
2751c0b2f7Stbbdev 
2851c0b2f7Stbbdev #if _WIN32||_WIN64
2951c0b2f7Stbbdev #include <windows.h>
3051c0b2f7Stbbdev #if __TBB_WIN8UI_SUPPORT
3151c0b2f7Stbbdev #include <thread>
3251c0b2f7Stbbdev #endif
3351c0b2f7Stbbdev #else
3451c0b2f7Stbbdev #include <unistd.h>
35734f0bc0SPablo Romero #if __unix__
3651c0b2f7Stbbdev #if __linux__
3751c0b2f7Stbbdev #include <sys/sysinfo.h>
38734f0bc0SPablo Romero #endif
3951c0b2f7Stbbdev #include <cstring>
4051c0b2f7Stbbdev #include <sched.h>
4151c0b2f7Stbbdev #include <cerrno>
4251c0b2f7Stbbdev #elif __sun
4351c0b2f7Stbbdev #include <sys/sysinfo.h>
4451c0b2f7Stbbdev #elif __FreeBSD__
4551c0b2f7Stbbdev #include <cerrno>
4651c0b2f7Stbbdev #include <cstring>
4751c0b2f7Stbbdev #include <sys/param.h>  // Required by <sys/cpuset.h>
4851c0b2f7Stbbdev #include <sys/cpuset.h>
4951c0b2f7Stbbdev #endif
5051c0b2f7Stbbdev #endif
5151c0b2f7Stbbdev 
5251c0b2f7Stbbdev namespace tbb {
5351c0b2f7Stbbdev namespace detail {
5451c0b2f7Stbbdev namespace r1 {
5551c0b2f7Stbbdev 
5651c0b2f7Stbbdev #if __TBB_USE_OS_AFFINITY_SYSCALL
5751c0b2f7Stbbdev 
58734f0bc0SPablo Romero #if __unix__
5951c0b2f7Stbbdev // Handlers for interoperation with libiomp
6051c0b2f7Stbbdev static int (*libiomp_try_restoring_original_mask)();
6151c0b2f7Stbbdev // Table for mapping to libiomp entry points
6251c0b2f7Stbbdev static const dynamic_link_descriptor iompLinkTable[] = {
6351c0b2f7Stbbdev     DLD_NOWEAK( kmp_set_thread_affinity_mask_initial, libiomp_try_restoring_original_mask )
6451c0b2f7Stbbdev };
6551c0b2f7Stbbdev #endif
6651c0b2f7Stbbdev 
set_thread_affinity_mask(std::size_t maskSize,const basic_mask_t * threadMask)6751c0b2f7Stbbdev static void set_thread_affinity_mask( std::size_t maskSize, const basic_mask_t* threadMask ) {
68734f0bc0SPablo Romero #if __FreeBSD__ || __NetBSD__ || __OpenBSD__
6951c0b2f7Stbbdev     if( cpuset_setaffinity( CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, maskSize, threadMask ) )
70734f0bc0SPablo Romero #else /* __unix__ */
71734f0bc0SPablo Romero     if( sched_setaffinity( 0, maskSize, threadMask ) )
7251c0b2f7Stbbdev #endif
7351c0b2f7Stbbdev         // Here and below the error severity is lowered from critical level
7451c0b2f7Stbbdev         // because it may happen during TBB library unload because of not
7551c0b2f7Stbbdev         // waiting for workers to complete (current RML policy, to be fixed).
7651c0b2f7Stbbdev         // handle_perror( errno, "setaffinity syscall" );
7751c0b2f7Stbbdev         runtime_warning( "setaffinity syscall failed" );
7851c0b2f7Stbbdev }
7951c0b2f7Stbbdev 
get_thread_affinity_mask(std::size_t maskSize,basic_mask_t * threadMask)8051c0b2f7Stbbdev static void get_thread_affinity_mask( std::size_t maskSize, basic_mask_t* threadMask ) {
81734f0bc0SPablo Romero #if __FreeBSD__ || __NetBSD__ || __OpenBSD__
8251c0b2f7Stbbdev     if( cpuset_getaffinity( CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, maskSize, threadMask ) )
83734f0bc0SPablo Romero #else /* __unix__ */
84734f0bc0SPablo Romero     if( sched_getaffinity( 0, maskSize, threadMask ) )
8551c0b2f7Stbbdev #endif
8651c0b2f7Stbbdev     runtime_warning( "getaffinity syscall failed" );
8751c0b2f7Stbbdev }
8851c0b2f7Stbbdev 
8951c0b2f7Stbbdev static basic_mask_t* process_mask;
9051c0b2f7Stbbdev static int num_masks;
9151c0b2f7Stbbdev 
destroy_process_mask()9251c0b2f7Stbbdev void destroy_process_mask() {
9351c0b2f7Stbbdev     delete [] process_mask;
94ba947f18SIlya Isaev     process_mask = nullptr;
9551c0b2f7Stbbdev }
9651c0b2f7Stbbdev 
9751c0b2f7Stbbdev #define curMaskSize sizeof(basic_mask_t) * num_masks
~affinity_helper()9851c0b2f7Stbbdev affinity_helper::~affinity_helper() {
9951c0b2f7Stbbdev     if( threadMask ) {
10051c0b2f7Stbbdev         if( is_changed ) {
10151c0b2f7Stbbdev             set_thread_affinity_mask( curMaskSize, threadMask );
10251c0b2f7Stbbdev         }
10351c0b2f7Stbbdev         delete [] threadMask;
10451c0b2f7Stbbdev     }
10551c0b2f7Stbbdev }
protect_affinity_mask(bool restore_process_mask)10651c0b2f7Stbbdev void affinity_helper::protect_affinity_mask( bool restore_process_mask ) {
10757f524caSIlya Isaev     if( threadMask == nullptr && num_masks ) { // TODO: assert num_masks validity?
10851c0b2f7Stbbdev         threadMask = new basic_mask_t [num_masks];
10951c0b2f7Stbbdev         std::memset( threadMask, 0, curMaskSize );
11051c0b2f7Stbbdev         get_thread_affinity_mask( curMaskSize, threadMask );
11151c0b2f7Stbbdev         if( restore_process_mask ) {
11251c0b2f7Stbbdev             __TBB_ASSERT( process_mask, "A process mask is requested but not yet stored" );
11351c0b2f7Stbbdev             is_changed = memcmp( process_mask, threadMask, curMaskSize );
11451c0b2f7Stbbdev             if( is_changed )
11551c0b2f7Stbbdev                 set_thread_affinity_mask( curMaskSize, process_mask );
11651c0b2f7Stbbdev         } else {
11751c0b2f7Stbbdev             // Assume that the mask will be changed by the caller.
11851c0b2f7Stbbdev             is_changed = 1;
11951c0b2f7Stbbdev         }
12051c0b2f7Stbbdev     }
12151c0b2f7Stbbdev }
dismiss()12251c0b2f7Stbbdev void affinity_helper::dismiss() {
12351c0b2f7Stbbdev     delete [] threadMask;
12457f524caSIlya Isaev     threadMask = nullptr;
12551c0b2f7Stbbdev     is_changed = 0;
12651c0b2f7Stbbdev }
12751c0b2f7Stbbdev #undef curMaskSize
12851c0b2f7Stbbdev 
12951c0b2f7Stbbdev static std::atomic<do_once_state> hardware_concurrency_info;
13051c0b2f7Stbbdev 
13151c0b2f7Stbbdev static int theNumProcs;
13251c0b2f7Stbbdev 
initialize_hardware_concurrency_info()13351c0b2f7Stbbdev static void initialize_hardware_concurrency_info () {
13451c0b2f7Stbbdev     int err;
13551c0b2f7Stbbdev     int availableProcs = 0;
13651c0b2f7Stbbdev     int numMasks = 1;
13751c0b2f7Stbbdev     int maxProcs = sysconf(_SC_NPROCESSORS_ONLN);
13851c0b2f7Stbbdev     basic_mask_t* processMask;
13951c0b2f7Stbbdev     const std::size_t BasicMaskSize =  sizeof(basic_mask_t);
14051c0b2f7Stbbdev     for (;;) {
14151c0b2f7Stbbdev         const int curMaskSize = BasicMaskSize * numMasks;
14251c0b2f7Stbbdev         processMask = new basic_mask_t[numMasks];
14351c0b2f7Stbbdev         std::memset( processMask, 0, curMaskSize );
144734f0bc0SPablo Romero #if __FreeBSD__ || __NetBSD__ || __OpenBSD__
14551c0b2f7Stbbdev         // CPU_LEVEL_WHICH - anonymous (current) mask, CPU_LEVEL_CPUSET - assigned mask
14651c0b2f7Stbbdev         err = cpuset_getaffinity( CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, curMaskSize, processMask );
14751c0b2f7Stbbdev         if ( !err || errno != ERANGE || curMaskSize * CHAR_BIT >= 16 * 1024 )
14851c0b2f7Stbbdev             break;
149734f0bc0SPablo Romero #else /* __unix__ */
150734f0bc0SPablo Romero         int pid = getpid();
151734f0bc0SPablo Romero         err = sched_getaffinity( pid, curMaskSize, processMask );
152734f0bc0SPablo Romero         if ( !err || errno != EINVAL || curMaskSize * CHAR_BIT >= 256 * 1024 )
153734f0bc0SPablo Romero              break;
154734f0bc0SPablo Romero #endif
15551c0b2f7Stbbdev         delete[] processMask;
15651c0b2f7Stbbdev         numMasks <<= 1;
15751c0b2f7Stbbdev     }
15851c0b2f7Stbbdev     if ( !err ) {
15951c0b2f7Stbbdev         // We have found the mask size and captured the process affinity mask into processMask.
16051c0b2f7Stbbdev         num_masks = numMasks; // do here because it's needed for affinity_helper to work
161734f0bc0SPablo Romero #if __unix__
16251c0b2f7Stbbdev         // For better coexistence with libiomp which might have changed the mask already,
16351c0b2f7Stbbdev         // check for its presence and ask it to restore the mask.
16451c0b2f7Stbbdev         dynamic_link_handle libhandle;
16551c0b2f7Stbbdev         if ( dynamic_link( "libiomp5.so", iompLinkTable, 1, &libhandle, DYNAMIC_LINK_GLOBAL ) ) {
16651c0b2f7Stbbdev             // We have found the symbol provided by libiomp5 for restoring original thread affinity.
16751c0b2f7Stbbdev             affinity_helper affhelp;
16851c0b2f7Stbbdev             affhelp.protect_affinity_mask( /*restore_process_mask=*/false );
16951c0b2f7Stbbdev             if ( libiomp_try_restoring_original_mask()==0 ) {
17051c0b2f7Stbbdev                 // Now we have the right mask to capture, restored by libiomp.
17151c0b2f7Stbbdev                 const int curMaskSize = BasicMaskSize * numMasks;
17251c0b2f7Stbbdev                 std::memset( processMask, 0, curMaskSize );
17351c0b2f7Stbbdev                 get_thread_affinity_mask( curMaskSize, processMask );
17451c0b2f7Stbbdev             } else
17551c0b2f7Stbbdev                 affhelp.dismiss();  // thread mask has not changed
17651c0b2f7Stbbdev             dynamic_unlink( libhandle );
17751c0b2f7Stbbdev             // Destructor of affinity_helper restores the thread mask (unless dismissed).
17851c0b2f7Stbbdev         }
17951c0b2f7Stbbdev #endif
18051c0b2f7Stbbdev         for ( int m = 0; availableProcs < maxProcs && m < numMasks; ++m ) {
18151c0b2f7Stbbdev             for ( std::size_t i = 0; (availableProcs < maxProcs) && (i < BasicMaskSize * CHAR_BIT); ++i ) {
18251c0b2f7Stbbdev                 if ( CPU_ISSET( i, processMask + m ) )
18351c0b2f7Stbbdev                     ++availableProcs;
18451c0b2f7Stbbdev             }
18551c0b2f7Stbbdev         }
18651c0b2f7Stbbdev         process_mask = processMask;
18751c0b2f7Stbbdev     }
18851c0b2f7Stbbdev     else {
18951c0b2f7Stbbdev         // Failed to get the process affinity mask; assume the whole machine can be used.
19051c0b2f7Stbbdev         availableProcs = (maxProcs == INT_MAX) ? sysconf(_SC_NPROCESSORS_ONLN) : maxProcs;
19151c0b2f7Stbbdev         delete[] processMask;
19251c0b2f7Stbbdev     }
19351c0b2f7Stbbdev     theNumProcs = availableProcs > 0 ? availableProcs : 1; // Fail safety strap
19457f524caSIlya Isaev     __TBB_ASSERT( theNumProcs <= sysconf(_SC_NPROCESSORS_ONLN), nullptr);
19551c0b2f7Stbbdev }
19651c0b2f7Stbbdev 
AvailableHwConcurrency()19751c0b2f7Stbbdev int AvailableHwConcurrency() {
19851c0b2f7Stbbdev     atomic_do_once( &initialize_hardware_concurrency_info, hardware_concurrency_info );
19951c0b2f7Stbbdev     return theNumProcs;
20051c0b2f7Stbbdev }
20151c0b2f7Stbbdev 
20251c0b2f7Stbbdev /* End of __TBB_USE_OS_AFFINITY_SYSCALL implementation */
20351c0b2f7Stbbdev #elif __ANDROID__
20451c0b2f7Stbbdev 
20551c0b2f7Stbbdev // Work-around for Android that reads the correct number of available CPUs since system calls are unreliable.
20651c0b2f7Stbbdev // Format of "present" file is: ([<int>-<int>|<int>],)+
20751c0b2f7Stbbdev int AvailableHwConcurrency() {
20851c0b2f7Stbbdev     FILE *fp = fopen("/sys/devices/system/cpu/present", "r");
20957f524caSIlya Isaev     if (fp == nullptr) return 1;
21051c0b2f7Stbbdev     int num_args, lower, upper, num_cpus=0;
21151c0b2f7Stbbdev     while ((num_args = fscanf(fp, "%u-%u", &lower, &upper)) != EOF) {
21251c0b2f7Stbbdev         switch(num_args) {
21351c0b2f7Stbbdev             case 2: num_cpus += upper - lower + 1; break;
21451c0b2f7Stbbdev             case 1: num_cpus += 1; break;
21551c0b2f7Stbbdev         }
21651c0b2f7Stbbdev         fscanf(fp, ",");
21751c0b2f7Stbbdev     }
218*ddc3bd03SPavel Kumbrasev     fclose(fp);
21951c0b2f7Stbbdev     return (num_cpus > 0) ? num_cpus : 1;
22051c0b2f7Stbbdev }
22151c0b2f7Stbbdev 
22251c0b2f7Stbbdev #elif defined(_SC_NPROCESSORS_ONLN)
22351c0b2f7Stbbdev 
22451c0b2f7Stbbdev int AvailableHwConcurrency() {
22551c0b2f7Stbbdev     int n = sysconf(_SC_NPROCESSORS_ONLN);
22651c0b2f7Stbbdev     return (n > 0) ? n : 1;
22751c0b2f7Stbbdev }
22851c0b2f7Stbbdev 
22951c0b2f7Stbbdev #elif _WIN32||_WIN64
23051c0b2f7Stbbdev 
23151c0b2f7Stbbdev static std::atomic<do_once_state> hardware_concurrency_info;
23251c0b2f7Stbbdev 
23351c0b2f7Stbbdev static const WORD TBB_ALL_PROCESSOR_GROUPS = 0xffff;
23451c0b2f7Stbbdev 
23551c0b2f7Stbbdev // Statically allocate an array for processor group information.
23651c0b2f7Stbbdev // Windows 7 supports maximum 4 groups, but let's look ahead a little.
23751c0b2f7Stbbdev static const WORD MaxProcessorGroups = 64;
23851c0b2f7Stbbdev 
23951c0b2f7Stbbdev struct ProcessorGroupInfo {
24051c0b2f7Stbbdev     DWORD_PTR   mask;                   ///< Affinity mask covering the whole group
24151c0b2f7Stbbdev     int         numProcs;               ///< Number of processors in the group
24251c0b2f7Stbbdev     int         numProcsRunningTotal;   ///< Subtotal of processors in this and preceding groups
24351c0b2f7Stbbdev 
24451c0b2f7Stbbdev     //! Total number of processor groups in the system
24551c0b2f7Stbbdev     static int NumGroups;
24651c0b2f7Stbbdev 
247b15aabb3Stbbdev     //! Index of the group with a slot reserved for the first external thread
24851c0b2f7Stbbdev     /** In the context of multiple processor groups support current implementation
249b15aabb3Stbbdev         defines "the first external thread" as the first thread to invoke
25051c0b2f7Stbbdev         AvailableHwConcurrency().
25151c0b2f7Stbbdev 
25251c0b2f7Stbbdev         TODO:   Implement a dynamic scheme remapping workers depending on the pending
253b15aabb3Stbbdev                 external threads affinity. **/
25451c0b2f7Stbbdev     static int HoleIndex;
25551c0b2f7Stbbdev };
25651c0b2f7Stbbdev 
25751c0b2f7Stbbdev int ProcessorGroupInfo::NumGroups = 1;
25851c0b2f7Stbbdev int ProcessorGroupInfo::HoleIndex = 0;
25951c0b2f7Stbbdev 
26051c0b2f7Stbbdev ProcessorGroupInfo theProcessorGroups[MaxProcessorGroups];
2610a521127SAnuya Welling int calculate_numa[MaxProcessorGroups];  //Array needed for FindProcessorGroupIndex to calculate Processor Group when number of threads > number of cores to distribute threads evenly between processor groups
2620a521127SAnuya Welling int numaSum;
26351c0b2f7Stbbdev struct TBB_GROUP_AFFINITY {
26451c0b2f7Stbbdev     DWORD_PTR Mask;
26551c0b2f7Stbbdev     WORD   Group;
26651c0b2f7Stbbdev     WORD   Reserved[3];
26751c0b2f7Stbbdev };
26851c0b2f7Stbbdev 
26957f524caSIlya Isaev static DWORD (WINAPI *TBB_GetActiveProcessorCount)( WORD groupIndex ) = nullptr;
27057f524caSIlya Isaev static WORD (WINAPI *TBB_GetActiveProcessorGroupCount)() = nullptr;
27151c0b2f7Stbbdev static BOOL (WINAPI *TBB_SetThreadGroupAffinity)( HANDLE hThread,
27251c0b2f7Stbbdev                         const TBB_GROUP_AFFINITY* newAff, TBB_GROUP_AFFINITY *prevAff );
27351c0b2f7Stbbdev static BOOL (WINAPI *TBB_GetThreadGroupAffinity)( HANDLE hThread, TBB_GROUP_AFFINITY* );
27451c0b2f7Stbbdev 
27551c0b2f7Stbbdev static const dynamic_link_descriptor ProcessorGroupsApiLinkTable[] = {
27651c0b2f7Stbbdev       DLD(GetActiveProcessorCount, TBB_GetActiveProcessorCount)
27751c0b2f7Stbbdev     , DLD(GetActiveProcessorGroupCount, TBB_GetActiveProcessorGroupCount)
27851c0b2f7Stbbdev     , DLD(SetThreadGroupAffinity, TBB_SetThreadGroupAffinity)
27951c0b2f7Stbbdev     , DLD(GetThreadGroupAffinity, TBB_GetThreadGroupAffinity)
28051c0b2f7Stbbdev };
28151c0b2f7Stbbdev 
28251c0b2f7Stbbdev static void initialize_hardware_concurrency_info () {
283478de5b1Stbbdev     suppress_unused_warning(TBB_ALL_PROCESSOR_GROUPS);
28451c0b2f7Stbbdev #if __TBB_WIN8UI_SUPPORT
28551c0b2f7Stbbdev     // For these applications processor groups info is unavailable
28651c0b2f7Stbbdev     // Setting up a number of processors for one processor group
28751c0b2f7Stbbdev     theProcessorGroups[0].numProcs = theProcessorGroups[0].numProcsRunningTotal = std::thread::hardware_concurrency();
28851c0b2f7Stbbdev #else /* __TBB_WIN8UI_SUPPORT */
28951c0b2f7Stbbdev     dynamic_link( "Kernel32.dll", ProcessorGroupsApiLinkTable,
29051c0b2f7Stbbdev                   sizeof(ProcessorGroupsApiLinkTable)/sizeof(dynamic_link_descriptor) );
29151c0b2f7Stbbdev     SYSTEM_INFO si;
29251c0b2f7Stbbdev     GetNativeSystemInfo(&si);
29351c0b2f7Stbbdev     DWORD_PTR pam, sam, m = 1;
29451c0b2f7Stbbdev     GetProcessAffinityMask( GetCurrentProcess(), &pam, &sam );
29551c0b2f7Stbbdev     int nproc = 0;
29651c0b2f7Stbbdev     for ( std::size_t i = 0; i < sizeof(DWORD_PTR) * CHAR_BIT; ++i, m <<= 1 ) {
29751c0b2f7Stbbdev         if ( pam & m )
29851c0b2f7Stbbdev             ++nproc;
29951c0b2f7Stbbdev     }
30057f524caSIlya Isaev     __TBB_ASSERT( nproc <= (int)si.dwNumberOfProcessors, nullptr);
30151c0b2f7Stbbdev     // By default setting up a number of processors for one processor group
30251c0b2f7Stbbdev     theProcessorGroups[0].numProcs = theProcessorGroups[0].numProcsRunningTotal = nproc;
30351c0b2f7Stbbdev     // Setting up processor groups in case the process does not restrict affinity mask and more than one processor group is present
30451c0b2f7Stbbdev     if ( nproc == (int)si.dwNumberOfProcessors && TBB_GetActiveProcessorCount ) {
30551c0b2f7Stbbdev         // The process does not have restricting affinity mask and multiple processor groups are possible
30651c0b2f7Stbbdev         ProcessorGroupInfo::NumGroups = (int)TBB_GetActiveProcessorGroupCount();
30757f524caSIlya Isaev         __TBB_ASSERT( ProcessorGroupInfo::NumGroups <= MaxProcessorGroups, nullptr);
30851c0b2f7Stbbdev         // Fail safety bootstrap. Release versions will limit available concurrency
30951c0b2f7Stbbdev         // level, while debug ones would assert.
31051c0b2f7Stbbdev         if ( ProcessorGroupInfo::NumGroups > MaxProcessorGroups )
31151c0b2f7Stbbdev             ProcessorGroupInfo::NumGroups = MaxProcessorGroups;
31251c0b2f7Stbbdev         if ( ProcessorGroupInfo::NumGroups > 1 ) {
31351c0b2f7Stbbdev             TBB_GROUP_AFFINITY ga;
31451c0b2f7Stbbdev             if ( TBB_GetThreadGroupAffinity( GetCurrentThread(), &ga ) )
31551c0b2f7Stbbdev                 ProcessorGroupInfo::HoleIndex = ga.Group;
31651c0b2f7Stbbdev             int nprocs = 0;
3170a521127SAnuya Welling             int min_procs = INT_MAX;
31851c0b2f7Stbbdev             for ( WORD i = 0; i < ProcessorGroupInfo::NumGroups; ++i ) {
31951c0b2f7Stbbdev                 ProcessorGroupInfo  &pgi = theProcessorGroups[i];
32051c0b2f7Stbbdev                 pgi.numProcs = (int)TBB_GetActiveProcessorCount(i);
3210a521127SAnuya Welling                 if (pgi.numProcs < min_procs) min_procs = pgi.numProcs;  //Finding the minimum number of processors in the Processor Groups
3220a521127SAnuya Welling                 calculate_numa[i] = pgi.numProcs;
32357f524caSIlya Isaev                 __TBB_ASSERT( pgi.numProcs <= (int)sizeof(DWORD_PTR) * CHAR_BIT, nullptr);
32451c0b2f7Stbbdev                 pgi.mask = pgi.numProcs == sizeof(DWORD_PTR) * CHAR_BIT ? ~(DWORD_PTR)0 : (DWORD_PTR(1) << pgi.numProcs) - 1;
32551c0b2f7Stbbdev                 pgi.numProcsRunningTotal = nprocs += pgi.numProcs;
32651c0b2f7Stbbdev             }
32757f524caSIlya Isaev             __TBB_ASSERT( nprocs == (int)TBB_GetActiveProcessorCount( TBB_ALL_PROCESSOR_GROUPS ), nullptr);
3280a521127SAnuya Welling 
3290a521127SAnuya Welling             calculate_numa[0] = (calculate_numa[0] / min_procs)-1;
3300a521127SAnuya Welling             for (WORD i = 1; i < ProcessorGroupInfo::NumGroups; ++i) {
3310a521127SAnuya Welling                 calculate_numa[i] = calculate_numa[i-1] + (calculate_numa[i] / min_procs);
33251c0b2f7Stbbdev             }
3330a521127SAnuya Welling 
3340a521127SAnuya Welling             numaSum = calculate_numa[ProcessorGroupInfo::NumGroups - 1];
3350a521127SAnuya Welling 
3360a521127SAnuya Welling         }
3370a521127SAnuya Welling 
33851c0b2f7Stbbdev     }
33951c0b2f7Stbbdev #endif /* __TBB_WIN8UI_SUPPORT */
34051c0b2f7Stbbdev 
34151c0b2f7Stbbdev     PrintExtraVersionInfo("Processor groups", "%d", ProcessorGroupInfo::NumGroups);
34251c0b2f7Stbbdev     if (ProcessorGroupInfo::NumGroups>1)
34351c0b2f7Stbbdev         for (int i=0; i<ProcessorGroupInfo::NumGroups; ++i)
34451c0b2f7Stbbdev             PrintExtraVersionInfo( "----- Group", "%d: size %d", i, theProcessorGroups[i].numProcs);
34551c0b2f7Stbbdev }
34651c0b2f7Stbbdev 
34751c0b2f7Stbbdev int NumberOfProcessorGroups() {
34851c0b2f7Stbbdev     __TBB_ASSERT( hardware_concurrency_info == do_once_state::initialized, "NumberOfProcessorGroups is used before AvailableHwConcurrency" );
34951c0b2f7Stbbdev     return ProcessorGroupInfo::NumGroups;
35051c0b2f7Stbbdev }
35151c0b2f7Stbbdev 
35251c0b2f7Stbbdev int FindProcessorGroupIndex ( int procIdx ) {
3530a521127SAnuya Welling     int current_grp_idx = ProcessorGroupInfo::HoleIndex;
3540a521127SAnuya Welling     if (procIdx >= theProcessorGroups[current_grp_idx].numProcs  && procIdx < theProcessorGroups[ProcessorGroupInfo::NumGroups - 1].numProcsRunningTotal) {
3550a521127SAnuya Welling         procIdx = procIdx - theProcessorGroups[current_grp_idx].numProcs;
35651c0b2f7Stbbdev         do {
3570a521127SAnuya Welling             current_grp_idx = (current_grp_idx + 1) % (ProcessorGroupInfo::NumGroups);
3580a521127SAnuya Welling             procIdx = procIdx - theProcessorGroups[current_grp_idx].numProcs;
3590a521127SAnuya Welling 
3600a521127SAnuya Welling         } while (procIdx >= 0);
36151c0b2f7Stbbdev     }
3620a521127SAnuya Welling     else if (procIdx >= theProcessorGroups[ProcessorGroupInfo::NumGroups - 1].numProcsRunningTotal) {
3630a521127SAnuya Welling         int temp_grp_index = 0;
3640a521127SAnuya Welling         procIdx = procIdx - theProcessorGroups[ProcessorGroupInfo::NumGroups - 1].numProcsRunningTotal;
3650a521127SAnuya Welling         procIdx = procIdx % (numaSum+1);  //ProcIdx to stay between 0 and numaSum
3660a521127SAnuya Welling 
3670a521127SAnuya Welling         while (procIdx - calculate_numa[temp_grp_index] > 0) {
3680a521127SAnuya Welling             temp_grp_index = (temp_grp_index + 1) % ProcessorGroupInfo::NumGroups;
3690a521127SAnuya Welling         }
3700a521127SAnuya Welling         current_grp_idx = temp_grp_index;
3710a521127SAnuya Welling     }
3720a521127SAnuya Welling     __TBB_ASSERT(current_grp_idx < ProcessorGroupInfo::NumGroups, nullptr);
3730a521127SAnuya Welling 
3740a521127SAnuya Welling     return current_grp_idx;
37551c0b2f7Stbbdev }
37651c0b2f7Stbbdev 
37751c0b2f7Stbbdev void MoveThreadIntoProcessorGroup( void* hThread, int groupIndex ) {
37851c0b2f7Stbbdev     __TBB_ASSERT( hardware_concurrency_info == do_once_state::initialized, "MoveThreadIntoProcessorGroup is used before AvailableHwConcurrency" );
37951c0b2f7Stbbdev     if ( !TBB_SetThreadGroupAffinity )
38051c0b2f7Stbbdev         return;
38151c0b2f7Stbbdev     TBB_GROUP_AFFINITY ga = { theProcessorGroups[groupIndex].mask, (WORD)groupIndex, {0,0,0} };
38257f524caSIlya Isaev     TBB_SetThreadGroupAffinity( hThread, &ga, nullptr);
38351c0b2f7Stbbdev }
38451c0b2f7Stbbdev 
38551c0b2f7Stbbdev int AvailableHwConcurrency() {
38651c0b2f7Stbbdev     atomic_do_once( &initialize_hardware_concurrency_info, hardware_concurrency_info );
38751c0b2f7Stbbdev     return theProcessorGroups[ProcessorGroupInfo::NumGroups - 1].numProcsRunningTotal;
38851c0b2f7Stbbdev }
38951c0b2f7Stbbdev 
39051c0b2f7Stbbdev /* End of _WIN32||_WIN64 implementation */
39151c0b2f7Stbbdev #else
39251c0b2f7Stbbdev     #error AvailableHwConcurrency is not implemented for this OS
39351c0b2f7Stbbdev #endif
39451c0b2f7Stbbdev 
39551c0b2f7Stbbdev } // namespace r1
39651c0b2f7Stbbdev } // namespace detail
39751c0b2f7Stbbdev } // namespace tbb
39851c0b2f7Stbbdev 
39951c0b2f7Stbbdev #endif /* !__TBB_HardwareConcurrency */
400