1 /*
2  * kmp_affinity.cpp -- affinity management
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 //                     The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include "kmp.h"
17 #include "kmp_i18n.h"
18 #include "kmp_io.h"
19 #include "kmp_str.h"
20 #include "kmp_wrapper_getpid.h"
21 #include "kmp_affinity.h"
22 
23 // Store the real or imagined machine hierarchy here
24 static hierarchy_info machine_hierarchy;
25 
26 void __kmp_cleanup_hierarchy() {
27     machine_hierarchy.fini();
28 }
29 
30 void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
31     kmp_uint32 depth;
32     // The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier.
33     if (TCR_1(machine_hierarchy.uninitialized))
34         machine_hierarchy.init(NULL, nproc);
35 
36     // Adjust the hierarchy in case num threads exceeds original
37     if (nproc > machine_hierarchy.base_num_threads)
38         machine_hierarchy.resize(nproc);
39 
40     depth = machine_hierarchy.depth;
41     KMP_DEBUG_ASSERT(depth > 0);
42 
43     thr_bar->depth = depth;
44     thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
45     thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
46 }
47 
48 #if KMP_AFFINITY_SUPPORTED
49 
50 bool KMPAffinity::picked_api = false;
51 
52 void* KMPAffinity::Mask::operator new(size_t n) { return __kmp_allocate(n); }
53 void* KMPAffinity::Mask::operator new[](size_t n) { return __kmp_allocate(n); }
54 void KMPAffinity::Mask::operator delete(void* p) { __kmp_free(p); }
55 void KMPAffinity::Mask::operator delete[](void* p) { __kmp_free(p); }
56 void* KMPAffinity::operator new(size_t n) { return __kmp_allocate(n); }
57 void KMPAffinity::operator delete(void* p) { __kmp_free(p); }
58 
59 void KMPAffinity::pick_api() {
60     KMPAffinity* affinity_dispatch;
61     if (picked_api)
62         return;
63 #if KMP_USE_HWLOC
64     if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
65         affinity_dispatch = new KMPHwlocAffinity();
66     } else
67 #endif
68     {
69         affinity_dispatch = new KMPNativeAffinity();
70     }
71     __kmp_affinity_dispatch = affinity_dispatch;
72     picked_api = true;
73 }
74 
75 void KMPAffinity::destroy_api() {
76     if (__kmp_affinity_dispatch != NULL) {
77         delete __kmp_affinity_dispatch;
78         __kmp_affinity_dispatch = NULL;
79         picked_api = false;
80     }
81 }
82 
83 //
84 // Print the affinity mask to the character array in a pretty format.
85 //
86 char *
87 __kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
88 {
89     KMP_ASSERT(buf_len >= 40);
90     char *scan = buf;
91     char *end = buf + buf_len - 1;
92 
93     //
94     // Find first element / check for empty set.
95     //
96     size_t i;
97     i = mask->begin();
98     if (i == mask->end()) {
99         KMP_SNPRINTF(scan, end-scan+1, "{<empty>}");
100         while (*scan != '\0') scan++;
101         KMP_ASSERT(scan <= end);
102         return buf;
103     }
104 
105     KMP_SNPRINTF(scan, end-scan+1, "{%ld", (long)i);
106     while (*scan != '\0') scan++;
107     i++;
108     for (; i != mask->end(); i = mask->next(i)) {
109         if (! KMP_CPU_ISSET(i, mask)) {
110             continue;
111         }
112 
113         //
114         // Check for buffer overflow.  A string of the form ",<n>" will have
115         // at most 10 characters, plus we want to leave room to print ",...}"
116         // if the set is too large to print for a total of 15 characters.
117         // We already left room for '\0' in setting end.
118         //
119         if (end - scan < 15) {
120            break;
121         }
122         KMP_SNPRINTF(scan, end-scan+1, ",%-ld", (long)i);
123         while (*scan != '\0') scan++;
124     }
125     if (i != mask->end()) {
126         KMP_SNPRINTF(scan, end-scan+1,  ",...");
127         while (*scan != '\0') scan++;
128     }
129     KMP_SNPRINTF(scan, end-scan+1, "}");
130     while (*scan != '\0') scan++;
131     KMP_ASSERT(scan <= end);
132     return buf;
133 }
134 
135 
136 void
137 __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
138 {
139     KMP_CPU_ZERO(mask);
140 
141 # if KMP_GROUP_AFFINITY
142 
143     if (__kmp_num_proc_groups > 1) {
144         int group;
145         KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
146         for (group = 0; group < __kmp_num_proc_groups; group++) {
147             int i;
148             int num = __kmp_GetActiveProcessorCount(group);
149             for (i = 0; i < num; i++) {
150                 KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
151             }
152         }
153     }
154     else
155 
156 # endif /* KMP_GROUP_AFFINITY */
157 
158     {
159         int proc;
160         for (proc = 0; proc < __kmp_xproc; proc++) {
161             KMP_CPU_SET(proc, mask);
162         }
163     }
164 }
165 
166 //
167 // When sorting by labels, __kmp_affinity_assign_child_nums() must first be
168 // called to renumber the labels from [0..n] and place them into the child_num
169 // vector of the address object.  This is done in case the labels used for
170 // the children at one node of the hierarchy differ from those used for
171 // another node at the same level.  Example:  suppose the machine has 2 nodes
172 // with 2 packages each.  The first node contains packages 601 and 602, and
173 // second node contains packages 603 and 604.  If we try to sort the table
174 // for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
175 // because we are paying attention to the labels themselves, not the ordinal
176 // child numbers.  By using the child numbers in the sort, the result is
177 // {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
178 //
179 static void
180 __kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
181   int numAddrs)
182 {
183     KMP_DEBUG_ASSERT(numAddrs > 0);
184     int depth = address2os->first.depth;
185     unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
186     unsigned *lastLabel = (unsigned *)__kmp_allocate(depth
187       * sizeof(unsigned));
188     int labCt;
189     for (labCt = 0; labCt < depth; labCt++) {
190         address2os[0].first.childNums[labCt] = counts[labCt] = 0;
191         lastLabel[labCt] = address2os[0].first.labels[labCt];
192     }
193     int i;
194     for (i = 1; i < numAddrs; i++) {
195         for (labCt = 0; labCt < depth; labCt++) {
196             if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
197                 int labCt2;
198                 for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
199                     counts[labCt2] = 0;
200                     lastLabel[labCt2] = address2os[i].first.labels[labCt2];
201                 }
202                 counts[labCt]++;
203                 lastLabel[labCt] = address2os[i].first.labels[labCt];
204                 break;
205             }
206         }
207         for (labCt = 0; labCt < depth; labCt++) {
208             address2os[i].first.childNums[labCt] = counts[labCt];
209         }
210         for (; labCt < (int)Address::maxDepth; labCt++) {
211             address2os[i].first.childNums[labCt] = 0;
212         }
213     }
214     __kmp_free(lastLabel);
215     __kmp_free(counts);
216 }
217 
218 
219 //
220 // All of the __kmp_affinity_create_*_map() routines should set
221 // __kmp_affinity_masks to a vector of affinity mask objects of length
222 // __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and
223 // return the number of levels in the machine topology tree (zero if
224 // __kmp_affinity_type == affinity_none).
225 //
226 // All of the __kmp_affinity_create_*_map() routines should set *__kmp_affin_fullMask
227 // to the affinity mask for the initialization thread.  They need to save and
228 // restore the mask, and it could be needed later, so saving it is just an
229 // optimization to avoid calling kmp_get_system_affinity() again.
230 //
231 kmp_affin_mask_t *__kmp_affin_fullMask = NULL;
232 
233 static int nCoresPerPkg, nPackages;
234 static int __kmp_nThreadsPerCore;
235 #ifndef KMP_DFLT_NTH_CORES
236 static int __kmp_ncores;
237 #endif
238 static int *__kmp_pu_os_idx = NULL;
239 
240 //
241 // __kmp_affinity_uniform_topology() doesn't work when called from
242 // places which support arbitrarily many levels in the machine topology
243 // map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
244 // __kmp_affinity_create_x2apicid_map().
245 //
246 inline static bool
247 __kmp_affinity_uniform_topology()
248 {
249     return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
250 }
251 
252 
253 //
254 // Print out the detailed machine topology map, i.e. the physical locations
255 // of each OS proc.
256 //
257 static void
258 __kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth,
259   int pkgLevel, int coreLevel, int threadLevel)
260 {
261     int proc;
262 
263     KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
264     for (proc = 0; proc < len; proc++) {
265         int level;
266         kmp_str_buf_t buf;
267         __kmp_str_buf_init(&buf);
268         for (level = 0; level < depth; level++) {
269             if (level == threadLevel) {
270                 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
271             }
272             else if (level == coreLevel) {
273                 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
274             }
275             else if (level == pkgLevel) {
276                 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
277             }
278             else if (level > pkgLevel) {
279                 __kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
280                   level - pkgLevel - 1);
281             }
282             else {
283                 __kmp_str_buf_print(&buf, "L%d ", level);
284             }
285             __kmp_str_buf_print(&buf, "%d ",
286               address2os[proc].first.labels[level]);
287         }
288         KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
289           buf.str);
290         __kmp_str_buf_free(&buf);
291     }
292 }
293 
294 #if KMP_USE_HWLOC
295 
296 // This function removes the topology levels that are radix 1 and don't offer
297 // further information about the topology.  The most common example is when you
298 // have one thread context per core, we don't want the extra thread context
299 // level if it offers no unique labels.  So they are removed.
300 // return value: the new depth of address2os
301 static int
302 __kmp_affinity_remove_radix_one_levels(AddrUnsPair *address2os, int nActiveThreads, int depth, int* pkgLevel, int* coreLevel, int* threadLevel) {
303     int level;
304     int i;
305     int radix1_detected;
306 
307     for (level = depth-1; level >= 0; --level) {
308         // Always keep the package level
309         if (level == *pkgLevel)
310             continue;
311         // Detect if this level is radix 1
312         radix1_detected = 1;
313         for (i = 1; i < nActiveThreads; ++i) {
314             if (address2os[0].first.labels[level] != address2os[i].first.labels[level]) {
315                 // There are differing label values for this level so it stays
316                 radix1_detected = 0;
317                 break;
318             }
319         }
320         if (!radix1_detected)
321             continue;
322         // Radix 1 was detected
323         if (level == *threadLevel) {
324             // If only one thread per core, then just decrement
325             // the depth which removes the threadlevel from address2os
326             for (i = 0; i < nActiveThreads; ++i) {
327                 address2os[i].first.depth--;
328             }
329             *threadLevel = -1;
330         } else if (level == *coreLevel) {
331             // For core level, we move the thread labels over if they are still
332             // valid (*threadLevel != -1), and also reduce the depth another level
333             for (i = 0; i < nActiveThreads; ++i) {
334                 if (*threadLevel != -1) {
335                     address2os[i].first.labels[*coreLevel] = address2os[i].first.labels[*threadLevel];
336                 }
337                 address2os[i].first.depth--;
338             }
339             *coreLevel = -1;
340         }
341     }
342     return address2os[0].first.depth;
343 }
344 
345 // Returns the number of objects of type 'type' below 'obj' within the topology tree structure.
346 // e.g., if obj is a HWLOC_OBJ_SOCKET object, and type is HWLOC_OBJ_PU, then
347 //  this will return the number of PU's under the SOCKET object.
348 static int
349 __kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj, hwloc_obj_type_t type) {
350     int retval = 0;
351     hwloc_obj_t first;
352     for(first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type, obj->logical_index, type, 0);
353         first != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, obj->type, first) == obj;
354         first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type, first))
355     {
356         ++retval;
357     }
358     return retval;
359 }
360 
361 static int
362 __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
363   kmp_i18n_id_t *const msg_id)
364 {
365     *address2os = NULL;
366     *msg_id = kmp_i18n_null;
367 
368     //
369     // Save the affinity mask for the current thread.
370     //
371     kmp_affin_mask_t *oldMask;
372     KMP_CPU_ALLOC(oldMask);
373     __kmp_get_system_affinity(oldMask, TRUE);
374 
375     int depth = 3;
376     int pkgLevel = 0;
377     int coreLevel = 1;
378     int threadLevel = 2;
379 
380     if (! KMP_AFFINITY_CAPABLE())
381     {
382         //
383         // Hack to try and infer the machine topology using only the data
384         // available from cpuid on the current thread, and __kmp_xproc.
385         //
386         KMP_ASSERT(__kmp_affinity_type == affinity_none);
387 
388         nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0), HWLOC_OBJ_CORE);
389         __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, 0), HWLOC_OBJ_PU);
390         __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
391         nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
392         if (__kmp_affinity_verbose) {
393             KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
394             KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
395             if (__kmp_affinity_uniform_topology()) {
396                 KMP_INFORM(Uniform, "KMP_AFFINITY");
397             } else {
398                 KMP_INFORM(NonUniform, "KMP_AFFINITY");
399             }
400             KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
401               __kmp_nThreadsPerCore, __kmp_ncores);
402         }
403         KMP_CPU_FREE(oldMask);
404         return 0;
405     }
406 
407     //
408     // Allocate the data structure to be returned.
409     //
410     AddrUnsPair *retval = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
411     __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
412 
413     //
414     // When affinity is off, this routine will still be called to set
415     // __kmp_ncores, as well as __kmp_nThreadsPerCore,
416     // nCoresPerPkg, & nPackages.  Make sure all these vars are set
417     // correctly, and return if affinity is not enabled.
418     //
419 
420     hwloc_obj_t pu;
421     hwloc_obj_t core;
422     hwloc_obj_t socket;
423     int nActiveThreads = 0;
424     int socket_identifier = 0;
425     // re-calculate globals to count only accessible resources
426     __kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0;
427     for(socket = hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0);
428         socket != NULL;
429         socket = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, socket),
430         socket_identifier++)
431     {
432         int core_identifier = 0;
433         int num_active_cores = 0;
434         for(core = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, socket->type, socket->logical_index, HWLOC_OBJ_CORE, 0);
435             core != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, socket->type, core) == socket;
436             core = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, core),
437             core_identifier++)
438         {
439             int pu_identifier = 0;
440             int num_active_threads = 0;
441             for(pu = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, core->type, core->logical_index, HWLOC_OBJ_PU, 0);
442                 pu != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, core->type, pu) == core;
443                 pu = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PU, pu),
444                 pu_identifier++)
445             {
446                 Address addr(3);
447                 if(! KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask))
448                     continue;         // skip inactive (inaccessible) unit
449                 KA_TRACE(20, ("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n",
450                     socket->os_index, socket->logical_index, core->os_index, core->logical_index, pu->os_index,pu->logical_index));
451                 addr.labels[0] = socket_identifier; // package
452                 addr.labels[1] = core_identifier; // core
453                 addr.labels[2] = pu_identifier; // pu
454                 retval[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
455                 __kmp_pu_os_idx[nActiveThreads] = pu->os_index; // keep os index for each active pu
456                 nActiveThreads++;
457                 ++num_active_threads; // count active threads per core
458             }
459             if (num_active_threads) { // were there any active threads on the core?
460                 ++__kmp_ncores;       // count total active cores
461                 ++num_active_cores;   // count active cores per socket
462                 if (num_active_threads > __kmp_nThreadsPerCore)
463                     __kmp_nThreadsPerCore = num_active_threads; // calc maximum
464             }
465         }
466         if (num_active_cores) {       // were there any active cores on the socket?
467             ++nPackages;              // count total active packages
468             if (num_active_cores > nCoresPerPkg)
469                 nCoresPerPkg = num_active_cores; // calc maximum
470         }
471     }
472 
473     //
474     // If there's only one thread context to bind to, return now.
475     //
476     KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc);
477     KMP_ASSERT(nActiveThreads > 0);
478     if (nActiveThreads == 1) {
479         __kmp_ncores = nPackages = 1;
480         __kmp_nThreadsPerCore = nCoresPerPkg = 1;
481         if (__kmp_affinity_verbose) {
482             char buf[KMP_AFFIN_MASK_PRINT_LEN];
483             __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
484 
485             KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
486             if (__kmp_affinity_respect_mask) {
487                 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
488             } else {
489                 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
490             }
491             KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
492             KMP_INFORM(Uniform, "KMP_AFFINITY");
493             KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
494               __kmp_nThreadsPerCore, __kmp_ncores);
495         }
496 
497         if (__kmp_affinity_type == affinity_none) {
498             __kmp_free(retval);
499             KMP_CPU_FREE(oldMask);
500             return 0;
501         }
502 
503         //
504         // Form an Address object which only includes the package level.
505         //
506         Address addr(1);
507         addr.labels[0] = retval[0].first.labels[pkgLevel];
508         retval[0].first = addr;
509 
510         if (__kmp_affinity_gran_levels < 0) {
511             __kmp_affinity_gran_levels = 0;
512         }
513 
514         if (__kmp_affinity_verbose) {
515             __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
516         }
517 
518         *address2os = retval;
519         KMP_CPU_FREE(oldMask);
520         return 1;
521     }
522 
523     //
524     // Sort the table by physical Id.
525     //
526     qsort(retval, nActiveThreads, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
527 
528     //
529     // Check to see if the machine topology is uniform
530     //
531     unsigned uniform = (nPackages * nCoresPerPkg * __kmp_nThreadsPerCore == nActiveThreads);
532 
533     //
534     // Print the machine topology summary.
535     //
536     if (__kmp_affinity_verbose) {
537         char mask[KMP_AFFIN_MASK_PRINT_LEN];
538         __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
539 
540         KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
541         if (__kmp_affinity_respect_mask) {
542             KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
543         } else {
544             KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
545         }
546         KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
547         if (uniform) {
548             KMP_INFORM(Uniform, "KMP_AFFINITY");
549         } else {
550             KMP_INFORM(NonUniform, "KMP_AFFINITY");
551         }
552 
553         kmp_str_buf_t buf;
554         __kmp_str_buf_init(&buf);
555 
556         __kmp_str_buf_print(&buf, "%d", nPackages);
557         //for (level = 1; level <= pkgLevel; level++) {
558         //    __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
559        // }
560         KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
561           __kmp_nThreadsPerCore, __kmp_ncores);
562 
563         __kmp_str_buf_free(&buf);
564     }
565 
566     if (__kmp_affinity_type == affinity_none) {
567         __kmp_free(retval);
568         KMP_CPU_FREE(oldMask);
569         return 0;
570     }
571 
572     //
573     // Find any levels with radiix 1, and remove them from the map
574     // (except for the package level).
575     //
576     depth = __kmp_affinity_remove_radix_one_levels(retval, nActiveThreads, depth, &pkgLevel, &coreLevel, &threadLevel);
577 
578     if (__kmp_affinity_gran_levels < 0) {
579         //
580         // Set the granularity level based on what levels are modeled
581         // in the machine topology map.
582         //
583         __kmp_affinity_gran_levels = 0;
584         if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
585             __kmp_affinity_gran_levels++;
586         }
587         if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
588             __kmp_affinity_gran_levels++;
589         }
590         if (__kmp_affinity_gran > affinity_gran_package) {
591             __kmp_affinity_gran_levels++;
592         }
593     }
594 
595     if (__kmp_affinity_verbose) {
596         __kmp_affinity_print_topology(retval, nActiveThreads, depth, pkgLevel,
597           coreLevel, threadLevel);
598     }
599 
600     KMP_CPU_FREE(oldMask);
601     *address2os = retval;
602     return depth;
603 }
604 #endif // KMP_USE_HWLOC
605 
606 //
607 // If we don't know how to retrieve the machine's processor topology, or
608 // encounter an error in doing so, this routine is called to form a "flat"
609 // mapping of os thread id's <-> processor id's.
610 //
611 static int
612 __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
613   kmp_i18n_id_t *const msg_id)
614 {
615     *address2os = NULL;
616     *msg_id = kmp_i18n_null;
617 
618     //
619     // Even if __kmp_affinity_type == affinity_none, this routine might still
620     // called to set __kmp_ncores, as well as
621     // __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
622     //
623     if (! KMP_AFFINITY_CAPABLE()) {
624         KMP_ASSERT(__kmp_affinity_type == affinity_none);
625         __kmp_ncores = nPackages = __kmp_xproc;
626         __kmp_nThreadsPerCore = nCoresPerPkg = 1;
627         if (__kmp_affinity_verbose) {
628             KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
629             KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
630             KMP_INFORM(Uniform, "KMP_AFFINITY");
631             KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
632               __kmp_nThreadsPerCore, __kmp_ncores);
633         }
634         return 0;
635     }
636 
637     //
638     // When affinity is off, this routine will still be called to set
639     // __kmp_ncores, as well as __kmp_nThreadsPerCore,
640     // nCoresPerPkg, & nPackages.  Make sure all these vars are set
641     //  correctly, and return now if affinity is not enabled.
642     //
643     __kmp_ncores = nPackages = __kmp_avail_proc;
644     __kmp_nThreadsPerCore = nCoresPerPkg = 1;
645     if (__kmp_affinity_verbose) {
646         char buf[KMP_AFFIN_MASK_PRINT_LEN];
647         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, __kmp_affin_fullMask);
648 
649         KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
650         if (__kmp_affinity_respect_mask) {
651             KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
652         } else {
653             KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
654         }
655         KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
656         KMP_INFORM(Uniform, "KMP_AFFINITY");
657         KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
658           __kmp_nThreadsPerCore, __kmp_ncores);
659     }
660     KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
661     __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
662     if (__kmp_affinity_type == affinity_none) {
663         int avail_ct = 0;
664         int i;
665         KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
666             if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask))
667                 continue;
668             __kmp_pu_os_idx[avail_ct++] = i; // suppose indices are flat
669         }
670         return 0;
671     }
672 
673     //
674     // Contruct the data structure to be returned.
675     //
676     *address2os = (AddrUnsPair*)
677       __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
678     int avail_ct = 0;
679     unsigned int i;
680     KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
681         //
682         // Skip this proc if it is not included in the machine model.
683         //
684         if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
685             continue;
686         }
687         __kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat
688         Address addr(1);
689         addr.labels[0] = i;
690         (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
691     }
692     if (__kmp_affinity_verbose) {
693         KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
694     }
695 
696     if (__kmp_affinity_gran_levels < 0) {
697         //
698         // Only the package level is modeled in the machine topology map,
699         // so the #levels of granularity is either 0 or 1.
700         //
701         if (__kmp_affinity_gran > affinity_gran_package) {
702             __kmp_affinity_gran_levels = 1;
703         }
704         else {
705             __kmp_affinity_gran_levels = 0;
706         }
707     }
708     return 1;
709 }
710 
711 
712 # if KMP_GROUP_AFFINITY
713 
714 //
715 // If multiple Windows* OS processor groups exist, we can create a 2-level
716 // topology map with the groups at level 0 and the individual procs at
717 // level 1.
718 //
719 // This facilitates letting the threads float among all procs in a group,
720 // if granularity=group (the default when there are multiple groups).
721 //
722 static int
723 __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
724   kmp_i18n_id_t *const msg_id)
725 {
726     *address2os = NULL;
727     *msg_id = kmp_i18n_null;
728 
729     //
730     // If we don't have multiple processor groups, return now.
731     // The flat mapping will be used.
732     //
733     if ((! KMP_AFFINITY_CAPABLE()) || (__kmp_get_proc_group(__kmp_affin_fullMask) >= 0)) {
734         // FIXME set *msg_id
735         return -1;
736     }
737 
738     //
739     // Contruct the data structure to be returned.
740     //
741     *address2os = (AddrUnsPair*)
742       __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
743     KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
744     __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
745     int avail_ct = 0;
746     int i;
747     KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
748         //
749         // Skip this proc if it is not included in the machine model.
750         //
751         if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
752             continue;
753         }
754         __kmp_pu_os_idx[avail_ct] = i;  // suppose indices are flat
755         Address addr(2);
756         addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
757         addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
758         (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
759 
760         if (__kmp_affinity_verbose) {
761             KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
762               addr.labels[1]);
763         }
764     }
765 
766     if (__kmp_affinity_gran_levels < 0) {
767         if (__kmp_affinity_gran == affinity_gran_group) {
768             __kmp_affinity_gran_levels = 1;
769         }
770         else if ((__kmp_affinity_gran == affinity_gran_fine)
771           || (__kmp_affinity_gran == affinity_gran_thread)) {
772             __kmp_affinity_gran_levels = 0;
773         }
774         else {
775             const char *gran_str = NULL;
776             if (__kmp_affinity_gran == affinity_gran_core) {
777                 gran_str = "core";
778             }
779             else if (__kmp_affinity_gran == affinity_gran_package) {
780                 gran_str = "package";
781             }
782             else if (__kmp_affinity_gran == affinity_gran_node) {
783                 gran_str = "node";
784             }
785             else {
786                 KMP_ASSERT(0);
787             }
788 
789             // Warning: can't use affinity granularity \"gran\" with group topology method, using "thread"
790             __kmp_affinity_gran_levels = 0;
791         }
792     }
793     return 2;
794 }
795 
796 # endif /* KMP_GROUP_AFFINITY */
797 
798 
799 # if KMP_ARCH_X86 || KMP_ARCH_X86_64
800 
801 static int
802 __kmp_cpuid_mask_width(int count) {
803     int r = 0;
804 
805     while((1<<r) < count)
806         ++r;
807     return r;
808 }
809 
810 
811 class apicThreadInfo {
812 public:
813     unsigned osId;              // param to __kmp_affinity_bind_thread
814     unsigned apicId;            // from cpuid after binding
815     unsigned maxCoresPerPkg;    //      ""
816     unsigned maxThreadsPerPkg;  //      ""
817     unsigned pkgId;             // inferred from above values
818     unsigned coreId;            //      ""
819     unsigned threadId;          //      ""
820 };
821 
822 
823 static int
824 __kmp_affinity_cmp_apicThreadInfo_os_id(const void *a, const void *b)
825 {
826     const apicThreadInfo *aa = (const apicThreadInfo *)a;
827     const apicThreadInfo *bb = (const apicThreadInfo *)b;
828     if (aa->osId < bb->osId) return -1;
829     if (aa->osId > bb->osId) return 1;
830     return 0;
831 }
832 
833 
834 static int
835 __kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a, const void *b)
836 {
837     const apicThreadInfo *aa = (const apicThreadInfo *)a;
838     const apicThreadInfo *bb = (const apicThreadInfo *)b;
839     if (aa->pkgId < bb->pkgId) return -1;
840     if (aa->pkgId > bb->pkgId) return 1;
841     if (aa->coreId < bb->coreId) return -1;
842     if (aa->coreId > bb->coreId) return 1;
843     if (aa->threadId < bb->threadId) return -1;
844     if (aa->threadId > bb->threadId) return 1;
845     return 0;
846 }
847 
848 
849 //
850 // On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
851 // an algorithm which cycles through the available os threads, setting
852 // the current thread's affinity mask to that thread, and then retrieves
853 // the Apic Id for each thread context using the cpuid instruction.
854 //
855 static int
856 __kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
857   kmp_i18n_id_t *const msg_id)
858 {
859     kmp_cpuid buf;
860     int rc;
861     *address2os = NULL;
862     *msg_id = kmp_i18n_null;
863 
864     //
865     // Check if cpuid leaf 4 is supported.
866     //
867         __kmp_x86_cpuid(0, 0, &buf);
868         if (buf.eax < 4) {
869             *msg_id = kmp_i18n_str_NoLeaf4Support;
870             return -1;
871         }
872 
873     //
874     // The algorithm used starts by setting the affinity to each available
875     // thread and retrieving info from the cpuid instruction, so if we are
876     // not capable of calling __kmp_get_system_affinity() and
877     // _kmp_get_system_affinity(), then we need to do something else - use
878     // the defaults that we calculated from issuing cpuid without binding
879     // to each proc.
880     //
881     if (! KMP_AFFINITY_CAPABLE()) {
882         //
883         // Hack to try and infer the machine topology using only the data
884         // available from cpuid on the current thread, and __kmp_xproc.
885         //
886         KMP_ASSERT(__kmp_affinity_type == affinity_none);
887 
888         //
889         // Get an upper bound on the number of threads per package using
890         // cpuid(1).
891         //
892         // On some OS/chps combinations where HT is supported by the chip
893         // but is disabled, this value will be 2 on a single core chip.
894         // Usually, it will be 2 if HT is enabled and 1 if HT is disabled.
895         //
896         __kmp_x86_cpuid(1, 0, &buf);
897         int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
898         if (maxThreadsPerPkg == 0) {
899             maxThreadsPerPkg = 1;
900         }
901 
902         //
903         // The num cores per pkg comes from cpuid(4).
904         // 1 must be added to the encoded value.
905         //
906         // The author of cpu_count.cpp treated this only an upper bound
907         // on the number of cores, but I haven't seen any cases where it
908         // was greater than the actual number of cores, so we will treat
909         // it as exact in this block of code.
910         //
911         // First, we need to check if cpuid(4) is supported on this chip.
912         // To see if cpuid(n) is supported, issue cpuid(0) and check if eax
913         // has the value n or greater.
914         //
915         __kmp_x86_cpuid(0, 0, &buf);
916         if (buf.eax >= 4) {
917             __kmp_x86_cpuid(4, 0, &buf);
918             nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
919         }
920         else {
921             nCoresPerPkg = 1;
922         }
923 
924         //
925         // There is no way to reliably tell if HT is enabled without issuing
926         // the cpuid instruction from every thread, can correlating the cpuid
927         // info, so if the machine is not affinity capable, we assume that HT
928         // is off.  We have seen quite a few machines where maxThreadsPerPkg
929         // is 2, yet the machine does not support HT.
930         //
931         // - Older OSes are usually found on machines with older chips, which
932         //   do not support HT.
933         //
934         // - The performance penalty for mistakenly identifying a machine as
935         //   HT when it isn't (which results in blocktime being incorrecly set
936         //   to 0) is greater than the penalty when for mistakenly identifying
937         //   a machine as being 1 thread/core when it is really HT enabled
938         //   (which results in blocktime being incorrectly set to a positive
939         //   value).
940         //
941         __kmp_ncores = __kmp_xproc;
942         nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
943         __kmp_nThreadsPerCore = 1;
944         if (__kmp_affinity_verbose) {
945             KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
946             KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
947             if (__kmp_affinity_uniform_topology()) {
948                 KMP_INFORM(Uniform, "KMP_AFFINITY");
949             } else {
950                 KMP_INFORM(NonUniform, "KMP_AFFINITY");
951             }
952             KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
953               __kmp_nThreadsPerCore, __kmp_ncores);
954         }
955         return 0;
956     }
957 
958     //
959     //
960     // From here on, we can assume that it is safe to call
961     // __kmp_get_system_affinity() and __kmp_set_system_affinity(),
962     // even if __kmp_affinity_type = affinity_none.
963     //
964 
965     //
966     // Save the affinity mask for the current thread.
967     //
968     kmp_affin_mask_t *oldMask;
969     KMP_CPU_ALLOC(oldMask);
970     KMP_ASSERT(oldMask != NULL);
971     __kmp_get_system_affinity(oldMask, TRUE);
972 
973     //
974     // Run through each of the available contexts, binding the current thread
975     // to it, and obtaining the pertinent information using the cpuid instr.
976     //
977     // The relevant information is:
978     //
979     // Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
980     //    has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
981     //
982     // Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1).  The
983     //    value of this field determines the width of the core# + thread#
984     //    fields in the Apic Id.  It is also an upper bound on the number
985     //    of threads per package, but it has been verified that situations
986     //    happen were it is not exact.  In particular, on certain OS/chip
987     //    combinations where Intel(R) Hyper-Threading Technology is supported
988     //    by the chip but has
989     //    been disabled, the value of this field will be 2 (for a single core
990     //    chip).  On other OS/chip combinations supporting
991     //    Intel(R) Hyper-Threading Technology, the value of
992     //    this field will be 1 when Intel(R) Hyper-Threading Technology is
993     //    disabled and 2 when it is enabled.
994     //
995     // Max Cores Per Pkg:  Bits 26:31 of eax after issuing cpuid(4).  The
996     //    value of this field (+1) determines the width of the core# field in
997     //    the Apic Id.  The comments in "cpucount.cpp" say that this value is
998     //    an upper bound, but the IA-32 architecture manual says that it is
999     //    exactly the number of cores per package, and I haven't seen any
1000     //    case where it wasn't.
1001     //
1002     // From this information, deduce the package Id, core Id, and thread Id,
1003     // and set the corresponding fields in the apicThreadInfo struct.
1004     //
1005     unsigned i;
1006     apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
1007       __kmp_avail_proc * sizeof(apicThreadInfo));
1008     unsigned nApics = 0;
1009     KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
1010         //
1011         // Skip this proc if it is not included in the machine model.
1012         //
1013         if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
1014             continue;
1015         }
1016         KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
1017 
1018         __kmp_affinity_dispatch->bind_thread(i);
1019         threadInfo[nApics].osId = i;
1020 
1021         //
1022         // The apic id and max threads per pkg come from cpuid(1).
1023         //
1024         __kmp_x86_cpuid(1, 0, &buf);
1025         if (! (buf.edx >> 9) & 1) {
1026             __kmp_set_system_affinity(oldMask, TRUE);
1027             __kmp_free(threadInfo);
1028             KMP_CPU_FREE(oldMask);
1029             *msg_id = kmp_i18n_str_ApicNotPresent;
1030             return -1;
1031         }
1032         threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
1033         threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
1034         if (threadInfo[nApics].maxThreadsPerPkg == 0) {
1035             threadInfo[nApics].maxThreadsPerPkg = 1;
1036         }
1037 
1038         //
1039         // Max cores per pkg comes from cpuid(4).
1040         // 1 must be added to the encoded value.
1041         //
1042         // First, we need to check if cpuid(4) is supported on this chip.
1043         // To see if cpuid(n) is supported, issue cpuid(0) and check if eax
1044         // has the value n or greater.
1045         //
1046         __kmp_x86_cpuid(0, 0, &buf);
1047         if (buf.eax >= 4) {
1048             __kmp_x86_cpuid(4, 0, &buf);
1049             threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
1050         }
1051         else {
1052             threadInfo[nApics].maxCoresPerPkg = 1;
1053         }
1054 
1055         //
1056         // Infer the pkgId / coreId / threadId using only the info
1057         // obtained locally.
1058         //
1059         int widthCT = __kmp_cpuid_mask_width(
1060           threadInfo[nApics].maxThreadsPerPkg);
1061         threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
1062 
1063         int widthC = __kmp_cpuid_mask_width(
1064           threadInfo[nApics].maxCoresPerPkg);
1065         int widthT = widthCT - widthC;
1066         if (widthT < 0) {
1067             //
1068             // I've never seen this one happen, but I suppose it could, if
1069             // the cpuid instruction on a chip was really screwed up.
1070             // Make sure to restore the affinity mask before the tail call.
1071             //
1072             __kmp_set_system_affinity(oldMask, TRUE);
1073             __kmp_free(threadInfo);
1074             KMP_CPU_FREE(oldMask);
1075             *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1076             return -1;
1077         }
1078 
1079         int maskC = (1 << widthC) - 1;
1080         threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
1081           &maskC;
1082 
1083         int maskT = (1 << widthT) - 1;
1084         threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
1085 
1086         nApics++;
1087     }
1088 
1089     //
1090     // We've collected all the info we need.
1091     // Restore the old affinity mask for this thread.
1092     //
1093     __kmp_set_system_affinity(oldMask, TRUE);
1094 
1095     //
1096     // If there's only one thread context to bind to, form an Address object
1097     // with depth 1 and return immediately (or, if affinity is off, set
1098     // address2os to NULL and return).
1099     //
1100     // If it is configured to omit the package level when there is only a
1101     // single package, the logic at the end of this routine won't work if
1102     // there is only a single thread - it would try to form an Address
1103     // object with depth 0.
1104     //
1105     KMP_ASSERT(nApics > 0);
1106     if (nApics == 1) {
1107         __kmp_ncores = nPackages = 1;
1108         __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1109         if (__kmp_affinity_verbose) {
1110             char buf[KMP_AFFIN_MASK_PRINT_LEN];
1111             __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1112 
1113             KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
1114             if (__kmp_affinity_respect_mask) {
1115                 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1116             } else {
1117                 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1118             }
1119             KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1120             KMP_INFORM(Uniform, "KMP_AFFINITY");
1121             KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1122               __kmp_nThreadsPerCore, __kmp_ncores);
1123         }
1124 
1125         if (__kmp_affinity_type == affinity_none) {
1126             __kmp_free(threadInfo);
1127             KMP_CPU_FREE(oldMask);
1128             return 0;
1129         }
1130 
1131         *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair));
1132         Address addr(1);
1133         addr.labels[0] = threadInfo[0].pkgId;
1134         (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
1135 
1136         if (__kmp_affinity_gran_levels < 0) {
1137             __kmp_affinity_gran_levels = 0;
1138         }
1139 
1140         if (__kmp_affinity_verbose) {
1141             __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
1142         }
1143 
1144         __kmp_free(threadInfo);
1145         KMP_CPU_FREE(oldMask);
1146         return 1;
1147     }
1148 
1149     //
1150     // Sort the threadInfo table by physical Id.
1151     //
1152     qsort(threadInfo, nApics, sizeof(*threadInfo),
1153       __kmp_affinity_cmp_apicThreadInfo_phys_id);
1154 
1155     //
1156     // The table is now sorted by pkgId / coreId / threadId, but we really
1157     // don't know the radix of any of the fields.  pkgId's may be sparsely
1158     // assigned among the chips on a system.  Although coreId's are usually
1159     // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
1160     // [0..threadsPerCore-1], we don't want to make any such assumptions.
1161     //
1162     // For that matter, we don't know what coresPerPkg and threadsPerCore
1163     // (or the total # packages) are at this point - we want to determine
1164     // that now.  We only have an upper bound on the first two figures.
1165     //
1166     // We also perform a consistency check at this point: the values returned
1167     // by the cpuid instruction for any thread bound to a given package had
1168     // better return the same info for maxThreadsPerPkg and maxCoresPerPkg.
1169     //
1170     nPackages = 1;
1171     nCoresPerPkg = 1;
1172     __kmp_nThreadsPerCore = 1;
1173     unsigned nCores = 1;
1174 
1175     unsigned pkgCt = 1;                         // to determine radii
1176     unsigned lastPkgId = threadInfo[0].pkgId;
1177     unsigned coreCt = 1;
1178     unsigned lastCoreId = threadInfo[0].coreId;
1179     unsigned threadCt = 1;
1180     unsigned lastThreadId = threadInfo[0].threadId;
1181 
1182                                                 // intra-pkg consist checks
1183     unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
1184     unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
1185 
1186     for (i = 1; i < nApics; i++) {
1187         if (threadInfo[i].pkgId != lastPkgId) {
1188             nCores++;
1189             pkgCt++;
1190             lastPkgId = threadInfo[i].pkgId;
1191             if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
1192             coreCt = 1;
1193             lastCoreId = threadInfo[i].coreId;
1194             if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1195             threadCt = 1;
1196             lastThreadId = threadInfo[i].threadId;
1197 
1198             //
1199             // This is a different package, so go on to the next iteration
1200             // without doing any consistency checks.  Reset the consistency
1201             // check vars, though.
1202             //
1203             prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
1204             prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
1205             continue;
1206         }
1207 
1208         if (threadInfo[i].coreId != lastCoreId) {
1209             nCores++;
1210             coreCt++;
1211             lastCoreId = threadInfo[i].coreId;
1212             if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1213             threadCt = 1;
1214             lastThreadId = threadInfo[i].threadId;
1215         }
1216         else if (threadInfo[i].threadId != lastThreadId) {
1217             threadCt++;
1218             lastThreadId = threadInfo[i].threadId;
1219         }
1220         else {
1221             __kmp_free(threadInfo);
1222             KMP_CPU_FREE(oldMask);
1223             *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
1224             return -1;
1225         }
1226 
1227         //
1228         // Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
1229         // fields agree between all the threads bounds to a given package.
1230         //
1231         if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
1232           || (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
1233             __kmp_free(threadInfo);
1234             KMP_CPU_FREE(oldMask);
1235             *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1236             return -1;
1237         }
1238     }
1239     nPackages = pkgCt;
1240     if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
1241     if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1242 
1243     //
1244     // When affinity is off, this routine will still be called to set
1245     // __kmp_ncores, as well as __kmp_nThreadsPerCore,
1246     // nCoresPerPkg, & nPackages.  Make sure all these vars are set
1247     // correctly, and return now if affinity is not enabled.
1248     //
1249     __kmp_ncores = nCores;
1250     if (__kmp_affinity_verbose) {
1251         char buf[KMP_AFFIN_MASK_PRINT_LEN];
1252         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1253 
1254         KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
1255         if (__kmp_affinity_respect_mask) {
1256             KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1257         } else {
1258             KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1259         }
1260         KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1261         if (__kmp_affinity_uniform_topology()) {
1262             KMP_INFORM(Uniform, "KMP_AFFINITY");
1263         } else {
1264             KMP_INFORM(NonUniform, "KMP_AFFINITY");
1265         }
1266         KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1267           __kmp_nThreadsPerCore, __kmp_ncores);
1268 
1269     }
1270     KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
1271     KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
1272     __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
1273     for (i = 0; i < nApics; ++i) {
1274         __kmp_pu_os_idx[i] = threadInfo[i].osId;
1275     }
1276     if (__kmp_affinity_type == affinity_none) {
1277         __kmp_free(threadInfo);
1278         KMP_CPU_FREE(oldMask);
1279         return 0;
1280     }
1281 
1282     //
1283     // Now that we've determined the number of packages, the number of cores
1284     // per package, and the number of threads per core, we can construct the
1285     // data structure that is to be returned.
1286     //
1287     int pkgLevel = 0;
1288     int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
1289     int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
1290     unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
1291 
1292     KMP_ASSERT(depth > 0);
1293     *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
1294 
1295     for (i = 0; i < nApics; ++i) {
1296         Address addr(depth);
1297         unsigned os = threadInfo[i].osId;
1298         int d = 0;
1299 
1300         if (pkgLevel >= 0) {
1301             addr.labels[d++] = threadInfo[i].pkgId;
1302         }
1303         if (coreLevel >= 0) {
1304             addr.labels[d++] = threadInfo[i].coreId;
1305         }
1306         if (threadLevel >= 0) {
1307             addr.labels[d++] = threadInfo[i].threadId;
1308         }
1309         (*address2os)[i] = AddrUnsPair(addr, os);
1310     }
1311 
1312     if (__kmp_affinity_gran_levels < 0) {
1313         //
1314         // Set the granularity level based on what levels are modeled
1315         // in the machine topology map.
1316         //
1317         __kmp_affinity_gran_levels = 0;
1318         if ((threadLevel >= 0)
1319           && (__kmp_affinity_gran > affinity_gran_thread)) {
1320             __kmp_affinity_gran_levels++;
1321         }
1322         if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1323             __kmp_affinity_gran_levels++;
1324         }
1325         if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
1326             __kmp_affinity_gran_levels++;
1327         }
1328     }
1329 
1330     if (__kmp_affinity_verbose) {
1331         __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
1332           coreLevel, threadLevel);
1333     }
1334 
1335     __kmp_free(threadInfo);
1336     KMP_CPU_FREE(oldMask);
1337     return depth;
1338 }
1339 
1340 
1341 //
1342 // Intel(R) microarchitecture code name Nehalem, Dunnington and later
1343 // architectures support a newer interface for specifying the x2APIC Ids,
1344 // based on cpuid leaf 11.
1345 //
1346 static int
1347 __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
1348   kmp_i18n_id_t *const msg_id)
1349 {
1350     kmp_cpuid buf;
1351 
1352     *address2os = NULL;
1353     *msg_id = kmp_i18n_null;
1354 
1355     //
1356     // Check to see if cpuid leaf 11 is supported.
1357     //
1358     __kmp_x86_cpuid(0, 0, &buf);
1359     if (buf.eax < 11) {
1360         *msg_id = kmp_i18n_str_NoLeaf11Support;
1361         return -1;
1362     }
1363     __kmp_x86_cpuid(11, 0, &buf);
1364     if (buf.ebx == 0) {
1365         *msg_id = kmp_i18n_str_NoLeaf11Support;
1366         return -1;
1367     }
1368 
1369     //
1370     // Find the number of levels in the machine topology.  While we're at it,
1371     // get the default values for __kmp_nThreadsPerCore & nCoresPerPkg.  We will
1372     // try to get more accurate values later by explicitly counting them,
1373     // but get reasonable defaults now, in case we return early.
1374     //
1375     int level;
1376     int threadLevel = -1;
1377     int coreLevel = -1;
1378     int pkgLevel = -1;
1379     __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
1380 
1381     for (level = 0;; level++) {
1382         if (level > 31) {
1383             //
1384             // FIXME: Hack for DPD200163180
1385             //
1386             // If level is big then something went wrong -> exiting
1387             //
1388             // There could actually be 32 valid levels in the machine topology,
1389             // but so far, the only machine we have seen which does not exit
1390             // this loop before iteration 32 has fubar x2APIC settings.
1391             //
1392             // For now, just reject this case based upon loop trip count.
1393             //
1394             *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1395             return -1;
1396         }
1397         __kmp_x86_cpuid(11, level, &buf);
1398         if (buf.ebx == 0) {
1399             if (pkgLevel < 0) {
1400                 //
1401                 // Will infer nPackages from __kmp_xproc
1402                 //
1403                 pkgLevel = level;
1404                 level++;
1405             }
1406             break;
1407         }
1408         int kind = (buf.ecx >> 8) & 0xff;
1409         if (kind == 1) {
1410             //
1411             // SMT level
1412             //
1413             threadLevel = level;
1414             coreLevel = -1;
1415             pkgLevel = -1;
1416             __kmp_nThreadsPerCore = buf.ebx & 0xffff;
1417             if (__kmp_nThreadsPerCore == 0) {
1418                 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1419                 return -1;
1420             }
1421         }
1422         else if (kind == 2) {
1423             //
1424             // core level
1425             //
1426             coreLevel = level;
1427             pkgLevel = -1;
1428             nCoresPerPkg = buf.ebx & 0xffff;
1429             if (nCoresPerPkg == 0) {
1430                 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1431                 return -1;
1432             }
1433         }
1434         else {
1435             if (level <= 0) {
1436                 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1437                 return -1;
1438             }
1439             if (pkgLevel >= 0) {
1440                 continue;
1441             }
1442             pkgLevel = level;
1443             nPackages = buf.ebx & 0xffff;
1444             if (nPackages == 0) {
1445                 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1446                 return -1;
1447             }
1448         }
1449     }
1450     int depth = level;
1451 
1452     //
1453     // In the above loop, "level" was counted from the finest level (usually
1454     // thread) to the coarsest.  The caller expects that we will place the
1455     // labels in (*address2os)[].first.labels[] in the inverse order, so
1456     // we need to invert the vars saying which level means what.
1457     //
1458     if (threadLevel >= 0) {
1459         threadLevel = depth - threadLevel - 1;
1460     }
1461     if (coreLevel >= 0) {
1462         coreLevel = depth - coreLevel - 1;
1463     }
1464     KMP_DEBUG_ASSERT(pkgLevel >= 0);
1465     pkgLevel = depth - pkgLevel - 1;
1466 
1467     //
1468     // The algorithm used starts by setting the affinity to each available
1469     // thread and retrieving info from the cpuid instruction, so if we are
1470     // not capable of calling __kmp_get_system_affinity() and
1471     // _kmp_get_system_affinity(), then we need to do something else - use
1472     // the defaults that we calculated from issuing cpuid without binding
1473     // to each proc.
1474     //
1475     if (! KMP_AFFINITY_CAPABLE())
1476     {
1477         //
1478         // Hack to try and infer the machine topology using only the data
1479         // available from cpuid on the current thread, and __kmp_xproc.
1480         //
1481         KMP_ASSERT(__kmp_affinity_type == affinity_none);
1482 
1483         __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
1484         nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
1485         if (__kmp_affinity_verbose) {
1486             KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
1487             KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1488             if (__kmp_affinity_uniform_topology()) {
1489                 KMP_INFORM(Uniform, "KMP_AFFINITY");
1490             } else {
1491                 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1492             }
1493             KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1494               __kmp_nThreadsPerCore, __kmp_ncores);
1495         }
1496         return 0;
1497     }
1498 
1499     //
1500     //
1501     // From here on, we can assume that it is safe to call
1502     // __kmp_get_system_affinity() and __kmp_set_system_affinity(),
1503     // even if __kmp_affinity_type = affinity_none.
1504     //
1505 
1506     //
1507     // Save the affinity mask for the current thread.
1508     //
1509     kmp_affin_mask_t *oldMask;
1510     KMP_CPU_ALLOC(oldMask);
1511     __kmp_get_system_affinity(oldMask, TRUE);
1512 
1513     //
1514     // Allocate the data structure to be returned.
1515     //
1516     AddrUnsPair *retval = (AddrUnsPair *)
1517       __kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
1518 
1519     //
1520     // Run through each of the available contexts, binding the current thread
1521     // to it, and obtaining the pertinent information using the cpuid instr.
1522     //
1523     unsigned int proc;
1524     int nApics = 0;
1525     KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask) {
1526         //
1527         // Skip this proc if it is not included in the machine model.
1528         //
1529         if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
1530             continue;
1531         }
1532         KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
1533 
1534         __kmp_affinity_dispatch->bind_thread(proc);
1535 
1536         //
1537         // Extrach the labels for each level in the machine topology map
1538         // from the Apic ID.
1539         //
1540         Address addr(depth);
1541         int prev_shift = 0;
1542 
1543         for (level = 0; level < depth; level++) {
1544             __kmp_x86_cpuid(11, level, &buf);
1545             unsigned apicId = buf.edx;
1546             if (buf.ebx == 0) {
1547                 if (level != depth - 1) {
1548                     KMP_CPU_FREE(oldMask);
1549                     *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1550                     return -1;
1551                 }
1552                 addr.labels[depth - level - 1] = apicId >> prev_shift;
1553                 level++;
1554                 break;
1555             }
1556             int shift = buf.eax & 0x1f;
1557             int mask = (1 << shift) - 1;
1558             addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
1559             prev_shift = shift;
1560         }
1561         if (level != depth) {
1562             KMP_CPU_FREE(oldMask);
1563             *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1564             return -1;
1565         }
1566 
1567         retval[nApics] = AddrUnsPair(addr, proc);
1568         nApics++;
1569     }
1570 
1571     //
1572     // We've collected all the info we need.
1573     // Restore the old affinity mask for this thread.
1574     //
1575     __kmp_set_system_affinity(oldMask, TRUE);
1576 
1577     //
1578     // If there's only one thread context to bind to, return now.
1579     //
1580     KMP_ASSERT(nApics > 0);
1581     if (nApics == 1) {
1582         __kmp_ncores = nPackages = 1;
1583         __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1584         if (__kmp_affinity_verbose) {
1585             char buf[KMP_AFFIN_MASK_PRINT_LEN];
1586             __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1587 
1588             KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
1589             if (__kmp_affinity_respect_mask) {
1590                 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1591             } else {
1592                 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1593             }
1594             KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1595             KMP_INFORM(Uniform, "KMP_AFFINITY");
1596             KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1597               __kmp_nThreadsPerCore, __kmp_ncores);
1598         }
1599 
1600         if (__kmp_affinity_type == affinity_none) {
1601             __kmp_free(retval);
1602             KMP_CPU_FREE(oldMask);
1603             return 0;
1604         }
1605 
1606         //
1607         // Form an Address object which only includes the package level.
1608         //
1609         Address addr(1);
1610         addr.labels[0] = retval[0].first.labels[pkgLevel];
1611         retval[0].first = addr;
1612 
1613         if (__kmp_affinity_gran_levels < 0) {
1614             __kmp_affinity_gran_levels = 0;
1615         }
1616 
1617         if (__kmp_affinity_verbose) {
1618             __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
1619         }
1620 
1621         *address2os = retval;
1622         KMP_CPU_FREE(oldMask);
1623         return 1;
1624     }
1625 
1626     //
1627     // Sort the table by physical Id.
1628     //
1629     qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
1630 
1631     //
1632     // Find the radix at each of the levels.
1633     //
1634     unsigned *totals = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1635     unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1636     unsigned *maxCt = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1637     unsigned *last = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1638     for (level = 0; level < depth; level++) {
1639         totals[level] = 1;
1640         maxCt[level] = 1;
1641         counts[level] = 1;
1642         last[level] = retval[0].first.labels[level];
1643     }
1644 
1645     //
1646     // From here on, the iteration variable "level" runs from the finest
1647     // level to the coarsest, i.e. we iterate forward through
1648     // (*address2os)[].first.labels[] - in the previous loops, we iterated
1649     // backwards.
1650     //
1651     for (proc = 1; (int)proc < nApics; proc++) {
1652         int level;
1653         for (level = 0; level < depth; level++) {
1654             if (retval[proc].first.labels[level] != last[level]) {
1655                 int j;
1656                 for (j = level + 1; j < depth; j++) {
1657                     totals[j]++;
1658                     counts[j] = 1;
1659                     // The line below causes printing incorrect topology information
1660                     // in case the max value for some level (maxCt[level]) is encountered earlier than
1661                     // some less value while going through the array.
1662                     // For example, let pkg0 has 4 cores and pkg1 has 2 cores. Then maxCt[1] == 2
1663                     // whereas it must be 4.
1664                     // TODO!!! Check if it can be commented safely
1665                     //maxCt[j] = 1;
1666                     last[j] = retval[proc].first.labels[j];
1667                 }
1668                 totals[level]++;
1669                 counts[level]++;
1670                 if (counts[level] > maxCt[level]) {
1671                     maxCt[level] = counts[level];
1672                 }
1673                 last[level] = retval[proc].first.labels[level];
1674                 break;
1675             }
1676             else if (level == depth - 1) {
1677                 __kmp_free(last);
1678                 __kmp_free(maxCt);
1679                 __kmp_free(counts);
1680                 __kmp_free(totals);
1681                 __kmp_free(retval);
1682                 KMP_CPU_FREE(oldMask);
1683                 *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
1684                 return -1;
1685             }
1686         }
1687     }
1688 
1689     //
1690     // When affinity is off, this routine will still be called to set
1691     // __kmp_ncores, as well as __kmp_nThreadsPerCore,
1692     // nCoresPerPkg, & nPackages.  Make sure all these vars are set
1693     // correctly, and return if affinity is not enabled.
1694     //
1695     if (threadLevel >= 0) {
1696         __kmp_nThreadsPerCore = maxCt[threadLevel];
1697     }
1698     else {
1699         __kmp_nThreadsPerCore = 1;
1700     }
1701     nPackages = totals[pkgLevel];
1702 
1703     if (coreLevel >= 0) {
1704         __kmp_ncores = totals[coreLevel];
1705         nCoresPerPkg = maxCt[coreLevel];
1706     }
1707     else {
1708         __kmp_ncores = nPackages;
1709         nCoresPerPkg = 1;
1710     }
1711 
1712     //
1713     // Check to see if the machine topology is uniform
1714     //
1715     unsigned prod = maxCt[0];
1716     for (level = 1; level < depth; level++) {
1717        prod *= maxCt[level];
1718     }
1719     bool uniform = (prod == totals[level - 1]);
1720 
1721     //
1722     // Print the machine topology summary.
1723     //
1724     if (__kmp_affinity_verbose) {
1725         char mask[KMP_AFFIN_MASK_PRINT_LEN];
1726         __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1727 
1728         KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
1729         if (__kmp_affinity_respect_mask) {
1730             KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
1731         } else {
1732             KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
1733         }
1734         KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1735         if (uniform) {
1736             KMP_INFORM(Uniform, "KMP_AFFINITY");
1737         } else {
1738             KMP_INFORM(NonUniform, "KMP_AFFINITY");
1739         }
1740 
1741         kmp_str_buf_t buf;
1742         __kmp_str_buf_init(&buf);
1743 
1744         __kmp_str_buf_print(&buf, "%d", totals[0]);
1745         for (level = 1; level <= pkgLevel; level++) {
1746             __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
1747         }
1748         KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
1749           __kmp_nThreadsPerCore, __kmp_ncores);
1750 
1751         __kmp_str_buf_free(&buf);
1752     }
1753     KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
1754     KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
1755     __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
1756     for (proc = 0; (int)proc < nApics; ++proc) {
1757         __kmp_pu_os_idx[proc] = retval[proc].second;
1758     }
1759     if (__kmp_affinity_type == affinity_none) {
1760         __kmp_free(last);
1761         __kmp_free(maxCt);
1762         __kmp_free(counts);
1763         __kmp_free(totals);
1764         __kmp_free(retval);
1765         KMP_CPU_FREE(oldMask);
1766         return 0;
1767     }
1768 
1769     //
1770     // Find any levels with radiix 1, and remove them from the map
1771     // (except for the package level).
1772     //
1773     int new_depth = 0;
1774     for (level = 0; level < depth; level++) {
1775         if ((maxCt[level] == 1) && (level != pkgLevel)) {
1776            continue;
1777         }
1778         new_depth++;
1779     }
1780 
1781     //
1782     // If we are removing any levels, allocate a new vector to return,
1783     // and copy the relevant information to it.
1784     //
1785     if (new_depth != depth) {
1786         AddrUnsPair *new_retval = (AddrUnsPair *)__kmp_allocate(
1787           sizeof(AddrUnsPair) * nApics);
1788         for (proc = 0; (int)proc < nApics; proc++) {
1789             Address addr(new_depth);
1790             new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
1791         }
1792         int new_level = 0;
1793         int newPkgLevel = -1;
1794         int newCoreLevel = -1;
1795         int newThreadLevel = -1;
1796         int i;
1797         for (level = 0; level < depth; level++) {
1798             if ((maxCt[level] == 1)
1799               && (level != pkgLevel)) {
1800                 //
1801                 // Remove this level. Never remove the package level
1802                 //
1803                 continue;
1804             }
1805             if (level == pkgLevel) {
1806                 newPkgLevel = level;
1807             }
1808             if (level == coreLevel) {
1809                 newCoreLevel = level;
1810             }
1811             if (level == threadLevel) {
1812                 newThreadLevel = level;
1813             }
1814             for (proc = 0; (int)proc < nApics; proc++) {
1815                 new_retval[proc].first.labels[new_level]
1816                   = retval[proc].first.labels[level];
1817             }
1818             new_level++;
1819         }
1820 
1821         __kmp_free(retval);
1822         retval = new_retval;
1823         depth = new_depth;
1824         pkgLevel = newPkgLevel;
1825         coreLevel = newCoreLevel;
1826         threadLevel = newThreadLevel;
1827     }
1828 
1829     if (__kmp_affinity_gran_levels < 0) {
1830         //
1831         // Set the granularity level based on what levels are modeled
1832         // in the machine topology map.
1833         //
1834         __kmp_affinity_gran_levels = 0;
1835         if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1836             __kmp_affinity_gran_levels++;
1837         }
1838         if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1839             __kmp_affinity_gran_levels++;
1840         }
1841         if (__kmp_affinity_gran > affinity_gran_package) {
1842             __kmp_affinity_gran_levels++;
1843         }
1844     }
1845 
1846     if (__kmp_affinity_verbose) {
1847         __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
1848           coreLevel, threadLevel);
1849     }
1850 
1851     __kmp_free(last);
1852     __kmp_free(maxCt);
1853     __kmp_free(counts);
1854     __kmp_free(totals);
1855     KMP_CPU_FREE(oldMask);
1856     *address2os = retval;
1857     return depth;
1858 }
1859 
1860 
1861 # endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1862 
1863 
1864 #define osIdIndex       0
1865 #define threadIdIndex   1
1866 #define coreIdIndex     2
1867 #define pkgIdIndex      3
1868 #define nodeIdIndex     4
1869 
1870 typedef unsigned *ProcCpuInfo;
1871 static unsigned maxIndex = pkgIdIndex;
1872 
1873 
1874 static int
1875 __kmp_affinity_cmp_ProcCpuInfo_os_id(const void *a, const void *b)
1876 {
1877     const unsigned *aa = (const unsigned *)a;
1878     const unsigned *bb = (const unsigned *)b;
1879     if (aa[osIdIndex] < bb[osIdIndex]) return -1;
1880     if (aa[osIdIndex] > bb[osIdIndex]) return 1;
1881     return 0;
1882 };
1883 
1884 
1885 static int
1886 __kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a, const void *b)
1887 {
1888     unsigned i;
1889     const unsigned *aa = *((const unsigned **)a);
1890     const unsigned *bb = *((const unsigned **)b);
1891     for (i = maxIndex; ; i--) {
1892         if (aa[i] < bb[i]) return -1;
1893         if (aa[i] > bb[i]) return 1;
1894         if (i == osIdIndex) break;
1895     }
1896     return 0;
1897 }
1898 
1899 
1900 //
1901 // Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
1902 // affinity map.
1903 //
1904 static int
1905 __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os, int *line,
1906   kmp_i18n_id_t *const msg_id, FILE *f)
1907 {
1908     *address2os = NULL;
1909     *msg_id = kmp_i18n_null;
1910 
1911     //
1912     // Scan of the file, and count the number of "processor" (osId) fields,
1913     // and find the highest value of <n> for a node_<n> field.
1914     //
1915     char buf[256];
1916     unsigned num_records = 0;
1917     while (! feof(f)) {
1918         buf[sizeof(buf) - 1] = 1;
1919         if (! fgets(buf, sizeof(buf), f)) {
1920             //
1921             // Read errors presumably because of EOF
1922             //
1923             break;
1924         }
1925 
1926         char s1[] = "processor";
1927         if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
1928             num_records++;
1929             continue;
1930         }
1931 
1932         //
1933         // FIXME - this will match "node_<n> <garbage>"
1934         //
1935         unsigned level;
1936         if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
1937             if (nodeIdIndex + level >= maxIndex) {
1938                 maxIndex = nodeIdIndex + level;
1939             }
1940             continue;
1941         }
1942     }
1943 
1944     //
1945     // Check for empty file / no valid processor records, or too many.
1946     // The number of records can't exceed the number of valid bits in the
1947     // affinity mask.
1948     //
1949     if (num_records == 0) {
1950         *line = 0;
1951         *msg_id = kmp_i18n_str_NoProcRecords;
1952         return -1;
1953     }
1954     if (num_records > (unsigned)__kmp_xproc) {
1955         *line = 0;
1956         *msg_id = kmp_i18n_str_TooManyProcRecords;
1957         return -1;
1958     }
1959 
1960     //
1961     // Set the file pointer back to the begginning, so that we can scan the
1962     // file again, this time performing a full parse of the data.
1963     // Allocate a vector of ProcCpuInfo object, where we will place the data.
1964     // Adding an extra element at the end allows us to remove a lot of extra
1965     // checks for termination conditions.
1966     //
1967     if (fseek(f, 0, SEEK_SET) != 0) {
1968         *line = 0;
1969         *msg_id = kmp_i18n_str_CantRewindCpuinfo;
1970         return -1;
1971     }
1972 
1973     //
1974     // Allocate the array of records to store the proc info in.  The dummy
1975     // element at the end makes the logic in filling them out easier to code.
1976     //
1977     unsigned **threadInfo = (unsigned **)__kmp_allocate((num_records + 1)
1978       * sizeof(unsigned *));
1979     unsigned i;
1980     for (i = 0; i <= num_records; i++) {
1981         threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1)
1982           * sizeof(unsigned));
1983     }
1984 
1985 #define CLEANUP_THREAD_INFO \
1986     for (i = 0; i <= num_records; i++) {                                \
1987         __kmp_free(threadInfo[i]);                                      \
1988     }                                                                   \
1989     __kmp_free(threadInfo);
1990 
1991     //
1992     // A value of UINT_MAX means that we didn't find the field
1993     //
1994     unsigned __index;
1995 
1996 #define INIT_PROC_INFO(p) \
1997     for (__index = 0; __index <= maxIndex; __index++) {                 \
1998         (p)[__index] = UINT_MAX;                                        \
1999     }
2000 
2001     for (i = 0; i <= num_records; i++) {
2002         INIT_PROC_INFO(threadInfo[i]);
2003     }
2004 
2005     unsigned num_avail = 0;
2006     *line = 0;
2007     while (! feof(f)) {
2008         //
2009         // Create an inner scoping level, so that all the goto targets at the
2010         // end of the loop appear in an outer scoping level.  This avoids
2011         // warnings about jumping past an initialization to a target in the
2012         // same block.
2013         //
2014         {
2015             buf[sizeof(buf) - 1] = 1;
2016             bool long_line = false;
2017             if (! fgets(buf, sizeof(buf), f)) {
2018                 //
2019                 // Read errors presumably because of EOF
2020                 //
2021                 // If there is valid data in threadInfo[num_avail], then fake
2022                 // a blank line in ensure that the last address gets parsed.
2023                 //
2024                 bool valid = false;
2025                 for (i = 0; i <= maxIndex; i++) {
2026                     if (threadInfo[num_avail][i] != UINT_MAX) {
2027                         valid = true;
2028                     }
2029                 }
2030                 if (! valid) {
2031                     break;
2032                 }
2033                 buf[0] = 0;
2034             } else if (!buf[sizeof(buf) - 1]) {
2035                 //
2036                 // The line is longer than the buffer.  Set a flag and don't
2037                 // emit an error if we were going to ignore the line, anyway.
2038                 //
2039                 long_line = true;
2040 
2041 #define CHECK_LINE \
2042     if (long_line) {                                                    \
2043         CLEANUP_THREAD_INFO;                                            \
2044         *msg_id = kmp_i18n_str_LongLineCpuinfo;                         \
2045         return -1;                                                      \
2046     }
2047             }
2048             (*line)++;
2049 
2050             char s1[] = "processor";
2051             if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
2052                 CHECK_LINE;
2053                 char *p = strchr(buf + sizeof(s1) - 1, ':');
2054                 unsigned val;
2055                 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
2056                 if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field;
2057                 threadInfo[num_avail][osIdIndex] = val;
2058 #if KMP_OS_LINUX && USE_SYSFS_INFO
2059                 char path[256];
2060                 KMP_SNPRINTF(path, sizeof(path),
2061                     "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
2062                     threadInfo[num_avail][osIdIndex]);
2063                 __kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
2064 
2065                 KMP_SNPRINTF(path, sizeof(path),
2066                     "/sys/devices/system/cpu/cpu%u/topology/core_id",
2067                     threadInfo[num_avail][osIdIndex]);
2068                 __kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
2069                 continue;
2070 #else
2071             }
2072             char s2[] = "physical id";
2073             if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
2074                 CHECK_LINE;
2075                 char *p = strchr(buf + sizeof(s2) - 1, ':');
2076                 unsigned val;
2077                 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
2078                 if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field;
2079                 threadInfo[num_avail][pkgIdIndex] = val;
2080                 continue;
2081             }
2082             char s3[] = "core id";
2083             if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
2084                 CHECK_LINE;
2085                 char *p = strchr(buf + sizeof(s3) - 1, ':');
2086                 unsigned val;
2087                 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
2088                 if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field;
2089                 threadInfo[num_avail][coreIdIndex] = val;
2090                 continue;
2091 #endif // KMP_OS_LINUX && USE_SYSFS_INFO
2092             }
2093             char s4[] = "thread id";
2094             if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
2095                 CHECK_LINE;
2096                 char *p = strchr(buf + sizeof(s4) - 1, ':');
2097                 unsigned val;
2098                 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
2099                 if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field;
2100                 threadInfo[num_avail][threadIdIndex] = val;
2101                 continue;
2102             }
2103             unsigned level;
2104             if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
2105                 CHECK_LINE;
2106                 char *p = strchr(buf + sizeof(s4) - 1, ':');
2107                 unsigned val;
2108                 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
2109                 KMP_ASSERT(nodeIdIndex + level <= maxIndex);
2110                 if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field;
2111                 threadInfo[num_avail][nodeIdIndex + level] = val;
2112                 continue;
2113             }
2114 
2115             //
2116             // We didn't recognize the leading token on the line.
2117             // There are lots of leading tokens that we don't recognize -
2118             // if the line isn't empty, go on to the next line.
2119             //
2120             if ((*buf != 0) && (*buf != '\n')) {
2121                 //
2122                 // If the line is longer than the buffer, read characters
2123                 // until we find a newline.
2124                 //
2125                 if (long_line) {
2126                     int ch;
2127                     while (((ch = fgetc(f)) != EOF) && (ch != '\n'));
2128                 }
2129                 continue;
2130             }
2131 
2132             //
2133             // A newline has signalled the end of the processor record.
2134             // Check that there aren't too many procs specified.
2135             //
2136             if ((int)num_avail == __kmp_xproc) {
2137                 CLEANUP_THREAD_INFO;
2138                 *msg_id = kmp_i18n_str_TooManyEntries;
2139                 return -1;
2140             }
2141 
2142             //
2143             // Check for missing fields.  The osId field must be there, and we
2144             // currently require that the physical id field is specified, also.
2145             //
2146             if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
2147                 CLEANUP_THREAD_INFO;
2148                 *msg_id = kmp_i18n_str_MissingProcField;
2149                 return -1;
2150             }
2151             if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
2152                 CLEANUP_THREAD_INFO;
2153                 *msg_id = kmp_i18n_str_MissingPhysicalIDField;
2154                 return -1;
2155             }
2156 
2157             //
2158             // Skip this proc if it is not included in the machine model.
2159             //
2160             if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], __kmp_affin_fullMask)) {
2161                 INIT_PROC_INFO(threadInfo[num_avail]);
2162                 continue;
2163             }
2164 
2165             //
2166             // We have a successful parse of this proc's info.
2167             // Increment the counter, and prepare for the next proc.
2168             //
2169             num_avail++;
2170             KMP_ASSERT(num_avail <= num_records);
2171             INIT_PROC_INFO(threadInfo[num_avail]);
2172         }
2173         continue;
2174 
2175         no_val:
2176         CLEANUP_THREAD_INFO;
2177         *msg_id = kmp_i18n_str_MissingValCpuinfo;
2178         return -1;
2179 
2180         dup_field:
2181         CLEANUP_THREAD_INFO;
2182         *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
2183         return -1;
2184     }
2185     *line = 0;
2186 
2187 # if KMP_MIC && REDUCE_TEAM_SIZE
2188     unsigned teamSize = 0;
2189 # endif // KMP_MIC && REDUCE_TEAM_SIZE
2190 
2191     // check for num_records == __kmp_xproc ???
2192 
2193     //
2194     // If there's only one thread context to bind to, form an Address object
2195     // with depth 1 and return immediately (or, if affinity is off, set
2196     // address2os to NULL and return).
2197     //
2198     // If it is configured to omit the package level when there is only a
2199     // single package, the logic at the end of this routine won't work if
2200     // there is only a single thread - it would try to form an Address
2201     // object with depth 0.
2202     //
2203     KMP_ASSERT(num_avail > 0);
2204     KMP_ASSERT(num_avail <= num_records);
2205     if (num_avail == 1) {
2206         __kmp_ncores = 1;
2207         __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
2208         if (__kmp_affinity_verbose) {
2209             if (! KMP_AFFINITY_CAPABLE()) {
2210                 KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
2211                 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2212                 KMP_INFORM(Uniform, "KMP_AFFINITY");
2213             }
2214             else {
2215                 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2216                 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
2217                   __kmp_affin_fullMask);
2218                 KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
2219                 if (__kmp_affinity_respect_mask) {
2220                     KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
2221                 } else {
2222                     KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
2223                 }
2224                 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2225                 KMP_INFORM(Uniform, "KMP_AFFINITY");
2226             }
2227             int index;
2228             kmp_str_buf_t buf;
2229             __kmp_str_buf_init(&buf);
2230             __kmp_str_buf_print(&buf, "1");
2231             for (index = maxIndex - 1; index > pkgIdIndex; index--) {
2232                 __kmp_str_buf_print(&buf, " x 1");
2233             }
2234             KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
2235             __kmp_str_buf_free(&buf);
2236         }
2237 
2238         if (__kmp_affinity_type == affinity_none) {
2239             CLEANUP_THREAD_INFO;
2240             return 0;
2241         }
2242 
2243         *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair));
2244         Address addr(1);
2245         addr.labels[0] = threadInfo[0][pkgIdIndex];
2246         (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
2247 
2248         if (__kmp_affinity_gran_levels < 0) {
2249             __kmp_affinity_gran_levels = 0;
2250         }
2251 
2252         if (__kmp_affinity_verbose) {
2253             __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
2254         }
2255 
2256         CLEANUP_THREAD_INFO;
2257         return 1;
2258     }
2259 
2260     //
2261     // Sort the threadInfo table by physical Id.
2262     //
2263     qsort(threadInfo, num_avail, sizeof(*threadInfo),
2264       __kmp_affinity_cmp_ProcCpuInfo_phys_id);
2265 
2266     //
2267     // The table is now sorted by pkgId / coreId / threadId, but we really
2268     // don't know the radix of any of the fields.  pkgId's may be sparsely
2269     // assigned among the chips on a system.  Although coreId's are usually
2270     // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
2271     // [0..threadsPerCore-1], we don't want to make any such assumptions.
2272     //
2273     // For that matter, we don't know what coresPerPkg and threadsPerCore
2274     // (or the total # packages) are at this point - we want to determine
2275     // that now.  We only have an upper bound on the first two figures.
2276     //
2277     unsigned *counts = (unsigned *)__kmp_allocate((maxIndex + 1)
2278       * sizeof(unsigned));
2279     unsigned *maxCt = (unsigned *)__kmp_allocate((maxIndex + 1)
2280       * sizeof(unsigned));
2281     unsigned *totals = (unsigned *)__kmp_allocate((maxIndex + 1)
2282       * sizeof(unsigned));
2283     unsigned *lastId = (unsigned *)__kmp_allocate((maxIndex + 1)
2284       * sizeof(unsigned));
2285 
2286     bool assign_thread_ids = false;
2287     unsigned threadIdCt;
2288     unsigned index;
2289 
2290     restart_radix_check:
2291     threadIdCt = 0;
2292 
2293     //
2294     // Initialize the counter arrays with data from threadInfo[0].
2295     //
2296     if (assign_thread_ids) {
2297         if (threadInfo[0][threadIdIndex] == UINT_MAX) {
2298             threadInfo[0][threadIdIndex] = threadIdCt++;
2299         }
2300         else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
2301             threadIdCt = threadInfo[0][threadIdIndex] + 1;
2302         }
2303     }
2304     for (index = 0; index <= maxIndex; index++) {
2305         counts[index] = 1;
2306         maxCt[index] = 1;
2307         totals[index] = 1;
2308         lastId[index] = threadInfo[0][index];;
2309     }
2310 
2311     //
2312     // Run through the rest of the OS procs.
2313     //
2314     for (i = 1; i < num_avail; i++) {
2315         //
2316         // Find the most significant index whose id differs
2317         // from the id for the previous OS proc.
2318         //
2319         for (index = maxIndex; index >= threadIdIndex; index--) {
2320             if (assign_thread_ids && (index == threadIdIndex)) {
2321                 //
2322                 // Auto-assign the thread id field if it wasn't specified.
2323                 //
2324                 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2325                     threadInfo[i][threadIdIndex] = threadIdCt++;
2326                 }
2327 
2328                 //
2329                 // Aparrently the thread id field was specified for some
2330                 // entries and not others.  Start the thread id counter
2331                 // off at the next higher thread id.
2332                 //
2333                 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2334                     threadIdCt = threadInfo[i][threadIdIndex] + 1;
2335                 }
2336             }
2337             if (threadInfo[i][index] != lastId[index]) {
2338                 //
2339                 // Run through all indices which are less significant,
2340                 // and reset the counts to 1.
2341                 //
2342                 // At all levels up to and including index, we need to
2343                 // increment the totals and record the last id.
2344                 //
2345                 unsigned index2;
2346                 for (index2 = threadIdIndex; index2 < index; index2++) {
2347                     totals[index2]++;
2348                     if (counts[index2] > maxCt[index2]) {
2349                         maxCt[index2] = counts[index2];
2350                     }
2351                     counts[index2] = 1;
2352                     lastId[index2] = threadInfo[i][index2];
2353                 }
2354                 counts[index]++;
2355                 totals[index]++;
2356                 lastId[index] = threadInfo[i][index];
2357 
2358                 if (assign_thread_ids && (index > threadIdIndex)) {
2359 
2360 # if KMP_MIC && REDUCE_TEAM_SIZE
2361                     //
2362                     // The default team size is the total #threads in the machine
2363                     // minus 1 thread for every core that has 3 or more threads.
2364                     //
2365                     teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2366 # endif // KMP_MIC && REDUCE_TEAM_SIZE
2367 
2368                     //
2369                     // Restart the thread counter, as we are on a new core.
2370                     //
2371                     threadIdCt = 0;
2372 
2373                     //
2374                     // Auto-assign the thread id field if it wasn't specified.
2375                     //
2376                     if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2377                         threadInfo[i][threadIdIndex] = threadIdCt++;
2378                     }
2379 
2380                     //
2381                     // Aparrently the thread id field was specified for some
2382                     // entries and not others.  Start the thread id counter
2383                     // off at the next higher thread id.
2384                     //
2385                     else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2386                         threadIdCt = threadInfo[i][threadIdIndex] + 1;
2387                     }
2388                 }
2389                 break;
2390             }
2391         }
2392         if (index < threadIdIndex) {
2393             //
2394             // If thread ids were specified, it is an error if they are not
2395             // unique.  Also, check that we waven't already restarted the
2396             // loop (to be safe - shouldn't need to).
2397             //
2398             if ((threadInfo[i][threadIdIndex] != UINT_MAX)
2399               || assign_thread_ids) {
2400                 __kmp_free(lastId);
2401                 __kmp_free(totals);
2402                 __kmp_free(maxCt);
2403                 __kmp_free(counts);
2404                 CLEANUP_THREAD_INFO;
2405                 *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
2406                 return -1;
2407             }
2408 
2409             //
2410             // If the thread ids were not specified and we see entries
2411             // entries that are duplicates, start the loop over and
2412             // assign the thread ids manually.
2413             //
2414             assign_thread_ids = true;
2415             goto restart_radix_check;
2416         }
2417     }
2418 
2419 # if KMP_MIC && REDUCE_TEAM_SIZE
2420     //
2421     // The default team size is the total #threads in the machine
2422     // minus 1 thread for every core that has 3 or more threads.
2423     //
2424     teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2425 # endif // KMP_MIC && REDUCE_TEAM_SIZE
2426 
2427     for (index = threadIdIndex; index <= maxIndex; index++) {
2428         if (counts[index] > maxCt[index]) {
2429             maxCt[index] = counts[index];
2430         }
2431     }
2432 
2433     __kmp_nThreadsPerCore = maxCt[threadIdIndex];
2434     nCoresPerPkg = maxCt[coreIdIndex];
2435     nPackages = totals[pkgIdIndex];
2436 
2437     //
2438     // Check to see if the machine topology is uniform
2439     //
2440     unsigned prod = totals[maxIndex];
2441     for (index = threadIdIndex; index < maxIndex; index++) {
2442        prod *= maxCt[index];
2443     }
2444     bool uniform = (prod == totals[threadIdIndex]);
2445 
2446     //
2447     // When affinity is off, this routine will still be called to set
2448     // __kmp_ncores, as well as __kmp_nThreadsPerCore,
2449     // nCoresPerPkg, & nPackages.  Make sure all these vars are set
2450     // correctly, and return now if affinity is not enabled.
2451     //
2452     __kmp_ncores = totals[coreIdIndex];
2453 
2454     if (__kmp_affinity_verbose) {
2455         if (! KMP_AFFINITY_CAPABLE()) {
2456                 KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
2457                 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2458                 if (uniform) {
2459                     KMP_INFORM(Uniform, "KMP_AFFINITY");
2460                 } else {
2461                     KMP_INFORM(NonUniform, "KMP_AFFINITY");
2462                 }
2463         }
2464         else {
2465             char buf[KMP_AFFIN_MASK_PRINT_LEN];
2466             __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, __kmp_affin_fullMask);
2467                 KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
2468                 if (__kmp_affinity_respect_mask) {
2469                     KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
2470                 } else {
2471                     KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
2472                 }
2473                 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2474                 if (uniform) {
2475                     KMP_INFORM(Uniform, "KMP_AFFINITY");
2476                 } else {
2477                     KMP_INFORM(NonUniform, "KMP_AFFINITY");
2478                 }
2479         }
2480         kmp_str_buf_t buf;
2481         __kmp_str_buf_init(&buf);
2482 
2483         __kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
2484         for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
2485             __kmp_str_buf_print(&buf, " x %d", maxCt[index]);
2486         }
2487         KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str,  maxCt[coreIdIndex],
2488           maxCt[threadIdIndex], __kmp_ncores);
2489 
2490         __kmp_str_buf_free(&buf);
2491     }
2492 
2493 # if KMP_MIC && REDUCE_TEAM_SIZE
2494     //
2495     // Set the default team size.
2496     //
2497     if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
2498         __kmp_dflt_team_nth = teamSize;
2499         KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
2500           __kmp_dflt_team_nth));
2501     }
2502 # endif // KMP_MIC && REDUCE_TEAM_SIZE
2503 
2504     KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
2505     KMP_DEBUG_ASSERT(num_avail == __kmp_avail_proc);
2506     __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
2507     for (i = 0; i < num_avail; ++i) { // fill the os indices
2508         __kmp_pu_os_idx[i] = threadInfo[i][osIdIndex];
2509     }
2510 
2511     if (__kmp_affinity_type == affinity_none) {
2512         __kmp_free(lastId);
2513         __kmp_free(totals);
2514         __kmp_free(maxCt);
2515         __kmp_free(counts);
2516         CLEANUP_THREAD_INFO;
2517         return 0;
2518     }
2519 
2520     //
2521     // Count the number of levels which have more nodes at that level than
2522     // at the parent's level (with there being an implicit root node of
2523     // the top level).  This is equivalent to saying that there is at least
2524     // one node at this level which has a sibling.  These levels are in the
2525     // map, and the package level is always in the map.
2526     //
2527     bool *inMap = (bool *)__kmp_allocate((maxIndex + 1) * sizeof(bool));
2528     int level = 0;
2529     for (index = threadIdIndex; index < maxIndex; index++) {
2530         KMP_ASSERT(totals[index] >= totals[index + 1]);
2531         inMap[index] = (totals[index] > totals[index + 1]);
2532     }
2533     inMap[maxIndex] = (totals[maxIndex] > 1);
2534     inMap[pkgIdIndex] = true;
2535 
2536     int depth = 0;
2537     for (index = threadIdIndex; index <= maxIndex; index++) {
2538         if (inMap[index]) {
2539             depth++;
2540         }
2541     }
2542     KMP_ASSERT(depth > 0);
2543 
2544     //
2545     // Construct the data structure that is to be returned.
2546     //
2547     *address2os = (AddrUnsPair*)
2548       __kmp_allocate(sizeof(AddrUnsPair) * num_avail);
2549     int pkgLevel = -1;
2550     int coreLevel = -1;
2551     int threadLevel = -1;
2552 
2553     for (i = 0; i < num_avail; ++i) {
2554         Address addr(depth);
2555         unsigned os = threadInfo[i][osIdIndex];
2556         int src_index;
2557         int dst_index = 0;
2558 
2559         for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
2560             if (! inMap[src_index]) {
2561                 continue;
2562             }
2563             addr.labels[dst_index] = threadInfo[i][src_index];
2564             if (src_index == pkgIdIndex) {
2565                 pkgLevel = dst_index;
2566             }
2567             else if (src_index == coreIdIndex) {
2568                 coreLevel = dst_index;
2569             }
2570             else if (src_index == threadIdIndex) {
2571                 threadLevel = dst_index;
2572             }
2573             dst_index++;
2574         }
2575         (*address2os)[i] = AddrUnsPair(addr, os);
2576     }
2577 
2578     if (__kmp_affinity_gran_levels < 0) {
2579         //
2580         // Set the granularity level based on what levels are modeled
2581         // in the machine topology map.
2582         //
2583         unsigned src_index;
2584         __kmp_affinity_gran_levels = 0;
2585         for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
2586             if (! inMap[src_index]) {
2587                 continue;
2588             }
2589             switch (src_index) {
2590                 case threadIdIndex:
2591                 if (__kmp_affinity_gran > affinity_gran_thread) {
2592                     __kmp_affinity_gran_levels++;
2593                 }
2594 
2595                 break;
2596                 case coreIdIndex:
2597                 if (__kmp_affinity_gran > affinity_gran_core) {
2598                     __kmp_affinity_gran_levels++;
2599                 }
2600                 break;
2601 
2602                 case pkgIdIndex:
2603                 if (__kmp_affinity_gran > affinity_gran_package) {
2604                     __kmp_affinity_gran_levels++;
2605                 }
2606                 break;
2607             }
2608         }
2609     }
2610 
2611     if (__kmp_affinity_verbose) {
2612         __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
2613           coreLevel, threadLevel);
2614     }
2615 
2616     __kmp_free(inMap);
2617     __kmp_free(lastId);
2618     __kmp_free(totals);
2619     __kmp_free(maxCt);
2620     __kmp_free(counts);
2621     CLEANUP_THREAD_INFO;
2622     return depth;
2623 }
2624 
2625 
2626 //
2627 // Create and return a table of affinity masks, indexed by OS thread ID.
2628 // This routine handles OR'ing together all the affinity masks of threads
2629 // that are sufficiently close, if granularity > fine.
2630 //
2631 static kmp_affin_mask_t *
2632 __kmp_create_masks(unsigned *maxIndex, unsigned *numUnique,
2633   AddrUnsPair *address2os, unsigned numAddrs)
2634 {
2635     //
2636     // First form a table of affinity masks in order of OS thread id.
2637     //
2638     unsigned depth;
2639     unsigned maxOsId;
2640     unsigned i;
2641 
2642     KMP_ASSERT(numAddrs > 0);
2643     depth = address2os[0].first.depth;
2644 
2645     maxOsId = 0;
2646     for (i = 0; i < numAddrs; i++) {
2647         unsigned osId = address2os[i].second;
2648         if (osId > maxOsId) {
2649             maxOsId = osId;
2650         }
2651     }
2652     kmp_affin_mask_t *osId2Mask;
2653     KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId+1));
2654 
2655     //
2656     // Sort the address2os table according to physical order.  Doing so
2657     // will put all threads on the same core/package/node in consecutive
2658     // locations.
2659     //
2660     qsort(address2os, numAddrs, sizeof(*address2os),
2661       __kmp_affinity_cmp_Address_labels);
2662 
2663     KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
2664     if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
2665         KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY",  __kmp_affinity_gran_levels);
2666     }
2667     if (__kmp_affinity_gran_levels >= (int)depth) {
2668         if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2669           && (__kmp_affinity_type != affinity_none))) {
2670             KMP_WARNING(AffThreadsMayMigrate);
2671         }
2672     }
2673 
2674     //
2675     // Run through the table, forming the masks for all threads on each
2676     // core.  Threads on the same core will have identical "Address"
2677     // objects, not considering the last level, which must be the thread
2678     // id.  All threads on a core will appear consecutively.
2679     //
2680     unsigned unique = 0;
2681     unsigned j = 0;                             // index of 1st thread on core
2682     unsigned leader = 0;
2683     Address *leaderAddr = &(address2os[0].first);
2684     kmp_affin_mask_t *sum;
2685     KMP_CPU_ALLOC_ON_STACK(sum);
2686     KMP_CPU_ZERO(sum);
2687     KMP_CPU_SET(address2os[0].second, sum);
2688     for (i = 1; i < numAddrs; i++) {
2689         //
2690         // If this thread is sufficiently close to the leader (within the
2691         // granularity setting), then set the bit for this os thread in the
2692         // affinity mask for this group, and go on to the next thread.
2693         //
2694         if (leaderAddr->isClose(address2os[i].first,
2695           __kmp_affinity_gran_levels)) {
2696             KMP_CPU_SET(address2os[i].second, sum);
2697             continue;
2698         }
2699 
2700         //
2701         // For every thread in this group, copy the mask to the thread's
2702         // entry in the osId2Mask table.  Mark the first address as a
2703         // leader.
2704         //
2705         for (; j < i; j++) {
2706             unsigned osId = address2os[j].second;
2707             KMP_DEBUG_ASSERT(osId <= maxOsId);
2708             kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2709             KMP_CPU_COPY(mask, sum);
2710             address2os[j].first.leader = (j == leader);
2711         }
2712         unique++;
2713 
2714         //
2715         // Start a new mask.
2716         //
2717         leader = i;
2718         leaderAddr = &(address2os[i].first);
2719         KMP_CPU_ZERO(sum);
2720         KMP_CPU_SET(address2os[i].second, sum);
2721     }
2722 
2723     //
2724     // For every thread in last group, copy the mask to the thread's
2725     // entry in the osId2Mask table.
2726     //
2727     for (; j < i; j++) {
2728         unsigned osId = address2os[j].second;
2729         KMP_DEBUG_ASSERT(osId <= maxOsId);
2730         kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2731         KMP_CPU_COPY(mask, sum);
2732         address2os[j].first.leader = (j == leader);
2733     }
2734     unique++;
2735     KMP_CPU_FREE_FROM_STACK(sum);
2736 
2737     *maxIndex = maxOsId;
2738     *numUnique = unique;
2739     return osId2Mask;
2740 }
2741 
2742 
2743 //
2744 // Stuff for the affinity proclist parsers.  It's easier to declare these vars
2745 // as file-static than to try and pass them through the calling sequence of
2746 // the recursive-descent OMP_PLACES parser.
2747 //
2748 static kmp_affin_mask_t *newMasks;
2749 static int numNewMasks;
2750 static int nextNewMask;
2751 
2752 #define ADD_MASK(_mask) \
2753     {                                                                   \
2754         if (nextNewMask >= numNewMasks) {                               \
2755             int i;                                                      \
2756             numNewMasks *= 2;                                           \
2757             kmp_affin_mask_t* temp;                                     \
2758             KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks);            \
2759             for(i=0;i<numNewMasks/2;i++) {                              \
2760                 kmp_affin_mask_t* src  = KMP_CPU_INDEX(newMasks, i);    \
2761                 kmp_affin_mask_t* dest = KMP_CPU_INDEX(temp, i);        \
2762                 KMP_CPU_COPY(dest, src);                                \
2763             }                                                           \
2764             KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks/2);       \
2765             newMasks = temp;                                            \
2766         }                                                               \
2767         KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask));    \
2768         nextNewMask++;                                                  \
2769     }
2770 
2771 #define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
2772     {                                                                   \
2773         if (((_osId) > _maxOsId) ||                                     \
2774           (! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
2775             if (__kmp_affinity_verbose || (__kmp_affinity_warnings      \
2776               && (__kmp_affinity_type != affinity_none))) {             \
2777                 KMP_WARNING(AffIgnoreInvalidProcID, _osId);             \
2778             }                                                           \
2779         }                                                               \
2780         else {                                                          \
2781             ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId)));               \
2782         }                                                               \
2783     }
2784 
2785 
2786 //
2787 // Re-parse the proclist (for the explicit affinity type), and form the list
2788 // of affinity newMasks indexed by gtid.
2789 //
2790 static void
2791 __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
2792   unsigned int *out_numMasks, const char *proclist,
2793   kmp_affin_mask_t *osId2Mask, int maxOsId)
2794 {
2795     int i;
2796     const char *scan = proclist;
2797     const char *next = proclist;
2798 
2799     //
2800     // We use malloc() for the temporary mask vector,
2801     // so that we can use realloc() to extend it.
2802     //
2803     numNewMasks = 2;
2804     KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
2805     nextNewMask = 0;
2806     kmp_affin_mask_t *sumMask;
2807     KMP_CPU_ALLOC(sumMask);
2808     int setSize = 0;
2809 
2810     for (;;) {
2811         int start, end, stride;
2812 
2813         SKIP_WS(scan);
2814         next = scan;
2815         if (*next == '\0') {
2816             break;
2817         }
2818 
2819         if (*next == '{') {
2820             int num;
2821             setSize = 0;
2822             next++;     // skip '{'
2823             SKIP_WS(next);
2824             scan = next;
2825 
2826             //
2827             // Read the first integer in the set.
2828             //
2829             KMP_ASSERT2((*next >= '0') && (*next <= '9'),
2830               "bad proclist");
2831             SKIP_DIGITS(next);
2832             num = __kmp_str_to_int(scan, *next);
2833             KMP_ASSERT2(num >= 0, "bad explicit proc list");
2834 
2835             //
2836             // Copy the mask for that osId to the sum (union) mask.
2837             //
2838             if ((num > maxOsId) ||
2839               (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2840                 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2841                   && (__kmp_affinity_type != affinity_none))) {
2842                     KMP_WARNING(AffIgnoreInvalidProcID, num);
2843                 }
2844                 KMP_CPU_ZERO(sumMask);
2845             }
2846             else {
2847                 KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2848                 setSize = 1;
2849             }
2850 
2851             for (;;) {
2852                 //
2853                 // Check for end of set.
2854                 //
2855                 SKIP_WS(next);
2856                 if (*next == '}') {
2857                     next++;     // skip '}'
2858                     break;
2859                 }
2860 
2861                 //
2862                 // Skip optional comma.
2863                 //
2864                 if (*next == ',') {
2865                     next++;
2866                 }
2867                 SKIP_WS(next);
2868 
2869                 //
2870                 // Read the next integer in the set.
2871                 //
2872                 scan = next;
2873                 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
2874                   "bad explicit proc list");
2875 
2876                 SKIP_DIGITS(next);
2877                 num = __kmp_str_to_int(scan, *next);
2878                 KMP_ASSERT2(num >= 0, "bad explicit proc list");
2879 
2880                 //
2881                 // Add the mask for that osId to the sum mask.
2882                 //
2883                 if ((num > maxOsId) ||
2884                   (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2885                     if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2886                       && (__kmp_affinity_type != affinity_none))) {
2887                         KMP_WARNING(AffIgnoreInvalidProcID, num);
2888                     }
2889                 }
2890                 else {
2891                     KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2892                     setSize++;
2893                 }
2894             }
2895             if (setSize > 0) {
2896                 ADD_MASK(sumMask);
2897             }
2898 
2899             SKIP_WS(next);
2900             if (*next == ',') {
2901                 next++;
2902             }
2903             scan = next;
2904             continue;
2905         }
2906 
2907         //
2908         // Read the first integer.
2909         //
2910         KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
2911         SKIP_DIGITS(next);
2912         start = __kmp_str_to_int(scan, *next);
2913         KMP_ASSERT2(start >= 0, "bad explicit proc list");
2914         SKIP_WS(next);
2915 
2916         //
2917         // If this isn't a range, then add a mask to the list and go on.
2918         //
2919         if (*next != '-') {
2920             ADD_MASK_OSID(start, osId2Mask, maxOsId);
2921 
2922             //
2923             // Skip optional comma.
2924             //
2925             if (*next == ',') {
2926                 next++;
2927             }
2928             scan = next;
2929             continue;
2930         }
2931 
2932         //
2933         // This is a range.  Skip over the '-' and read in the 2nd int.
2934         //
2935         next++;         // skip '-'
2936         SKIP_WS(next);
2937         scan = next;
2938         KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
2939         SKIP_DIGITS(next);
2940         end = __kmp_str_to_int(scan, *next);
2941         KMP_ASSERT2(end >= 0, "bad explicit proc list");
2942 
2943         //
2944         // Check for a stride parameter
2945         //
2946         stride = 1;
2947         SKIP_WS(next);
2948         if (*next == ':') {
2949             //
2950             // A stride is specified.  Skip over the ':" and read the 3rd int.
2951             //
2952             int sign = +1;
2953             next++;         // skip ':'
2954             SKIP_WS(next);
2955             scan = next;
2956             if (*next == '-') {
2957                 sign = -1;
2958                 next++;
2959                 SKIP_WS(next);
2960                 scan = next;
2961             }
2962             KMP_ASSERT2((*next >=  '0') && (*next <= '9'),
2963               "bad explicit proc list");
2964             SKIP_DIGITS(next);
2965             stride = __kmp_str_to_int(scan, *next);
2966             KMP_ASSERT2(stride >= 0, "bad explicit proc list");
2967             stride *= sign;
2968         }
2969 
2970         //
2971         // Do some range checks.
2972         //
2973         KMP_ASSERT2(stride != 0, "bad explicit proc list");
2974         if (stride > 0) {
2975             KMP_ASSERT2(start <= end, "bad explicit proc list");
2976         }
2977         else {
2978             KMP_ASSERT2(start >= end, "bad explicit proc list");
2979         }
2980         KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
2981 
2982         //
2983         // Add the mask for each OS proc # to the list.
2984         //
2985         if (stride > 0) {
2986             do {
2987                 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2988                 start += stride;
2989             } while (start <= end);
2990         }
2991         else {
2992             do {
2993                 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2994                 start += stride;
2995             } while (start >= end);
2996         }
2997 
2998         //
2999         // Skip optional comma.
3000         //
3001         SKIP_WS(next);
3002         if (*next == ',') {
3003             next++;
3004         }
3005         scan = next;
3006     }
3007 
3008     *out_numMasks = nextNewMask;
3009     if (nextNewMask == 0) {
3010         *out_masks = NULL;
3011         KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3012         return;
3013     }
3014     KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
3015     for(i = 0; i < nextNewMask; i++) {
3016         kmp_affin_mask_t* src  = KMP_CPU_INDEX(newMasks, i);
3017         kmp_affin_mask_t* dest = KMP_CPU_INDEX((*out_masks), i);
3018         KMP_CPU_COPY(dest, src);
3019     }
3020     KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3021     KMP_CPU_FREE(sumMask);
3022 }
3023 
3024 
3025 # if OMP_40_ENABLED
3026 
3027 /*-----------------------------------------------------------------------------
3028 
3029 Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
3030 places.  Again, Here is the grammar:
3031 
3032 place_list := place
3033 place_list := place , place_list
3034 place := num
3035 place := place : num
3036 place := place : num : signed
3037 place := { subplacelist }
3038 place := ! place                  // (lowest priority)
3039 subplace_list := subplace
3040 subplace_list := subplace , subplace_list
3041 subplace := num
3042 subplace := num : num
3043 subplace := num : num : signed
3044 signed := num
3045 signed := + signed
3046 signed := - signed
3047 
3048 -----------------------------------------------------------------------------*/
3049 
3050 static void
3051 __kmp_process_subplace_list(const char **scan, kmp_affin_mask_t *osId2Mask,
3052   int maxOsId, kmp_affin_mask_t *tempMask, int *setSize)
3053 {
3054     const char *next;
3055 
3056     for (;;) {
3057         int start, count, stride, i;
3058 
3059         //
3060         // Read in the starting proc id
3061         //
3062         SKIP_WS(*scan);
3063         KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
3064           "bad explicit places list");
3065         next = *scan;
3066         SKIP_DIGITS(next);
3067         start = __kmp_str_to_int(*scan, *next);
3068         KMP_ASSERT(start >= 0);
3069         *scan = next;
3070 
3071         //
3072         // valid follow sets are ',' ':' and '}'
3073         //
3074         SKIP_WS(*scan);
3075         if (**scan == '}' || **scan == ',') {
3076             if ((start > maxOsId) ||
3077               (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3078                 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3079                   && (__kmp_affinity_type != affinity_none))) {
3080                     KMP_WARNING(AffIgnoreInvalidProcID, start);
3081                 }
3082             }
3083             else {
3084                 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3085                 (*setSize)++;
3086             }
3087             if (**scan == '}') {
3088                 break;
3089             }
3090             (*scan)++;  // skip ','
3091             continue;
3092         }
3093         KMP_ASSERT2(**scan == ':', "bad explicit places list");
3094         (*scan)++;      // skip ':'
3095 
3096         //
3097         // Read count parameter
3098         //
3099         SKIP_WS(*scan);
3100         KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
3101           "bad explicit places list");
3102         next = *scan;
3103         SKIP_DIGITS(next);
3104         count = __kmp_str_to_int(*scan, *next);
3105         KMP_ASSERT(count >= 0);
3106         *scan = next;
3107 
3108         //
3109         // valid follow sets are ',' ':' and '}'
3110         //
3111         SKIP_WS(*scan);
3112         if (**scan == '}' || **scan == ',') {
3113             for (i = 0; i < count; i++) {
3114                 if ((start > maxOsId) ||
3115                   (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3116                     if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3117                       && (__kmp_affinity_type != affinity_none))) {
3118                         KMP_WARNING(AffIgnoreInvalidProcID, start);
3119                     }
3120                     break;  // don't proliferate warnings for large count
3121                 }
3122                 else {
3123                     KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3124                     start++;
3125                     (*setSize)++;
3126                 }
3127             }
3128             if (**scan == '}') {
3129                 break;
3130             }
3131             (*scan)++;  // skip ','
3132             continue;
3133         }
3134         KMP_ASSERT2(**scan == ':', "bad explicit places list");
3135         (*scan)++;      // skip ':'
3136 
3137         //
3138         // Read stride parameter
3139         //
3140         int sign = +1;
3141         for (;;) {
3142             SKIP_WS(*scan);
3143             if (**scan == '+') {
3144                 (*scan)++; // skip '+'
3145                 continue;
3146             }
3147             if (**scan == '-') {
3148                 sign *= -1;
3149                 (*scan)++; // skip '-'
3150                 continue;
3151             }
3152             break;
3153         }
3154         SKIP_WS(*scan);
3155         KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
3156           "bad explicit places list");
3157         next = *scan;
3158         SKIP_DIGITS(next);
3159         stride = __kmp_str_to_int(*scan, *next);
3160         KMP_ASSERT(stride >= 0);
3161         *scan = next;
3162         stride *= sign;
3163 
3164         //
3165         // valid follow sets are ',' and '}'
3166         //
3167         SKIP_WS(*scan);
3168         if (**scan == '}' || **scan == ',') {
3169             for (i = 0; i < count; i++) {
3170                 if ((start > maxOsId) ||
3171                   (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3172                     if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3173                       && (__kmp_affinity_type != affinity_none))) {
3174                         KMP_WARNING(AffIgnoreInvalidProcID, start);
3175                     }
3176                     break;  // don't proliferate warnings for large count
3177                 }
3178                 else {
3179                     KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3180                     start += stride;
3181                     (*setSize)++;
3182                 }
3183             }
3184             if (**scan == '}') {
3185                 break;
3186             }
3187             (*scan)++;  // skip ','
3188             continue;
3189         }
3190 
3191         KMP_ASSERT2(0, "bad explicit places list");
3192     }
3193 }
3194 
3195 
3196 static void
3197 __kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask,
3198   int maxOsId, kmp_affin_mask_t *tempMask, int *setSize)
3199 {
3200     const char *next;
3201 
3202     //
3203     // valid follow sets are '{' '!' and num
3204     //
3205     SKIP_WS(*scan);
3206     if (**scan == '{') {
3207         (*scan)++;      // skip '{'
3208         __kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
3209           setSize);
3210         KMP_ASSERT2(**scan == '}', "bad explicit places list");
3211         (*scan)++;      // skip '}'
3212     }
3213     else if (**scan == '!') {
3214         (*scan)++;      // skip '!'
3215         __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
3216         KMP_CPU_COMPLEMENT(maxOsId, tempMask);
3217     }
3218     else if ((**scan >= '0') && (**scan <= '9')) {
3219         next = *scan;
3220         SKIP_DIGITS(next);
3221         int num = __kmp_str_to_int(*scan, *next);
3222         KMP_ASSERT(num >= 0);
3223         if ((num > maxOsId) ||
3224           (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
3225             if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3226               && (__kmp_affinity_type != affinity_none))) {
3227                 KMP_WARNING(AffIgnoreInvalidProcID, num);
3228             }
3229         }
3230         else {
3231             KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
3232             (*setSize)++;
3233         }
3234         *scan = next;  // skip num
3235     }
3236     else {
3237         KMP_ASSERT2(0, "bad explicit places list");
3238     }
3239 }
3240 
3241 
3242 //static void
3243 void
3244 __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
3245   unsigned int *out_numMasks, const char *placelist,
3246   kmp_affin_mask_t *osId2Mask, int maxOsId)
3247 {
3248     int i,j,count,stride,sign;
3249     const char *scan = placelist;
3250     const char *next = placelist;
3251 
3252     numNewMasks = 2;
3253     KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
3254     nextNewMask = 0;
3255 
3256     // tempMask is modified based on the previous or initial
3257     //   place to form the current place
3258     // previousMask contains the previous place
3259     kmp_affin_mask_t *tempMask;
3260     kmp_affin_mask_t *previousMask;
3261     KMP_CPU_ALLOC(tempMask);
3262     KMP_CPU_ZERO(tempMask);
3263     KMP_CPU_ALLOC(previousMask);
3264     KMP_CPU_ZERO(previousMask);
3265     int setSize = 0;
3266 
3267     for (;;) {
3268         __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
3269 
3270         //
3271         // valid follow sets are ',' ':' and EOL
3272         //
3273         SKIP_WS(scan);
3274         if (*scan == '\0' || *scan == ',') {
3275             if (setSize > 0) {
3276                 ADD_MASK(tempMask);
3277             }
3278             KMP_CPU_ZERO(tempMask);
3279             setSize = 0;
3280             if (*scan == '\0') {
3281                 break;
3282             }
3283             scan++;     // skip ','
3284             continue;
3285         }
3286 
3287         KMP_ASSERT2(*scan == ':', "bad explicit places list");
3288         scan++;         // skip ':'
3289 
3290         //
3291         // Read count parameter
3292         //
3293         SKIP_WS(scan);
3294         KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
3295           "bad explicit places list");
3296         next = scan;
3297         SKIP_DIGITS(next);
3298         count = __kmp_str_to_int(scan, *next);
3299         KMP_ASSERT(count >= 0);
3300         scan = next;
3301 
3302         //
3303         // valid follow sets are ',' ':' and EOL
3304         //
3305         SKIP_WS(scan);
3306         if (*scan == '\0' || *scan == ',') {
3307             stride = +1;
3308         }
3309         else {
3310             KMP_ASSERT2(*scan == ':', "bad explicit places list");
3311             scan++;         // skip ':'
3312 
3313             //
3314             // Read stride parameter
3315             //
3316             sign = +1;
3317             for (;;) {
3318                 SKIP_WS(scan);
3319                 if (*scan == '+') {
3320                     scan++; // skip '+'
3321                     continue;
3322                 }
3323                 if (*scan == '-') {
3324                     sign *= -1;
3325                     scan++; // skip '-'
3326                     continue;
3327                 }
3328                 break;
3329             }
3330             SKIP_WS(scan);
3331             KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
3332               "bad explicit places list");
3333             next = scan;
3334             SKIP_DIGITS(next);
3335             stride = __kmp_str_to_int(scan, *next);
3336             KMP_DEBUG_ASSERT(stride >= 0);
3337             scan = next;
3338             stride *= sign;
3339         }
3340 
3341         // Add places determined by initial_place : count : stride
3342         for (i = 0; i < count; i++) {
3343             if (setSize == 0) {
3344                 break;
3345             }
3346             // Add the current place, then build the next place (tempMask) from that
3347             KMP_CPU_COPY(previousMask, tempMask);
3348             ADD_MASK(previousMask);
3349             KMP_CPU_ZERO(tempMask);
3350             setSize = 0;
3351             KMP_CPU_SET_ITERATE(j, previousMask) {
3352                 if (! KMP_CPU_ISSET(j, previousMask)) {
3353                     continue;
3354                 }
3355                 if ((j+stride > maxOsId) || (j+stride < 0) ||
3356                   (! KMP_CPU_ISSET(j, __kmp_affin_fullMask)) ||
3357                   (! KMP_CPU_ISSET(j+stride, KMP_CPU_INDEX(osId2Mask, j+stride)))) {
3358                     if ((__kmp_affinity_verbose || (__kmp_affinity_warnings
3359                       && (__kmp_affinity_type != affinity_none))) && i < count - 1) {
3360                         KMP_WARNING(AffIgnoreInvalidProcID, j+stride);
3361                     }
3362                     continue;
3363                 }
3364                 KMP_CPU_SET(j+stride, tempMask);
3365                 setSize++;
3366             }
3367         }
3368         KMP_CPU_ZERO(tempMask);
3369         setSize = 0;
3370 
3371         //
3372         // valid follow sets are ',' and EOL
3373         //
3374         SKIP_WS(scan);
3375         if (*scan == '\0') {
3376             break;
3377         }
3378         if (*scan == ',') {
3379             scan++;     // skip ','
3380             continue;
3381         }
3382 
3383         KMP_ASSERT2(0, "bad explicit places list");
3384     }
3385 
3386     *out_numMasks = nextNewMask;
3387     if (nextNewMask == 0) {
3388         *out_masks = NULL;
3389         KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3390         return;
3391     }
3392     KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
3393     KMP_CPU_FREE(tempMask);
3394     KMP_CPU_FREE(previousMask);
3395     for(i = 0; i < nextNewMask; i++) {
3396         kmp_affin_mask_t* src  = KMP_CPU_INDEX(newMasks, i);
3397         kmp_affin_mask_t* dest = KMP_CPU_INDEX((*out_masks), i);
3398         KMP_CPU_COPY(dest, src);
3399     }
3400     KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3401 }
3402 
3403 # endif /* OMP_40_ENABLED */
3404 
3405 #undef ADD_MASK
3406 #undef ADD_MASK_OSID
3407 
3408 static void
3409 __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
3410 {
3411     int i, j, k, n_old = 0, n_new = 0, proc_num = 0;
3412     if (__kmp_place_num_sockets == 0 &&
3413         __kmp_place_num_cores == 0 &&
3414         __kmp_place_num_threads_per_core == 0 )
3415         goto _exit;   // no topology limiting actions requested, exit
3416     if (__kmp_place_num_sockets == 0)
3417         __kmp_place_num_sockets = nPackages;    // use all available sockets
3418     if (__kmp_place_num_cores == 0)
3419         __kmp_place_num_cores = nCoresPerPkg;   // use all available cores
3420     if (__kmp_place_num_threads_per_core == 0 ||
3421         __kmp_place_num_threads_per_core > __kmp_nThreadsPerCore)
3422         __kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
3423 
3424     if ( !__kmp_affinity_uniform_topology() ) {
3425         KMP_WARNING( AffHWSubsetNonUniform );
3426         goto _exit; // don't support non-uniform topology
3427     }
3428     if ( depth > 3 ) {
3429         KMP_WARNING( AffHWSubsetNonThreeLevel );
3430         goto _exit; // don't support not-3-level topology
3431     }
3432     if (__kmp_place_socket_offset + __kmp_place_num_sockets > nPackages) {
3433         KMP_WARNING(AffHWSubsetManySockets);
3434         goto _exit;
3435     }
3436     if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
3437         KMP_WARNING( AffHWSubsetManyCores );
3438         goto _exit;
3439     }
3440 
3441     AddrUnsPair *newAddr;
3442     if (pAddr) // pAddr is NULL in case of affinity_none
3443         newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) *
3444             __kmp_place_num_sockets * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
3445 
3446     for (i = 0; i < nPackages; ++i) {
3447         if (i < __kmp_place_socket_offset ||
3448             i >= __kmp_place_socket_offset + __kmp_place_num_sockets) {
3449             n_old += nCoresPerPkg * __kmp_nThreadsPerCore; // skip not-requested socket
3450             if (__kmp_pu_os_idx != NULL) {
3451                 for (j = 0; j < nCoresPerPkg; ++j) { // walk through skipped socket
3452                     for (k = 0; k < __kmp_nThreadsPerCore; ++k) {
3453                         KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3454                         ++proc_num;
3455                     }
3456                 }
3457             }
3458         } else {
3459             for (j = 0; j < nCoresPerPkg; ++j) { // walk through requested socket
3460                 if (j < __kmp_place_core_offset ||
3461                     j >= __kmp_place_core_offset + __kmp_place_num_cores) {
3462                     n_old += __kmp_nThreadsPerCore; // skip not-requested core
3463                     if (__kmp_pu_os_idx != NULL) {
3464                         for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through skipped core
3465                             KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3466                             ++proc_num;
3467                         }
3468                     }
3469                 } else {
3470                     for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through requested core
3471                         if (k < __kmp_place_num_threads_per_core) {
3472                             if (pAddr)
3473                                 newAddr[n_new] = (*pAddr)[n_old]; // collect requested thread's data
3474                             n_new++;
3475                         } else {
3476                             if (__kmp_pu_os_idx != NULL)
3477                                 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3478                         }
3479                         n_old++;
3480                         ++proc_num;
3481                     }
3482                 }
3483             }
3484         }
3485     }
3486     KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
3487     KMP_DEBUG_ASSERT(n_new == __kmp_place_num_sockets * __kmp_place_num_cores *
3488                      __kmp_place_num_threads_per_core);
3489 
3490     nPackages = __kmp_place_num_sockets;                      // correct nPackages
3491     nCoresPerPkg = __kmp_place_num_cores;                     // correct nCoresPerPkg
3492     __kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore
3493     __kmp_avail_proc = n_new;                                 // correct avail_proc
3494     __kmp_ncores = nPackages * __kmp_place_num_cores;         // correct ncores
3495 
3496     if (pAddr) {
3497         __kmp_free( *pAddr );
3498         *pAddr = newAddr;      // replace old topology with new one
3499     }
3500 _exit:
3501     if (__kmp_pu_os_idx != NULL) {
3502         __kmp_free(__kmp_pu_os_idx);
3503         __kmp_pu_os_idx = NULL;
3504     }
3505 }
3506 
3507 //
3508 // This function figures out the deepest level at which there is at least one cluster/core
3509 // with more than one processing unit bound to it.
3510 //
3511 static int
3512 __kmp_affinity_find_core_level(const AddrUnsPair *address2os, int nprocs, int bottom_level)
3513 {
3514     int core_level = 0;
3515 
3516     for( int i = 0; i < nprocs; i++ ) {
3517         for( int j = bottom_level; j > 0; j-- ) {
3518             if( address2os[i].first.labels[j] > 0 ) {
3519                 if( core_level < ( j - 1 ) ) {
3520                     core_level = j - 1;
3521                 }
3522             }
3523         }
3524     }
3525     return core_level;
3526 }
3527 
3528 //
3529 // This function counts number of clusters/cores at given level.
3530 //
3531 static int __kmp_affinity_compute_ncores(const AddrUnsPair *address2os, int nprocs, int bottom_level, int core_level)
3532 {
3533     int ncores = 0;
3534     int i, j;
3535 
3536     j = bottom_level;
3537     for( i = 0; i < nprocs; i++ ) {
3538         for ( j = bottom_level; j > core_level; j-- ) {
3539             if( ( i + 1 ) < nprocs ) {
3540                 if( address2os[i + 1].first.labels[j] > 0 ) {
3541                     break;
3542                 }
3543             }
3544         }
3545         if( j == core_level ) {
3546             ncores++;
3547         }
3548     }
3549     if( j > core_level ) {
3550         //
3551         // In case of ( nprocs < __kmp_avail_proc ) we may end too deep and miss one core.
3552         // May occur when called from __kmp_affinity_find_core().
3553         //
3554         ncores++;
3555     }
3556     return ncores;
3557 }
3558 
3559 //
3560 // This function finds to which cluster/core given processing unit is bound.
3561 //
3562 static int __kmp_affinity_find_core(const AddrUnsPair *address2os, int proc, int bottom_level, int core_level)
3563 {
3564     return __kmp_affinity_compute_ncores(address2os, proc + 1, bottom_level, core_level) - 1;
3565 }
3566 
3567 //
3568 // This function finds maximal number of processing units bound to a cluster/core at given level.
3569 //
3570 static int __kmp_affinity_max_proc_per_core(const AddrUnsPair *address2os, int nprocs, int bottom_level, int core_level)
3571 {
3572     int maxprocpercore = 0;
3573 
3574     if( core_level < bottom_level ) {
3575         for( int i = 0; i < nprocs; i++ ) {
3576             int percore = address2os[i].first.labels[core_level + 1] + 1;
3577 
3578             if( percore > maxprocpercore ) {
3579                 maxprocpercore = percore;
3580             }
3581        }
3582     } else {
3583         maxprocpercore = 1;
3584     }
3585     return maxprocpercore;
3586 }
3587 
3588 static AddrUnsPair *address2os = NULL;
3589 static int           * procarr = NULL;
3590 static int     __kmp_aff_depth = 0;
3591 
3592 #define KMP_EXIT_AFF_NONE                             \
3593     KMP_ASSERT(__kmp_affinity_type == affinity_none); \
3594     KMP_ASSERT(address2os == NULL);                   \
3595     __kmp_apply_thread_places(NULL, 0);               \
3596     return;
3597 
3598 static int
3599 __kmp_affinity_cmp_Address_child_num(const void *a, const void *b)
3600 {
3601     const Address *aa = (const Address *)&(((AddrUnsPair *)a)
3602       ->first);
3603     const Address *bb = (const Address *)&(((AddrUnsPair *)b)
3604       ->first);
3605     unsigned depth = aa->depth;
3606     unsigned i;
3607     KMP_DEBUG_ASSERT(depth == bb->depth);
3608     KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
3609     KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
3610     for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
3611         int j = depth - i - 1;
3612         if (aa->childNums[j] < bb->childNums[j]) return -1;
3613         if (aa->childNums[j] > bb->childNums[j]) return 1;
3614     }
3615     for (; i < depth; i++) {
3616         int j = i - __kmp_affinity_compact;
3617         if (aa->childNums[j] < bb->childNums[j]) return -1;
3618         if (aa->childNums[j] > bb->childNums[j]) return 1;
3619     }
3620     return 0;
3621 }
3622 
3623 static void
3624 __kmp_aux_affinity_initialize(void)
3625 {
3626     if (__kmp_affinity_masks != NULL) {
3627         KMP_ASSERT(__kmp_affin_fullMask != NULL);
3628         return;
3629     }
3630 
3631     //
3632     // Create the "full" mask - this defines all of the processors that we
3633     // consider to be in the machine model.  If respect is set, then it is
3634     // the initialization thread's affinity mask.  Otherwise, it is all
3635     // processors that we know about on the machine.
3636     //
3637     if (__kmp_affin_fullMask == NULL) {
3638         KMP_CPU_ALLOC(__kmp_affin_fullMask);
3639     }
3640     if (KMP_AFFINITY_CAPABLE()) {
3641         if (__kmp_affinity_respect_mask) {
3642             __kmp_get_system_affinity(__kmp_affin_fullMask, TRUE);
3643 
3644             //
3645             // Count the number of available processors.
3646             //
3647             unsigned i;
3648             __kmp_avail_proc = 0;
3649             KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
3650                 if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
3651                     continue;
3652                 }
3653                 __kmp_avail_proc++;
3654             }
3655             if (__kmp_avail_proc > __kmp_xproc) {
3656                 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3657                   && (__kmp_affinity_type != affinity_none))) {
3658                     KMP_WARNING(ErrorInitializeAffinity);
3659                 }
3660                 __kmp_affinity_type = affinity_none;
3661                 KMP_AFFINITY_DISABLE();
3662                 return;
3663             }
3664         }
3665         else {
3666             __kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
3667             __kmp_avail_proc = __kmp_xproc;
3668         }
3669     }
3670 
3671     int depth = -1;
3672     kmp_i18n_id_t msg_id = kmp_i18n_null;
3673 
3674     //
3675     // For backward compatibility, setting KMP_CPUINFO_FILE =>
3676     // KMP_TOPOLOGY_METHOD=cpuinfo
3677     //
3678     if ((__kmp_cpuinfo_file != NULL) &&
3679       (__kmp_affinity_top_method == affinity_top_method_all)) {
3680         __kmp_affinity_top_method = affinity_top_method_cpuinfo;
3681     }
3682 
3683     if (__kmp_affinity_top_method == affinity_top_method_all) {
3684         //
3685         // In the default code path, errors are not fatal - we just try using
3686         // another method.  We only emit a warning message if affinity is on,
3687         // or the verbose flag is set, an the nowarnings flag was not set.
3688         //
3689         const char *file_name = NULL;
3690         int line = 0;
3691 # if KMP_USE_HWLOC
3692         if (depth < 0 && __kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
3693             if (__kmp_affinity_verbose) {
3694                 KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
3695             }
3696             if(!__kmp_hwloc_error) {
3697                 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
3698                 if (depth == 0) {
3699                     KMP_EXIT_AFF_NONE;
3700                 } else if(depth < 0 && __kmp_affinity_verbose) {
3701                     KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
3702                 }
3703             } else if(__kmp_affinity_verbose) {
3704                 KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
3705             }
3706         }
3707 # endif
3708 
3709 # if KMP_ARCH_X86 || KMP_ARCH_X86_64
3710 
3711         if (depth < 0) {
3712             if (__kmp_affinity_verbose) {
3713                 KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
3714             }
3715 
3716             file_name = NULL;
3717             depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
3718             if (depth == 0) {
3719                 KMP_EXIT_AFF_NONE;
3720             }
3721 
3722             if (depth < 0) {
3723                 if (__kmp_affinity_verbose) {
3724                     if (msg_id != kmp_i18n_null) {
3725                         KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
3726                           KMP_I18N_STR(DecodingLegacyAPIC));
3727                     }
3728                     else {
3729                         KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
3730                     }
3731                 }
3732 
3733                 file_name = NULL;
3734                 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3735                 if (depth == 0) {
3736                     KMP_EXIT_AFF_NONE;
3737                 }
3738             }
3739         }
3740 
3741 # endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
3742 
3743 # if KMP_OS_LINUX
3744 
3745         if (depth < 0) {
3746             if (__kmp_affinity_verbose) {
3747                 if (msg_id != kmp_i18n_null) {
3748                     KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
3749                 }
3750                 else {
3751                     KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
3752                 }
3753             }
3754 
3755             FILE *f = fopen("/proc/cpuinfo", "r");
3756             if (f == NULL) {
3757                 msg_id = kmp_i18n_str_CantOpenCpuinfo;
3758             }
3759             else {
3760                 file_name = "/proc/cpuinfo";
3761                 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3762                 fclose(f);
3763                 if (depth == 0) {
3764                     KMP_EXIT_AFF_NONE;
3765                 }
3766             }
3767         }
3768 
3769 # endif /* KMP_OS_LINUX */
3770 
3771 # if KMP_GROUP_AFFINITY
3772 
3773         if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
3774             if (__kmp_affinity_verbose) {
3775                 KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
3776             }
3777 
3778             depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3779             KMP_ASSERT(depth != 0);
3780         }
3781 
3782 # endif /* KMP_GROUP_AFFINITY */
3783 
3784         if (depth < 0) {
3785             if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
3786                 if (file_name == NULL) {
3787                     KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
3788                 }
3789                 else if (line == 0) {
3790                     KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
3791                 }
3792                 else {
3793                     KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
3794                 }
3795             }
3796             // FIXME - print msg if msg_id = kmp_i18n_null ???
3797 
3798             file_name = "";
3799             depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3800             if (depth == 0) {
3801                 KMP_EXIT_AFF_NONE;
3802             }
3803             KMP_ASSERT(depth > 0);
3804             KMP_ASSERT(address2os != NULL);
3805         }
3806     }
3807 
3808     //
3809     // If the user has specified that a paricular topology discovery method
3810     // is to be used, then we abort if that method fails.  The exception is
3811     // group affinity, which might have been implicitly set.
3812     //
3813 
3814 # if KMP_ARCH_X86 || KMP_ARCH_X86_64
3815 
3816     else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
3817         if (__kmp_affinity_verbose) {
3818             KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
3819               KMP_I18N_STR(Decodingx2APIC));
3820         }
3821 
3822         depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
3823         if (depth == 0) {
3824             KMP_EXIT_AFF_NONE;
3825         }
3826         if (depth < 0) {
3827             KMP_ASSERT(msg_id != kmp_i18n_null);
3828             KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3829         }
3830     }
3831     else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
3832         if (__kmp_affinity_verbose) {
3833             KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
3834               KMP_I18N_STR(DecodingLegacyAPIC));
3835         }
3836 
3837         depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3838         if (depth == 0) {
3839             KMP_EXIT_AFF_NONE;
3840         }
3841         if (depth < 0) {
3842             KMP_ASSERT(msg_id != kmp_i18n_null);
3843             KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3844         }
3845     }
3846 
3847 # endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
3848 
3849     else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
3850         const char *filename;
3851         if (__kmp_cpuinfo_file != NULL) {
3852             filename = __kmp_cpuinfo_file;
3853         }
3854         else {
3855             filename = "/proc/cpuinfo";
3856         }
3857 
3858         if (__kmp_affinity_verbose) {
3859             KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
3860         }
3861 
3862         FILE *f = fopen(filename, "r");
3863         if (f == NULL) {
3864             int code = errno;
3865             if (__kmp_cpuinfo_file != NULL) {
3866                 __kmp_msg(
3867                     kmp_ms_fatal,
3868                     KMP_MSG(CantOpenFileForReading, filename),
3869                     KMP_ERR(code),
3870                     KMP_HNT(NameComesFrom_CPUINFO_FILE),
3871                     __kmp_msg_null
3872                 );
3873             }
3874             else {
3875                 __kmp_msg(
3876                     kmp_ms_fatal,
3877                     KMP_MSG(CantOpenFileForReading, filename),
3878                     KMP_ERR(code),
3879                     __kmp_msg_null
3880                 );
3881             }
3882         }
3883         int line = 0;
3884         depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3885         fclose(f);
3886         if (depth < 0) {
3887             KMP_ASSERT(msg_id != kmp_i18n_null);
3888             if (line > 0) {
3889                 KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
3890             }
3891             else {
3892                 KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
3893             }
3894         }
3895         if (__kmp_affinity_type == affinity_none) {
3896             KMP_ASSERT(depth == 0);
3897             KMP_EXIT_AFF_NONE;
3898         }
3899     }
3900 
3901 # if KMP_GROUP_AFFINITY
3902 
3903     else if (__kmp_affinity_top_method == affinity_top_method_group) {
3904         if (__kmp_affinity_verbose) {
3905             KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
3906         }
3907 
3908         depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3909         KMP_ASSERT(depth != 0);
3910         if (depth < 0) {
3911             KMP_ASSERT(msg_id != kmp_i18n_null);
3912             KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3913         }
3914     }
3915 
3916 # endif /* KMP_GROUP_AFFINITY */
3917 
3918     else if (__kmp_affinity_top_method == affinity_top_method_flat) {
3919         if (__kmp_affinity_verbose) {
3920             KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
3921         }
3922 
3923         depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3924         if (depth == 0) {
3925             KMP_EXIT_AFF_NONE;
3926         }
3927         // should not fail
3928         KMP_ASSERT(depth > 0);
3929         KMP_ASSERT(address2os != NULL);
3930     }
3931 
3932 # if KMP_USE_HWLOC
3933     else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
3934         KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC);
3935         if (__kmp_affinity_verbose) {
3936             KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
3937         }
3938         depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
3939         if (depth == 0) {
3940             KMP_EXIT_AFF_NONE;
3941         }
3942     }
3943 # endif // KMP_USE_HWLOC
3944 
3945     if (address2os == NULL) {
3946         if (KMP_AFFINITY_CAPABLE()
3947           && (__kmp_affinity_verbose || (__kmp_affinity_warnings
3948           && (__kmp_affinity_type != affinity_none)))) {
3949             KMP_WARNING(ErrorInitializeAffinity);
3950         }
3951         __kmp_affinity_type = affinity_none;
3952         KMP_AFFINITY_DISABLE();
3953         return;
3954     }
3955 
3956     __kmp_apply_thread_places(&address2os, depth);
3957 
3958     //
3959     // Create the table of masks, indexed by thread Id.
3960     //
3961     unsigned maxIndex;
3962     unsigned numUnique;
3963     kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
3964       address2os, __kmp_avail_proc);
3965     if (__kmp_affinity_gran_levels == 0) {
3966         KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
3967     }
3968 
3969     //
3970     // Set the childNums vector in all Address objects.  This must be done
3971     // before we can sort using __kmp_affinity_cmp_Address_child_num(),
3972     // which takes into account the setting of __kmp_affinity_compact.
3973     //
3974     __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
3975 
3976     switch (__kmp_affinity_type) {
3977 
3978         case affinity_explicit:
3979         KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
3980 # if OMP_40_ENABLED
3981         if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
3982 # endif
3983         {
3984             __kmp_affinity_process_proclist(&__kmp_affinity_masks,
3985               &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3986               maxIndex);
3987         }
3988 # if OMP_40_ENABLED
3989         else {
3990             __kmp_affinity_process_placelist(&__kmp_affinity_masks,
3991               &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3992               maxIndex);
3993         }
3994 # endif
3995         if (__kmp_affinity_num_masks == 0) {
3996             if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3997               && (__kmp_affinity_type != affinity_none))) {
3998                 KMP_WARNING(AffNoValidProcID);
3999             }
4000             __kmp_affinity_type = affinity_none;
4001             return;
4002         }
4003         break;
4004 
4005         //
4006         // The other affinity types rely on sorting the Addresses according
4007         // to some permutation of the machine topology tree.  Set
4008         // __kmp_affinity_compact and __kmp_affinity_offset appropriately,
4009         // then jump to a common code fragment to do the sort and create
4010         // the array of affinity masks.
4011         //
4012 
4013         case affinity_logical:
4014         __kmp_affinity_compact = 0;
4015         if (__kmp_affinity_offset) {
4016             __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
4017               % __kmp_avail_proc;
4018         }
4019         goto sortAddresses;
4020 
4021         case affinity_physical:
4022         if (__kmp_nThreadsPerCore > 1) {
4023             __kmp_affinity_compact = 1;
4024             if (__kmp_affinity_compact >= depth) {
4025                 __kmp_affinity_compact = 0;
4026             }
4027         } else {
4028             __kmp_affinity_compact = 0;
4029         }
4030         if (__kmp_affinity_offset) {
4031             __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
4032               % __kmp_avail_proc;
4033         }
4034         goto sortAddresses;
4035 
4036         case affinity_scatter:
4037         if (__kmp_affinity_compact >= depth) {
4038             __kmp_affinity_compact = 0;
4039         }
4040         else {
4041             __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
4042         }
4043         goto sortAddresses;
4044 
4045         case affinity_compact:
4046         if (__kmp_affinity_compact >= depth) {
4047             __kmp_affinity_compact = depth - 1;
4048         }
4049         goto sortAddresses;
4050 
4051         case affinity_balanced:
4052         if( depth <= 1 ) {
4053             if( __kmp_affinity_verbose || __kmp_affinity_warnings ) {
4054                 KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" );
4055             }
4056             __kmp_affinity_type = affinity_none;
4057             return;
4058         } else if( __kmp_affinity_uniform_topology() ) {
4059             break;
4060         } else { // Non-uniform topology
4061 
4062             // Save the depth for further usage
4063             __kmp_aff_depth = depth;
4064 
4065             int core_level = __kmp_affinity_find_core_level(address2os, __kmp_avail_proc, depth - 1);
4066             int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc, depth - 1, core_level);
4067             int maxprocpercore = __kmp_affinity_max_proc_per_core(address2os, __kmp_avail_proc, depth - 1, core_level);
4068 
4069             int nproc = ncores * maxprocpercore;
4070             if( ( nproc < 2 ) || ( nproc < __kmp_avail_proc ) ) {
4071                 if( __kmp_affinity_verbose || __kmp_affinity_warnings ) {
4072                     KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" );
4073                 }
4074                 __kmp_affinity_type = affinity_none;
4075                 return;
4076             }
4077 
4078             procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
4079             for( int i = 0; i < nproc; i++ ) {
4080                 procarr[ i ] = -1;
4081             }
4082 
4083             int lastcore = -1;
4084             int inlastcore = 0;
4085             for( int i = 0; i < __kmp_avail_proc; i++ ) {
4086                 int proc = address2os[ i ].second;
4087                 int core = __kmp_affinity_find_core(address2os, i, depth - 1, core_level);
4088 
4089                 if ( core == lastcore ) {
4090                     inlastcore++;
4091                 } else {
4092                     inlastcore = 0;
4093                 }
4094                 lastcore = core;
4095 
4096                 procarr[ core * maxprocpercore + inlastcore ] = proc;
4097             }
4098 
4099             break;
4100         }
4101 
4102         sortAddresses:
4103         //
4104         // Allocate the gtid->affinity mask table.
4105         //
4106         if (__kmp_affinity_dups) {
4107             __kmp_affinity_num_masks = __kmp_avail_proc;
4108         }
4109         else {
4110             __kmp_affinity_num_masks = numUnique;
4111         }
4112 
4113 # if OMP_40_ENABLED
4114         if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
4115           && ( __kmp_affinity_num_places > 0 )
4116           && ( (unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
4117             __kmp_affinity_num_masks = __kmp_affinity_num_places;
4118         }
4119 # endif
4120 
4121         KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4122 
4123         //
4124         // Sort the address2os table according to the current setting of
4125         // __kmp_affinity_compact, then fill out __kmp_affinity_masks.
4126         //
4127         qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
4128           __kmp_affinity_cmp_Address_child_num);
4129         {
4130             int i;
4131             unsigned j;
4132             for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
4133                 if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
4134                     continue;
4135                 }
4136                 unsigned osId = address2os[i].second;
4137                 kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
4138                 kmp_affin_mask_t *dest
4139                   = KMP_CPU_INDEX(__kmp_affinity_masks, j);
4140                 KMP_ASSERT(KMP_CPU_ISSET(osId, src));
4141                 KMP_CPU_COPY(dest, src);
4142                 if (++j >= __kmp_affinity_num_masks) {
4143                     break;
4144                 }
4145             }
4146             KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
4147         }
4148         break;
4149 
4150         default:
4151         KMP_ASSERT2(0, "Unexpected affinity setting");
4152     }
4153 
4154     KMP_CPU_FREE_ARRAY(osId2Mask, maxIndex+1);
4155     machine_hierarchy.init(address2os, __kmp_avail_proc);
4156 }
4157 #undef KMP_EXIT_AFF_NONE
4158 
4159 
4160 void
4161 __kmp_affinity_initialize(void)
4162 {
4163     //
4164     // Much of the code above was written assumming that if a machine was not
4165     // affinity capable, then __kmp_affinity_type == affinity_none.  We now
4166     // explicitly represent this as __kmp_affinity_type == affinity_disabled.
4167     //
4168     // There are too many checks for __kmp_affinity_type == affinity_none
4169     // in this code.  Instead of trying to change them all, check if
4170     // __kmp_affinity_type == affinity_disabled, and if so, slam it with
4171     // affinity_none, call the real initialization routine, then restore
4172     // __kmp_affinity_type to affinity_disabled.
4173     //
4174     int disabled = (__kmp_affinity_type == affinity_disabled);
4175     if (! KMP_AFFINITY_CAPABLE()) {
4176         KMP_ASSERT(disabled);
4177     }
4178     if (disabled) {
4179         __kmp_affinity_type = affinity_none;
4180     }
4181     __kmp_aux_affinity_initialize();
4182     if (disabled) {
4183         __kmp_affinity_type = affinity_disabled;
4184     }
4185 }
4186 
4187 
4188 void
4189 __kmp_affinity_uninitialize(void)
4190 {
4191     if (__kmp_affinity_masks != NULL) {
4192         KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4193         __kmp_affinity_masks = NULL;
4194     }
4195     if (__kmp_affin_fullMask != NULL) {
4196         KMP_CPU_FREE(__kmp_affin_fullMask);
4197         __kmp_affin_fullMask = NULL;
4198     }
4199     __kmp_affinity_num_masks = 0;
4200 # if OMP_40_ENABLED
4201     __kmp_affinity_num_places = 0;
4202 # endif
4203     if (__kmp_affinity_proclist != NULL) {
4204         __kmp_free(__kmp_affinity_proclist);
4205         __kmp_affinity_proclist = NULL;
4206     }
4207     if( address2os != NULL ) {
4208         __kmp_free( address2os );
4209         address2os = NULL;
4210     }
4211     if( procarr != NULL ) {
4212         __kmp_free( procarr );
4213         procarr = NULL;
4214     }
4215 # if KMP_USE_HWLOC
4216     if (__kmp_hwloc_topology != NULL) {
4217         hwloc_topology_destroy(__kmp_hwloc_topology);
4218         __kmp_hwloc_topology = NULL;
4219     }
4220 # endif
4221     KMPAffinity::destroy_api();
4222 }
4223 
4224 
4225 void
4226 __kmp_affinity_set_init_mask(int gtid, int isa_root)
4227 {
4228     if (! KMP_AFFINITY_CAPABLE()) {
4229         return;
4230     }
4231 
4232     kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4233     if (th->th.th_affin_mask == NULL) {
4234         KMP_CPU_ALLOC(th->th.th_affin_mask);
4235     }
4236     else {
4237         KMP_CPU_ZERO(th->th.th_affin_mask);
4238     }
4239 
4240     //
4241     // Copy the thread mask to the kmp_info_t strucuture.
4242     // If __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one
4243     // that has all of the OS proc ids set, or if __kmp_affinity_respect_mask
4244     // is set, then the full mask is the same as the mask of the initialization
4245     // thread.
4246     //
4247     kmp_affin_mask_t *mask;
4248     int i;
4249 
4250 # if OMP_40_ENABLED
4251     if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
4252 # endif
4253     {
4254         if ((__kmp_affinity_type == affinity_none) || (__kmp_affinity_type == affinity_balanced)
4255           ) {
4256 # if KMP_GROUP_AFFINITY
4257             if (__kmp_num_proc_groups > 1) {
4258                 return;
4259             }
4260 # endif
4261             KMP_ASSERT(__kmp_affin_fullMask != NULL);
4262             i = KMP_PLACE_ALL;
4263             mask = __kmp_affin_fullMask;
4264         }
4265         else {
4266             KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
4267             i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4268             mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4269         }
4270     }
4271 # if OMP_40_ENABLED
4272     else {
4273         if ((! isa_root)
4274           || (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
4275 #  if KMP_GROUP_AFFINITY
4276             if (__kmp_num_proc_groups > 1) {
4277                 return;
4278             }
4279 #  endif
4280             KMP_ASSERT(__kmp_affin_fullMask != NULL);
4281             i = KMP_PLACE_ALL;
4282             mask = __kmp_affin_fullMask;
4283         }
4284         else {
4285             //
4286             // int i = some hash function or just a counter that doesn't
4287             // always start at 0.  Use gtid for now.
4288             //
4289             KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
4290             i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4291             mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4292         }
4293     }
4294 # endif
4295 
4296 # if OMP_40_ENABLED
4297     th->th.th_current_place = i;
4298     if (isa_root) {
4299         th->th.th_new_place = i;
4300         th->th.th_first_place = 0;
4301         th->th.th_last_place = __kmp_affinity_num_masks - 1;
4302     }
4303 
4304     if (i == KMP_PLACE_ALL) {
4305         KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
4306           gtid));
4307     }
4308     else {
4309         KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
4310           gtid, i));
4311     }
4312 # else
4313     if (i == -1) {
4314         KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n",
4315           gtid));
4316     }
4317     else {
4318         KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
4319           gtid, i));
4320     }
4321 # endif /* OMP_40_ENABLED */
4322 
4323     KMP_CPU_COPY(th->th.th_affin_mask, mask);
4324 
4325     if (__kmp_affinity_verbose) {
4326         char buf[KMP_AFFIN_MASK_PRINT_LEN];
4327         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4328           th->th.th_affin_mask);
4329         KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), gtid,
4330           buf);
4331     }
4332 
4333 # if KMP_OS_WINDOWS
4334     //
4335     // On Windows* OS, the process affinity mask might have changed.
4336     // If the user didn't request affinity and this call fails,
4337     // just continue silently.  See CQ171393.
4338     //
4339     if ( __kmp_affinity_type == affinity_none ) {
4340         __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
4341     }
4342     else
4343 # endif
4344     __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4345 }
4346 
4347 
4348 # if OMP_40_ENABLED
4349 
4350 void
4351 __kmp_affinity_set_place(int gtid)
4352 {
4353     int retval;
4354 
4355     if (! KMP_AFFINITY_CAPABLE()) {
4356         return;
4357     }
4358 
4359     kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4360 
4361     KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
4362       gtid, th->th.th_new_place, th->th.th_current_place));
4363 
4364     //
4365     // Check that the new place is within this thread's partition.
4366     //
4367     KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4368     KMP_ASSERT(th->th.th_new_place >= 0);
4369     KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
4370     if (th->th.th_first_place <= th->th.th_last_place) {
4371         KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place)
4372          && (th->th.th_new_place <= th->th.th_last_place));
4373     }
4374     else {
4375         KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place)
4376          || (th->th.th_new_place >= th->th.th_last_place));
4377     }
4378 
4379     //
4380     // Copy the thread mask to the kmp_info_t strucuture,
4381     // and set this thread's affinity.
4382     //
4383     kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
4384       th->th.th_new_place);
4385     KMP_CPU_COPY(th->th.th_affin_mask, mask);
4386     th->th.th_current_place = th->th.th_new_place;
4387 
4388     if (__kmp_affinity_verbose) {
4389         char buf[KMP_AFFIN_MASK_PRINT_LEN];
4390         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4391           th->th.th_affin_mask);
4392         KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
4393           gtid, buf);
4394     }
4395     __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4396 }
4397 
4398 # endif /* OMP_40_ENABLED */
4399 
4400 
4401 int
4402 __kmp_aux_set_affinity(void **mask)
4403 {
4404     int gtid;
4405     kmp_info_t *th;
4406     int retval;
4407 
4408     if (! KMP_AFFINITY_CAPABLE()) {
4409         return -1;
4410     }
4411 
4412     gtid = __kmp_entry_gtid();
4413     KA_TRACE(1000, ;{
4414         char buf[KMP_AFFIN_MASK_PRINT_LEN];
4415         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4416           (kmp_affin_mask_t *)(*mask));
4417         __kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n",
4418           gtid, buf);
4419     });
4420 
4421     if (__kmp_env_consistency_check) {
4422         if ((mask == NULL) || (*mask == NULL)) {
4423             KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4424         }
4425         else {
4426             unsigned proc;
4427             int num_procs = 0;
4428 
4429             KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t*)(*mask))) {
4430                 if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4431                     KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4432                 }
4433                 if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
4434                     continue;
4435                 }
4436                 num_procs++;
4437             }
4438             if (num_procs == 0) {
4439                 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4440             }
4441 
4442 # if KMP_GROUP_AFFINITY
4443             if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
4444                 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4445             }
4446 # endif /* KMP_GROUP_AFFINITY */
4447 
4448         }
4449     }
4450 
4451     th = __kmp_threads[gtid];
4452     KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4453     retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4454     if (retval == 0) {
4455         KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
4456     }
4457 
4458 # if OMP_40_ENABLED
4459     th->th.th_current_place = KMP_PLACE_UNDEFINED;
4460     th->th.th_new_place = KMP_PLACE_UNDEFINED;
4461     th->th.th_first_place = 0;
4462     th->th.th_last_place = __kmp_affinity_num_masks - 1;
4463 
4464     //
4465     // Turn off 4.0 affinity for the current tread at this parallel level.
4466     //
4467     th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
4468 # endif
4469 
4470     return retval;
4471 }
4472 
4473 
4474 int
4475 __kmp_aux_get_affinity(void **mask)
4476 {
4477     int gtid;
4478     int retval;
4479     kmp_info_t *th;
4480 
4481     if (! KMP_AFFINITY_CAPABLE()) {
4482         return -1;
4483     }
4484 
4485     gtid = __kmp_entry_gtid();
4486     th = __kmp_threads[gtid];
4487     KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4488 
4489     KA_TRACE(1000, ;{
4490         char buf[KMP_AFFIN_MASK_PRINT_LEN];
4491         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4492           th->th.th_affin_mask);
4493         __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
4494     });
4495 
4496     if (__kmp_env_consistency_check) {
4497         if ((mask == NULL) || (*mask == NULL)) {
4498             KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
4499         }
4500     }
4501 
4502 # if !KMP_OS_WINDOWS
4503 
4504     retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4505     KA_TRACE(1000, ;{
4506         char buf[KMP_AFFIN_MASK_PRINT_LEN];
4507         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4508           (kmp_affin_mask_t *)(*mask));
4509         __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
4510     });
4511     return retval;
4512 
4513 # else
4514 
4515     KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
4516     return 0;
4517 
4518 # endif /* KMP_OS_WINDOWS */
4519 
4520 }
4521 
4522 int
4523 __kmp_aux_get_affinity_max_proc() {
4524     if (!  KMP_AFFINITY_CAPABLE()) {
4525         return 0;
4526     }
4527 #if KMP_GROUP_AFFINITY
4528     if ( __kmp_num_proc_groups > 1 ) {
4529         return (int)(__kmp_num_proc_groups*sizeof(DWORD_PTR)*CHAR_BIT);
4530     }
4531 #endif
4532     return __kmp_xproc;
4533 }
4534 
4535 int
4536 __kmp_aux_set_affinity_mask_proc(int proc, void **mask)
4537 {
4538     int retval;
4539 
4540     if (! KMP_AFFINITY_CAPABLE()) {
4541         return -1;
4542     }
4543 
4544     KA_TRACE(1000, ;{
4545         int gtid = __kmp_entry_gtid();
4546         char buf[KMP_AFFIN_MASK_PRINT_LEN];
4547         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4548           (kmp_affin_mask_t *)(*mask));
4549         __kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
4550           proc, gtid, buf);
4551     });
4552 
4553     if (__kmp_env_consistency_check) {
4554         if ((mask == NULL) || (*mask == NULL)) {
4555             KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
4556         }
4557     }
4558 
4559     if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
4560         return -1;
4561     }
4562     if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4563         return -2;
4564     }
4565 
4566     KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
4567     return 0;
4568 }
4569 
4570 
4571 int
4572 __kmp_aux_unset_affinity_mask_proc(int proc, void **mask)
4573 {
4574     int retval;
4575 
4576     if (! KMP_AFFINITY_CAPABLE()) {
4577         return -1;
4578     }
4579 
4580     KA_TRACE(1000, ;{
4581         int gtid = __kmp_entry_gtid();
4582         char buf[KMP_AFFIN_MASK_PRINT_LEN];
4583         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4584           (kmp_affin_mask_t *)(*mask));
4585         __kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
4586           proc, gtid, buf);
4587     });
4588 
4589     if (__kmp_env_consistency_check) {
4590         if ((mask == NULL) || (*mask == NULL)) {
4591             KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
4592         }
4593     }
4594 
4595     if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
4596         return -1;
4597     }
4598     if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4599         return -2;
4600     }
4601 
4602     KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
4603     return 0;
4604 }
4605 
4606 
4607 int
4608 __kmp_aux_get_affinity_mask_proc(int proc, void **mask)
4609 {
4610     int retval;
4611 
4612     if (! KMP_AFFINITY_CAPABLE()) {
4613         return -1;
4614     }
4615 
4616     KA_TRACE(1000, ;{
4617         int gtid = __kmp_entry_gtid();
4618         char buf[KMP_AFFIN_MASK_PRINT_LEN];
4619         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4620           (kmp_affin_mask_t *)(*mask));
4621         __kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
4622           proc, gtid, buf);
4623     });
4624 
4625     if (__kmp_env_consistency_check) {
4626         if ((mask == NULL) || (*mask == NULL)) {
4627             KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc");
4628         }
4629     }
4630 
4631     if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
4632         return -1;
4633     }
4634     if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4635         return 0;
4636     }
4637 
4638     return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
4639 }
4640 
4641 
4642 // Dynamic affinity settings - Affinity balanced
4643 void __kmp_balanced_affinity( int tid, int nthreads )
4644 {
4645     bool fine_gran = true;
4646 
4647     switch (__kmp_affinity_gran) {
4648         case affinity_gran_fine:
4649         case affinity_gran_thread:
4650             break;
4651         case affinity_gran_core:
4652             if( __kmp_nThreadsPerCore > 1) {
4653                 fine_gran = false;
4654             }
4655             break;
4656         case affinity_gran_package:
4657             if( nCoresPerPkg > 1) {
4658                 fine_gran = false;
4659             }
4660             break;
4661         default:
4662             fine_gran = false;
4663     }
4664 
4665     if( __kmp_affinity_uniform_topology() ) {
4666         int coreID;
4667         int threadID;
4668         // Number of hyper threads per core in HT machine
4669         int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
4670         // Number of cores
4671         int ncores = __kmp_ncores;
4672         if( ( nPackages > 1 ) && ( __kmp_nth_per_core <= 1 ) ) {
4673             __kmp_nth_per_core = __kmp_avail_proc / nPackages;
4674             ncores = nPackages;
4675         }
4676         // How many threads will be bound to each core
4677         int chunk = nthreads / ncores;
4678         // How many cores will have an additional thread bound to it - "big cores"
4679         int big_cores = nthreads % ncores;
4680         // Number of threads on the big cores
4681         int big_nth = ( chunk + 1 ) * big_cores;
4682         if( tid < big_nth ) {
4683             coreID = tid / (chunk + 1 );
4684             threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
4685         } else { //tid >= big_nth
4686             coreID = ( tid - big_cores ) / chunk;
4687             threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
4688         }
4689 
4690         KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
4691           "Illegal set affinity operation when not capable");
4692 
4693         kmp_affin_mask_t *mask;
4694         KMP_CPU_ALLOC_ON_STACK(mask);
4695         KMP_CPU_ZERO(mask);
4696 
4697         if( fine_gran ) {
4698             int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
4699             KMP_CPU_SET( osID, mask);
4700         } else {
4701             for( int i = 0; i < __kmp_nth_per_core; i++ ) {
4702                 int osID;
4703                 osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
4704                 KMP_CPU_SET( osID, mask);
4705             }
4706         }
4707         if (__kmp_affinity_verbose) {
4708             char buf[KMP_AFFIN_MASK_PRINT_LEN];
4709             __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
4710             KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
4711               tid, buf);
4712         }
4713         __kmp_set_system_affinity( mask, TRUE );
4714         KMP_CPU_FREE_FROM_STACK(mask);
4715     } else { // Non-uniform topology
4716 
4717         kmp_affin_mask_t *mask;
4718         KMP_CPU_ALLOC_ON_STACK(mask);
4719         KMP_CPU_ZERO(mask);
4720 
4721         int core_level = __kmp_affinity_find_core_level(address2os, __kmp_avail_proc, __kmp_aff_depth - 1);
4722         int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc, __kmp_aff_depth - 1, core_level);
4723         int nth_per_core = __kmp_affinity_max_proc_per_core(address2os, __kmp_avail_proc, __kmp_aff_depth - 1, core_level);
4724 
4725         // For performance gain consider the special case nthreads == __kmp_avail_proc
4726         if( nthreads == __kmp_avail_proc ) {
4727             if( fine_gran ) {
4728                 int osID = address2os[ tid ].second;
4729                 KMP_CPU_SET( osID, mask);
4730             } else {
4731                 int core = __kmp_affinity_find_core(address2os, tid, __kmp_aff_depth - 1, core_level);
4732                 for( int i = 0; i < __kmp_avail_proc; i++ ) {
4733                     int osID = address2os[ i ].second;
4734                     if( __kmp_affinity_find_core(address2os, i,  __kmp_aff_depth - 1, core_level) == core ) {
4735                         KMP_CPU_SET( osID, mask);
4736                     }
4737                 }
4738             }
4739         } else if( nthreads <= ncores ) {
4740 
4741             int core = 0;
4742             for( int i = 0; i < ncores; i++ ) {
4743                 // Check if this core from procarr[] is in the mask
4744                 int in_mask = 0;
4745                 for( int j = 0; j < nth_per_core; j++ ) {
4746                     if( procarr[ i * nth_per_core + j ] != - 1 ) {
4747                         in_mask = 1;
4748                         break;
4749                     }
4750                 }
4751                 if( in_mask ) {
4752                     if( tid == core ) {
4753                         for( int j = 0; j < nth_per_core; j++ ) {
4754                             int osID = procarr[ i * nth_per_core + j ];
4755                             if( osID != -1 ) {
4756                                 KMP_CPU_SET( osID, mask );
4757                                 // For fine granularity it is enough to set the first available osID for this core
4758                                 if( fine_gran) {
4759                                     break;
4760                                 }
4761                             }
4762                         }
4763                         break;
4764                     } else {
4765                         core++;
4766                     }
4767                 }
4768             }
4769 
4770         } else { // nthreads > ncores
4771 
4772             // Array to save the number of processors at each core
4773             int* nproc_at_core = (int*)KMP_ALLOCA(sizeof(int)*ncores);
4774             // Array to save the number of cores with "x" available processors;
4775             int* ncores_with_x_procs = (int*)KMP_ALLOCA(sizeof(int)*(nth_per_core+1));
4776             // Array to save the number of cores with # procs from x to nth_per_core
4777             int* ncores_with_x_to_max_procs = (int*)KMP_ALLOCA(sizeof(int)*(nth_per_core+1));
4778 
4779             for( int i = 0; i <= nth_per_core; i++ ) {
4780                 ncores_with_x_procs[ i ] = 0;
4781                 ncores_with_x_to_max_procs[ i ] = 0;
4782             }
4783 
4784             for( int i = 0; i < ncores; i++ ) {
4785                 int cnt = 0;
4786                 for( int j = 0; j < nth_per_core; j++ ) {
4787                     if( procarr[ i * nth_per_core + j ] != -1 ) {
4788                         cnt++;
4789                     }
4790                 }
4791                 nproc_at_core[ i ] = cnt;
4792                 ncores_with_x_procs[ cnt ]++;
4793             }
4794 
4795             for( int i = 0; i <= nth_per_core; i++ ) {
4796                 for( int j = i; j <= nth_per_core; j++ ) {
4797                     ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
4798                 }
4799             }
4800 
4801             // Max number of processors
4802             int nproc = nth_per_core * ncores;
4803             // An array to keep number of threads per each context
4804             int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
4805             for( int i = 0; i < nproc; i++ ) {
4806                 newarr[ i ] = 0;
4807             }
4808 
4809             int nth = nthreads;
4810             int flag = 0;
4811             while( nth > 0 ) {
4812                 for( int j = 1; j <= nth_per_core; j++ ) {
4813                     int cnt = ncores_with_x_to_max_procs[ j ];
4814                     for( int i = 0; i < ncores; i++ ) {
4815                         // Skip the core with 0 processors
4816                         if( nproc_at_core[ i ] == 0 ) {
4817                             continue;
4818                         }
4819                         for( int k = 0; k < nth_per_core; k++ ) {
4820                             if( procarr[ i * nth_per_core + k ] != -1 ) {
4821                                 if( newarr[ i * nth_per_core + k ] == 0 ) {
4822                                     newarr[ i * nth_per_core + k ] = 1;
4823                                     cnt--;
4824                                     nth--;
4825                                     break;
4826                                 } else {
4827                                     if( flag != 0 ) {
4828                                         newarr[ i * nth_per_core + k ] ++;
4829                                         cnt--;
4830                                         nth--;
4831                                         break;
4832                                     }
4833                                 }
4834                             }
4835                         }
4836                         if( cnt == 0 || nth == 0 ) {
4837                             break;
4838                         }
4839                     }
4840                     if( nth == 0 ) {
4841                         break;
4842                     }
4843                 }
4844                 flag = 1;
4845             }
4846             int sum = 0;
4847             for( int i = 0; i < nproc; i++ ) {
4848                 sum += newarr[ i ];
4849                 if( sum > tid ) {
4850                     if( fine_gran) {
4851                         int osID = procarr[ i ];
4852                         KMP_CPU_SET( osID, mask);
4853                     } else {
4854                         int coreID = i / nth_per_core;
4855                         for( int ii = 0; ii < nth_per_core; ii++ ) {
4856                             int osID = procarr[ coreID * nth_per_core + ii ];
4857                             if( osID != -1 ) {
4858                                 KMP_CPU_SET( osID, mask);
4859                             }
4860                         }
4861                     }
4862                     break;
4863                 }
4864             }
4865             __kmp_free( newarr );
4866         }
4867 
4868         if (__kmp_affinity_verbose) {
4869             char buf[KMP_AFFIN_MASK_PRINT_LEN];
4870             __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
4871             KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
4872               tid, buf);
4873         }
4874         __kmp_set_system_affinity( mask, TRUE );
4875         KMP_CPU_FREE_FROM_STACK(mask);
4876     }
4877 }
4878 
4879 #if KMP_OS_LINUX
4880 // We don't need this entry for Windows because
4881 // there is GetProcessAffinityMask() api
4882 //
4883 // The intended usage is indicated by these steps:
4884 // 1) The user gets the current affinity mask
4885 // 2) Then sets the affinity by calling this function
4886 // 3) Error check the return value
4887 // 4) Use non-OpenMP parallelization
4888 // 5) Reset the affinity to what was stored in step 1)
4889 #ifdef __cplusplus
4890 extern "C"
4891 #endif
4892 int
4893 kmp_set_thread_affinity_mask_initial()
4894 // the function returns 0 on success,
4895 //   -1 if we cannot bind thread
4896 //   >0 (errno) if an error happened during binding
4897 {
4898     int gtid = __kmp_get_gtid();
4899     if (gtid < 0) {
4900         // Do not touch non-omp threads
4901         KA_TRACE(30, ( "kmp_set_thread_affinity_mask_initial: "
4902             "non-omp thread, returning\n"));
4903         return -1;
4904     }
4905     if (!KMP_AFFINITY_CAPABLE() || !__kmp_init_middle) {
4906         KA_TRACE(30, ( "kmp_set_thread_affinity_mask_initial: "
4907             "affinity not initialized, returning\n"));
4908         return -1;
4909     }
4910     KA_TRACE(30, ( "kmp_set_thread_affinity_mask_initial: "
4911         "set full mask for thread %d\n", gtid));
4912     KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL);
4913     return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE);
4914 }
4915 #endif
4916 
4917 #endif // KMP_AFFINITY_SUPPORTED
4918