1 /*
2  * kmp_affinity.cpp -- affinity management
3  * $Revision: 42810 $
4  * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $
5  */
6 
7 
8 //===----------------------------------------------------------------------===//
9 //
10 //                     The LLVM Compiler Infrastructure
11 //
12 // This file is dual licensed under the MIT and the University of Illinois Open
13 // Source Licenses. See LICENSE.txt for details.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 
18 #include "kmp.h"
19 #include "kmp_i18n.h"
20 #include "kmp_io.h"
21 #include "kmp_str.h"
22 
23 
24 #if KMP_AFFINITY_SUPPORTED
25 
26 //
27 // Print the affinity mask to the character array in a pretty format.
28 //
29 char *
30 __kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
31 {
32     KMP_ASSERT(buf_len >= 40);
33     char *scan = buf;
34     char *end = buf + buf_len - 1;
35 
36     //
37     // Find first element / check for empty set.
38     //
39     size_t i;
40     for (i = 0; i < KMP_CPU_SETSIZE; i++) {
41         if (KMP_CPU_ISSET(i, mask)) {
42             break;
43         }
44     }
45     if (i == KMP_CPU_SETSIZE) {
46         sprintf(scan, "{<empty>}");
47         while (*scan != '\0') scan++;
48         KMP_ASSERT(scan <= end);
49         return buf;
50     }
51 
52     sprintf(scan, "{%ld", i);
53     while (*scan != '\0') scan++;
54     i++;
55     for (; i < KMP_CPU_SETSIZE; i++) {
56         if (! KMP_CPU_ISSET(i, mask)) {
57             continue;
58         }
59 
60         //
61         // Check for buffer overflow.  A string of the form ",<n>" will have
62         // at most 10 characters, plus we want to leave room to print ",...}"
63         // if the set is too large to print for a total of 15 characters.
64         // We already left room for '\0' in setting end.
65         //
66         if (end - scan < 15) {
67            break;
68         }
69         sprintf(scan, ",%-ld", i);
70         while (*scan != '\0') scan++;
71     }
72     if (i < KMP_CPU_SETSIZE) {
73         sprintf(scan, ",...");
74         while (*scan != '\0') scan++;
75     }
76     sprintf(scan, "}");
77     while (*scan != '\0') scan++;
78     KMP_ASSERT(scan <= end);
79     return buf;
80 }
81 
82 
83 void
84 __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
85 {
86     KMP_CPU_ZERO(mask);
87 
88 # if KMP_OS_WINDOWS && KMP_ARCH_X86_64
89 
90     if (__kmp_num_proc_groups > 1) {
91         int group;
92         struct GROUP_AFFINITY ga;
93         KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
94         for (group = 0; group < __kmp_num_proc_groups; group++) {
95             int i;
96             int num = __kmp_GetActiveProcessorCount(group);
97             for (i = 0; i < num; i++) {
98                 KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
99             }
100         }
101     }
102     else
103 
104 # endif /* KMP_OS_WINDOWS && KMP_ARCH_X86_64 */
105 
106     {
107         int proc;
108         for (proc = 0; proc < __kmp_xproc; proc++) {
109             KMP_CPU_SET(proc, mask);
110         }
111     }
112 }
113 
114 
115 //
116 // In Linux* OS debug & cover (-O0) builds, we need to avoid inline member
117 // functions.
118 //
119 // The icc codegen emits sections with extremely long names, of the form
120 // ".gnu.linkonce.<mangled_name>".  There seems to have been a linker bug
121 // introduced between GNU ld version 2.14.90.0.4 and 2.15.92.0.2 involving
122 // some sort of memory corruption or table overflow that is triggered by
123 // these long strings.  I checked the latest version of the linker -
124 // GNU ld (Linux* OS/GNU Binutils) 2.18.50.0.7.20080422 - and the bug is not
125 // fixed.
126 //
127 // Unfortunately, my attempts to reproduce it in a smaller example have
128 // failed - I'm not sure what the prospects are of getting it fixed
129 // properly - but we need a reproducer smaller than all of libiomp.
130 //
131 // Work around the problem by avoiding inline constructors in such builds.
132 // We do this for all platforms, not just Linux* OS - non-inline functions are
133 // more debuggable and provide better coverage into than inline functions.
134 // Use inline functions in shipping libs, for performance.
135 //
136 
137 # if !defined(KMP_DEBUG) && !defined(COVER)
138 
139 class Address {
140 public:
141     static const unsigned maxDepth = 32;
142     unsigned labels[maxDepth];
143     unsigned childNums[maxDepth];
144     unsigned depth;
145     unsigned leader;
146     Address(unsigned _depth)
147       : depth(_depth), leader(FALSE) {
148     }
149     Address &operator=(const Address &b) {
150         depth = b.depth;
151         for (unsigned i = 0; i < depth; i++) {
152             labels[i] = b.labels[i];
153             childNums[i] = b.childNums[i];
154         }
155         leader = FALSE;
156         return *this;
157     }
158     bool operator==(const Address &b) const {
159         if (depth != b.depth)
160             return false;
161         for (unsigned i = 0; i < depth; i++)
162             if(labels[i] != b.labels[i])
163                 return false;
164         return true;
165     }
166     bool isClose(const Address &b, int level) const {
167         if (depth != b.depth)
168             return false;
169         if ((unsigned)level >= depth)
170             return true;
171         for (unsigned i = 0; i < (depth - level); i++)
172             if(labels[i] != b.labels[i])
173                 return false;
174         return true;
175     }
176     bool operator!=(const Address &b) const {
177         return !operator==(b);
178     }
179 };
180 
181 class AddrUnsPair {
182 public:
183     Address first;
184     unsigned second;
185     AddrUnsPair(Address _first, unsigned _second)
186       : first(_first), second(_second) {
187     }
188     AddrUnsPair &operator=(const AddrUnsPair &b)
189     {
190         first = b.first;
191         second = b.second;
192         return *this;
193     }
194 };
195 
196 # else
197 
198 class Address {
199 public:
200     static const unsigned maxDepth = 32;
201     unsigned labels[maxDepth];
202     unsigned childNums[maxDepth];
203     unsigned depth;
204     unsigned leader;
205     Address(unsigned _depth);
206     Address &operator=(const Address &b);
207     bool operator==(const Address &b) const;
208     bool isClose(const Address &b, int level) const;
209     bool operator!=(const Address &b) const;
210 };
211 
212 Address::Address(unsigned _depth)
213 {
214     depth = _depth;
215     leader = FALSE;
216 }
217 
218 Address &Address::operator=(const Address &b) {
219     depth = b.depth;
220     for (unsigned i = 0; i < depth; i++) {
221         labels[i] = b.labels[i];
222         childNums[i] = b.childNums[i];
223     }
224     leader = FALSE;
225     return *this;
226 }
227 
228 bool Address::operator==(const Address &b) const {
229     if (depth != b.depth)
230         return false;
231     for (unsigned i = 0; i < depth; i++)
232         if(labels[i] != b.labels[i])
233             return false;
234     return true;
235 }
236 
237 bool Address::isClose(const Address &b, int level) const {
238     if (depth != b.depth)
239         return false;
240     if ((unsigned)level >= depth)
241         return true;
242     for (unsigned i = 0; i < (depth - level); i++)
243         if(labels[i] != b.labels[i])
244             return false;
245     return true;
246 }
247 
248 bool Address::operator!=(const Address &b) const {
249     return !operator==(b);
250 }
251 
252 class AddrUnsPair {
253 public:
254     Address first;
255     unsigned second;
256     AddrUnsPair(Address _first, unsigned _second);
257     AddrUnsPair &operator=(const AddrUnsPair &b);
258 };
259 
260 AddrUnsPair::AddrUnsPair(Address _first, unsigned _second)
261   : first(_first), second(_second)
262 {
263 }
264 
265 AddrUnsPair &AddrUnsPair::operator=(const AddrUnsPair &b)
266 {
267     first = b.first;
268     second = b.second;
269     return *this;
270 }
271 
272 # endif /* !defined(KMP_DEBUG) && !defined(COVER) */
273 
274 
275 static int
276 __kmp_affinity_cmp_Address_labels(const void *a, const void *b)
277 {
278     const Address *aa = (const Address *)&(((AddrUnsPair *)a)
279       ->first);
280     const Address *bb = (const Address *)&(((AddrUnsPair *)b)
281       ->first);
282     unsigned depth = aa->depth;
283     unsigned i;
284     KMP_DEBUG_ASSERT(depth == bb->depth);
285     for (i  = 0; i < depth; i++) {
286         if (aa->labels[i] < bb->labels[i]) return -1;
287         if (aa->labels[i] > bb->labels[i]) return 1;
288     }
289     return 0;
290 }
291 
292 
293 static int
294 __kmp_affinity_cmp_Address_child_num(const void *a, const void *b)
295 {
296     const Address *aa = (const Address *)&(((AddrUnsPair *)a)
297       ->first);
298     const Address *bb = (const Address *)&(((AddrUnsPair *)b)
299       ->first);
300     unsigned depth = aa->depth;
301     unsigned i;
302     KMP_DEBUG_ASSERT(depth == bb->depth);
303     KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
304     KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
305     for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
306         int j = depth - i - 1;
307         if (aa->childNums[j] < bb->childNums[j]) return -1;
308         if (aa->childNums[j] > bb->childNums[j]) return 1;
309     }
310     for (; i < depth; i++) {
311         int j = i - __kmp_affinity_compact;
312         if (aa->childNums[j] < bb->childNums[j]) return -1;
313         if (aa->childNums[j] > bb->childNums[j]) return 1;
314     }
315     return 0;
316 }
317 
318 
319 //
320 // When sorting by labels, __kmp_affinity_assign_child_nums() must first be
321 // called to renumber the labels from [0..n] and place them into the child_num
322 // vector of the address object.  This is done in case the labels used for
323 // the children at one node of the hierarchy differ from those used for
324 // another node at the same level.  Example:  suppose the machine has 2 nodes
325 // with 2 packages each.  The first node contains packages 601 and 602, and
326 // second node contains packages 603 and 604.  If we try to sort the table
327 // for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
328 // because we are paying attention to the labels themselves, not the ordinal
329 // child numbers.  By using the child numbers in the sort, the result is
330 // {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
331 //
332 static void
333 __kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
334   int numAddrs)
335 {
336     KMP_DEBUG_ASSERT(numAddrs > 0);
337     int depth = address2os->first.depth;
338     unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
339     unsigned *lastLabel = (unsigned *)__kmp_allocate(depth
340       * sizeof(unsigned));
341     int labCt;
342     for (labCt = 0; labCt < depth; labCt++) {
343         address2os[0].first.childNums[labCt] = counts[labCt] = 0;
344         lastLabel[labCt] = address2os[0].first.labels[labCt];
345     }
346     int i;
347     for (i = 1; i < numAddrs; i++) {
348         for (labCt = 0; labCt < depth; labCt++) {
349             if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
350                 int labCt2;
351                 for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
352                     counts[labCt2] = 0;
353                     lastLabel[labCt2] = address2os[i].first.labels[labCt2];
354                 }
355                 counts[labCt]++;
356                 lastLabel[labCt] = address2os[i].first.labels[labCt];
357                 break;
358             }
359         }
360         for (labCt = 0; labCt < depth; labCt++) {
361             address2os[i].first.childNums[labCt] = counts[labCt];
362         }
363         for (; labCt < (int)Address::maxDepth; labCt++) {
364             address2os[i].first.childNums[labCt] = 0;
365         }
366     }
367 }
368 
369 
370 //
371 // All of the __kmp_affinity_create_*_map() routines should set
372 // __kmp_affinity_masks to a vector of affinity mask objects of length
373 // __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and
374 // return the number of levels in the machine topology tree (zero if
375 // __kmp_affinity_type == affinity_none).
376 //
377 // All of the __kmp_affinity_create_*_map() routines should set *fullMask
378 // to the affinity mask for the initialization thread.  They need to save and
379 // restore the mask, and it could be needed later, so saving it is just an
380 // optimization to avoid calling kmp_get_system_affinity() again.
381 //
382 static kmp_affin_mask_t *fullMask = NULL;
383 
384 kmp_affin_mask_t *
385 __kmp_affinity_get_fullMask() { return fullMask; }
386 
387 
388 static int nCoresPerPkg, nPackages;
389 int __kmp_nThreadsPerCore;
390 
391 //
392 // __kmp_affinity_uniform_topology() doesn't work when called from
393 // places which support arbitrarily many levels in the machine topology
394 // map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
395 // __kmp_affinity_create_x2apicid_map().
396 //
397 inline static bool
398 __kmp_affinity_uniform_topology()
399 {
400     return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
401 }
402 
403 
404 //
405 // Print out the detailed machine topology map, i.e. the physical locations
406 // of each OS proc.
407 //
408 static void
409 __kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth,
410   int pkgLevel, int coreLevel, int threadLevel)
411 {
412     int proc;
413 
414     KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
415     for (proc = 0; proc < len; proc++) {
416         int level;
417         kmp_str_buf_t buf;
418         __kmp_str_buf_init(&buf);
419         for (level = 0; level < depth; level++) {
420             if (level == threadLevel) {
421                 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
422             }
423             else if (level == coreLevel) {
424                 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
425             }
426             else if (level == pkgLevel) {
427                 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
428             }
429             else if (level > pkgLevel) {
430                 __kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
431                   level - pkgLevel - 1);
432             }
433             else {
434                 __kmp_str_buf_print(&buf, "L%d ", level);
435             }
436             __kmp_str_buf_print(&buf, "%d ",
437               address2os[proc].first.labels[level]);
438         }
439         KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
440           buf.str);
441         __kmp_str_buf_free(&buf);
442     }
443 }
444 
445 
446 //
447 // If we don't know how to retrieve the machine's processor topology, or
448 // encounter an error in doing so, this routine is called to form a "flat"
449 // mapping of os thread id's <-> processor id's.
450 //
451 static int
452 __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
453   kmp_i18n_id_t *const msg_id)
454 {
455     *address2os = NULL;
456     *msg_id = kmp_i18n_null;
457 
458     //
459     // Even if __kmp_affinity_type == affinity_none, this routine might still
460     // called to set __kmp_ht_enabled, & __kmp_ncores, as well as
461     // __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
462     //
463     if (! KMP_AFFINITY_CAPABLE()) {
464         KMP_ASSERT(__kmp_affinity_type == affinity_none);
465         __kmp_ncores = nPackages = __kmp_xproc;
466         __kmp_nThreadsPerCore = nCoresPerPkg = 1;
467         __kmp_ht_enabled = FALSE;
468         if (__kmp_affinity_verbose) {
469             KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
470             KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
471             KMP_INFORM(Uniform, "KMP_AFFINITY");
472             KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
473               __kmp_nThreadsPerCore, __kmp_ncores);
474         }
475         return 0;
476     }
477 
478     //
479     // When affinity is off, this routine will still be called to set
480     // __kmp_ht_enabled, & __kmp_ncores, as well as __kmp_nThreadsPerCore,
481     // nCoresPerPkg, & nPackages.  Make sure all these vars are set
482     //  correctly, and return now if affinity is not enabled.
483     //
484     __kmp_ncores = nPackages = __kmp_avail_proc;
485     __kmp_nThreadsPerCore = nCoresPerPkg = 1;
486     __kmp_ht_enabled = FALSE;
487     if (__kmp_affinity_verbose) {
488         char buf[KMP_AFFIN_MASK_PRINT_LEN];
489         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
490 
491         KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
492         if (__kmp_affinity_respect_mask) {
493             KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
494         } else {
495             KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
496         }
497         KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
498         KMP_INFORM(Uniform, "KMP_AFFINITY");
499         KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
500           __kmp_nThreadsPerCore, __kmp_ncores);
501     }
502     if (__kmp_affinity_type == affinity_none) {
503         return 0;
504     }
505 
506     //
507     // Contruct the data structure to be returned.
508     //
509     *address2os = (AddrUnsPair*)
510       __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
511     int avail_ct = 0;
512     unsigned int i;
513     for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
514         //
515         // Skip this proc if it is not included in the machine model.
516         //
517         if (! KMP_CPU_ISSET(i, fullMask)) {
518             continue;
519         }
520 
521         Address addr(1);
522         addr.labels[0] = i;
523         (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
524     }
525     if (__kmp_affinity_verbose) {
526         KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
527     }
528 
529     if (__kmp_affinity_gran_levels < 0) {
530         //
531         // Only the package level is modeled in the machine topology map,
532         // so the #levels of granularity is either 0 or 1.
533         //
534         if (__kmp_affinity_gran > affinity_gran_package) {
535             __kmp_affinity_gran_levels = 1;
536         }
537         else {
538             __kmp_affinity_gran_levels = 0;
539         }
540     }
541     return 1;
542 }
543 
544 
545 # if KMP_OS_WINDOWS && KMP_ARCH_X86_64
546 
547 //
548 // If multiple Windows* OS processor groups exist, we can create a 2-level
549 // topology map with the groups at level 0 and the individual procs at
550 // level 1.
551 //
552 // This facilitates letting the threads float among all procs in a group,
553 // if granularity=group (the default when there are multiple groups).
554 //
555 static int
556 __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
557   kmp_i18n_id_t *const msg_id)
558 {
559     *address2os = NULL;
560     *msg_id = kmp_i18n_null;
561 
562     //
563     // If we don't have multiple processor groups, return now.
564     // The flat mapping will be used.
565     //
566     if ((! KMP_AFFINITY_CAPABLE()) || (__kmp_get_proc_group(fullMask) >= 0)) {
567         // FIXME set *msg_id
568         return -1;
569     }
570 
571     //
572     // Contruct the data structure to be returned.
573     //
574     *address2os = (AddrUnsPair*)
575       __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
576     int avail_ct = 0;
577     int i;
578     for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
579         //
580         // Skip this proc if it is not included in the machine model.
581         //
582         if (! KMP_CPU_ISSET(i, fullMask)) {
583             continue;
584         }
585 
586         Address addr(2);
587         addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
588         addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
589         (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
590 
591         if (__kmp_affinity_verbose) {
592             KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
593               addr.labels[1]);
594         }
595     }
596 
597     if (__kmp_affinity_gran_levels < 0) {
598         if (__kmp_affinity_gran == affinity_gran_group) {
599             __kmp_affinity_gran_levels = 1;
600         }
601         else if ((__kmp_affinity_gran == affinity_gran_fine)
602           || (__kmp_affinity_gran == affinity_gran_thread)) {
603             __kmp_affinity_gran_levels = 0;
604         }
605         else {
606             const char *gran_str = NULL;
607             if (__kmp_affinity_gran == affinity_gran_core) {
608                 gran_str = "core";
609             }
610             else if (__kmp_affinity_gran == affinity_gran_package) {
611                 gran_str = "package";
612             }
613             else if (__kmp_affinity_gran == affinity_gran_node) {
614                 gran_str = "node";
615             }
616             else {
617                 KMP_ASSERT(0);
618             }
619 
620             // Warning: can't use affinity granularity \"gran\" with group topology method, using "thread"
621             __kmp_affinity_gran_levels = 0;
622         }
623     }
624     return 2;
625 }
626 
627 # endif /* KMP_OS_WINDOWS && KMP_ARCH_X86_64 */
628 
629 
630 # if KMP_ARCH_X86 || KMP_ARCH_X86_64
631 
632 static int
633 __kmp_cpuid_mask_width(int count) {
634     int r = 0;
635 
636     while((1<<r) < count)
637         ++r;
638     return r;
639 }
640 
641 
642 class apicThreadInfo {
643 public:
644     unsigned osId;              // param to __kmp_affinity_bind_thread
645     unsigned apicId;            // from cpuid after binding
646     unsigned maxCoresPerPkg;    //      ""
647     unsigned maxThreadsPerPkg;  //      ""
648     unsigned pkgId;             // inferred from above values
649     unsigned coreId;            //      ""
650     unsigned threadId;          //      ""
651 };
652 
653 
654 static int
655 __kmp_affinity_cmp_apicThreadInfo_os_id(const void *a, const void *b)
656 {
657     const apicThreadInfo *aa = (const apicThreadInfo *)a;
658     const apicThreadInfo *bb = (const apicThreadInfo *)b;
659     if (aa->osId < bb->osId) return -1;
660     if (aa->osId > bb->osId) return 1;
661     return 0;
662 }
663 
664 
665 static int
666 __kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a, const void *b)
667 {
668     const apicThreadInfo *aa = (const apicThreadInfo *)a;
669     const apicThreadInfo *bb = (const apicThreadInfo *)b;
670     if (aa->pkgId < bb->pkgId) return -1;
671     if (aa->pkgId > bb->pkgId) return 1;
672     if (aa->coreId < bb->coreId) return -1;
673     if (aa->coreId > bb->coreId) return 1;
674     if (aa->threadId < bb->threadId) return -1;
675     if (aa->threadId > bb->threadId) return 1;
676     return 0;
677 }
678 
679 
680 //
681 // On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
682 // an algorithm which cycles through the available os threads, setting
683 // the current thread's affinity mask to that thread, and then retrieves
684 // the Apic Id for each thread context using the cpuid instruction.
685 //
686 static int
687 __kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
688   kmp_i18n_id_t *const msg_id)
689 {
690     int rc;
691     *address2os = NULL;
692     *msg_id = kmp_i18n_null;
693 
694 #  if KMP_MIC
695     {
696         // The code below will use cpuid(4).
697         // Check if cpuid(4) is supported.
698         // FIXME? - this really doesn't need to be specific to MIC.
699         kmp_cpuid buf;
700         __kmp_x86_cpuid(0, 0, &buf);
701         if (buf.eax < 4) {
702             *msg_id = kmp_i18n_str_NoLeaf4Support;
703             return -1;
704         }
705     }
706 #  endif // KMP_MIC
707 
708     //
709     // Even if __kmp_affinity_type == affinity_none, this routine is still
710     // called to set __kmp_ht_enabled, & __kmp_ncores, as well as
711     // __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
712     //
713     // The algorithm used starts by setting the affinity to each available
714     // thread and retreiving info from the cpuid instruction, so if we are not
715     // capable of calling __kmp_affinity_get_map()/__kmp_affinity_get_map(),
716     // then we need to do something else.
717     //
718     if (! KMP_AFFINITY_CAPABLE()) {
719         //
720         // Hack to try and infer the machine topology using only the data
721         // available from cpuid on the current thread, and __kmp_xproc.
722         //
723         KMP_ASSERT(__kmp_affinity_type == affinity_none);
724 
725         //
726         // Get an upper bound on the number of threads per package using
727         // cpuid(1).
728         //
729         // On some OS/chps combinations where HT is supported by the chip
730         // but is disabled, this value will be 2 on a single core chip.
731         // Usually, it will be 2 if HT is enabled and 1 if HT is disabled.
732         //
733         kmp_cpuid buf;
734         __kmp_x86_cpuid(1, 0, &buf);
735         int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
736         if (maxThreadsPerPkg == 0) {
737             maxThreadsPerPkg = 1;
738         }
739 
740         //
741         // The num cores per pkg comes from cpuid(4).
742         // 1 must be added to the encoded value.
743         //
744         // The author of cpu_count.cpp treated this only an upper bound
745         // on the number of cores, but I haven't seen any cases where it
746         // was greater than the actual number of cores, so we will treat
747         // it as exact in this block of code.
748         //
749         // First, we need to check if cpuid(4) is supported on this chip.
750         // To see if cpuid(n) is supported, issue cpuid(0) and check if eax
751         // has the value n or greater.
752         //
753         __kmp_x86_cpuid(0, 0, &buf);
754         if (buf.eax >= 4) {
755             __kmp_x86_cpuid(4, 0, &buf);
756             nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
757         }
758         else {
759             nCoresPerPkg = 1;
760         }
761 
762         //
763         // There is no way to reliably tell if HT is enabled without issuing
764         // the cpuid instruction from every thread, can correlating the cpuid
765         // info, so if the machine is not affinity capable, we assume that HT
766         // is off.  We have seen quite a few machines where maxThreadsPerPkg
767         // is 2, yet the machine does not support HT.
768         //
769         // - Older OSes are usually found on machines with older chips, which
770         //   do not support HT.
771         //
772         // - The performance penalty for mistakenly identifying a machine as
773         //   HT when it isn't (which results in blocktime being incorrecly set
774         //   to 0) is greater than the penalty when for mistakenly identifying
775         //   a machine as being 1 thread/core when it is really HT enabled
776         //   (which results in blocktime being incorrectly set to a positive
777         //   value).
778         //
779         __kmp_ncores = __kmp_xproc;
780         nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
781         __kmp_nThreadsPerCore = 1;
782         __kmp_ht_enabled = FALSE;
783         if (__kmp_affinity_verbose) {
784             KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
785             KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
786             if (__kmp_affinity_uniform_topology()) {
787                 KMP_INFORM(Uniform, "KMP_AFFINITY");
788             } else {
789                 KMP_INFORM(NonUniform, "KMP_AFFINITY");
790             }
791             KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
792               __kmp_nThreadsPerCore, __kmp_ncores);
793         }
794         return 0;
795     }
796 
797     //
798     //
799     // From here on, we can assume that it is safe to call
800     // __kmp_get_system_affinity() and __kmp_set_system_affinity(),
801     // even if __kmp_affinity_type = affinity_none.
802     //
803 
804     //
805     // Save the affinity mask for the current thread.
806     //
807     kmp_affin_mask_t *oldMask;
808     KMP_CPU_ALLOC(oldMask);
809     KMP_ASSERT(oldMask != NULL);
810     __kmp_get_system_affinity(oldMask, TRUE);
811 
812     //
813     // Run through each of the available contexts, binding the current thread
814     // to it, and obtaining the pertinent information using the cpuid instr.
815     //
816     // The relevant information is:
817     //
818     // Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
819     //    has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
820     //
821     // Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1).  The
822     //    value of this field determines the width of the core# + thread#
823     //    fields in the Apic Id.  It is also an upper bound on the number
824     //    of threads per package, but it has been verified that situations
825     //    happen were it is not exact.  In particular, on certain OS/chip
826     //    combinations where Intel(R) Hyper-Threading Technology is supported
827     //    by the chip but has
828     //    been disabled, the value of this field will be 2 (for a single core
829     //    chip).  On other OS/chip combinations supporting
830     //    Intel(R) Hyper-Threading Technology, the value of
831     //    this field will be 1 when Intel(R) Hyper-Threading Technology is
832     //    disabled and 2 when it is enabled.
833     //
834     // Max Cores Per Pkg:  Bits 26:31 of eax after issuing cpuid(4).  The
835     //    value of this field (+1) determines the width of the core# field in
836     //    the Apic Id.  The comments in "cpucount.cpp" say that this value is
837     //    an upper bound, but the IA-32 architecture manual says that it is
838     //    exactly the number of cores per package, and I haven't seen any
839     //    case where it wasn't.
840     //
841     // From this information, deduce the package Id, core Id, and thread Id,
842     // and set the corresponding fields in the apicThreadInfo struct.
843     //
844     unsigned i;
845     apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
846       __kmp_avail_proc * sizeof(apicThreadInfo));
847     unsigned nApics = 0;
848     for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
849         //
850         // Skip this proc if it is not included in the machine model.
851         //
852         if (! KMP_CPU_ISSET(i, fullMask)) {
853             continue;
854         }
855         KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
856 
857         __kmp_affinity_bind_thread(i);
858         threadInfo[nApics].osId = i;
859 
860         //
861         // The apic id and max threads per pkg come from cpuid(1).
862         //
863         kmp_cpuid buf;
864         __kmp_x86_cpuid(1, 0, &buf);
865         if (! (buf.edx >> 9) & 1) {
866             __kmp_set_system_affinity(oldMask, TRUE);
867             __kmp_free(threadInfo);
868             KMP_CPU_FREE(oldMask);
869             *msg_id = kmp_i18n_str_ApicNotPresent;
870             return -1;
871         }
872         threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
873         threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
874         if (threadInfo[nApics].maxThreadsPerPkg == 0) {
875             threadInfo[nApics].maxThreadsPerPkg = 1;
876         }
877 
878         //
879         // Max cores per pkg comes from cpuid(4).
880         // 1 must be added to the encoded value.
881         //
882         // First, we need to check if cpuid(4) is supported on this chip.
883         // To see if cpuid(n) is supported, issue cpuid(0) and check if eax
884         // has the value n or greater.
885         //
886         __kmp_x86_cpuid(0, 0, &buf);
887         if (buf.eax >= 4) {
888             __kmp_x86_cpuid(4, 0, &buf);
889             threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
890         }
891         else {
892             threadInfo[nApics].maxCoresPerPkg = 1;
893         }
894 
895         //
896         // Infer the pkgId / coreId / threadId using only the info
897         // obtained locally.
898         //
899         int widthCT = __kmp_cpuid_mask_width(
900           threadInfo[nApics].maxThreadsPerPkg);
901         threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
902 
903         int widthC = __kmp_cpuid_mask_width(
904           threadInfo[nApics].maxCoresPerPkg);
905         int widthT = widthCT - widthC;
906         if (widthT < 0) {
907             //
908             // I've never seen this one happen, but I suppose it could, if
909             // the cpuid instruction on a chip was really screwed up.
910             // Make sure to restore the affinity mask before the tail call.
911             //
912             __kmp_set_system_affinity(oldMask, TRUE);
913             __kmp_free(threadInfo);
914             KMP_CPU_FREE(oldMask);
915             *msg_id = kmp_i18n_str_InvalidCpuidInfo;
916             return -1;
917         }
918 
919         int maskC = (1 << widthC) - 1;
920         threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
921           &maskC;
922 
923         int maskT = (1 << widthT) - 1;
924         threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
925 
926         nApics++;
927     }
928 
929     //
930     // We've collected all the info we need.
931     // Restore the old affinity mask for this thread.
932     //
933     __kmp_set_system_affinity(oldMask, TRUE);
934 
935     //
936     // If there's only one thread context to bind to, form an Address object
937     // with depth 1 and return immediately (or, if affinity is off, set
938     // address2os to NULL and return).
939     //
940     // If it is configured to omit the package level when there is only a
941     // single package, the logic at the end of this routine won't work if
942     // there is only a single thread - it would try to form an Address
943     // object with depth 0.
944     //
945     KMP_ASSERT(nApics > 0);
946     if (nApics == 1) {
947         __kmp_ncores = nPackages = 1;
948         __kmp_nThreadsPerCore = nCoresPerPkg = 1;
949         __kmp_ht_enabled = FALSE;
950         if (__kmp_affinity_verbose) {
951             char buf[KMP_AFFIN_MASK_PRINT_LEN];
952             __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
953 
954             KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
955             if (__kmp_affinity_respect_mask) {
956                 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
957             } else {
958                 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
959             }
960             KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
961             KMP_INFORM(Uniform, "KMP_AFFINITY");
962             KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
963               __kmp_nThreadsPerCore, __kmp_ncores);
964         }
965 
966         if (__kmp_affinity_type == affinity_none) {
967             __kmp_free(threadInfo);
968             KMP_CPU_FREE(oldMask);
969             return 0;
970         }
971 
972         *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair));
973         Address addr(1);
974         addr.labels[0] = threadInfo[0].pkgId;
975         (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
976 
977         if (__kmp_affinity_gran_levels < 0) {
978             __kmp_affinity_gran_levels = 0;
979         }
980 
981         if (__kmp_affinity_verbose) {
982             __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
983         }
984 
985         __kmp_free(threadInfo);
986         KMP_CPU_FREE(oldMask);
987         return 1;
988     }
989 
990     //
991     // Sort the threadInfo table by physical Id.
992     //
993     qsort(threadInfo, nApics, sizeof(*threadInfo),
994       __kmp_affinity_cmp_apicThreadInfo_phys_id);
995 
996     //
997     // The table is now sorted by pkgId / coreId / threadId, but we really
998     // don't know the radix of any of the fields.  pkgId's may be sparsely
999     // assigned among the chips on a system.  Although coreId's are usually
1000     // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
1001     // [0..threadsPerCore-1], we don't want to make any such assumptions.
1002     //
1003     // For that matter, we don't know what coresPerPkg and threadsPerCore
1004     // (or the total # packages) are at this point - we want to determine
1005     // that now.  We only have an upper bound on the first two figures.
1006     //
1007     // We also perform a consistency check at this point: the values returned
1008     // by the cpuid instruction for any thread bound to a given package had
1009     // better return the same info for maxThreadsPerPkg and maxCoresPerPkg.
1010     //
1011     nPackages = 1;
1012     nCoresPerPkg = 1;
1013     __kmp_nThreadsPerCore = 1;
1014     unsigned nCores = 1;
1015 
1016     unsigned pkgCt = 1;                         // to determine radii
1017     unsigned lastPkgId = threadInfo[0].pkgId;
1018     unsigned coreCt = 1;
1019     unsigned lastCoreId = threadInfo[0].coreId;
1020     unsigned threadCt = 1;
1021     unsigned lastThreadId = threadInfo[0].threadId;
1022 
1023                                                 // intra-pkg consist checks
1024     unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
1025     unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
1026 
1027     for (i = 1; i < nApics; i++) {
1028         if (threadInfo[i].pkgId != lastPkgId) {
1029             nCores++;
1030             pkgCt++;
1031             lastPkgId = threadInfo[i].pkgId;
1032             if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
1033             coreCt = 1;
1034             lastCoreId = threadInfo[i].coreId;
1035             if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1036             threadCt = 1;
1037             lastThreadId = threadInfo[i].threadId;
1038 
1039             //
1040             // This is a different package, so go on to the next iteration
1041             // without doing any consistency checks.  Reset the consistency
1042             // check vars, though.
1043             //
1044             prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
1045             prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
1046             continue;
1047         }
1048 
1049         if (threadInfo[i].coreId != lastCoreId) {
1050             nCores++;
1051             coreCt++;
1052             lastCoreId = threadInfo[i].coreId;
1053             if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1054             threadCt = 1;
1055             lastThreadId = threadInfo[i].threadId;
1056         }
1057         else if (threadInfo[i].threadId != lastThreadId) {
1058             threadCt++;
1059             lastThreadId = threadInfo[i].threadId;
1060         }
1061         else {
1062             __kmp_free(threadInfo);
1063             KMP_CPU_FREE(oldMask);
1064             *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
1065             return -1;
1066         }
1067 
1068         //
1069         // Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
1070         // fields agree between all the threads bounds to a given package.
1071         //
1072         if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
1073           || (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
1074             __kmp_free(threadInfo);
1075             KMP_CPU_FREE(oldMask);
1076             *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1077             return -1;
1078         }
1079     }
1080     nPackages = pkgCt;
1081     if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
1082     if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1083 
1084     //
1085     // When affinity is off, this routine will still be called to set
1086     // __kmp_ht_enabled, & __kmp_ncores, as well as __kmp_nThreadsPerCore,
1087     // nCoresPerPkg, & nPackages.  Make sure all these vars are set
1088     // correctly, and return now if affinity is not enabled.
1089     //
1090     __kmp_ht_enabled = (__kmp_nThreadsPerCore > 1);
1091     __kmp_ncores = nCores;
1092     if (__kmp_affinity_verbose) {
1093         char buf[KMP_AFFIN_MASK_PRINT_LEN];
1094         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1095 
1096         KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
1097         if (__kmp_affinity_respect_mask) {
1098             KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1099         } else {
1100             KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1101         }
1102         KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1103         if (__kmp_affinity_uniform_topology()) {
1104             KMP_INFORM(Uniform, "KMP_AFFINITY");
1105         } else {
1106             KMP_INFORM(NonUniform, "KMP_AFFINITY");
1107         }
1108         KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1109           __kmp_nThreadsPerCore, __kmp_ncores);
1110 
1111     }
1112 
1113     if (__kmp_affinity_type == affinity_none) {
1114         __kmp_free(threadInfo);
1115         KMP_CPU_FREE(oldMask);
1116         return 0;
1117     }
1118 
1119     //
1120     // Now that we've determined the number of packages, the number of cores
1121     // per package, and the number of threads per core, we can construct the
1122     // data structure that is to be returned.
1123     //
1124     int pkgLevel = 0;
1125     int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
1126     int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
1127     unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
1128 
1129     KMP_ASSERT(depth > 0);
1130     *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
1131 
1132     for (i = 0; i < nApics; ++i) {
1133         Address addr(depth);
1134         unsigned os = threadInfo[i].osId;
1135         int d = 0;
1136 
1137         if (pkgLevel >= 0) {
1138             addr.labels[d++] = threadInfo[i].pkgId;
1139         }
1140         if (coreLevel >= 0) {
1141             addr.labels[d++] = threadInfo[i].coreId;
1142         }
1143         if (threadLevel >= 0) {
1144             addr.labels[d++] = threadInfo[i].threadId;
1145         }
1146         (*address2os)[i] = AddrUnsPair(addr, os);
1147     }
1148 
1149     if (__kmp_affinity_gran_levels < 0) {
1150         //
1151         // Set the granularity level based on what levels are modeled
1152         // in the machine topology map.
1153         //
1154         __kmp_affinity_gran_levels = 0;
1155         if ((threadLevel >= 0)
1156           && (__kmp_affinity_gran > affinity_gran_thread)) {
1157             __kmp_affinity_gran_levels++;
1158         }
1159         if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1160             __kmp_affinity_gran_levels++;
1161         }
1162         if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
1163             __kmp_affinity_gran_levels++;
1164         }
1165     }
1166 
1167     if (__kmp_affinity_verbose) {
1168         __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
1169           coreLevel, threadLevel);
1170     }
1171 
1172     __kmp_free(threadInfo);
1173     KMP_CPU_FREE(oldMask);
1174     return depth;
1175 }
1176 
1177 
1178 //
1179 // Intel(R) microarchitecture code name Nehalem, Dunnington and later
1180 // architectures support a newer interface for specifying the x2APIC Ids,
1181 // based on cpuid leaf 11.
1182 //
1183 static int
1184 __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
1185   kmp_i18n_id_t *const msg_id)
1186 {
1187     kmp_cpuid buf;
1188 
1189     *address2os = NULL;
1190     *msg_id = kmp_i18n_null;
1191 
1192     //
1193     // Check to see if cpuid leaf 11 is supported.
1194     //
1195     __kmp_x86_cpuid(0, 0, &buf);
1196     if (buf.eax < 11) {
1197         *msg_id = kmp_i18n_str_NoLeaf11Support;
1198         return -1;
1199     }
1200     __kmp_x86_cpuid(11, 0, &buf);
1201     if (buf.ebx == 0) {
1202         *msg_id = kmp_i18n_str_NoLeaf11Support;
1203         return -1;
1204     }
1205 
1206     //
1207     // Find the number of levels in the machine topology.  While we're at it,
1208     // get the default values for __kmp_nThreadsPerCore & nCoresPerPkg.  We will
1209     // try to get more accurate values later by explicitly counting them,
1210     // but get reasonable defaults now, in case we return early.
1211     //
1212     int level;
1213     int threadLevel = -1;
1214     int coreLevel = -1;
1215     int pkgLevel = -1;
1216     __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
1217 
1218     for (level = 0;; level++) {
1219         if (level > 31) {
1220             //
1221             // FIXME: Hack for DPD200163180
1222             //
1223             // If level is big then something went wrong -> exiting
1224             //
1225             // There could actually be 32 valid levels in the machine topology,
1226             // but so far, the only machine we have seen which does not exit
1227             // this loop before iteration 32 has fubar x2APIC settings.
1228             //
1229             // For now, just reject this case based upon loop trip count.
1230             //
1231             *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1232             return -1;
1233         }
1234         __kmp_x86_cpuid(11, level, &buf);
1235         if (buf.ebx == 0) {
1236             if (pkgLevel < 0) {
1237                 //
1238                 // Will infer nPackages from __kmp_xproc
1239                 //
1240                 pkgLevel = level;
1241                 level++;
1242             }
1243             break;
1244         }
1245         int kind = (buf.ecx >> 8) & 0xff;
1246         if (kind == 1) {
1247             //
1248             // SMT level
1249             //
1250             threadLevel = level;
1251             coreLevel = -1;
1252             pkgLevel = -1;
1253             __kmp_nThreadsPerCore = buf.ebx & 0xff;
1254             if (__kmp_nThreadsPerCore == 0) {
1255                 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1256                 return -1;
1257             }
1258         }
1259         else if (kind == 2) {
1260             //
1261             // core level
1262             //
1263             coreLevel = level;
1264             pkgLevel = -1;
1265             nCoresPerPkg = buf.ebx & 0xff;
1266             if (nCoresPerPkg == 0) {
1267                 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1268                 return -1;
1269             }
1270         }
1271         else {
1272             if (level <= 0) {
1273                 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1274                 return -1;
1275             }
1276             if (pkgLevel >= 0) {
1277                 continue;
1278             }
1279             pkgLevel = level;
1280             nPackages = buf.ebx & 0xff;
1281             if (nPackages == 0) {
1282                 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1283                 return -1;
1284             }
1285         }
1286     }
1287     int depth = level;
1288 
1289     //
1290     // In the above loop, "level" was counted from the finest level (usually
1291     // thread) to the coarsest.  The caller expects that we will place the
1292     // labels in (*address2os)[].first.labels[] in the inverse order, so
1293     // we need to invert the vars saying which level means what.
1294     //
1295     if (threadLevel >= 0) {
1296         threadLevel = depth - threadLevel - 1;
1297     }
1298     if (coreLevel >= 0) {
1299         coreLevel = depth - coreLevel - 1;
1300     }
1301     KMP_DEBUG_ASSERT(pkgLevel >= 0);
1302     pkgLevel = depth - pkgLevel - 1;
1303 
1304     //
1305     // The algorithm used starts by setting the affinity to each available
1306     // thread and retrieving info from the cpuid instruction, so if we are not
1307     // capable of calling __kmp_affinity_get_map()/__kmp_affinity_get_map(),
1308     // then we need to do something else - use the defaults that we calculated
1309     // from issuing cpuid without binding to each proc.
1310     //
1311     if (! KMP_AFFINITY_CAPABLE())
1312     {
1313         //
1314         // Hack to try and infer the machine topology using only the data
1315         // available from cpuid on the current thread, and __kmp_xproc.
1316         //
1317         KMP_ASSERT(__kmp_affinity_type == affinity_none);
1318 
1319         __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
1320         nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
1321         __kmp_ht_enabled = (__kmp_nThreadsPerCore > 1);
1322         if (__kmp_affinity_verbose) {
1323             KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
1324             KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1325             if (__kmp_affinity_uniform_topology()) {
1326                 KMP_INFORM(Uniform, "KMP_AFFINITY");
1327             } else {
1328                 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1329             }
1330             KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1331               __kmp_nThreadsPerCore, __kmp_ncores);
1332         }
1333         return 0;
1334     }
1335 
1336     //
1337     //
1338     // From here on, we can assume that it is safe to call
1339     // __kmp_get_system_affinity() and __kmp_set_system_affinity(),
1340     // even if __kmp_affinity_type = affinity_none.
1341     //
1342 
1343     //
1344     // Save the affinity mask for the current thread.
1345     //
1346     kmp_affin_mask_t *oldMask;
1347     KMP_CPU_ALLOC(oldMask);
1348     __kmp_get_system_affinity(oldMask, TRUE);
1349 
1350     //
1351     // Allocate the data structure to be returned.
1352     //
1353     AddrUnsPair *retval = (AddrUnsPair *)
1354       __kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
1355 
1356     //
1357     // Run through each of the available contexts, binding the current thread
1358     // to it, and obtaining the pertinent information using the cpuid instr.
1359     //
1360     unsigned int proc;
1361     int nApics = 0;
1362     for (proc = 0; proc < KMP_CPU_SETSIZE; ++proc) {
1363         //
1364         // Skip this proc if it is not included in the machine model.
1365         //
1366         if (! KMP_CPU_ISSET(proc, fullMask)) {
1367             continue;
1368         }
1369         KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
1370 
1371         __kmp_affinity_bind_thread(proc);
1372 
1373         //
1374         // Extrach the labels for each level in the machine topology map
1375         // from the Apic ID.
1376         //
1377         Address addr(depth);
1378         int prev_shift = 0;
1379 
1380         for (level = 0; level < depth; level++) {
1381             __kmp_x86_cpuid(11, level, &buf);
1382             unsigned apicId = buf.edx;
1383             if (buf.ebx == 0) {
1384                 if (level != depth - 1) {
1385                     KMP_CPU_FREE(oldMask);
1386                     *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1387                     return -1;
1388                 }
1389                 addr.labels[depth - level - 1] = apicId >> prev_shift;
1390                 level++;
1391                 break;
1392             }
1393             int shift = buf.eax & 0x1f;
1394             int mask = (1 << shift) - 1;
1395             addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
1396             prev_shift = shift;
1397         }
1398         if (level != depth) {
1399             KMP_CPU_FREE(oldMask);
1400             *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1401             return -1;
1402         }
1403 
1404         retval[nApics] = AddrUnsPair(addr, proc);
1405         nApics++;
1406     }
1407 
1408     //
1409     // We've collected all the info we need.
1410     // Restore the old affinity mask for this thread.
1411     //
1412     __kmp_set_system_affinity(oldMask, TRUE);
1413 
1414     //
1415     // If there's only one thread context to bind to, return now.
1416     //
1417     KMP_ASSERT(nApics > 0);
1418     if (nApics == 1) {
1419         __kmp_ncores = nPackages = 1;
1420         __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1421         __kmp_ht_enabled = FALSE;
1422         if (__kmp_affinity_verbose) {
1423             char buf[KMP_AFFIN_MASK_PRINT_LEN];
1424             __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1425 
1426             KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
1427             if (__kmp_affinity_respect_mask) {
1428                 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1429             } else {
1430                 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1431             }
1432             KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1433             KMP_INFORM(Uniform, "KMP_AFFINITY");
1434             KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1435               __kmp_nThreadsPerCore, __kmp_ncores);
1436         }
1437 
1438         if (__kmp_affinity_type == affinity_none) {
1439             __kmp_free(retval);
1440             KMP_CPU_FREE(oldMask);
1441             return 0;
1442         }
1443 
1444         //
1445         // Form an Address object which only includes the package level.
1446         //
1447         Address addr(1);
1448         addr.labels[0] = retval[0].first.labels[pkgLevel];
1449         retval[0].first = addr;
1450 
1451         if (__kmp_affinity_gran_levels < 0) {
1452             __kmp_affinity_gran_levels = 0;
1453         }
1454 
1455         if (__kmp_affinity_verbose) {
1456             __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
1457         }
1458 
1459         *address2os = retval;
1460         KMP_CPU_FREE(oldMask);
1461         return 1;
1462     }
1463 
1464     //
1465     // Sort the table by physical Id.
1466     //
1467     qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
1468 
1469     //
1470     // Find the radix at each of the levels.
1471     //
1472     unsigned *totals = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1473     unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1474     unsigned *maxCt = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1475     unsigned *last = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1476     for (level = 0; level < depth; level++) {
1477         totals[level] = 1;
1478         maxCt[level] = 1;
1479         counts[level] = 1;
1480         last[level] = retval[0].first.labels[level];
1481     }
1482 
1483     //
1484     // From here on, the iteration variable "level" runs from the finest
1485     // level to the coarsest, i.e. we iterate forward through
1486     // (*address2os)[].first.labels[] - in the previous loops, we iterated
1487     // backwards.
1488     //
1489     for (proc = 1; (int)proc < nApics; proc++) {
1490         int level;
1491         for (level = 0; level < depth; level++) {
1492             if (retval[proc].first.labels[level] != last[level]) {
1493                 int j;
1494                 for (j = level + 1; j < depth; j++) {
1495                     totals[j]++;
1496                     counts[j] = 1;
1497                     // The line below causes printing incorrect topology information
1498                     // in case the max value for some level (maxCt[level]) is encountered earlier than
1499                     // some less value while going through the array.
1500                     // For example, let pkg0 has 4 cores and pkg1 has 2 cores. Then maxCt[1] == 2
1501                     // whereas it must be 4.
1502                     // TODO!!! Check if it can be commented safely
1503                     //maxCt[j] = 1;
1504                     last[j] = retval[proc].first.labels[j];
1505                 }
1506                 totals[level]++;
1507                 counts[level]++;
1508                 if (counts[level] > maxCt[level]) {
1509                     maxCt[level] = counts[level];
1510                 }
1511                 last[level] = retval[proc].first.labels[level];
1512                 break;
1513             }
1514             else if (level == depth - 1) {
1515                 __kmp_free(last);
1516                 __kmp_free(maxCt);
1517                 __kmp_free(counts);
1518                 __kmp_free(totals);
1519                 __kmp_free(retval);
1520                 KMP_CPU_FREE(oldMask);
1521                 *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
1522                 return -1;
1523             }
1524         }
1525     }
1526 
1527     //
1528     // When affinity is off, this routine will still be called to set
1529     // __kmp_ht_enabled, & __kmp_ncores, as well as __kmp_nThreadsPerCore,
1530     // nCoresPerPkg, & nPackages.  Make sure all these vars are set
1531     // correctly, and return if affinity is not enabled.
1532     //
1533     if (threadLevel >= 0) {
1534         __kmp_nThreadsPerCore = maxCt[threadLevel];
1535     }
1536     else {
1537         __kmp_nThreadsPerCore = 1;
1538     }
1539     __kmp_ht_enabled = (__kmp_nThreadsPerCore > 1);
1540 
1541     nPackages = totals[pkgLevel];
1542 
1543     if (coreLevel >= 0) {
1544         __kmp_ncores = totals[coreLevel];
1545         nCoresPerPkg = maxCt[coreLevel];
1546     }
1547     else {
1548         __kmp_ncores = nPackages;
1549         nCoresPerPkg = 1;
1550     }
1551 
1552     //
1553     // Check to see if the machine topology is uniform
1554     //
1555     unsigned prod = maxCt[0];
1556     for (level = 1; level < depth; level++) {
1557        prod *= maxCt[level];
1558     }
1559     bool uniform = (prod == totals[level - 1]);
1560 
1561     //
1562     // Print the machine topology summary.
1563     //
1564     if (__kmp_affinity_verbose) {
1565         char mask[KMP_AFFIN_MASK_PRINT_LEN];
1566         __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1567 
1568         KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
1569         if (__kmp_affinity_respect_mask) {
1570             KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
1571         } else {
1572             KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
1573         }
1574         KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1575         if (uniform) {
1576             KMP_INFORM(Uniform, "KMP_AFFINITY");
1577         } else {
1578             KMP_INFORM(NonUniform, "KMP_AFFINITY");
1579         }
1580 
1581         kmp_str_buf_t buf;
1582         __kmp_str_buf_init(&buf);
1583 
1584         __kmp_str_buf_print(&buf, "%d", totals[0]);
1585         for (level = 1; level <= pkgLevel; level++) {
1586             __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
1587         }
1588         KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
1589           __kmp_nThreadsPerCore, __kmp_ncores);
1590 
1591         __kmp_str_buf_free(&buf);
1592     }
1593 
1594     if (__kmp_affinity_type == affinity_none) {
1595         __kmp_free(last);
1596         __kmp_free(maxCt);
1597         __kmp_free(counts);
1598         __kmp_free(totals);
1599         __kmp_free(retval);
1600         KMP_CPU_FREE(oldMask);
1601         return 0;
1602     }
1603 
1604     //
1605     // Find any levels with radiix 1, and remove them from the map
1606     // (except for the package level).
1607     //
1608     int new_depth = 0;
1609     for (level = 0; level < depth; level++) {
1610         if ((maxCt[level] == 1) && (level != pkgLevel)) {
1611            continue;
1612         }
1613         new_depth++;
1614     }
1615 
1616     //
1617     // If we are removing any levels, allocate a new vector to return,
1618     // and copy the relevant information to it.
1619     //
1620     if (new_depth != depth) {
1621         AddrUnsPair *new_retval = (AddrUnsPair *)__kmp_allocate(
1622           sizeof(AddrUnsPair) * nApics);
1623         for (proc = 0; (int)proc < nApics; proc++) {
1624             Address addr(new_depth);
1625             new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
1626         }
1627         int new_level = 0;
1628         for (level = 0; level < depth; level++) {
1629             if ((maxCt[level] == 1) && (level != pkgLevel)) {
1630                if (level == threadLevel) {
1631                    threadLevel = -1;
1632                }
1633                else if ((threadLevel >= 0) && (level < threadLevel)) {
1634                    threadLevel--;
1635                }
1636                if (level == coreLevel) {
1637                    coreLevel = -1;
1638                }
1639                else if ((coreLevel >= 0) && (level < coreLevel)) {
1640                    coreLevel--;
1641                }
1642                if (level < pkgLevel) {
1643                    pkgLevel--;
1644                }
1645                continue;
1646             }
1647             for (proc = 0; (int)proc < nApics; proc++) {
1648                 new_retval[proc].first.labels[new_level]
1649                   = retval[proc].first.labels[level];
1650             }
1651             new_level++;
1652         }
1653 
1654         __kmp_free(retval);
1655         retval = new_retval;
1656         depth = new_depth;
1657     }
1658 
1659     if (__kmp_affinity_gran_levels < 0) {
1660         //
1661         // Set the granularity level based on what levels are modeled
1662         // in the machine topology map.
1663         //
1664         __kmp_affinity_gran_levels = 0;
1665         if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1666             __kmp_affinity_gran_levels++;
1667         }
1668         if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1669             __kmp_affinity_gran_levels++;
1670         }
1671         if (__kmp_affinity_gran > affinity_gran_package) {
1672             __kmp_affinity_gran_levels++;
1673         }
1674     }
1675 
1676     if (__kmp_affinity_verbose) {
1677         __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
1678           coreLevel, threadLevel);
1679     }
1680 
1681     __kmp_free(last);
1682     __kmp_free(maxCt);
1683     __kmp_free(counts);
1684     __kmp_free(totals);
1685     KMP_CPU_FREE(oldMask);
1686     *address2os = retval;
1687     return depth;
1688 }
1689 
1690 
1691 # endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1692 
1693 
1694 #define osIdIndex       0
1695 #define threadIdIndex   1
1696 #define coreIdIndex     2
1697 #define pkgIdIndex      3
1698 #define nodeIdIndex     4
1699 
1700 typedef unsigned *ProcCpuInfo;
1701 static unsigned maxIndex = pkgIdIndex;
1702 
1703 
1704 static int
1705 __kmp_affinity_cmp_ProcCpuInfo_os_id(const void *a, const void *b)
1706 {
1707     const unsigned *aa = (const unsigned *)a;
1708     const unsigned *bb = (const unsigned *)b;
1709     if (aa[osIdIndex] < bb[osIdIndex]) return -1;
1710     if (aa[osIdIndex] > bb[osIdIndex]) return 1;
1711     return 0;
1712 };
1713 
1714 
1715 static int
1716 __kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a, const void *b)
1717 {
1718     unsigned i;
1719     const unsigned *aa = *((const unsigned **)a);
1720     const unsigned *bb = *((const unsigned **)b);
1721     for (i = maxIndex; ; i--) {
1722         if (aa[i] < bb[i]) return -1;
1723         if (aa[i] > bb[i]) return 1;
1724         if (i == osIdIndex) break;
1725     }
1726     return 0;
1727 }
1728 
1729 
1730 //
1731 // Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
1732 // affinity map.
1733 //
1734 static int
1735 __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os, int *line,
1736   kmp_i18n_id_t *const msg_id, FILE *f)
1737 {
1738     *address2os = NULL;
1739     *msg_id = kmp_i18n_null;
1740 
1741     //
1742     // Scan of the file, and count the number of "processor" (osId) fields,
1743     // and find the highest value of <n> for a node_<n> field.
1744     //
1745     char buf[256];
1746     unsigned num_records = 0;
1747     while (! feof(f)) {
1748         buf[sizeof(buf) - 1] = 1;
1749         if (! fgets(buf, sizeof(buf), f)) {
1750             //
1751             // Read errors presumably because of EOF
1752             //
1753             break;
1754         }
1755 
1756         char s1[] = "processor";
1757         if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
1758             num_records++;
1759             continue;
1760         }
1761 
1762         //
1763         // FIXME - this will match "node_<n> <garbage>"
1764         //
1765         unsigned level;
1766         if (sscanf(buf, "node_%d id", &level) == 1) {
1767             if (nodeIdIndex + level >= maxIndex) {
1768                 maxIndex = nodeIdIndex + level;
1769             }
1770             continue;
1771         }
1772     }
1773 
1774     //
1775     // Check for empty file / no valid processor records, or too many.
1776     // The number of records can't exceed the number of valid bits in the
1777     // affinity mask.
1778     //
1779     if (num_records == 0) {
1780         *line = 0;
1781         *msg_id = kmp_i18n_str_NoProcRecords;
1782         return -1;
1783     }
1784     if (num_records > (unsigned)__kmp_xproc) {
1785         *line = 0;
1786         *msg_id = kmp_i18n_str_TooManyProcRecords;
1787         return -1;
1788     }
1789 
1790     //
1791     // Set the file pointer back to the begginning, so that we can scan the
1792     // file again, this time performing a full parse of the data.
1793     // Allocate a vector of ProcCpuInfo object, where we will place the data.
1794     // Adding an extra element at the end allows us to remove a lot of extra
1795     // checks for termination conditions.
1796     //
1797     if (fseek(f, 0, SEEK_SET) != 0) {
1798         *line = 0;
1799         *msg_id = kmp_i18n_str_CantRewindCpuinfo;
1800         return -1;
1801     }
1802 
1803     //
1804     // Allocate the array of records to store the proc info in.  The dummy
1805     // element at the end makes the logic in filling them out easier to code.
1806     //
1807     unsigned **threadInfo = (unsigned **)__kmp_allocate((num_records + 1)
1808       * sizeof(unsigned *));
1809     unsigned i;
1810     for (i = 0; i <= num_records; i++) {
1811         threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1)
1812           * sizeof(unsigned));
1813     }
1814 
1815 #define CLEANUP_THREAD_INFO \
1816     for (i = 0; i <= num_records; i++) {                                \
1817         __kmp_free(threadInfo[i]);                                      \
1818     }                                                                   \
1819     __kmp_free(threadInfo);
1820 
1821     //
1822     // A value of UINT_MAX means that we didn't find the field
1823     //
1824     unsigned __index;
1825 
1826 #define INIT_PROC_INFO(p) \
1827     for (__index = 0; __index <= maxIndex; __index++) {                 \
1828         (p)[__index] = UINT_MAX;                                        \
1829     }
1830 
1831     for (i = 0; i <= num_records; i++) {
1832         INIT_PROC_INFO(threadInfo[i]);
1833     }
1834 
1835     unsigned num_avail = 0;
1836     *line = 0;
1837     while (! feof(f)) {
1838         //
1839         // Create an inner scoping level, so that all the goto targets at the
1840         // end of the loop appear in an outer scoping level.  This avoids
1841         // warnings about jumping past an initialization to a target in the
1842         // same block.
1843         //
1844         {
1845             buf[sizeof(buf) - 1] = 1;
1846             bool long_line = false;
1847             if (! fgets(buf, sizeof(buf), f)) {
1848                 //
1849                 // Read errors presumably because of EOF
1850                 //
1851                 // If there is valid data in threadInfo[num_avail], then fake
1852                 // a blank line in ensure that the last address gets parsed.
1853                 //
1854                 bool valid = false;
1855                 for (i = 0; i <= maxIndex; i++) {
1856                     if (threadInfo[num_avail][i] != UINT_MAX) {
1857                         valid = true;
1858                     }
1859                 }
1860                 if (! valid) {
1861                     break;
1862                 }
1863                 buf[0] = 0;
1864             } else if (!buf[sizeof(buf) - 1]) {
1865                 //
1866                 // The line is longer than the buffer.  Set a flag and don't
1867                 // emit an error if we were going to ignore the line, anyway.
1868                 //
1869                 long_line = true;
1870 
1871 #define CHECK_LINE \
1872     if (long_line) {                                                    \
1873         CLEANUP_THREAD_INFO;                                            \
1874         *msg_id = kmp_i18n_str_LongLineCpuinfo;                         \
1875         return -1;                                                      \
1876     }
1877             }
1878             (*line)++;
1879 
1880             char s1[] = "processor";
1881             if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
1882                 CHECK_LINE;
1883                 char *p = strchr(buf + sizeof(s1) - 1, ':');
1884                 unsigned val;
1885                 if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
1886                 if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field;
1887                 threadInfo[num_avail][osIdIndex] = val;
1888 #if KMP_OS_LINUX && USE_SYSFS_INFO
1889                 char path[256];
1890                 snprintf(path, sizeof(path),
1891                     "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
1892                     threadInfo[num_avail][osIdIndex]);
1893                 __kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
1894 
1895                 snprintf(path, sizeof(path),
1896                     "/sys/devices/system/cpu/cpu%u/topology/core_id",
1897                     threadInfo[num_avail][osIdIndex]);
1898                 __kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
1899                 continue;
1900 #else
1901             }
1902             char s2[] = "physical id";
1903             if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
1904                 CHECK_LINE;
1905                 char *p = strchr(buf + sizeof(s2) - 1, ':');
1906                 unsigned val;
1907                 if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
1908                 if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field;
1909                 threadInfo[num_avail][pkgIdIndex] = val;
1910                 continue;
1911             }
1912             char s3[] = "core id";
1913             if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
1914                 CHECK_LINE;
1915                 char *p = strchr(buf + sizeof(s3) - 1, ':');
1916                 unsigned val;
1917                 if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
1918                 if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field;
1919                 threadInfo[num_avail][coreIdIndex] = val;
1920                 continue;
1921 #endif // KMP_OS_LINUX && USE_SYSFS_INFO
1922             }
1923             char s4[] = "thread id";
1924             if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
1925                 CHECK_LINE;
1926                 char *p = strchr(buf + sizeof(s4) - 1, ':');
1927                 unsigned val;
1928                 if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
1929                 if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field;
1930                 threadInfo[num_avail][threadIdIndex] = val;
1931                 continue;
1932             }
1933             unsigned level;
1934             if (sscanf(buf, "node_%d id", &level) == 1) {
1935                 CHECK_LINE;
1936                 char *p = strchr(buf + sizeof(s4) - 1, ':');
1937                 unsigned val;
1938                 if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
1939                 KMP_ASSERT(nodeIdIndex + level <= maxIndex);
1940                 if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field;
1941                 threadInfo[num_avail][nodeIdIndex + level] = val;
1942                 continue;
1943             }
1944 
1945             //
1946             // We didn't recognize the leading token on the line.
1947             // There are lots of leading tokens that we don't recognize -
1948             // if the line isn't empty, go on to the next line.
1949             //
1950             if ((*buf != 0) && (*buf != '\n')) {
1951                 //
1952                 // If the line is longer than the buffer, read characters
1953                 // until we find a newline.
1954                 //
1955                 if (long_line) {
1956                     int ch;
1957                     while (((ch = fgetc(f)) != EOF) && (ch != '\n'));
1958                 }
1959                 continue;
1960             }
1961 
1962             //
1963             // A newline has signalled the end of the processor record.
1964             // Check that there aren't too many procs specified.
1965             //
1966             if (num_avail == __kmp_xproc) {
1967                 CLEANUP_THREAD_INFO;
1968                 *msg_id = kmp_i18n_str_TooManyEntries;
1969                 return -1;
1970             }
1971 
1972             //
1973             // Check for missing fields.  The osId field must be there, and we
1974             // currently require that the physical id field is specified, also.
1975             //
1976             if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
1977                 CLEANUP_THREAD_INFO;
1978                 *msg_id = kmp_i18n_str_MissingProcField;
1979                 return -1;
1980             }
1981             if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
1982                 CLEANUP_THREAD_INFO;
1983                 *msg_id = kmp_i18n_str_MissingPhysicalIDField;
1984                 return -1;
1985             }
1986 
1987             //
1988             // Skip this proc if it is not included in the machine model.
1989             //
1990             if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], fullMask)) {
1991                 INIT_PROC_INFO(threadInfo[num_avail]);
1992                 continue;
1993             }
1994 
1995             //
1996             // We have a successful parse of this proc's info.
1997             // Increment the counter, and prepare for the next proc.
1998             //
1999             num_avail++;
2000             KMP_ASSERT(num_avail <= num_records);
2001             INIT_PROC_INFO(threadInfo[num_avail]);
2002         }
2003         continue;
2004 
2005         no_val:
2006         CLEANUP_THREAD_INFO;
2007         *msg_id = kmp_i18n_str_MissingValCpuinfo;
2008         return -1;
2009 
2010         dup_field:
2011         CLEANUP_THREAD_INFO;
2012         *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
2013         return -1;
2014     }
2015     *line = 0;
2016 
2017 # if KMP_MIC && REDUCE_TEAM_SIZE
2018     unsigned teamSize = 0;
2019 # endif // KMP_MIC && REDUCE_TEAM_SIZE
2020 
2021     // check for num_records == __kmp_xproc ???
2022 
2023     //
2024     // If there's only one thread context to bind to, form an Address object
2025     // with depth 1 and return immediately (or, if affinity is off, set
2026     // address2os to NULL and return).
2027     //
2028     // If it is configured to omit the package level when there is only a
2029     // single package, the logic at the end of this routine won't work if
2030     // there is only a single thread - it would try to form an Address
2031     // object with depth 0.
2032     //
2033     KMP_ASSERT(num_avail > 0);
2034     KMP_ASSERT(num_avail <= num_records);
2035     if (num_avail == 1) {
2036         __kmp_ncores = 1;
2037         __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
2038         __kmp_ht_enabled = FALSE;
2039         if (__kmp_affinity_verbose) {
2040             if (! KMP_AFFINITY_CAPABLE()) {
2041                 KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
2042                 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2043                 KMP_INFORM(Uniform, "KMP_AFFINITY");
2044             }
2045             else {
2046                 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2047                 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
2048                   fullMask);
2049                 KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
2050                 if (__kmp_affinity_respect_mask) {
2051                     KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
2052                 } else {
2053                     KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
2054                 }
2055                 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2056                 KMP_INFORM(Uniform, "KMP_AFFINITY");
2057             }
2058             int index;
2059             kmp_str_buf_t buf;
2060             __kmp_str_buf_init(&buf);
2061             __kmp_str_buf_print(&buf, "1");
2062             for (index = maxIndex - 1; index > pkgIdIndex; index--) {
2063                 __kmp_str_buf_print(&buf, " x 1");
2064             }
2065             KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
2066             __kmp_str_buf_free(&buf);
2067         }
2068 
2069         if (__kmp_affinity_type == affinity_none) {
2070             CLEANUP_THREAD_INFO;
2071             return 0;
2072         }
2073 
2074         *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair));
2075         Address addr(1);
2076         addr.labels[0] = threadInfo[0][pkgIdIndex];
2077         (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
2078 
2079         if (__kmp_affinity_gran_levels < 0) {
2080             __kmp_affinity_gran_levels = 0;
2081         }
2082 
2083         if (__kmp_affinity_verbose) {
2084             __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
2085         }
2086 
2087         CLEANUP_THREAD_INFO;
2088         return 1;
2089     }
2090 
2091     //
2092     // Sort the threadInfo table by physical Id.
2093     //
2094     qsort(threadInfo, num_avail, sizeof(*threadInfo),
2095       __kmp_affinity_cmp_ProcCpuInfo_phys_id);
2096 
2097     //
2098     // The table is now sorted by pkgId / coreId / threadId, but we really
2099     // don't know the radix of any of the fields.  pkgId's may be sparsely
2100     // assigned among the chips on a system.  Although coreId's are usually
2101     // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
2102     // [0..threadsPerCore-1], we don't want to make any such assumptions.
2103     //
2104     // For that matter, we don't know what coresPerPkg and threadsPerCore
2105     // (or the total # packages) are at this point - we want to determine
2106     // that now.  We only have an upper bound on the first two figures.
2107     //
2108     unsigned *counts = (unsigned *)__kmp_allocate((maxIndex + 1)
2109       * sizeof(unsigned));
2110     unsigned *maxCt = (unsigned *)__kmp_allocate((maxIndex + 1)
2111       * sizeof(unsigned));
2112     unsigned *totals = (unsigned *)__kmp_allocate((maxIndex + 1)
2113       * sizeof(unsigned));
2114     unsigned *lastId = (unsigned *)__kmp_allocate((maxIndex + 1)
2115       * sizeof(unsigned));
2116 
2117     bool assign_thread_ids = false;
2118     unsigned threadIdCt;
2119     unsigned index;
2120 
2121     restart_radix_check:
2122     threadIdCt = 0;
2123 
2124     //
2125     // Initialize the counter arrays with data from threadInfo[0].
2126     //
2127     if (assign_thread_ids) {
2128         if (threadInfo[0][threadIdIndex] == UINT_MAX) {
2129             threadInfo[0][threadIdIndex] = threadIdCt++;
2130         }
2131         else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
2132             threadIdCt = threadInfo[0][threadIdIndex] + 1;
2133         }
2134     }
2135     for (index = 0; index <= maxIndex; index++) {
2136         counts[index] = 1;
2137         maxCt[index] = 1;
2138         totals[index] = 1;
2139         lastId[index] = threadInfo[0][index];;
2140     }
2141 
2142     //
2143     // Run through the rest of the OS procs.
2144     //
2145     for (i = 1; i < num_avail; i++) {
2146         //
2147         // Find the most significant index whose id differs
2148         // from the id for the previous OS proc.
2149         //
2150         for (index = maxIndex; index >= threadIdIndex; index--) {
2151             if (assign_thread_ids && (index == threadIdIndex)) {
2152                 //
2153                 // Auto-assign the thread id field if it wasn't specified.
2154                 //
2155                 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2156                     threadInfo[i][threadIdIndex] = threadIdCt++;
2157                 }
2158 
2159                 //
2160                 // Aparrently the thread id field was specified for some
2161                 // entries and not others.  Start the thread id counter
2162                 // off at the next higher thread id.
2163                 //
2164                 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2165                     threadIdCt = threadInfo[i][threadIdIndex] + 1;
2166                 }
2167             }
2168             if (threadInfo[i][index] != lastId[index]) {
2169                 //
2170                 // Run through all indices which are less significant,
2171                 // and reset the counts to 1.
2172                 //
2173                 // At all levels up to and including index, we need to
2174                 // increment the totals and record the last id.
2175                 //
2176                 unsigned index2;
2177                 for (index2 = threadIdIndex; index2 < index; index2++) {
2178                     totals[index2]++;
2179                     if (counts[index2] > maxCt[index2]) {
2180                         maxCt[index2] = counts[index2];
2181                     }
2182                     counts[index2] = 1;
2183                     lastId[index2] = threadInfo[i][index2];
2184                 }
2185                 counts[index]++;
2186                 totals[index]++;
2187                 lastId[index] = threadInfo[i][index];
2188 
2189                 if (assign_thread_ids && (index > threadIdIndex)) {
2190 
2191 # if KMP_MIC && REDUCE_TEAM_SIZE
2192                     //
2193                     // The default team size is the total #threads in the machine
2194                     // minus 1 thread for every core that has 3 or more threads.
2195                     //
2196                     teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2197 # endif // KMP_MIC && REDUCE_TEAM_SIZE
2198 
2199                     //
2200                     // Restart the thread counter, as we are on a new core.
2201                     //
2202                     threadIdCt = 0;
2203 
2204                     //
2205                     // Auto-assign the thread id field if it wasn't specified.
2206                     //
2207                     if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2208                         threadInfo[i][threadIdIndex] = threadIdCt++;
2209                     }
2210 
2211                     //
2212                     // Aparrently the thread id field was specified for some
2213                     // entries and not others.  Start the thread id counter
2214                     // off at the next higher thread id.
2215                     //
2216                     else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2217                         threadIdCt = threadInfo[i][threadIdIndex] + 1;
2218                     }
2219                 }
2220                 break;
2221             }
2222         }
2223         if (index < threadIdIndex) {
2224             //
2225             // If thread ids were specified, it is an error if they are not
2226             // unique.  Also, check that we waven't already restarted the
2227             // loop (to be safe - shouldn't need to).
2228             //
2229             if ((threadInfo[i][threadIdIndex] != UINT_MAX)
2230               || assign_thread_ids) {
2231                 __kmp_free(lastId);
2232                 __kmp_free(totals);
2233                 __kmp_free(maxCt);
2234                 __kmp_free(counts);
2235                 CLEANUP_THREAD_INFO;
2236                 *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
2237                 return -1;
2238             }
2239 
2240             //
2241             // If the thread ids were not specified and we see entries
2242             // entries that are duplicates, start the loop over and
2243             // assign the thread ids manually.
2244             //
2245             assign_thread_ids = true;
2246             goto restart_radix_check;
2247         }
2248     }
2249 
2250 # if KMP_MIC && REDUCE_TEAM_SIZE
2251     //
2252     // The default team size is the total #threads in the machine
2253     // minus 1 thread for every core that has 3 or more threads.
2254     //
2255     teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2256 # endif // KMP_MIC && REDUCE_TEAM_SIZE
2257 
2258     for (index = threadIdIndex; index <= maxIndex; index++) {
2259         if (counts[index] > maxCt[index]) {
2260             maxCt[index] = counts[index];
2261         }
2262     }
2263 
2264     __kmp_nThreadsPerCore = maxCt[threadIdIndex];
2265     nCoresPerPkg = maxCt[coreIdIndex];
2266     nPackages = totals[pkgIdIndex];
2267 
2268     //
2269     // Check to see if the machine topology is uniform
2270     //
2271     unsigned prod = totals[maxIndex];
2272     for (index = threadIdIndex; index < maxIndex; index++) {
2273        prod *= maxCt[index];
2274     }
2275     bool uniform = (prod == totals[threadIdIndex]);
2276 
2277     //
2278     // When affinity is off, this routine will still be called to set
2279     // __kmp_ht_enabled, & __kmp_ncores, as well as __kmp_nThreadsPerCore,
2280     // nCoresPerPkg, & nPackages.  Make sure all these vars are set
2281     // correctly, and return now if affinity is not enabled.
2282     //
2283     __kmp_ht_enabled = (maxCt[threadIdIndex] > 1); // threads per core > 1
2284     __kmp_ncores = totals[coreIdIndex];
2285 
2286     if (__kmp_affinity_verbose) {
2287         if (! KMP_AFFINITY_CAPABLE()) {
2288                 KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
2289                 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2290                 if (uniform) {
2291                     KMP_INFORM(Uniform, "KMP_AFFINITY");
2292                 } else {
2293                     KMP_INFORM(NonUniform, "KMP_AFFINITY");
2294                 }
2295         }
2296         else {
2297             char buf[KMP_AFFIN_MASK_PRINT_LEN];
2298             __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
2299                 KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
2300                 if (__kmp_affinity_respect_mask) {
2301                     KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
2302                 } else {
2303                     KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
2304                 }
2305                 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2306                 if (uniform) {
2307                     KMP_INFORM(Uniform, "KMP_AFFINITY");
2308                 } else {
2309                     KMP_INFORM(NonUniform, "KMP_AFFINITY");
2310                 }
2311         }
2312         kmp_str_buf_t buf;
2313         __kmp_str_buf_init(&buf);
2314 
2315         __kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
2316         for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
2317             __kmp_str_buf_print(&buf, " x %d", maxCt[index]);
2318         }
2319         KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str,  maxCt[coreIdIndex],
2320           maxCt[threadIdIndex], __kmp_ncores);
2321 
2322         __kmp_str_buf_free(&buf);
2323     }
2324 
2325 # if KMP_MIC && REDUCE_TEAM_SIZE
2326     //
2327     // Set the default team size.
2328     //
2329     if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
2330         __kmp_dflt_team_nth = teamSize;
2331         KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
2332           __kmp_dflt_team_nth));
2333     }
2334 # endif // KMP_MIC && REDUCE_TEAM_SIZE
2335 
2336     if (__kmp_affinity_type == affinity_none) {
2337         __kmp_free(lastId);
2338         __kmp_free(totals);
2339         __kmp_free(maxCt);
2340         __kmp_free(counts);
2341         CLEANUP_THREAD_INFO;
2342         return 0;
2343     }
2344 
2345     //
2346     // Count the number of levels which have more nodes at that level than
2347     // at the parent's level (with there being an implicit root node of
2348     // the top level).  This is equivalent to saying that there is at least
2349     // one node at this level which has a sibling.  These levels are in the
2350     // map, and the package level is always in the map.
2351     //
2352     bool *inMap = (bool *)__kmp_allocate((maxIndex + 1) * sizeof(bool));
2353     int level = 0;
2354     for (index = threadIdIndex; index < maxIndex; index++) {
2355         KMP_ASSERT(totals[index] >= totals[index + 1]);
2356         inMap[index] = (totals[index] > totals[index + 1]);
2357     }
2358     inMap[maxIndex] = (totals[maxIndex] > 1);
2359     inMap[pkgIdIndex] = true;
2360 
2361     int depth = 0;
2362     for (index = threadIdIndex; index <= maxIndex; index++) {
2363         if (inMap[index]) {
2364             depth++;
2365         }
2366     }
2367     KMP_ASSERT(depth > 0);
2368 
2369     //
2370     // Construct the data structure that is to be returned.
2371     //
2372     *address2os = (AddrUnsPair*)
2373       __kmp_allocate(sizeof(AddrUnsPair) * num_avail);
2374     int pkgLevel = -1;
2375     int coreLevel = -1;
2376     int threadLevel = -1;
2377 
2378     for (i = 0; i < num_avail; ++i) {
2379         Address addr(depth);
2380         unsigned os = threadInfo[i][osIdIndex];
2381         int src_index;
2382         int dst_index = 0;
2383 
2384         for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
2385             if (! inMap[src_index]) {
2386                 continue;
2387             }
2388             addr.labels[dst_index] = threadInfo[i][src_index];
2389             if (src_index == pkgIdIndex) {
2390                 pkgLevel = dst_index;
2391             }
2392             else if (src_index == coreIdIndex) {
2393                 coreLevel = dst_index;
2394             }
2395             else if (src_index == threadIdIndex) {
2396                 threadLevel = dst_index;
2397             }
2398             dst_index++;
2399         }
2400         (*address2os)[i] = AddrUnsPair(addr, os);
2401     }
2402 
2403     if (__kmp_affinity_gran_levels < 0) {
2404         //
2405         // Set the granularity level based on what levels are modeled
2406         // in the machine topology map.
2407         //
2408         unsigned src_index;
2409         __kmp_affinity_gran_levels = 0;
2410         for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
2411             if (! inMap[src_index]) {
2412                 continue;
2413             }
2414             switch (src_index) {
2415                 case threadIdIndex:
2416                 if (__kmp_affinity_gran > affinity_gran_thread) {
2417                     __kmp_affinity_gran_levels++;
2418                 }
2419 
2420                 break;
2421                 case coreIdIndex:
2422                 if (__kmp_affinity_gran > affinity_gran_core) {
2423                     __kmp_affinity_gran_levels++;
2424                 }
2425                 break;
2426 
2427                 case pkgIdIndex:
2428                 if (__kmp_affinity_gran > affinity_gran_package) {
2429                     __kmp_affinity_gran_levels++;
2430                 }
2431                 break;
2432             }
2433         }
2434     }
2435 
2436     if (__kmp_affinity_verbose) {
2437         __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
2438           coreLevel, threadLevel);
2439     }
2440 
2441     __kmp_free(inMap);
2442     __kmp_free(lastId);
2443     __kmp_free(totals);
2444     __kmp_free(maxCt);
2445     __kmp_free(counts);
2446     CLEANUP_THREAD_INFO;
2447     return depth;
2448 }
2449 
2450 
2451 //
2452 // Create and return a table of affinity masks, indexed by OS thread ID.
2453 // This routine handles OR'ing together all the affinity masks of threads
2454 // that are sufficiently close, if granularity > fine.
2455 //
2456 static kmp_affin_mask_t *
2457 __kmp_create_masks(unsigned *maxIndex, unsigned *numUnique,
2458   AddrUnsPair *address2os, unsigned numAddrs)
2459 {
2460     //
2461     // First form a table of affinity masks in order of OS thread id.
2462     //
2463     unsigned depth;
2464     unsigned maxOsId;
2465     unsigned i;
2466 
2467     KMP_ASSERT(numAddrs > 0);
2468     depth = address2os[0].first.depth;
2469 
2470     maxOsId = 0;
2471     for (i = 0; i < numAddrs; i++) {
2472         unsigned osId = address2os[i].second;
2473         if (osId > maxOsId) {
2474             maxOsId = osId;
2475         }
2476     }
2477     kmp_affin_mask_t *osId2Mask = (kmp_affin_mask_t *)__kmp_allocate(
2478       (maxOsId + 1) * __kmp_affin_mask_size);
2479 
2480     //
2481     // Sort the address2os table according to physical order.  Doing so
2482     // will put all threads on the same core/package/node in consecutive
2483     // locations.
2484     //
2485     qsort(address2os, numAddrs, sizeof(*address2os),
2486       __kmp_affinity_cmp_Address_labels);
2487 
2488     KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
2489     if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
2490         KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY",  __kmp_affinity_gran_levels);
2491     }
2492     if (__kmp_affinity_gran_levels >= (int)depth) {
2493         if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2494           && (__kmp_affinity_type != affinity_none))) {
2495             KMP_WARNING(AffThreadsMayMigrate);
2496         }
2497     }
2498 
2499     //
2500     // Run through the table, forming the masks for all threads on each
2501     // core.  Threads on the same core will have identical "Address"
2502     // objects, not considering the last level, which must be the thread
2503     // id.  All threads on a core will appear consecutively.
2504     //
2505     unsigned unique = 0;
2506     unsigned j = 0;                             // index of 1st thread on core
2507     unsigned leader = 0;
2508     Address *leaderAddr = &(address2os[0].first);
2509     kmp_affin_mask_t *sum
2510       = (kmp_affin_mask_t *)alloca(__kmp_affin_mask_size);
2511     KMP_CPU_ZERO(sum);
2512     KMP_CPU_SET(address2os[0].second, sum);
2513     for (i = 1; i < numAddrs; i++) {
2514         //
2515         // If this thread is sufficiently close to the leader (within the
2516         // granularity setting), then set the bit for this os thread in the
2517         // affinity mask for this group, and go on to the next thread.
2518         //
2519         if (leaderAddr->isClose(address2os[i].first,
2520           __kmp_affinity_gran_levels)) {
2521             KMP_CPU_SET(address2os[i].second, sum);
2522             continue;
2523         }
2524 
2525         //
2526         // For every thread in this group, copy the mask to the thread's
2527         // entry in the osId2Mask table.  Mark the first address as a
2528         // leader.
2529         //
2530         for (; j < i; j++) {
2531             unsigned osId = address2os[j].second;
2532             KMP_DEBUG_ASSERT(osId <= maxOsId);
2533             kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2534             KMP_CPU_COPY(mask, sum);
2535             address2os[j].first.leader = (j == leader);
2536         }
2537         unique++;
2538 
2539         //
2540         // Start a new mask.
2541         //
2542         leader = i;
2543         leaderAddr = &(address2os[i].first);
2544         KMP_CPU_ZERO(sum);
2545         KMP_CPU_SET(address2os[i].second, sum);
2546     }
2547 
2548     //
2549     // For every thread in last group, copy the mask to the thread's
2550     // entry in the osId2Mask table.
2551     //
2552     for (; j < i; j++) {
2553         unsigned osId = address2os[j].second;
2554         KMP_DEBUG_ASSERT(osId <= maxOsId);
2555         kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2556         KMP_CPU_COPY(mask, sum);
2557         address2os[j].first.leader = (j == leader);
2558     }
2559     unique++;
2560 
2561     *maxIndex = maxOsId;
2562     *numUnique = unique;
2563     return osId2Mask;
2564 }
2565 
2566 
2567 //
2568 // Stuff for the affinity proclist parsers.  It's easier to declare these vars
2569 // as file-static than to try and pass them through the calling sequence of
2570 // the recursive-descent OMP_PLACES parser.
2571 //
2572 static kmp_affin_mask_t *newMasks;
2573 static int numNewMasks;
2574 static int nextNewMask;
2575 
2576 #define ADD_MASK(_mask) \
2577     {                                                                   \
2578         if (nextNewMask >= numNewMasks) {                               \
2579             numNewMasks *= 2;                                           \
2580             newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_REALLOC(newMasks, \
2581               numNewMasks * __kmp_affin_mask_size);                     \
2582         }                                                               \
2583         KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask));    \
2584         nextNewMask++;                                                  \
2585     }
2586 
2587 #define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
2588     {                                                                   \
2589         if (((_osId) > _maxOsId) ||                                     \
2590           (! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX(_osId2Mask, (_osId))))) {\
2591             if (__kmp_affinity_verbose || (__kmp_affinity_warnings      \
2592               && (__kmp_affinity_type != affinity_none))) {             \
2593                 KMP_WARNING(AffIgnoreInvalidProcID, _osId);             \
2594             }                                                           \
2595         }                                                               \
2596         else {                                                          \
2597             ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId)));               \
2598         }                                                               \
2599     }
2600 
2601 
2602 //
2603 // Re-parse the proclist (for the explicit affinity type), and form the list
2604 // of affinity newMasks indexed by gtid.
2605 //
2606 static void
2607 __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
2608   unsigned int *out_numMasks, const char *proclist,
2609   kmp_affin_mask_t *osId2Mask, int maxOsId)
2610 {
2611     const char *scan = proclist;
2612     const char *next = proclist;
2613 
2614     //
2615     // We use malloc() for the temporary mask vector,
2616     // so that we can use realloc() to extend it.
2617     //
2618     numNewMasks = 2;
2619     newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
2620       * __kmp_affin_mask_size);
2621     nextNewMask = 0;
2622     kmp_affin_mask_t *sumMask = (kmp_affin_mask_t *)__kmp_allocate(
2623       __kmp_affin_mask_size);
2624     int setSize = 0;
2625 
2626     for (;;) {
2627         int start, end, stride;
2628 
2629         SKIP_WS(scan);
2630         next = scan;
2631         if (*next == '\0') {
2632             break;
2633         }
2634 
2635         if (*next == '{') {
2636             int num;
2637             setSize = 0;
2638             next++;     // skip '{'
2639             SKIP_WS(next);
2640             scan = next;
2641 
2642             //
2643             // Read the first integer in the set.
2644             //
2645             KMP_ASSERT2((*next >= '0') && (*next <= '9'),
2646               "bad proclist");
2647             SKIP_DIGITS(next);
2648             num = __kmp_str_to_int(scan, *next);
2649             KMP_ASSERT2(num >= 0, "bad explicit proc list");
2650 
2651             //
2652             // Copy the mask for that osId to the sum (union) mask.
2653             //
2654             if ((num > maxOsId) ||
2655               (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2656                 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2657                   && (__kmp_affinity_type != affinity_none))) {
2658                     KMP_WARNING(AffIgnoreInvalidProcID, num);
2659                 }
2660                 KMP_CPU_ZERO(sumMask);
2661             }
2662             else {
2663                 KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2664                 setSize = 1;
2665             }
2666 
2667             for (;;) {
2668                 //
2669                 // Check for end of set.
2670                 //
2671                 SKIP_WS(next);
2672                 if (*next == '}') {
2673                     next++;     // skip '}'
2674                     break;
2675                 }
2676 
2677                 //
2678                 // Skip optional comma.
2679                 //
2680                 if (*next == ',') {
2681                     next++;
2682                 }
2683                 SKIP_WS(next);
2684 
2685                 //
2686                 // Read the next integer in the set.
2687                 //
2688                 scan = next;
2689                 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
2690                   "bad explicit proc list");
2691 
2692                 SKIP_DIGITS(next);
2693                 num = __kmp_str_to_int(scan, *next);
2694                 KMP_ASSERT2(num >= 0, "bad explicit proc list");
2695 
2696                 //
2697                 // Add the mask for that osId to the sum mask.
2698                 //
2699                 if ((num > maxOsId) ||
2700                   (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2701                     if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2702                       && (__kmp_affinity_type != affinity_none))) {
2703                         KMP_WARNING(AffIgnoreInvalidProcID, num);
2704                     }
2705                 }
2706                 else {
2707                     KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2708                     setSize++;
2709                 }
2710             }
2711             if (setSize > 0) {
2712                 ADD_MASK(sumMask);
2713             }
2714 
2715             SKIP_WS(next);
2716             if (*next == ',') {
2717                 next++;
2718             }
2719             scan = next;
2720             continue;
2721         }
2722 
2723         //
2724         // Read the first integer.
2725         //
2726         KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
2727         SKIP_DIGITS(next);
2728         start = __kmp_str_to_int(scan, *next);
2729         KMP_ASSERT2(start >= 0, "bad explicit proc list");
2730         SKIP_WS(next);
2731 
2732         //
2733         // If this isn't a range, then add a mask to the list and go on.
2734         //
2735         if (*next != '-') {
2736             ADD_MASK_OSID(start, osId2Mask, maxOsId);
2737 
2738             //
2739             // Skip optional comma.
2740             //
2741             if (*next == ',') {
2742                 next++;
2743             }
2744             scan = next;
2745             continue;
2746         }
2747 
2748         //
2749         // This is a range.  Skip over the '-' and read in the 2nd int.
2750         //
2751         next++;         // skip '-'
2752         SKIP_WS(next);
2753         scan = next;
2754         KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
2755         SKIP_DIGITS(next);
2756         end = __kmp_str_to_int(scan, *next);
2757         KMP_ASSERT2(end >= 0, "bad explicit proc list");
2758 
2759         //
2760         // Check for a stride parameter
2761         //
2762         stride = 1;
2763         SKIP_WS(next);
2764         if (*next == ':') {
2765             //
2766             // A stride is specified.  Skip over the ':" and read the 3rd int.
2767             //
2768             int sign = +1;
2769             next++;         // skip ':'
2770             SKIP_WS(next);
2771             scan = next;
2772             if (*next == '-') {
2773                 sign = -1;
2774                 next++;
2775                 SKIP_WS(next);
2776                 scan = next;
2777             }
2778             KMP_ASSERT2((*next >=  '0') && (*next <= '9'),
2779               "bad explicit proc list");
2780             SKIP_DIGITS(next);
2781             stride = __kmp_str_to_int(scan, *next);
2782             KMP_ASSERT2(stride >= 0, "bad explicit proc list");
2783             stride *= sign;
2784         }
2785 
2786         //
2787         // Do some range checks.
2788         //
2789         KMP_ASSERT2(stride != 0, "bad explicit proc list");
2790         if (stride > 0) {
2791             KMP_ASSERT2(start <= end, "bad explicit proc list");
2792         }
2793         else {
2794             KMP_ASSERT2(start >= end, "bad explicit proc list");
2795         }
2796         KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
2797 
2798         //
2799         // Add the mask for each OS proc # to the list.
2800         //
2801         if (stride > 0) {
2802             do {
2803                 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2804                 start += stride;
2805             } while (start <= end);
2806         }
2807         else {
2808             do {
2809                 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2810                 start += stride;
2811             } while (start >= end);
2812         }
2813 
2814         //
2815         // Skip optional comma.
2816         //
2817         SKIP_WS(next);
2818         if (*next == ',') {
2819             next++;
2820         }
2821         scan = next;
2822     }
2823 
2824     *out_numMasks = nextNewMask;
2825     if (nextNewMask == 0) {
2826         *out_masks = NULL;
2827         KMP_INTERNAL_FREE(newMasks);
2828         return;
2829     }
2830     *out_masks
2831       = (kmp_affin_mask_t *)__kmp_allocate(nextNewMask * __kmp_affin_mask_size);
2832     memcpy(*out_masks, newMasks, nextNewMask * __kmp_affin_mask_size);
2833     __kmp_free(sumMask);
2834     KMP_INTERNAL_FREE(newMasks);
2835 }
2836 
2837 
2838 # if OMP_40_ENABLED
2839 
2840 /*-----------------------------------------------------------------------------
2841 
2842 Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
2843 places.  Again, Here is the grammar:
2844 
2845 place_list := place
2846 place_list := place , place_list
2847 place := num
2848 place := place : num
2849 place := place : num : signed
2850 place := { subplacelist }
2851 place := ! place                  // (lowest priority)
2852 subplace_list := subplace
2853 subplace_list := subplace , subplace_list
2854 subplace := num
2855 subplace := num : num
2856 subplace := num : num : signed
2857 signed := num
2858 signed := + signed
2859 signed := - signed
2860 
2861 -----------------------------------------------------------------------------*/
2862 
2863 static void
2864 __kmp_process_subplace_list(const char **scan, kmp_affin_mask_t *osId2Mask,
2865   int maxOsId, kmp_affin_mask_t *tempMask, int *setSize)
2866 {
2867     const char *next;
2868 
2869     for (;;) {
2870         int start, count, stride, i;
2871 
2872         //
2873         // Read in the starting proc id
2874         //
2875         SKIP_WS(*scan);
2876         KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
2877           "bad explicit places list");
2878         next = *scan;
2879         SKIP_DIGITS(next);
2880         start = __kmp_str_to_int(*scan, *next);
2881         KMP_ASSERT(start >= 0);
2882         *scan = next;
2883 
2884         //
2885         // valid follow sets are ',' ':' and '}'
2886         //
2887         SKIP_WS(*scan);
2888         if (**scan == '}' || **scan == ',') {
2889             if ((start > maxOsId) ||
2890               (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
2891                 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2892                   && (__kmp_affinity_type != affinity_none))) {
2893                     KMP_WARNING(AffIgnoreInvalidProcID, start);
2894                 }
2895             }
2896             else {
2897                 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
2898                 (*setSize)++;
2899             }
2900             if (**scan == '}') {
2901                 break;
2902             }
2903             (*scan)++;  // skip ','
2904             continue;
2905         }
2906         KMP_ASSERT2(**scan == ':', "bad explicit places list");
2907         (*scan)++;      // skip ':'
2908 
2909         //
2910         // Read count parameter
2911         //
2912         SKIP_WS(*scan);
2913         KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
2914           "bad explicit places list");
2915         next = *scan;
2916         SKIP_DIGITS(next);
2917         count = __kmp_str_to_int(*scan, *next);
2918         KMP_ASSERT(count >= 0);
2919         *scan = next;
2920 
2921         //
2922         // valid follow sets are ',' ':' and '}'
2923         //
2924         SKIP_WS(*scan);
2925         if (**scan == '}' || **scan == ',') {
2926             for (i = 0; i < count; i++) {
2927                 if ((start > maxOsId) ||
2928                   (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
2929                     if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2930                       && (__kmp_affinity_type != affinity_none))) {
2931                         KMP_WARNING(AffIgnoreInvalidProcID, start);
2932                     }
2933                     break;  // don't proliferate warnings for large count
2934                 }
2935                 else {
2936                     KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
2937                     start++;
2938                     (*setSize)++;
2939                 }
2940             }
2941             if (**scan == '}') {
2942                 break;
2943             }
2944             (*scan)++;  // skip ','
2945             continue;
2946         }
2947         KMP_ASSERT2(**scan == ':', "bad explicit places list");
2948         (*scan)++;      // skip ':'
2949 
2950         //
2951         // Read stride parameter
2952         //
2953         int sign = +1;
2954         for (;;) {
2955             SKIP_WS(*scan);
2956             if (**scan == '+') {
2957                 (*scan)++; // skip '+'
2958                 continue;
2959             }
2960             if (**scan == '-') {
2961                 sign *= -1;
2962                 (*scan)++; // skip '-'
2963                 continue;
2964             }
2965             break;
2966         }
2967         SKIP_WS(*scan);
2968         KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
2969           "bad explicit places list");
2970         next = *scan;
2971         SKIP_DIGITS(next);
2972         stride = __kmp_str_to_int(*scan, *next);
2973         KMP_ASSERT(stride >= 0);
2974         *scan = next;
2975         stride *= sign;
2976 
2977         //
2978         // valid follow sets are ',' and '}'
2979         //
2980         SKIP_WS(*scan);
2981         if (**scan == '}' || **scan == ',') {
2982             for (i = 0; i < count; i++) {
2983                 if ((start > maxOsId) ||
2984                   (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
2985                     if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2986                       && (__kmp_affinity_type != affinity_none))) {
2987                         KMP_WARNING(AffIgnoreInvalidProcID, start);
2988                     }
2989                     break;  // don't proliferate warnings for large count
2990                 }
2991                 else {
2992                     KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
2993                     start += stride;
2994                     (*setSize)++;
2995                 }
2996             }
2997             if (**scan == '}') {
2998                 break;
2999             }
3000             (*scan)++;  // skip ','
3001             continue;
3002         }
3003 
3004         KMP_ASSERT2(0, "bad explicit places list");
3005     }
3006 }
3007 
3008 
3009 static void
3010 __kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask,
3011   int maxOsId, kmp_affin_mask_t *tempMask, int *setSize)
3012 {
3013     const char *next;
3014 
3015     //
3016     // valid follow sets are '{' '!' and num
3017     //
3018     SKIP_WS(*scan);
3019     if (**scan == '{') {
3020         (*scan)++;      // skip '{'
3021         __kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
3022           setSize);
3023         KMP_ASSERT2(**scan == '}', "bad explicit places list");
3024         (*scan)++;      // skip '}'
3025     }
3026     else if (**scan == '!') {
3027         __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
3028         KMP_CPU_COMPLEMENT(tempMask);
3029         (*scan)++;      // skip '!'
3030     }
3031     else if ((**scan >= '0') && (**scan <= '9')) {
3032         next = *scan;
3033         SKIP_DIGITS(next);
3034         int num = __kmp_str_to_int(*scan, *next);
3035         KMP_ASSERT(num >= 0);
3036         if ((num > maxOsId) ||
3037           (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
3038             if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3039               && (__kmp_affinity_type != affinity_none))) {
3040                 KMP_WARNING(AffIgnoreInvalidProcID, num);
3041             }
3042         }
3043         else {
3044             KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
3045             (*setSize)++;
3046         }
3047         *scan = next;  // skip num
3048         }
3049     else {
3050         KMP_ASSERT2(0, "bad explicit places list");
3051     }
3052 }
3053 
3054 
3055 static void
3056 __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
3057   unsigned int *out_numMasks, const char *placelist,
3058   kmp_affin_mask_t *osId2Mask, int maxOsId)
3059 {
3060     const char *scan = placelist;
3061     const char *next = placelist;
3062 
3063     numNewMasks = 2;
3064     newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
3065       * __kmp_affin_mask_size);
3066     nextNewMask = 0;
3067 
3068     kmp_affin_mask_t *tempMask = (kmp_affin_mask_t *)__kmp_allocate(
3069       __kmp_affin_mask_size);
3070     KMP_CPU_ZERO(tempMask);
3071     int setSize = 0;
3072 
3073     for (;;) {
3074         __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
3075 
3076         //
3077         // valid follow sets are ',' ':' and EOL
3078         //
3079         SKIP_WS(scan);
3080         if (*scan == '\0' || *scan == ',') {
3081             if (setSize > 0) {
3082                 ADD_MASK(tempMask);
3083             }
3084             KMP_CPU_ZERO(tempMask);
3085             setSize = 0;
3086             if (*scan == '\0') {
3087                 break;
3088             }
3089             scan++;     // skip ','
3090             continue;
3091         }
3092 
3093         KMP_ASSERT2(*scan == ':', "bad explicit places list");
3094         scan++;         // skip ':'
3095 
3096         //
3097         // Read count parameter
3098         //
3099         SKIP_WS(scan);
3100         KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
3101           "bad explicit places list");
3102         next = scan;
3103         SKIP_DIGITS(next);
3104         int count = __kmp_str_to_int(scan, *next);
3105         KMP_ASSERT(count >= 0);
3106         scan = next;
3107 
3108         //
3109         // valid follow sets are ',' ':' and EOL
3110         //
3111         SKIP_WS(scan);
3112         if (*scan == '\0' || *scan == ',') {
3113             int i;
3114             for (i = 0; i < count; i++) {
3115                 int j;
3116                 if (setSize == 0) {
3117                     break;
3118                 }
3119                 ADD_MASK(tempMask);
3120                 setSize = 0;
3121                 for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j > 0; j--) {
3122                     //
3123                     // Use a temp var in case macro is changed to evaluate
3124                     // args multiple times.
3125                     //
3126                     if (KMP_CPU_ISSET(j - 1, tempMask)) {
3127                         KMP_CPU_SET(j, tempMask);
3128                         setSize++;
3129                     }
3130                     else {
3131                         KMP_CPU_CLR(j, tempMask);
3132                     }
3133                 }
3134                 for (; j >= 0; j--) {
3135                     KMP_CPU_CLR(j, tempMask);
3136                 }
3137             }
3138             KMP_CPU_ZERO(tempMask);
3139             setSize = 0;
3140 
3141             if (*scan == '\0') {
3142                 break;
3143             }
3144             scan++;     // skip ','
3145             continue;
3146         }
3147 
3148         KMP_ASSERT2(*scan == ':', "bad explicit places list");
3149         scan++;         // skip ':'
3150 
3151         //
3152         // Read stride parameter
3153         //
3154         int sign = +1;
3155         for (;;) {
3156             SKIP_WS(scan);
3157             if (*scan == '+') {
3158                 scan++; // skip '+'
3159                 continue;
3160             }
3161             if (*scan == '-') {
3162                 sign *= -1;
3163                 scan++; // skip '-'
3164                 continue;
3165             }
3166             break;
3167         }
3168         SKIP_WS(scan);
3169         KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
3170           "bad explicit places list");
3171         next = scan;
3172         SKIP_DIGITS(next);
3173         int stride = __kmp_str_to_int(scan, *next);
3174         KMP_DEBUG_ASSERT(stride >= 0);
3175         scan = next;
3176         stride *= sign;
3177 
3178         if (stride > 0) {
3179             int i;
3180             for (i = 0; i < count; i++) {
3181                 int j;
3182                 if (setSize == 0) {
3183                     break;
3184                 }
3185                 ADD_MASK(tempMask);
3186                 setSize = 0;
3187                 for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j >= stride; j--) {
3188                     if (KMP_CPU_ISSET(j - stride, tempMask)) {
3189                         KMP_CPU_SET(j, tempMask);
3190                         setSize++;
3191                     }
3192                     else {
3193                         KMP_CPU_CLR(j, tempMask);
3194                     }
3195                 }
3196                 for (; j >= 0; j--) {
3197                     KMP_CPU_CLR(j, tempMask);
3198                 }
3199             }
3200         }
3201         else {
3202             int i;
3203             for (i = 0; i < count; i++) {
3204                 unsigned j;
3205                 if (setSize == 0) {
3206                     break;
3207                 }
3208                 ADD_MASK(tempMask);
3209                 setSize = 0;
3210                 for (j = 0; j < (__kmp_affin_mask_size * CHAR_BIT) + stride;
3211                   j++) {
3212                     if (KMP_CPU_ISSET(j - stride, tempMask)) {
3213                         KMP_CPU_SET(j, tempMask);
3214                         setSize++;
3215                     }
3216                     else {
3217                         KMP_CPU_CLR(j, tempMask);
3218                     }
3219                 }
3220                 for (; j < __kmp_affin_mask_size * CHAR_BIT; j++) {
3221                     KMP_CPU_CLR(j, tempMask);
3222                 }
3223             }
3224         }
3225         KMP_CPU_ZERO(tempMask);
3226         setSize = 0;
3227 
3228         //
3229         // valid follow sets are ',' and EOL
3230         //
3231         SKIP_WS(scan);
3232         if (*scan == '\0') {
3233             break;
3234         }
3235         if (*scan == ',') {
3236             scan++;     // skip ','
3237             continue;
3238         }
3239 
3240         KMP_ASSERT2(0, "bad explicit places list");
3241     }
3242 
3243     *out_numMasks = nextNewMask;
3244     if (nextNewMask == 0) {
3245         *out_masks = NULL;
3246         KMP_INTERNAL_FREE(newMasks);
3247         return;
3248     }
3249     *out_masks
3250       = (kmp_affin_mask_t *)__kmp_allocate(nextNewMask * __kmp_affin_mask_size);
3251     memcpy(*out_masks, newMasks, nextNewMask * __kmp_affin_mask_size);
3252     __kmp_free(tempMask);
3253     KMP_INTERNAL_FREE(newMasks);
3254 }
3255 
3256 # endif /* OMP_40_ENABLED */
3257 
3258 #undef ADD_MASK
3259 #undef ADD_MASK_OSID
3260 
3261 
3262 # if KMP_MIC
3263 
3264 static void
3265 __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
3266 {
3267     if ( __kmp_place_num_cores == 0 ) {
3268         if ( __kmp_place_num_threads_per_core == 0 ) {
3269             return;   // no cores limiting actions requested, exit
3270         }
3271         __kmp_place_num_cores = nCoresPerPkg;   // use all available cores
3272     }
3273     if ( !__kmp_affinity_uniform_topology() || depth != 3 ) {
3274         KMP_WARNING( AffThrPlaceUnsupported );
3275         return; // don't support non-uniform topology or not-3-level architecture
3276     }
3277     if ( __kmp_place_num_threads_per_core == 0 ) {
3278         __kmp_place_num_threads_per_core = __kmp_nThreadsPerCore;  // use all HW contexts
3279     }
3280     if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
3281         KMP_WARNING( AffThrPlaceManyCores );
3282         return;
3283     }
3284 
3285     AddrUnsPair *newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) *
3286                             nPackages * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
3287     int i, j, k, n_old = 0, n_new = 0;
3288     for ( i = 0; i < nPackages; ++i ) {
3289         for ( j = 0; j < nCoresPerPkg; ++j ) {
3290             if ( j < __kmp_place_core_offset || j >= __kmp_place_core_offset + __kmp_place_num_cores ) {
3291                 n_old += __kmp_nThreadsPerCore;   // skip not-requested core
3292             } else {
3293                 for ( k = 0; k < __kmp_nThreadsPerCore; ++k ) {
3294                     if ( k < __kmp_place_num_threads_per_core ) {
3295                         newAddr[n_new] = (*pAddr)[n_old];   // copy requested core' data to new location
3296                         n_new++;
3297                     }
3298                     n_old++;
3299                 }
3300             }
3301         }
3302     }
3303     nCoresPerPkg = __kmp_place_num_cores;                     // correct nCoresPerPkg
3304     __kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore
3305     __kmp_avail_proc = n_new;                                 // correct avail_proc
3306     __kmp_ncores = nPackages * __kmp_place_num_cores;         // correct ncores
3307 
3308     __kmp_free( *pAddr );
3309     *pAddr = newAddr;      // replace old topology with new one
3310 }
3311 
3312 # endif /* KMP_MIC */
3313 
3314 
3315 static AddrUnsPair *address2os = NULL;
3316 static int           * procarr = NULL;
3317 static int     __kmp_aff_depth = 0;
3318 
3319 static void
3320 __kmp_aux_affinity_initialize(void)
3321 {
3322     if (__kmp_affinity_masks != NULL) {
3323         KMP_ASSERT(fullMask != NULL);
3324         return;
3325     }
3326 
3327     //
3328     // Create the "full" mask - this defines all of the processors that we
3329     // consider to be in the machine model.  If respect is set, then it is
3330     // the initialization thread's affinity mask.  Otherwise, it is all
3331     // processors that we know about on the machine.
3332     //
3333     if (fullMask == NULL) {
3334         fullMask = (kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size);
3335     }
3336     if (KMP_AFFINITY_CAPABLE()) {
3337         if (__kmp_affinity_respect_mask) {
3338             __kmp_get_system_affinity(fullMask, TRUE);
3339 
3340             //
3341             // Count the number of available processors.
3342             //
3343             unsigned i;
3344             __kmp_avail_proc = 0;
3345             for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
3346                 if (! KMP_CPU_ISSET(i, fullMask)) {
3347                     continue;
3348                 }
3349                 __kmp_avail_proc++;
3350             }
3351             if (__kmp_avail_proc > __kmp_xproc) {
3352                 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3353                   && (__kmp_affinity_type != affinity_none))) {
3354                     KMP_WARNING(ErrorInitializeAffinity);
3355                 }
3356                 __kmp_affinity_type = affinity_none;
3357                 __kmp_affin_mask_size = 0;
3358                 return;
3359             }
3360         }
3361         else {
3362             __kmp_affinity_entire_machine_mask(fullMask);
3363             __kmp_avail_proc = __kmp_xproc;
3364         }
3365     }
3366 
3367     int depth = -1;
3368     kmp_i18n_id_t msg_id = kmp_i18n_null;
3369 
3370     //
3371     // For backward compatibility, setting KMP_CPUINFO_FILE =>
3372     // KMP_TOPOLOGY_METHOD=cpuinfo
3373     //
3374     if ((__kmp_cpuinfo_file != NULL) &&
3375       (__kmp_affinity_top_method == affinity_top_method_all)) {
3376         __kmp_affinity_top_method = affinity_top_method_cpuinfo;
3377     }
3378 
3379     if (__kmp_affinity_top_method == affinity_top_method_all) {
3380         //
3381         // In the default code path, errors are not fatal - we just try using
3382         // another method.  We only emit a warning message if affinity is on,
3383         // or the verbose flag is set, an the nowarnings flag was not set.
3384         //
3385         const char *file_name = NULL;
3386         int line = 0;
3387 
3388 # if KMP_ARCH_X86 || KMP_ARCH_X86_64
3389 
3390         if (__kmp_affinity_verbose) {
3391             KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
3392         }
3393 
3394         file_name = NULL;
3395         depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
3396         if (depth == 0) {
3397             KMP_ASSERT(__kmp_affinity_type == affinity_none);
3398             KMP_ASSERT(address2os == NULL);
3399             return;
3400         }
3401 
3402         if (depth < 0) {
3403             if ((msg_id != kmp_i18n_null)
3404               && (__kmp_affinity_verbose || (__kmp_affinity_warnings
3405               && (__kmp_affinity_type != affinity_none)))) {
3406 #  if KMP_MIC
3407                 if (__kmp_affinity_verbose) {
3408                     KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
3409                       KMP_I18N_STR(DecodingLegacyAPIC));
3410                 }
3411 #  else
3412                 KMP_WARNING(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
3413                   KMP_I18N_STR(DecodingLegacyAPIC));
3414 #  endif
3415             }
3416 
3417             file_name = NULL;
3418             depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3419             if (depth == 0) {
3420                 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3421                 KMP_ASSERT(address2os == NULL);
3422                 return;
3423             }
3424         }
3425 
3426 # endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
3427 
3428 # if KMP_OS_LINUX
3429 
3430         if (depth < 0) {
3431             if ((msg_id != kmp_i18n_null)
3432               && (__kmp_affinity_verbose || (__kmp_affinity_warnings
3433               && (__kmp_affinity_type != affinity_none)))) {
3434 #  if KMP_MIC
3435                 if (__kmp_affinity_verbose) {
3436                     KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
3437                 }
3438 #  else
3439                 KMP_WARNING(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
3440 #  endif
3441             }
3442             else if (__kmp_affinity_verbose) {
3443                 KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
3444             }
3445 
3446             FILE *f = fopen("/proc/cpuinfo", "r");
3447             if (f == NULL) {
3448                 msg_id = kmp_i18n_str_CantOpenCpuinfo;
3449             }
3450             else {
3451                 file_name = "/proc/cpuinfo";
3452                 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3453                 fclose(f);
3454                 if (depth == 0) {
3455                     KMP_ASSERT(__kmp_affinity_type == affinity_none);
3456                     KMP_ASSERT(address2os == NULL);
3457                     return;
3458                 }
3459             }
3460         }
3461 
3462 # endif /* KMP_OS_LINUX */
3463 
3464         if (depth < 0) {
3465             if (msg_id != kmp_i18n_null
3466               && (__kmp_affinity_verbose || (__kmp_affinity_warnings
3467               && (__kmp_affinity_type != affinity_none)))) {
3468                 if (file_name == NULL) {
3469                     KMP_WARNING(UsingFlatOS, __kmp_i18n_catgets(msg_id));
3470                 }
3471                 else if (line == 0) {
3472                     KMP_WARNING(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
3473                 }
3474                 else {
3475                     KMP_WARNING(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
3476                 }
3477             }
3478 
3479             file_name = "";
3480             depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3481             if (depth == 0) {
3482                 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3483                 KMP_ASSERT(address2os == NULL);
3484                 return;
3485             }
3486             KMP_ASSERT(depth > 0);
3487             KMP_ASSERT(address2os != NULL);
3488         }
3489     }
3490 
3491     //
3492     // If the user has specified that a paricular topology discovery method
3493     // is to be used, then we abort if that method fails.  The exception is
3494     // group affinity, which might have been implicitly set.
3495     //
3496 
3497 # if KMP_ARCH_X86 || KMP_ARCH_X86_64
3498 
3499     else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
3500         if (__kmp_affinity_verbose) {
3501             KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
3502               KMP_I18N_STR(Decodingx2APIC));
3503         }
3504 
3505         depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
3506         if (depth == 0) {
3507             KMP_ASSERT(__kmp_affinity_type == affinity_none);
3508             KMP_ASSERT(address2os == NULL);
3509             return;
3510         }
3511 
3512         if (depth < 0) {
3513             KMP_ASSERT(msg_id != kmp_i18n_null);
3514             KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3515         }
3516     }
3517     else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
3518         if (__kmp_affinity_verbose) {
3519             KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
3520               KMP_I18N_STR(DecodingLegacyAPIC));
3521         }
3522 
3523         depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3524         if (depth == 0) {
3525             KMP_ASSERT(__kmp_affinity_type == affinity_none);
3526             KMP_ASSERT(address2os == NULL);
3527             return;
3528         }
3529 
3530         if (depth < 0) {
3531             KMP_ASSERT(msg_id != kmp_i18n_null);
3532             KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3533         }
3534     }
3535 
3536 # endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
3537 
3538     else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
3539         const char *filename;
3540         if (__kmp_cpuinfo_file != NULL) {
3541             filename = __kmp_cpuinfo_file;
3542         }
3543         else {
3544             filename = "/proc/cpuinfo";
3545         }
3546 
3547         if (__kmp_affinity_verbose) {
3548             KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
3549         }
3550 
3551         FILE *f = fopen(filename, "r");
3552         if (f == NULL) {
3553             int code = errno;
3554             if (__kmp_cpuinfo_file != NULL) {
3555                 __kmp_msg(
3556                     kmp_ms_fatal,
3557                     KMP_MSG(CantOpenFileForReading, filename),
3558                     KMP_ERR(code),
3559                     KMP_HNT(NameComesFrom_CPUINFO_FILE),
3560                     __kmp_msg_null
3561                 );
3562             }
3563             else {
3564                 __kmp_msg(
3565                     kmp_ms_fatal,
3566                     KMP_MSG(CantOpenFileForReading, filename),
3567                     KMP_ERR(code),
3568                     __kmp_msg_null
3569                 );
3570             }
3571         }
3572         int line = 0;
3573         depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3574         fclose(f);
3575         if (depth < 0) {
3576             KMP_ASSERT(msg_id != kmp_i18n_null);
3577             if (line > 0) {
3578                 KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
3579             }
3580             else {
3581                 KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
3582             }
3583         }
3584         if (__kmp_affinity_type == affinity_none) {
3585             KMP_ASSERT(depth == 0);
3586             KMP_ASSERT(address2os == NULL);
3587             return;
3588         }
3589     }
3590 
3591 # if KMP_OS_WINDOWS && KMP_ARCH_X86_64
3592 
3593     else if (__kmp_affinity_top_method == affinity_top_method_group) {
3594         if (__kmp_affinity_verbose) {
3595             KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
3596         }
3597 
3598         depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3599         KMP_ASSERT(depth != 0);
3600 
3601         if (depth < 0) {
3602             if ((msg_id != kmp_i18n_null)
3603               && (__kmp_affinity_verbose || (__kmp_affinity_warnings
3604               && (__kmp_affinity_type != affinity_none)))) {
3605                 KMP_WARNING(UsingFlatOS, __kmp_i18n_catgets(msg_id));
3606             }
3607 
3608             depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3609             if (depth == 0) {
3610                 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3611                 KMP_ASSERT(address2os == NULL);
3612                 return;
3613             }
3614             // should not fail
3615             KMP_ASSERT(depth > 0);
3616             KMP_ASSERT(address2os != NULL);
3617         }
3618     }
3619 
3620 # endif /* KMP_OS_WINDOWS && KMP_ARCH_X86_64 */
3621 
3622     else if (__kmp_affinity_top_method == affinity_top_method_flat) {
3623         if (__kmp_affinity_verbose) {
3624             KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
3625         }
3626 
3627         depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3628         if (depth == 0) {
3629             KMP_ASSERT(__kmp_affinity_type == affinity_none);
3630             KMP_ASSERT(address2os == NULL);
3631             return;
3632         }
3633         // should not fail
3634         KMP_ASSERT(depth > 0);
3635         KMP_ASSERT(address2os != NULL);
3636     }
3637 
3638     if (address2os == NULL) {
3639         if (KMP_AFFINITY_CAPABLE()
3640           && (__kmp_affinity_verbose || (__kmp_affinity_warnings
3641           && (__kmp_affinity_type != affinity_none)))) {
3642             KMP_WARNING(ErrorInitializeAffinity);
3643         }
3644         __kmp_affinity_type = affinity_none;
3645         __kmp_affin_mask_size = 0;
3646         return;
3647     }
3648 
3649 # if KMP_MIC
3650     __kmp_apply_thread_places(&address2os, depth);
3651 # endif
3652 
3653     //
3654     // Create the table of masks, indexed by thread Id.
3655     //
3656     unsigned maxIndex;
3657     unsigned numUnique;
3658     kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
3659       address2os, __kmp_avail_proc);
3660     if (__kmp_affinity_gran_levels == 0) {
3661         KMP_DEBUG_ASSERT(numUnique == __kmp_avail_proc);
3662     }
3663 
3664     //
3665     // Set the childNums vector in all Address objects.  This must be done
3666     // before we can sort using __kmp_affinity_cmp_Address_child_num(),
3667     // which takes into account the setting of __kmp_affinity_compact.
3668     //
3669     __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
3670 
3671     switch (__kmp_affinity_type) {
3672 
3673         case affinity_explicit:
3674         KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
3675 # if OMP_40_ENABLED
3676         if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
3677 # endif
3678         {
3679             __kmp_affinity_process_proclist(&__kmp_affinity_masks,
3680               &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3681               maxIndex);
3682         }
3683 # if OMP_40_ENABLED
3684         else {
3685             __kmp_affinity_process_placelist(&__kmp_affinity_masks,
3686               &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3687               maxIndex);
3688         }
3689 # endif
3690         if (__kmp_affinity_num_masks == 0) {
3691             if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3692               && (__kmp_affinity_type != affinity_none))) {
3693                 KMP_WARNING(AffNoValidProcID);
3694             }
3695             __kmp_affinity_type = affinity_none;
3696             return;
3697         }
3698         break;
3699 
3700         //
3701         // The other affinity types rely on sorting the Addresses according
3702         // to some permutation of the machine topology tree.  Set
3703         // __kmp_affinity_compact and __kmp_affinity_offset appropriately,
3704         // then jump to a common code fragment to do the sort and create
3705         // the array of affinity masks.
3706         //
3707 
3708         case affinity_logical:
3709         __kmp_affinity_compact = 0;
3710         if (__kmp_affinity_offset) {
3711             __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
3712               % __kmp_avail_proc;
3713         }
3714         goto sortAddresses;
3715 
3716         case affinity_physical:
3717         if (__kmp_nThreadsPerCore > 1) {
3718             __kmp_affinity_compact = 1;
3719             if (__kmp_affinity_compact >= depth) {
3720                 __kmp_affinity_compact = 0;
3721             }
3722         } else {
3723             __kmp_affinity_compact = 0;
3724         }
3725         if (__kmp_affinity_offset) {
3726             __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
3727               % __kmp_avail_proc;
3728         }
3729         goto sortAddresses;
3730 
3731         case affinity_scatter:
3732         if (__kmp_affinity_compact >= depth) {
3733             __kmp_affinity_compact = 0;
3734         }
3735         else {
3736             __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
3737         }
3738         goto sortAddresses;
3739 
3740         case affinity_compact:
3741         if (__kmp_affinity_compact >= depth) {
3742             __kmp_affinity_compact = depth - 1;
3743         }
3744         goto sortAddresses;
3745 
3746 # if KMP_MIC
3747         case affinity_balanced:
3748         // Balanced works only for the case of a single package and uniform topology
3749         if( nPackages > 1 ) {
3750             if( __kmp_affinity_verbose || __kmp_affinity_warnings ) {
3751                 KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" );
3752             }
3753             __kmp_affinity_type = affinity_none;
3754             return;
3755         } else if( __kmp_affinity_uniform_topology() ) {
3756             break;
3757         } else { // Non-uniform topology
3758 
3759             // Save the depth for further usage
3760             __kmp_aff_depth = depth;
3761 
3762             // Number of hyper threads per core in HT machine
3763             int nth_per_core = __kmp_nThreadsPerCore;
3764 
3765             int core_level;
3766             if( nth_per_core > 1 ) {
3767                 core_level = depth - 2;
3768             } else {
3769                 core_level = depth - 1;
3770             }
3771             int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
3772             int nproc = nth_per_core * ncores;
3773 
3774             procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
3775             for( int i = 0; i < nproc; i++ ) {
3776                 procarr[ i ] = -1;
3777             }
3778 
3779             for( int i = 0; i < __kmp_avail_proc; i++ ) {
3780                 int proc = address2os[ i ].second;
3781                 // If depth == 3 then level=0 - package, level=1 - core, level=2 - thread.
3782                 // If there is only one thread per core then depth == 2: level 0 - package,
3783                 // level 1 - core.
3784                 int level = depth - 1;
3785 
3786                 // __kmp_nth_per_core == 1
3787                 int thread = 0;
3788                 int core = address2os[ i ].first.labels[ level ];
3789                 // If the thread level exists, that is we have more than one thread context per core
3790                 if( nth_per_core > 1 ) {
3791                     thread = address2os[ i ].first.labels[ level ] % nth_per_core;
3792                     core = address2os[ i ].first.labels[ level - 1 ];
3793                 }
3794                 procarr[ core * nth_per_core + thread ] = proc;
3795             }
3796 
3797             break;
3798         }
3799 # endif
3800 
3801         sortAddresses:
3802         //
3803         // Allocate the gtid->affinity mask table.
3804         //
3805         if (__kmp_affinity_dups) {
3806             __kmp_affinity_num_masks = __kmp_avail_proc;
3807         }
3808         else {
3809             __kmp_affinity_num_masks = numUnique;
3810         }
3811 
3812 # if OMP_40_ENABLED
3813         if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
3814           && ( __kmp_affinity_num_places > 0 )
3815           && ( (unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
3816             __kmp_affinity_num_masks = __kmp_affinity_num_places;
3817         }
3818 # endif
3819 
3820         __kmp_affinity_masks = (kmp_affin_mask_t*)__kmp_allocate(
3821           __kmp_affinity_num_masks * __kmp_affin_mask_size);
3822 
3823         //
3824         // Sort the address2os table according to the current setting of
3825         // __kmp_affinity_compact, then fill out __kmp_affinity_masks.
3826         //
3827         qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
3828           __kmp_affinity_cmp_Address_child_num);
3829         {
3830             int i;
3831             unsigned j;
3832             for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
3833                 if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
3834                     continue;
3835                 }
3836                 unsigned osId = address2os[i].second;
3837                 kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
3838                 kmp_affin_mask_t *dest
3839                   = KMP_CPU_INDEX(__kmp_affinity_masks, j);
3840                 KMP_ASSERT(KMP_CPU_ISSET(osId, src));
3841                 KMP_CPU_COPY(dest, src);
3842                 if (++j >= __kmp_affinity_num_masks) {
3843                     break;
3844                 }
3845             }
3846             KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
3847         }
3848         break;
3849 
3850         default:
3851         KMP_ASSERT2(0, "Unexpected affinity setting");
3852     }
3853 
3854     __kmp_free(osId2Mask);
3855 }
3856 
3857 
3858 void
3859 __kmp_affinity_initialize(void)
3860 {
3861     //
3862     // Much of the code above was written assumming that if a machine was not
3863     // affinity capable, then __kmp_affinity_type == affinity_none.  We now
3864     // explicitly represent this as __kmp_affinity_type == affinity_disabled.
3865     //
3866     // There are too many checks for __kmp_affinity_type == affinity_none
3867     // in this code.  Instead of trying to change them all, check if
3868     // __kmp_affinity_type == affinity_disabled, and if so, slam it with
3869     // affinity_none, call the real initialization routine, then restore
3870     // __kmp_affinity_type to affinity_disabled.
3871     //
3872     int disabled = (__kmp_affinity_type == affinity_disabled);
3873     if (! KMP_AFFINITY_CAPABLE()) {
3874         KMP_ASSERT(disabled);
3875     }
3876     if (disabled) {
3877         __kmp_affinity_type = affinity_none;
3878     }
3879     __kmp_aux_affinity_initialize();
3880     if (disabled) {
3881         __kmp_affinity_type = affinity_disabled;
3882     }
3883 }
3884 
3885 
3886 void
3887 __kmp_affinity_uninitialize(void)
3888 {
3889     if (__kmp_affinity_masks != NULL) {
3890         __kmp_free(__kmp_affinity_masks);
3891         __kmp_affinity_masks = NULL;
3892     }
3893     if (fullMask != NULL) {
3894         KMP_CPU_FREE(fullMask);
3895         fullMask = NULL;
3896     }
3897     __kmp_affinity_num_masks = 0;
3898 # if OMP_40_ENABLED
3899     __kmp_affinity_num_places = 0;
3900 # endif
3901     if (__kmp_affinity_proclist != NULL) {
3902         __kmp_free(__kmp_affinity_proclist);
3903         __kmp_affinity_proclist = NULL;
3904     }
3905     if( address2os != NULL ) {
3906         __kmp_free( address2os );
3907         address2os = NULL;
3908     }
3909     if( procarr != NULL ) {
3910         __kmp_free( procarr );
3911         procarr = NULL;
3912     }
3913 }
3914 
3915 
3916 void
3917 __kmp_affinity_set_init_mask(int gtid, int isa_root)
3918 {
3919     if (! KMP_AFFINITY_CAPABLE()) {
3920         return;
3921     }
3922 
3923     kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
3924     if (th->th.th_affin_mask == NULL) {
3925         KMP_CPU_ALLOC(th->th.th_affin_mask);
3926     }
3927     else {
3928         KMP_CPU_ZERO(th->th.th_affin_mask);
3929     }
3930 
3931     //
3932     // Copy the thread mask to the kmp_info_t strucuture.
3933     // If __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one
3934     // that has all of the OS proc ids set, or if __kmp_affinity_respect_mask
3935     // is set, then the full mask is the same as the mask of the initialization
3936     // thread.
3937     //
3938     kmp_affin_mask_t *mask;
3939     int i;
3940 
3941 # if OMP_40_ENABLED
3942     if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
3943 # endif
3944     {
3945         if ((__kmp_affinity_type == affinity_none)
3946 # if KMP_MIC
3947           || (__kmp_affinity_type == affinity_balanced)
3948 # endif
3949           ) {
3950 # if KMP_OS_WINDOWS && KMP_ARCH_X86_64
3951             if (__kmp_num_proc_groups > 1) {
3952                 return;
3953             }
3954 # endif
3955             KMP_ASSERT(fullMask != NULL);
3956             i = -1;
3957             mask = fullMask;
3958         }
3959         else {
3960             KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
3961             i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
3962             mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
3963         }
3964     }
3965 # if OMP_40_ENABLED
3966     else {
3967         if ((! isa_root)
3968           || (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
3969 #  if KMP_OS_WINDOWS && KMP_ARCH_X86_64
3970             if (__kmp_num_proc_groups > 1) {
3971                 return;
3972             }
3973 #  endif
3974             KMP_ASSERT(fullMask != NULL);
3975             i = KMP_PLACE_ALL;
3976             mask = fullMask;
3977         }
3978         else {
3979             //
3980             // int i = some hash function or just a counter that doesn't
3981             // always start at 0.  Use gtid for now.
3982             //
3983             KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
3984             i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
3985             mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
3986         }
3987     }
3988 # endif
3989 
3990 # if OMP_40_ENABLED
3991     th->th.th_current_place = i;
3992     if (isa_root) {
3993         th->th.th_new_place = i;
3994         th->th.th_first_place = 0;
3995         th->th.th_last_place = __kmp_affinity_num_masks - 1;
3996     }
3997 
3998     if (i == KMP_PLACE_ALL) {
3999         KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
4000           gtid));
4001     }
4002     else {
4003         KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
4004           gtid, i));
4005     }
4006 # else
4007     if (i == -1) {
4008         KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to fullMask\n",
4009           gtid));
4010     }
4011     else {
4012         KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
4013           gtid, i));
4014     }
4015 # endif /* OMP_40_ENABLED */
4016 
4017     KMP_CPU_COPY(th->th.th_affin_mask, mask);
4018 
4019     if (__kmp_affinity_verbose) {
4020         char buf[KMP_AFFIN_MASK_PRINT_LEN];
4021         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4022           th->th.th_affin_mask);
4023         KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", gtid, buf);
4024     }
4025 
4026 # if KMP_OS_WINDOWS
4027     //
4028     // On Windows* OS, the process affinity mask might have changed.
4029     // If the user didn't request affinity and this call fails,
4030     // just continue silently.  See CQ171393.
4031     //
4032     if ( __kmp_affinity_type == affinity_none ) {
4033         __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
4034     }
4035     else
4036 # endif
4037     __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4038 }
4039 
4040 
4041 # if OMP_40_ENABLED
4042 
4043 void
4044 __kmp_affinity_set_place(int gtid)
4045 {
4046     int retval;
4047 
4048     if (! KMP_AFFINITY_CAPABLE()) {
4049         return;
4050     }
4051 
4052     kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4053 
4054     KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
4055       gtid, th->th.th_new_place, th->th.th_current_place));
4056 
4057     //
4058     // Check that the new place is within this thread's partition.
4059     //
4060     KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4061     KMP_DEBUG_ASSERT(th->th.th_new_place >= 0);
4062     KMP_DEBUG_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
4063     if (th->th.th_first_place <= th->th.th_last_place) {
4064         KMP_DEBUG_ASSERT((th->th.th_new_place >= th->th.th_first_place)
4065          && (th->th.th_new_place <= th->th.th_last_place));
4066     }
4067     else {
4068         KMP_DEBUG_ASSERT((th->th.th_new_place <= th->th.th_first_place)
4069          || (th->th.th_new_place >= th->th.th_last_place));
4070     }
4071 
4072     //
4073     // Copy the thread mask to the kmp_info_t strucuture,
4074     // and set this thread's affinity.
4075     //
4076     kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
4077       th->th.th_new_place);
4078     KMP_CPU_COPY(th->th.th_affin_mask, mask);
4079     th->th.th_current_place = th->th.th_new_place;
4080 
4081     if (__kmp_affinity_verbose) {
4082         char buf[KMP_AFFIN_MASK_PRINT_LEN];
4083         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4084           th->th.th_affin_mask);
4085         KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", gtid, buf);
4086     }
4087     __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4088 }
4089 
4090 # endif /* OMP_40_ENABLED */
4091 
4092 
4093 int
4094 __kmp_aux_set_affinity(void **mask)
4095 {
4096     int gtid;
4097     kmp_info_t *th;
4098     int retval;
4099 
4100     if (! KMP_AFFINITY_CAPABLE()) {
4101         return -1;
4102     }
4103 
4104     gtid = __kmp_entry_gtid();
4105     KA_TRACE(1000, ;{
4106         char buf[KMP_AFFIN_MASK_PRINT_LEN];
4107         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4108           (kmp_affin_mask_t *)(*mask));
4109         __kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n",
4110           gtid, buf);
4111     });
4112 
4113     if (__kmp_env_consistency_check) {
4114         if ((mask == NULL) || (*mask == NULL)) {
4115             KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4116         }
4117         else {
4118             unsigned proc;
4119             int num_procs = 0;
4120 
4121             for (proc = 0; proc < KMP_CPU_SETSIZE; proc++) {
4122                 if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
4123                     continue;
4124                 }
4125                 num_procs++;
4126                 if (! KMP_CPU_ISSET(proc, fullMask)) {
4127                     KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4128                     break;
4129                 }
4130             }
4131             if (num_procs == 0) {
4132                 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4133             }
4134 
4135 # if KMP_OS_WINDOWS && KMP_ARCH_X86_64
4136             if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
4137                 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4138             }
4139 # endif /* KMP_OS_WINDOWS && KMP_ARCH_X86_64 */
4140 
4141         }
4142     }
4143 
4144     th = __kmp_threads[gtid];
4145     KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4146     retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4147     if (retval == 0) {
4148         KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
4149     }
4150 
4151 # if OMP_40_ENABLED
4152     th->th.th_current_place = KMP_PLACE_UNDEFINED;
4153     th->th.th_new_place = KMP_PLACE_UNDEFINED;
4154     th->th.th_first_place = 0;
4155     th->th.th_last_place = __kmp_affinity_num_masks - 1;
4156 # endif
4157 
4158     return retval;
4159 }
4160 
4161 
4162 int
4163 __kmp_aux_get_affinity(void **mask)
4164 {
4165     int gtid;
4166     int retval;
4167     kmp_info_t *th;
4168 
4169     if (! KMP_AFFINITY_CAPABLE()) {
4170         return -1;
4171     }
4172 
4173     gtid = __kmp_entry_gtid();
4174     th = __kmp_threads[gtid];
4175     KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4176 
4177     KA_TRACE(1000, ;{
4178         char buf[KMP_AFFIN_MASK_PRINT_LEN];
4179         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4180           th->th.th_affin_mask);
4181         __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
4182     });
4183 
4184     if (__kmp_env_consistency_check) {
4185         if ((mask == NULL) || (*mask == NULL)) {
4186             KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
4187         }
4188     }
4189 
4190 # if !KMP_OS_WINDOWS
4191 
4192     retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4193     KA_TRACE(1000, ;{
4194         char buf[KMP_AFFIN_MASK_PRINT_LEN];
4195         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4196           (kmp_affin_mask_t *)(*mask));
4197         __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
4198     });
4199     return retval;
4200 
4201 # else
4202 
4203     KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
4204     return 0;
4205 
4206 # endif /* KMP_OS_WINDOWS */
4207 
4208 }
4209 
4210 
4211 int
4212 __kmp_aux_set_affinity_mask_proc(int proc, void **mask)
4213 {
4214     int retval;
4215 
4216     if (! KMP_AFFINITY_CAPABLE()) {
4217         return -1;
4218     }
4219 
4220     KA_TRACE(1000, ;{
4221         int gtid = __kmp_entry_gtid();
4222         char buf[KMP_AFFIN_MASK_PRINT_LEN];
4223         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4224           (kmp_affin_mask_t *)(*mask));
4225         __kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
4226           proc, gtid, buf);
4227     });
4228 
4229     if (__kmp_env_consistency_check) {
4230         if ((mask == NULL) || (*mask == NULL)) {
4231             KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
4232         }
4233     }
4234 
4235     if ((proc < 0) || ((unsigned)proc >= KMP_CPU_SETSIZE)) {
4236         return -1;
4237     }
4238     if (! KMP_CPU_ISSET(proc, fullMask)) {
4239         return -2;
4240     }
4241 
4242     KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
4243     return 0;
4244 }
4245 
4246 
4247 int
4248 __kmp_aux_unset_affinity_mask_proc(int proc, void **mask)
4249 {
4250     int retval;
4251 
4252     if (! KMP_AFFINITY_CAPABLE()) {
4253         return -1;
4254     }
4255 
4256     KA_TRACE(1000, ;{
4257         int gtid = __kmp_entry_gtid();
4258         char buf[KMP_AFFIN_MASK_PRINT_LEN];
4259         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4260           (kmp_affin_mask_t *)(*mask));
4261         __kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
4262           proc, gtid, buf);
4263     });
4264 
4265     if (__kmp_env_consistency_check) {
4266         if ((mask == NULL) || (*mask == NULL)) {
4267             KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
4268         }
4269     }
4270 
4271     if ((proc < 0) || ((unsigned)proc >= KMP_CPU_SETSIZE)) {
4272         return -1;
4273     }
4274     if (! KMP_CPU_ISSET(proc, fullMask)) {
4275         return -2;
4276     }
4277 
4278     KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
4279     return 0;
4280 }
4281 
4282 
4283 int
4284 __kmp_aux_get_affinity_mask_proc(int proc, void **mask)
4285 {
4286     int retval;
4287 
4288     if (! KMP_AFFINITY_CAPABLE()) {
4289         return -1;
4290     }
4291 
4292     KA_TRACE(1000, ;{
4293         int gtid = __kmp_entry_gtid();
4294         char buf[KMP_AFFIN_MASK_PRINT_LEN];
4295         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4296           (kmp_affin_mask_t *)(*mask));
4297         __kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
4298           proc, gtid, buf);
4299     });
4300 
4301     if (__kmp_env_consistency_check) {
4302         if ((mask == NULL) || (*mask == NULL)) {
4303             KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
4304         }
4305     }
4306 
4307     if ((proc < 0) || ((unsigned)proc >= KMP_CPU_SETSIZE)) {
4308         return 0;
4309     }
4310     if (! KMP_CPU_ISSET(proc, fullMask)) {
4311         return 0;
4312     }
4313 
4314     return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
4315 }
4316 
4317 # if KMP_MIC
4318 
4319 // Dynamic affinity settings - Affinity balanced
4320 void __kmp_balanced_affinity( int tid, int nthreads )
4321 {
4322     if( __kmp_affinity_uniform_topology() ) {
4323         int coreID;
4324         int threadID;
4325         // Number of hyper threads per core in HT machine
4326         int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
4327         // Number of cores
4328         int ncores = __kmp_ncores;
4329         // How many threads will be bound to each core
4330         int chunk = nthreads / ncores;
4331         // How many cores will have an additional thread bound to it - "big cores"
4332         int big_cores = nthreads % ncores;
4333         // Number of threads on the big cores
4334         int big_nth = ( chunk + 1 ) * big_cores;
4335         if( tid < big_nth ) {
4336             coreID = tid / (chunk + 1 );
4337             threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
4338         } else { //tid >= big_nth
4339             coreID = ( tid - big_cores ) / chunk;
4340             threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
4341         }
4342 
4343         KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
4344           "Illegal set affinity operation when not capable");
4345 
4346         kmp_affin_mask_t *mask = (kmp_affin_mask_t *)alloca(__kmp_affin_mask_size);
4347         KMP_CPU_ZERO(mask);
4348 
4349         // Granularity == thread
4350         if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4351             int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
4352             KMP_CPU_SET( osID, mask);
4353         } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
4354             for( int i = 0; i < __kmp_nth_per_core; i++ ) {
4355                 int osID;
4356                 osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
4357                 KMP_CPU_SET( osID, mask);
4358             }
4359         }
4360         if (__kmp_affinity_verbose) {
4361             char buf[KMP_AFFIN_MASK_PRINT_LEN];
4362             __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
4363             KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", tid, buf);
4364         }
4365         __kmp_set_system_affinity( mask, TRUE );
4366     } else { // Non-uniform topology
4367 
4368         kmp_affin_mask_t *mask = (kmp_affin_mask_t *)alloca(__kmp_affin_mask_size);
4369         KMP_CPU_ZERO(mask);
4370 
4371         // Number of hyper threads per core in HT machine
4372         int nth_per_core = __kmp_nThreadsPerCore;
4373         int core_level;
4374         if( nth_per_core > 1 ) {
4375             core_level = __kmp_aff_depth - 2;
4376         } else {
4377             core_level = __kmp_aff_depth - 1;
4378         }
4379 
4380         // Number of cores - maximum value; it does not count trail cores with 0 processors
4381         int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
4382 
4383         // For performance gain consider the special case nthreads == __kmp_avail_proc
4384         if( nthreads == __kmp_avail_proc ) {
4385             if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4386                 int osID = address2os[ tid ].second;
4387                 KMP_CPU_SET( osID, mask);
4388             } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
4389                 int coreID = address2os[ tid ].first.labels[ core_level ];
4390                 // We'll count found osIDs for the current core; they can be not more than nth_per_core;
4391                 // since the address2os is sortied we can break when cnt==nth_per_core
4392                 int cnt = 0;
4393                 for( int i = 0; i < __kmp_avail_proc; i++ ) {
4394                     int osID = address2os[ i ].second;
4395                     int core = address2os[ i ].first.labels[ core_level ];
4396                     if( core == coreID ) {
4397                         KMP_CPU_SET( osID, mask);
4398                         cnt++;
4399                         if( cnt == nth_per_core ) {
4400                             break;
4401                         }
4402                     }
4403                 }
4404             }
4405         } else if( nthreads <= __kmp_ncores ) {
4406 
4407             int core = 0;
4408             for( int i = 0; i < ncores; i++ ) {
4409                 // Check if this core from procarr[] is in the mask
4410                 int in_mask = 0;
4411                 for( int j = 0; j < nth_per_core; j++ ) {
4412                     if( procarr[ i * nth_per_core + j ] != - 1 ) {
4413                         in_mask = 1;
4414                         break;
4415                     }
4416                 }
4417                 if( in_mask ) {
4418                     if( tid == core ) {
4419                         for( int j = 0; j < nth_per_core; j++ ) {
4420                             int osID = procarr[ i * nth_per_core + j ];
4421                             if( osID != -1 ) {
4422                                 KMP_CPU_SET( osID, mask );
4423                                 // For granularity=thread it is enough to set the first available osID for this core
4424                                 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4425                                     break;
4426                                 }
4427                             }
4428                         }
4429                         break;
4430                     } else {
4431                         core++;
4432                     }
4433                 }
4434             }
4435 
4436         } else { // nthreads > __kmp_ncores
4437 
4438             // Array to save the number of processors at each core
4439             int nproc_at_core[ ncores ];
4440             // Array to save the number of cores with "x" available processors;
4441             int ncores_with_x_procs[ nth_per_core + 1 ];
4442             // Array to save the number of cores with # procs from x to nth_per_core
4443             int ncores_with_x_to_max_procs[ nth_per_core + 1 ];
4444 
4445             for( int i = 0; i <= nth_per_core; i++ ) {
4446                 ncores_with_x_procs[ i ] = 0;
4447                 ncores_with_x_to_max_procs[ i ] = 0;
4448             }
4449 
4450             for( int i = 0; i < ncores; i++ ) {
4451                 int cnt = 0;
4452                 for( int j = 0; j < nth_per_core; j++ ) {
4453                     if( procarr[ i * nth_per_core + j ] != -1 ) {
4454                         cnt++;
4455                     }
4456                 }
4457                 nproc_at_core[ i ] = cnt;
4458                 ncores_with_x_procs[ cnt ]++;
4459             }
4460 
4461             for( int i = 0; i <= nth_per_core; i++ ) {
4462                 for( int j = i; j <= nth_per_core; j++ ) {
4463                     ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
4464                 }
4465             }
4466 
4467             // Max number of processors
4468             int nproc = nth_per_core * ncores;
4469             // An array to keep number of threads per each context
4470             int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
4471             for( int i = 0; i < nproc; i++ ) {
4472                 newarr[ i ] = 0;
4473             }
4474 
4475             int nth = nthreads;
4476             int flag = 0;
4477             while( nth > 0 ) {
4478                 for( int j = 1; j <= nth_per_core; j++ ) {
4479                     int cnt = ncores_with_x_to_max_procs[ j ];
4480                     for( int i = 0; i < ncores; i++ ) {
4481                         // Skip the core with 0 processors
4482                         if( nproc_at_core[ i ] == 0 ) {
4483                             continue;
4484                         }
4485                         for( int k = 0; k < nth_per_core; k++ ) {
4486                             if( procarr[ i * nth_per_core + k ] != -1 ) {
4487                                 if( newarr[ i * nth_per_core + k ] == 0 ) {
4488                                     newarr[ i * nth_per_core + k ] = 1;
4489                                     cnt--;
4490                                     nth--;
4491                                     break;
4492                                 } else {
4493                                     if( flag != 0 ) {
4494                                         newarr[ i * nth_per_core + k ] ++;
4495                                         cnt--;
4496                                         nth--;
4497                                         break;
4498                                     }
4499                                 }
4500                             }
4501                         }
4502                         if( cnt == 0 || nth == 0 ) {
4503                             break;
4504                         }
4505                     }
4506                     if( nth == 0 ) {
4507                         break;
4508                     }
4509                 }
4510                 flag = 1;
4511             }
4512             int sum = 0;
4513             for( int i = 0; i < nproc; i++ ) {
4514                 sum += newarr[ i ];
4515                 if( sum > tid ) {
4516                     // Granularity == thread
4517                     if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4518                         int osID = procarr[ i ];
4519                         KMP_CPU_SET( osID, mask);
4520                     } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
4521                         int coreID = i / nth_per_core;
4522                         for( int ii = 0; ii < nth_per_core; ii++ ) {
4523                             int osID = procarr[ coreID * nth_per_core + ii ];
4524                             if( osID != -1 ) {
4525                                 KMP_CPU_SET( osID, mask);
4526                             }
4527                         }
4528                     }
4529                     break;
4530                 }
4531             }
4532             __kmp_free( newarr );
4533         }
4534 
4535         if (__kmp_affinity_verbose) {
4536             char buf[KMP_AFFIN_MASK_PRINT_LEN];
4537             __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
4538             KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", tid, buf);
4539         }
4540         __kmp_set_system_affinity( mask, TRUE );
4541     }
4542 }
4543 
4544 # endif /* KMP_MIC */
4545 
4546 #endif // KMP_AFFINITY_SUPPORTED
4547