1 //===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file implements the operating system Host concept.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Support/Host.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringMap.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/ADT/Triple.h"
19 #include "llvm/Config/llvm-config.h"
20 #include "llvm/Support/MemoryBuffer.h"
21 #include "llvm/Support/X86TargetParser.h"
22 #include "llvm/Support/raw_ostream.h"
23 #include <string.h>
24 
25 // Include the platform-specific parts of this class.
26 #ifdef LLVM_ON_UNIX
27 #include "Unix/Host.inc"
28 #include <sched.h>
29 #endif
30 #ifdef _WIN32
31 #include "Windows/Host.inc"
32 #endif
33 #ifdef _MSC_VER
34 #include <intrin.h>
35 #endif
36 #ifdef __MVS__
37 #include "llvm/Support/BCD.h"
38 #endif
39 #if defined(__APPLE__)
40 #include <mach/host_info.h>
41 #include <mach/mach.h>
42 #include <mach/mach_host.h>
43 #include <mach/machine.h>
44 #include <sys/param.h>
45 #include <sys/sysctl.h>
46 #endif
47 #ifdef _AIX
48 #include <sys/systemcfg.h>
49 #endif
50 
51 #define DEBUG_TYPE "host-detection"
52 
53 //===----------------------------------------------------------------------===//
54 //
55 //  Implementations of the CPU detection routines
56 //
57 //===----------------------------------------------------------------------===//
58 
59 using namespace llvm;
60 
61 static std::unique_ptr<llvm::MemoryBuffer>
62     LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() {
63   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
64       llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
65   if (std::error_code EC = Text.getError()) {
66     llvm::errs() << "Can't read "
67                  << "/proc/cpuinfo: " << EC.message() << "\n";
68     return nullptr;
69   }
70   return std::move(*Text);
71 }
72 
73 StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) {
74   // Access to the Processor Version Register (PVR) on PowerPC is privileged,
75   // and so we must use an operating-system interface to determine the current
76   // processor type. On Linux, this is exposed through the /proc/cpuinfo file.
77   const char *generic = "generic";
78 
79   // The cpu line is second (after the 'processor: 0' line), so if this
80   // buffer is too small then something has changed (or is wrong).
81   StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin();
82   StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end();
83 
84   StringRef::const_iterator CIP = CPUInfoStart;
85 
86   StringRef::const_iterator CPUStart = nullptr;
87   size_t CPULen = 0;
88 
89   // We need to find the first line which starts with cpu, spaces, and a colon.
90   // After the colon, there may be some additional spaces and then the cpu type.
91   while (CIP < CPUInfoEnd && CPUStart == nullptr) {
92     if (CIP < CPUInfoEnd && *CIP == '\n')
93       ++CIP;
94 
95     if (CIP < CPUInfoEnd && *CIP == 'c') {
96       ++CIP;
97       if (CIP < CPUInfoEnd && *CIP == 'p') {
98         ++CIP;
99         if (CIP < CPUInfoEnd && *CIP == 'u') {
100           ++CIP;
101           while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
102             ++CIP;
103 
104           if (CIP < CPUInfoEnd && *CIP == ':') {
105             ++CIP;
106             while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
107               ++CIP;
108 
109             if (CIP < CPUInfoEnd) {
110               CPUStart = CIP;
111               while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' &&
112                                           *CIP != ',' && *CIP != '\n'))
113                 ++CIP;
114               CPULen = CIP - CPUStart;
115             }
116           }
117         }
118       }
119     }
120 
121     if (CPUStart == nullptr)
122       while (CIP < CPUInfoEnd && *CIP != '\n')
123         ++CIP;
124   }
125 
126   if (CPUStart == nullptr)
127     return generic;
128 
129   return StringSwitch<const char *>(StringRef(CPUStart, CPULen))
130       .Case("604e", "604e")
131       .Case("604", "604")
132       .Case("7400", "7400")
133       .Case("7410", "7400")
134       .Case("7447", "7400")
135       .Case("7455", "7450")
136       .Case("G4", "g4")
137       .Case("POWER4", "970")
138       .Case("PPC970FX", "970")
139       .Case("PPC970MP", "970")
140       .Case("G5", "g5")
141       .Case("POWER5", "g5")
142       .Case("A2", "a2")
143       .Case("POWER6", "pwr6")
144       .Case("POWER7", "pwr7")
145       .Case("POWER8", "pwr8")
146       .Case("POWER8E", "pwr8")
147       .Case("POWER8NVL", "pwr8")
148       .Case("POWER9", "pwr9")
149       .Case("POWER10", "pwr10")
150       // FIXME: If we get a simulator or machine with the capabilities of
151       // mcpu=future, we should revisit this and add the name reported by the
152       // simulator/machine.
153       .Default(generic);
154 }
155 
156 StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
157   // The cpuid register on arm is not accessible from user space. On Linux,
158   // it is exposed through the /proc/cpuinfo file.
159 
160   // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line
161   // in all cases.
162   SmallVector<StringRef, 32> Lines;
163   ProcCpuinfoContent.split(Lines, "\n");
164 
165   // Look for the CPU implementer line.
166   StringRef Implementer;
167   StringRef Hardware;
168   StringRef Part;
169   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
170     if (Lines[I].startswith("CPU implementer"))
171       Implementer = Lines[I].substr(15).ltrim("\t :");
172     if (Lines[I].startswith("Hardware"))
173       Hardware = Lines[I].substr(8).ltrim("\t :");
174     if (Lines[I].startswith("CPU part"))
175       Part = Lines[I].substr(8).ltrim("\t :");
176   }
177 
178   if (Implementer == "0x41") { // ARM Ltd.
179     // MSM8992/8994 may give cpu part for the core that the kernel is running on,
180     // which is undeterministic and wrong. Always return cortex-a53 for these SoC.
181     if (Hardware.endswith("MSM8994") || Hardware.endswith("MSM8996"))
182       return "cortex-a53";
183 
184 
185     // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
186     // values correspond to the "Part number" in the CP15/c0 register. The
187     // contents are specified in the various processor manuals.
188     // This corresponds to the Main ID Register in Technical Reference Manuals.
189     // and is used in programs like sys-utils
190     return StringSwitch<const char *>(Part)
191         .Case("0x926", "arm926ej-s")
192         .Case("0xb02", "mpcore")
193         .Case("0xb36", "arm1136j-s")
194         .Case("0xb56", "arm1156t2-s")
195         .Case("0xb76", "arm1176jz-s")
196         .Case("0xc08", "cortex-a8")
197         .Case("0xc09", "cortex-a9")
198         .Case("0xc0f", "cortex-a15")
199         .Case("0xc20", "cortex-m0")
200         .Case("0xc23", "cortex-m3")
201         .Case("0xc24", "cortex-m4")
202         .Case("0xd22", "cortex-m55")
203         .Case("0xd02", "cortex-a34")
204         .Case("0xd04", "cortex-a35")
205         .Case("0xd03", "cortex-a53")
206         .Case("0xd07", "cortex-a57")
207         .Case("0xd08", "cortex-a72")
208         .Case("0xd09", "cortex-a73")
209         .Case("0xd0a", "cortex-a75")
210         .Case("0xd0b", "cortex-a76")
211         .Case("0xd0d", "cortex-a77")
212         .Case("0xd41", "cortex-a78")
213         .Case("0xd44", "cortex-x1")
214         .Case("0xd4c", "cortex-x1c")
215         .Case("0xd0c", "neoverse-n1")
216         .Case("0xd49", "neoverse-n2")
217         .Case("0xd40", "neoverse-v1")
218         .Default("generic");
219   }
220 
221   if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium.
222     return StringSwitch<const char *>(Part)
223       .Case("0x516", "thunderx2t99")
224       .Case("0x0516", "thunderx2t99")
225       .Case("0xaf", "thunderx2t99")
226       .Case("0x0af", "thunderx2t99")
227       .Case("0xa1", "thunderxt88")
228       .Case("0x0a1", "thunderxt88")
229       .Default("generic");
230   }
231 
232   if (Implementer == "0x46") { // Fujitsu Ltd.
233     return StringSwitch<const char *>(Part)
234       .Case("0x001", "a64fx")
235       .Default("generic");
236   }
237 
238   if (Implementer == "0x4e") { // NVIDIA Corporation
239     return StringSwitch<const char *>(Part)
240         .Case("0x004", "carmel")
241         .Default("generic");
242   }
243 
244   if (Implementer == "0x48") // HiSilicon Technologies, Inc.
245     // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
246     // values correspond to the "Part number" in the CP15/c0 register. The
247     // contents are specified in the various processor manuals.
248     return StringSwitch<const char *>(Part)
249       .Case("0xd01", "tsv110")
250       .Default("generic");
251 
252   if (Implementer == "0x51") // Qualcomm Technologies, Inc.
253     // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
254     // values correspond to the "Part number" in the CP15/c0 register. The
255     // contents are specified in the various processor manuals.
256     return StringSwitch<const char *>(Part)
257         .Case("0x06f", "krait") // APQ8064
258         .Case("0x201", "kryo")
259         .Case("0x205", "kryo")
260         .Case("0x211", "kryo")
261         .Case("0x800", "cortex-a73") // Kryo 2xx Gold
262         .Case("0x801", "cortex-a73") // Kryo 2xx Silver
263         .Case("0x802", "cortex-a75") // Kryo 3xx Gold
264         .Case("0x803", "cortex-a75") // Kryo 3xx Silver
265         .Case("0x804", "cortex-a76") // Kryo 4xx Gold
266         .Case("0x805", "cortex-a76") // Kryo 4xx/5xx Silver
267         .Case("0xc00", "falkor")
268         .Case("0xc01", "saphira")
269         .Default("generic");
270   if (Implementer == "0x53") { // Samsung Electronics Co., Ltd.
271     // The Exynos chips have a convoluted ID scheme that doesn't seem to follow
272     // any predictive pattern across variants and parts.
273     unsigned Variant = 0, Part = 0;
274 
275     // Look for the CPU variant line, whose value is a 1 digit hexadecimal
276     // number, corresponding to the Variant bits in the CP15/C0 register.
277     for (auto I : Lines)
278       if (I.consume_front("CPU variant"))
279         I.ltrim("\t :").getAsInteger(0, Variant);
280 
281     // Look for the CPU part line, whose value is a 3 digit hexadecimal
282     // number, corresponding to the PartNum bits in the CP15/C0 register.
283     for (auto I : Lines)
284       if (I.consume_front("CPU part"))
285         I.ltrim("\t :").getAsInteger(0, Part);
286 
287     unsigned Exynos = (Variant << 12) | Part;
288     switch (Exynos) {
289     default:
290       // Default by falling through to Exynos M3.
291       LLVM_FALLTHROUGH;
292     case 0x1002:
293       return "exynos-m3";
294     case 0x1003:
295       return "exynos-m4";
296     }
297   }
298 
299   if (Implementer == "0xc0") { // Ampere Computing
300     return StringSwitch<const char *>(Part)
301         .Case("0xac3", "ampere1")
302         .Default("generic");
303   }
304 
305   return "generic";
306 }
307 
308 namespace {
309 StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) {
310   switch (Id) {
311     case 2064:  // z900 not supported by LLVM
312     case 2066:
313     case 2084:  // z990 not supported by LLVM
314     case 2086:
315     case 2094:  // z9-109 not supported by LLVM
316     case 2096:
317       return "generic";
318     case 2097:
319     case 2098:
320       return "z10";
321     case 2817:
322     case 2818:
323       return "z196";
324     case 2827:
325     case 2828:
326       return "zEC12";
327     case 2964:
328     case 2965:
329       return HaveVectorSupport? "z13" : "zEC12";
330     case 3906:
331     case 3907:
332       return HaveVectorSupport? "z14" : "zEC12";
333     case 8561:
334     case 8562:
335       return HaveVectorSupport? "z15" : "zEC12";
336     case 3931:
337     case 3932:
338     default:
339       return HaveVectorSupport? "z16" : "zEC12";
340   }
341 }
342 } // end anonymous namespace
343 
344 StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) {
345   // STIDP is a privileged operation, so use /proc/cpuinfo instead.
346 
347   // The "processor 0:" line comes after a fair amount of other information,
348   // including a cache breakdown, but this should be plenty.
349   SmallVector<StringRef, 32> Lines;
350   ProcCpuinfoContent.split(Lines, "\n");
351 
352   // Look for the CPU features.
353   SmallVector<StringRef, 32> CPUFeatures;
354   for (unsigned I = 0, E = Lines.size(); I != E; ++I)
355     if (Lines[I].startswith("features")) {
356       size_t Pos = Lines[I].find(':');
357       if (Pos != StringRef::npos) {
358         Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' ');
359         break;
360       }
361     }
362 
363   // We need to check for the presence of vector support independently of
364   // the machine type, since we may only use the vector register set when
365   // supported by the kernel (and hypervisor).
366   bool HaveVectorSupport = false;
367   for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
368     if (CPUFeatures[I] == "vx")
369       HaveVectorSupport = true;
370   }
371 
372   // Now check the processor machine type.
373   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
374     if (Lines[I].startswith("processor ")) {
375       size_t Pos = Lines[I].find("machine = ");
376       if (Pos != StringRef::npos) {
377         Pos += sizeof("machine = ") - 1;
378         unsigned int Id;
379         if (!Lines[I].drop_front(Pos).getAsInteger(10, Id))
380           return getCPUNameFromS390Model(Id, HaveVectorSupport);
381       }
382       break;
383     }
384   }
385 
386   return "generic";
387 }
388 
389 StringRef sys::detail::getHostCPUNameForRISCV(StringRef ProcCpuinfoContent) {
390   // There are 24 lines in /proc/cpuinfo
391   SmallVector<StringRef> Lines;
392   ProcCpuinfoContent.split(Lines, "\n");
393 
394   // Look for uarch line to determine cpu name
395   StringRef UArch;
396   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
397     if (Lines[I].startswith("uarch")) {
398       UArch = Lines[I].substr(5).ltrim("\t :");
399       break;
400     }
401   }
402 
403   return StringSwitch<const char *>(UArch)
404       .Case("sifive,u74-mc", "sifive-u74")
405       .Case("sifive,bullet0", "sifive-u74")
406       .Default("generic");
407 }
408 
409 StringRef sys::detail::getHostCPUNameForBPF() {
410 #if !defined(__linux__) || !defined(__x86_64__)
411   return "generic";
412 #else
413   uint8_t v3_insns[40] __attribute__ ((aligned (8))) =
414       /* BPF_MOV64_IMM(BPF_REG_0, 0) */
415     { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
416       /* BPF_MOV64_IMM(BPF_REG_2, 1) */
417       0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
418       /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
419       0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
420       /* BPF_MOV64_IMM(BPF_REG_0, 1) */
421       0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
422       /* BPF_EXIT_INSN() */
423       0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
424 
425   uint8_t v2_insns[40] __attribute__ ((aligned (8))) =
426       /* BPF_MOV64_IMM(BPF_REG_0, 0) */
427     { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
428       /* BPF_MOV64_IMM(BPF_REG_2, 1) */
429       0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
430       /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
431       0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
432       /* BPF_MOV64_IMM(BPF_REG_0, 1) */
433       0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
434       /* BPF_EXIT_INSN() */
435       0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
436 
437   struct bpf_prog_load_attr {
438     uint32_t prog_type;
439     uint32_t insn_cnt;
440     uint64_t insns;
441     uint64_t license;
442     uint32_t log_level;
443     uint32_t log_size;
444     uint64_t log_buf;
445     uint32_t kern_version;
446     uint32_t prog_flags;
447   } attr = {};
448   attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
449   attr.insn_cnt = 5;
450   attr.insns = (uint64_t)v3_insns;
451   attr.license = (uint64_t)"DUMMY";
452 
453   int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr,
454                    sizeof(attr));
455   if (fd >= 0) {
456     close(fd);
457     return "v3";
458   }
459 
460   /* Clear the whole attr in case its content changed by syscall. */
461   memset(&attr, 0, sizeof(attr));
462   attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
463   attr.insn_cnt = 5;
464   attr.insns = (uint64_t)v2_insns;
465   attr.license = (uint64_t)"DUMMY";
466   fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr));
467   if (fd >= 0) {
468     close(fd);
469     return "v2";
470   }
471   return "v1";
472 #endif
473 }
474 
475 #if defined(__i386__) || defined(_M_IX86) || \
476     defined(__x86_64__) || defined(_M_X64)
477 
478 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
479 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
480 // support. Consequently, for i386, the presence of CPUID is checked first
481 // via the corresponding eflags bit.
482 // Removal of cpuid.h header motivated by PR30384
483 // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp
484 // or test-suite, but are used in external projects e.g. libstdcxx
485 static bool isCpuIdSupported() {
486 #if defined(__GNUC__) || defined(__clang__)
487 #if defined(__i386__)
488   int __cpuid_supported;
489   __asm__("  pushfl\n"
490           "  popl   %%eax\n"
491           "  movl   %%eax,%%ecx\n"
492           "  xorl   $0x00200000,%%eax\n"
493           "  pushl  %%eax\n"
494           "  popfl\n"
495           "  pushfl\n"
496           "  popl   %%eax\n"
497           "  movl   $0,%0\n"
498           "  cmpl   %%eax,%%ecx\n"
499           "  je     1f\n"
500           "  movl   $1,%0\n"
501           "1:"
502           : "=r"(__cpuid_supported)
503           :
504           : "eax", "ecx");
505   if (!__cpuid_supported)
506     return false;
507 #endif
508   return true;
509 #endif
510   return true;
511 }
512 
513 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
514 /// the specified arguments.  If we can't run cpuid on the host, return true.
515 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
516                                unsigned *rECX, unsigned *rEDX) {
517 #if defined(__GNUC__) || defined(__clang__)
518 #if defined(__x86_64__)
519   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
520   // FIXME: should we save this for Clang?
521   __asm__("movq\t%%rbx, %%rsi\n\t"
522           "cpuid\n\t"
523           "xchgq\t%%rbx, %%rsi\n\t"
524           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
525           : "a"(value));
526   return false;
527 #elif defined(__i386__)
528   __asm__("movl\t%%ebx, %%esi\n\t"
529           "cpuid\n\t"
530           "xchgl\t%%ebx, %%esi\n\t"
531           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
532           : "a"(value));
533   return false;
534 #else
535   return true;
536 #endif
537 #elif defined(_MSC_VER)
538   // The MSVC intrinsic is portable across x86 and x64.
539   int registers[4];
540   __cpuid(registers, value);
541   *rEAX = registers[0];
542   *rEBX = registers[1];
543   *rECX = registers[2];
544   *rEDX = registers[3];
545   return false;
546 #else
547   return true;
548 #endif
549 }
550 
551 namespace llvm {
552 namespace sys {
553 namespace detail {
554 namespace x86 {
555 
556 VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
557   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
558   if (MaxLeaf == nullptr)
559     MaxLeaf = &EAX;
560   else
561     *MaxLeaf = 0;
562 
563   if (!isCpuIdSupported())
564     return VendorSignatures::UNKNOWN;
565 
566   if (getX86CpuIDAndInfo(0, MaxLeaf, &EBX, &ECX, &EDX) || *MaxLeaf < 1)
567     return VendorSignatures::UNKNOWN;
568 
569   // "Genu ineI ntel"
570   if (EBX == 0x756e6547 && EDX == 0x49656e69 && ECX == 0x6c65746e)
571     return VendorSignatures::GENUINE_INTEL;
572 
573   // "Auth enti cAMD"
574   if (EBX == 0x68747541 && EDX == 0x69746e65 && ECX == 0x444d4163)
575     return VendorSignatures::AUTHENTIC_AMD;
576 
577   return VendorSignatures::UNKNOWN;
578 }
579 
580 } // namespace x86
581 } // namespace detail
582 } // namespace sys
583 } // namespace llvm
584 
585 using namespace llvm::sys::detail::x86;
586 
587 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
588 /// the 4 values in the specified arguments.  If we can't run cpuid on the host,
589 /// return true.
590 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
591                                  unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
592                                  unsigned *rEDX) {
593 #if defined(__GNUC__) || defined(__clang__)
594 #if defined(__x86_64__)
595   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
596   // FIXME: should we save this for Clang?
597   __asm__("movq\t%%rbx, %%rsi\n\t"
598           "cpuid\n\t"
599           "xchgq\t%%rbx, %%rsi\n\t"
600           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
601           : "a"(value), "c"(subleaf));
602   return false;
603 #elif defined(__i386__)
604   __asm__("movl\t%%ebx, %%esi\n\t"
605           "cpuid\n\t"
606           "xchgl\t%%ebx, %%esi\n\t"
607           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
608           : "a"(value), "c"(subleaf));
609   return false;
610 #else
611   return true;
612 #endif
613 #elif defined(_MSC_VER)
614   int registers[4];
615   __cpuidex(registers, value, subleaf);
616   *rEAX = registers[0];
617   *rEBX = registers[1];
618   *rECX = registers[2];
619   *rEDX = registers[3];
620   return false;
621 #else
622   return true;
623 #endif
624 }
625 
626 // Read control register 0 (XCR0). Used to detect features such as AVX.
627 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
628 #if defined(__GNUC__) || defined(__clang__)
629   // Check xgetbv; this uses a .byte sequence instead of the instruction
630   // directly because older assemblers do not include support for xgetbv and
631   // there is no easy way to conditionally compile based on the assembler used.
632   __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
633   return false;
634 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
635   unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
636   *rEAX = Result;
637   *rEDX = Result >> 32;
638   return false;
639 #else
640   return true;
641 #endif
642 }
643 
644 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
645                                  unsigned *Model) {
646   *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
647   *Model = (EAX >> 4) & 0xf;  // Bits 4 - 7
648   if (*Family == 6 || *Family == 0xf) {
649     if (*Family == 0xf)
650       // Examine extended family ID if family ID is F.
651       *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
652     // Examine extended model ID if family ID is 6 or F.
653     *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
654   }
655 }
656 
657 static StringRef
658 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
659                                 const unsigned *Features,
660                                 unsigned *Type, unsigned *Subtype) {
661   auto testFeature = [&](unsigned F) {
662     return (Features[F / 32] & (1U << (F % 32))) != 0;
663   };
664 
665   StringRef CPU;
666 
667   switch (Family) {
668   case 3:
669     CPU = "i386";
670     break;
671   case 4:
672     CPU = "i486";
673     break;
674   case 5:
675     if (testFeature(X86::FEATURE_MMX)) {
676       CPU = "pentium-mmx";
677       break;
678     }
679     CPU = "pentium";
680     break;
681   case 6:
682     switch (Model) {
683     case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
684                // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
685                // mobile processor, Intel Core 2 Extreme processor, Intel
686                // Pentium Dual-Core processor, Intel Xeon processor, model
687                // 0Fh. All processors are manufactured using the 65 nm process.
688     case 0x16: // Intel Celeron processor model 16h. All processors are
689                // manufactured using the 65 nm process
690       CPU = "core2";
691       *Type = X86::INTEL_CORE2;
692       break;
693     case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
694                // 17h. All processors are manufactured using the 45 nm process.
695                //
696                // 45nm: Penryn , Wolfdale, Yorkfield (XE)
697     case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
698                // the 45 nm process.
699       CPU = "penryn";
700       *Type = X86::INTEL_CORE2;
701       break;
702     case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
703                // processors are manufactured using the 45 nm process.
704     case 0x1e: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
705                // As found in a Summer 2010 model iMac.
706     case 0x1f:
707     case 0x2e:              // Nehalem EX
708       CPU = "nehalem";
709       *Type = X86::INTEL_COREI7;
710       *Subtype = X86::INTEL_COREI7_NEHALEM;
711       break;
712     case 0x25: // Intel Core i7, laptop version.
713     case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
714                // processors are manufactured using the 32 nm process.
715     case 0x2f: // Westmere EX
716       CPU = "westmere";
717       *Type = X86::INTEL_COREI7;
718       *Subtype = X86::INTEL_COREI7_WESTMERE;
719       break;
720     case 0x2a: // Intel Core i7 processor. All processors are manufactured
721                // using the 32 nm process.
722     case 0x2d:
723       CPU = "sandybridge";
724       *Type = X86::INTEL_COREI7;
725       *Subtype = X86::INTEL_COREI7_SANDYBRIDGE;
726       break;
727     case 0x3a:
728     case 0x3e:              // Ivy Bridge EP
729       CPU = "ivybridge";
730       *Type = X86::INTEL_COREI7;
731       *Subtype = X86::INTEL_COREI7_IVYBRIDGE;
732       break;
733 
734     // Haswell:
735     case 0x3c:
736     case 0x3f:
737     case 0x45:
738     case 0x46:
739       CPU = "haswell";
740       *Type = X86::INTEL_COREI7;
741       *Subtype = X86::INTEL_COREI7_HASWELL;
742       break;
743 
744     // Broadwell:
745     case 0x3d:
746     case 0x47:
747     case 0x4f:
748     case 0x56:
749       CPU = "broadwell";
750       *Type = X86::INTEL_COREI7;
751       *Subtype = X86::INTEL_COREI7_BROADWELL;
752       break;
753 
754     // Skylake:
755     case 0x4e:              // Skylake mobile
756     case 0x5e:              // Skylake desktop
757     case 0x8e:              // Kaby Lake mobile
758     case 0x9e:              // Kaby Lake desktop
759     case 0xa5:              // Comet Lake-H/S
760     case 0xa6:              // Comet Lake-U
761       CPU = "skylake";
762       *Type = X86::INTEL_COREI7;
763       *Subtype = X86::INTEL_COREI7_SKYLAKE;
764       break;
765 
766     // Rocketlake:
767     case 0xa7:
768       CPU = "rocketlake";
769       *Type = X86::INTEL_COREI7;
770       *Subtype = X86::INTEL_COREI7_ROCKETLAKE;
771       break;
772 
773     // Skylake Xeon:
774     case 0x55:
775       *Type = X86::INTEL_COREI7;
776       if (testFeature(X86::FEATURE_AVX512BF16)) {
777         CPU = "cooperlake";
778         *Subtype = X86::INTEL_COREI7_COOPERLAKE;
779       } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
780         CPU = "cascadelake";
781         *Subtype = X86::INTEL_COREI7_CASCADELAKE;
782       } else {
783         CPU = "skylake-avx512";
784         *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512;
785       }
786       break;
787 
788     // Cannonlake:
789     case 0x66:
790       CPU = "cannonlake";
791       *Type = X86::INTEL_COREI7;
792       *Subtype = X86::INTEL_COREI7_CANNONLAKE;
793       break;
794 
795     // Icelake:
796     case 0x7d:
797     case 0x7e:
798       CPU = "icelake-client";
799       *Type = X86::INTEL_COREI7;
800       *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT;
801       break;
802 
803     // Tigerlake:
804     case 0x8c:
805     case 0x8d:
806       CPU = "tigerlake";
807       *Type = X86::INTEL_COREI7;
808       *Subtype = X86::INTEL_COREI7_TIGERLAKE;
809       break;
810 
811     // Alderlake:
812     case 0x97:
813     case 0x9a:
814       CPU = "alderlake";
815       *Type = X86::INTEL_COREI7;
816       *Subtype = X86::INTEL_COREI7_ALDERLAKE;
817       break;
818 
819     // Icelake Xeon:
820     case 0x6a:
821     case 0x6c:
822       CPU = "icelake-server";
823       *Type = X86::INTEL_COREI7;
824       *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER;
825       break;
826 
827     // Sapphire Rapids:
828     case 0x8f:
829       CPU = "sapphirerapids";
830       *Type = X86::INTEL_COREI7;
831       *Subtype = X86::INTEL_COREI7_SAPPHIRERAPIDS;
832       break;
833 
834     case 0x1c: // Most 45 nm Intel Atom processors
835     case 0x26: // 45 nm Atom Lincroft
836     case 0x27: // 32 nm Atom Medfield
837     case 0x35: // 32 nm Atom Midview
838     case 0x36: // 32 nm Atom Midview
839       CPU = "bonnell";
840       *Type = X86::INTEL_BONNELL;
841       break;
842 
843     // Atom Silvermont codes from the Intel software optimization guide.
844     case 0x37:
845     case 0x4a:
846     case 0x4d:
847     case 0x5a:
848     case 0x5d:
849     case 0x4c: // really airmont
850       CPU = "silvermont";
851       *Type = X86::INTEL_SILVERMONT;
852       break;
853     // Goldmont:
854     case 0x5c: // Apollo Lake
855     case 0x5f: // Denverton
856       CPU = "goldmont";
857       *Type = X86::INTEL_GOLDMONT;
858       break;
859     case 0x7a:
860       CPU = "goldmont-plus";
861       *Type = X86::INTEL_GOLDMONT_PLUS;
862       break;
863     case 0x86:
864       CPU = "tremont";
865       *Type = X86::INTEL_TREMONT;
866       break;
867 
868     // Xeon Phi (Knights Landing + Knights Mill):
869     case 0x57:
870       CPU = "knl";
871       *Type = X86::INTEL_KNL;
872       break;
873     case 0x85:
874       CPU = "knm";
875       *Type = X86::INTEL_KNM;
876       break;
877 
878     default: // Unknown family 6 CPU, try to guess.
879       // Don't both with Type/Subtype here, they aren't used by the caller.
880       // They're used above to keep the code in sync with compiler-rt.
881       // TODO detect tigerlake host from model
882       if (testFeature(X86::FEATURE_AVX512VP2INTERSECT)) {
883         CPU = "tigerlake";
884       } else if (testFeature(X86::FEATURE_AVX512VBMI2)) {
885         CPU = "icelake-client";
886       } else if (testFeature(X86::FEATURE_AVX512VBMI)) {
887         CPU = "cannonlake";
888       } else if (testFeature(X86::FEATURE_AVX512BF16)) {
889         CPU = "cooperlake";
890       } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
891         CPU = "cascadelake";
892       } else if (testFeature(X86::FEATURE_AVX512VL)) {
893         CPU = "skylake-avx512";
894       } else if (testFeature(X86::FEATURE_AVX512ER)) {
895         CPU = "knl";
896       } else if (testFeature(X86::FEATURE_CLFLUSHOPT)) {
897         if (testFeature(X86::FEATURE_SHA))
898           CPU = "goldmont";
899         else
900           CPU = "skylake";
901       } else if (testFeature(X86::FEATURE_ADX)) {
902         CPU = "broadwell";
903       } else if (testFeature(X86::FEATURE_AVX2)) {
904         CPU = "haswell";
905       } else if (testFeature(X86::FEATURE_AVX)) {
906         CPU = "sandybridge";
907       } else if (testFeature(X86::FEATURE_SSE4_2)) {
908         if (testFeature(X86::FEATURE_MOVBE))
909           CPU = "silvermont";
910         else
911           CPU = "nehalem";
912       } else if (testFeature(X86::FEATURE_SSE4_1)) {
913         CPU = "penryn";
914       } else if (testFeature(X86::FEATURE_SSSE3)) {
915         if (testFeature(X86::FEATURE_MOVBE))
916           CPU = "bonnell";
917         else
918           CPU = "core2";
919       } else if (testFeature(X86::FEATURE_64BIT)) {
920         CPU = "core2";
921       } else if (testFeature(X86::FEATURE_SSE3)) {
922         CPU = "yonah";
923       } else if (testFeature(X86::FEATURE_SSE2)) {
924         CPU = "pentium-m";
925       } else if (testFeature(X86::FEATURE_SSE)) {
926         CPU = "pentium3";
927       } else if (testFeature(X86::FEATURE_MMX)) {
928         CPU = "pentium2";
929       } else {
930         CPU = "pentiumpro";
931       }
932       break;
933     }
934     break;
935   case 15: {
936     if (testFeature(X86::FEATURE_64BIT)) {
937       CPU = "nocona";
938       break;
939     }
940     if (testFeature(X86::FEATURE_SSE3)) {
941       CPU = "prescott";
942       break;
943     }
944     CPU = "pentium4";
945     break;
946   }
947   default:
948     break; // Unknown.
949   }
950 
951   return CPU;
952 }
953 
954 static StringRef
955 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
956                               const unsigned *Features,
957                               unsigned *Type, unsigned *Subtype) {
958   auto testFeature = [&](unsigned F) {
959     return (Features[F / 32] & (1U << (F % 32))) != 0;
960   };
961 
962   StringRef CPU;
963 
964   switch (Family) {
965   case 4:
966     CPU = "i486";
967     break;
968   case 5:
969     CPU = "pentium";
970     switch (Model) {
971     case 6:
972     case 7:
973       CPU = "k6";
974       break;
975     case 8:
976       CPU = "k6-2";
977       break;
978     case 9:
979     case 13:
980       CPU = "k6-3";
981       break;
982     case 10:
983       CPU = "geode";
984       break;
985     }
986     break;
987   case 6:
988     if (testFeature(X86::FEATURE_SSE)) {
989       CPU = "athlon-xp";
990       break;
991     }
992     CPU = "athlon";
993     break;
994   case 15:
995     if (testFeature(X86::FEATURE_SSE3)) {
996       CPU = "k8-sse3";
997       break;
998     }
999     CPU = "k8";
1000     break;
1001   case 16:
1002     CPU = "amdfam10";
1003     *Type = X86::AMDFAM10H; // "amdfam10"
1004     switch (Model) {
1005     case 2:
1006       *Subtype = X86::AMDFAM10H_BARCELONA;
1007       break;
1008     case 4:
1009       *Subtype = X86::AMDFAM10H_SHANGHAI;
1010       break;
1011     case 8:
1012       *Subtype = X86::AMDFAM10H_ISTANBUL;
1013       break;
1014     }
1015     break;
1016   case 20:
1017     CPU = "btver1";
1018     *Type = X86::AMD_BTVER1;
1019     break;
1020   case 21:
1021     CPU = "bdver1";
1022     *Type = X86::AMDFAM15H;
1023     if (Model >= 0x60 && Model <= 0x7f) {
1024       CPU = "bdver4";
1025       *Subtype = X86::AMDFAM15H_BDVER4;
1026       break; // 60h-7Fh: Excavator
1027     }
1028     if (Model >= 0x30 && Model <= 0x3f) {
1029       CPU = "bdver3";
1030       *Subtype = X86::AMDFAM15H_BDVER3;
1031       break; // 30h-3Fh: Steamroller
1032     }
1033     if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
1034       CPU = "bdver2";
1035       *Subtype = X86::AMDFAM15H_BDVER2;
1036       break; // 02h, 10h-1Fh: Piledriver
1037     }
1038     if (Model <= 0x0f) {
1039       *Subtype = X86::AMDFAM15H_BDVER1;
1040       break; // 00h-0Fh: Bulldozer
1041     }
1042     break;
1043   case 22:
1044     CPU = "btver2";
1045     *Type = X86::AMD_BTVER2;
1046     break;
1047   case 23:
1048     CPU = "znver1";
1049     *Type = X86::AMDFAM17H;
1050     if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
1051       CPU = "znver2";
1052       *Subtype = X86::AMDFAM17H_ZNVER2;
1053       break; // 30h-3fh, 71h: Zen2
1054     }
1055     if (Model <= 0x0f) {
1056       *Subtype = X86::AMDFAM17H_ZNVER1;
1057       break; // 00h-0Fh: Zen1
1058     }
1059     break;
1060   case 25:
1061     CPU = "znver3";
1062     *Type = X86::AMDFAM19H;
1063     if (Model <= 0x0f || Model == 0x21) {
1064       *Subtype = X86::AMDFAM19H_ZNVER3;
1065       break; // 00h-0Fh, 21h: Zen3
1066     }
1067     break;
1068   default:
1069     break; // Unknown AMD CPU.
1070   }
1071 
1072   return CPU;
1073 }
1074 
1075 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
1076                                  unsigned *Features) {
1077   unsigned EAX, EBX;
1078 
1079   auto setFeature = [&](unsigned F) {
1080     Features[F / 32] |= 1U << (F % 32);
1081   };
1082 
1083   if ((EDX >> 15) & 1)
1084     setFeature(X86::FEATURE_CMOV);
1085   if ((EDX >> 23) & 1)
1086     setFeature(X86::FEATURE_MMX);
1087   if ((EDX >> 25) & 1)
1088     setFeature(X86::FEATURE_SSE);
1089   if ((EDX >> 26) & 1)
1090     setFeature(X86::FEATURE_SSE2);
1091 
1092   if ((ECX >> 0) & 1)
1093     setFeature(X86::FEATURE_SSE3);
1094   if ((ECX >> 1) & 1)
1095     setFeature(X86::FEATURE_PCLMUL);
1096   if ((ECX >> 9) & 1)
1097     setFeature(X86::FEATURE_SSSE3);
1098   if ((ECX >> 12) & 1)
1099     setFeature(X86::FEATURE_FMA);
1100   if ((ECX >> 19) & 1)
1101     setFeature(X86::FEATURE_SSE4_1);
1102   if ((ECX >> 20) & 1) {
1103     setFeature(X86::FEATURE_SSE4_2);
1104     setFeature(X86::FEATURE_CRC32);
1105   }
1106   if ((ECX >> 23) & 1)
1107     setFeature(X86::FEATURE_POPCNT);
1108   if ((ECX >> 25) & 1)
1109     setFeature(X86::FEATURE_AES);
1110 
1111   if ((ECX >> 22) & 1)
1112     setFeature(X86::FEATURE_MOVBE);
1113 
1114   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1115   // indicates that the AVX registers will be saved and restored on context
1116   // switch, then we have full AVX support.
1117   const unsigned AVXBits = (1 << 27) | (1 << 28);
1118   bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
1119                 ((EAX & 0x6) == 0x6);
1120 #if defined(__APPLE__)
1121   // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1122   // save the AVX512 context if we use AVX512 instructions, even the bit is not
1123   // set right now.
1124   bool HasAVX512Save = true;
1125 #else
1126   // AVX512 requires additional context to be saved by the OS.
1127   bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
1128 #endif
1129 
1130   if (HasAVX)
1131     setFeature(X86::FEATURE_AVX);
1132 
1133   bool HasLeaf7 =
1134       MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1135 
1136   if (HasLeaf7 && ((EBX >> 3) & 1))
1137     setFeature(X86::FEATURE_BMI);
1138   if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
1139     setFeature(X86::FEATURE_AVX2);
1140   if (HasLeaf7 && ((EBX >> 8) & 1))
1141     setFeature(X86::FEATURE_BMI2);
1142   if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
1143     setFeature(X86::FEATURE_AVX512F);
1144   if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
1145     setFeature(X86::FEATURE_AVX512DQ);
1146   if (HasLeaf7 && ((EBX >> 19) & 1))
1147     setFeature(X86::FEATURE_ADX);
1148   if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
1149     setFeature(X86::FEATURE_AVX512IFMA);
1150   if (HasLeaf7 && ((EBX >> 23) & 1))
1151     setFeature(X86::FEATURE_CLFLUSHOPT);
1152   if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
1153     setFeature(X86::FEATURE_AVX512PF);
1154   if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
1155     setFeature(X86::FEATURE_AVX512ER);
1156   if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
1157     setFeature(X86::FEATURE_AVX512CD);
1158   if (HasLeaf7 && ((EBX >> 29) & 1))
1159     setFeature(X86::FEATURE_SHA);
1160   if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
1161     setFeature(X86::FEATURE_AVX512BW);
1162   if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
1163     setFeature(X86::FEATURE_AVX512VL);
1164 
1165   if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
1166     setFeature(X86::FEATURE_AVX512VBMI);
1167   if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
1168     setFeature(X86::FEATURE_AVX512VBMI2);
1169   if (HasLeaf7 && ((ECX >> 8) & 1))
1170     setFeature(X86::FEATURE_GFNI);
1171   if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
1172     setFeature(X86::FEATURE_VPCLMULQDQ);
1173   if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
1174     setFeature(X86::FEATURE_AVX512VNNI);
1175   if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
1176     setFeature(X86::FEATURE_AVX512BITALG);
1177   if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
1178     setFeature(X86::FEATURE_AVX512VPOPCNTDQ);
1179 
1180   if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
1181     setFeature(X86::FEATURE_AVX5124VNNIW);
1182   if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
1183     setFeature(X86::FEATURE_AVX5124FMAPS);
1184   if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
1185     setFeature(X86::FEATURE_AVX512VP2INTERSECT);
1186 
1187   bool HasLeaf7Subleaf1 =
1188       MaxLeaf >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1189   if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
1190     setFeature(X86::FEATURE_AVX512BF16);
1191 
1192   unsigned MaxExtLevel;
1193   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1194 
1195   bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1196                      !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1197   if (HasExtLeaf1 && ((ECX >> 6) & 1))
1198     setFeature(X86::FEATURE_SSE4_A);
1199   if (HasExtLeaf1 && ((ECX >> 11) & 1))
1200     setFeature(X86::FEATURE_XOP);
1201   if (HasExtLeaf1 && ((ECX >> 16) & 1))
1202     setFeature(X86::FEATURE_FMA4);
1203 
1204   if (HasExtLeaf1 && ((EDX >> 29) & 1))
1205     setFeature(X86::FEATURE_64BIT);
1206 }
1207 
1208 StringRef sys::getHostCPUName() {
1209   unsigned MaxLeaf = 0;
1210   const VendorSignatures Vendor = getVendorSignature(&MaxLeaf);
1211   if (Vendor == VendorSignatures::UNKNOWN)
1212     return "generic";
1213 
1214   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1215   getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
1216 
1217   unsigned Family = 0, Model = 0;
1218   unsigned Features[(X86::CPU_FEATURE_MAX + 31) / 32] = {0};
1219   detectX86FamilyModel(EAX, &Family, &Model);
1220   getAvailableFeatures(ECX, EDX, MaxLeaf, Features);
1221 
1222   // These aren't consumed in this file, but we try to keep some source code the
1223   // same or similar to compiler-rt.
1224   unsigned Type = 0;
1225   unsigned Subtype = 0;
1226 
1227   StringRef CPU;
1228 
1229   if (Vendor == VendorSignatures::GENUINE_INTEL) {
1230     CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type,
1231                                           &Subtype);
1232   } else if (Vendor == VendorSignatures::AUTHENTIC_AMD) {
1233     CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type,
1234                                         &Subtype);
1235   }
1236 
1237   if (!CPU.empty())
1238     return CPU;
1239 
1240   return "generic";
1241 }
1242 
1243 #elif defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__))
1244 StringRef sys::getHostCPUName() {
1245   host_basic_info_data_t hostInfo;
1246   mach_msg_type_number_t infoCount;
1247 
1248   infoCount = HOST_BASIC_INFO_COUNT;
1249   mach_port_t hostPort = mach_host_self();
1250   host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo,
1251             &infoCount);
1252   mach_port_deallocate(mach_task_self(), hostPort);
1253 
1254   if (hostInfo.cpu_type != CPU_TYPE_POWERPC)
1255     return "generic";
1256 
1257   switch (hostInfo.cpu_subtype) {
1258   case CPU_SUBTYPE_POWERPC_601:
1259     return "601";
1260   case CPU_SUBTYPE_POWERPC_602:
1261     return "602";
1262   case CPU_SUBTYPE_POWERPC_603:
1263     return "603";
1264   case CPU_SUBTYPE_POWERPC_603e:
1265     return "603e";
1266   case CPU_SUBTYPE_POWERPC_603ev:
1267     return "603ev";
1268   case CPU_SUBTYPE_POWERPC_604:
1269     return "604";
1270   case CPU_SUBTYPE_POWERPC_604e:
1271     return "604e";
1272   case CPU_SUBTYPE_POWERPC_620:
1273     return "620";
1274   case CPU_SUBTYPE_POWERPC_750:
1275     return "750";
1276   case CPU_SUBTYPE_POWERPC_7400:
1277     return "7400";
1278   case CPU_SUBTYPE_POWERPC_7450:
1279     return "7450";
1280   case CPU_SUBTYPE_POWERPC_970:
1281     return "970";
1282   default:;
1283   }
1284 
1285   return "generic";
1286 }
1287 #elif defined(__linux__) && (defined(__ppc__) || defined(__powerpc__))
1288 StringRef sys::getHostCPUName() {
1289   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1290   StringRef Content = P ? P->getBuffer() : "";
1291   return detail::getHostCPUNameForPowerPC(Content);
1292 }
1293 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
1294 StringRef sys::getHostCPUName() {
1295   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1296   StringRef Content = P ? P->getBuffer() : "";
1297   return detail::getHostCPUNameForARM(Content);
1298 }
1299 #elif defined(__linux__) && defined(__s390x__)
1300 StringRef sys::getHostCPUName() {
1301   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1302   StringRef Content = P ? P->getBuffer() : "";
1303   return detail::getHostCPUNameForS390x(Content);
1304 }
1305 #elif defined(__MVS__)
1306 StringRef sys::getHostCPUName() {
1307   // Get pointer to Communications Vector Table (CVT).
1308   // The pointer is located at offset 16 of the Prefixed Save Area (PSA).
1309   // It is stored as 31 bit pointer and will be zero-extended to 64 bit.
1310   int *StartToCVTOffset = reinterpret_cast<int *>(0x10);
1311   // Since its stored as a 31-bit pointer, get the 4 bytes from the start
1312   // of address.
1313   int ReadValue = *StartToCVTOffset;
1314   // Explicitly clear the high order bit.
1315   ReadValue = (ReadValue & 0x7FFFFFFF);
1316   char *CVT = reinterpret_cast<char *>(ReadValue);
1317   // The model number is located in the CVT prefix at offset -6 and stored as
1318   // signless packed decimal.
1319   uint16_t Id = *(uint16_t *)&CVT[-6];
1320   // Convert number to integer.
1321   Id = decodePackedBCD<uint16_t>(Id, false);
1322   // Check for vector support. It's stored in field CVTFLAG5 (offset 244),
1323   // bit CVTVEF (X'80'). The facilities list is part of the PSA but the vector
1324   // extension can only be used if bit CVTVEF is on.
1325   bool HaveVectorSupport = CVT[244] & 0x80;
1326   return getCPUNameFromS390Model(Id, HaveVectorSupport);
1327 }
1328 #elif defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__))
1329 #define CPUFAMILY_ARM_SWIFT 0x1e2d6381
1330 #define CPUFAMILY_ARM_CYCLONE 0x37a09642
1331 #define CPUFAMILY_ARM_TYPHOON 0x2c91a47e
1332 #define CPUFAMILY_ARM_TWISTER 0x92fb37c8
1333 #define CPUFAMILY_ARM_HURRICANE 0x67ceee93
1334 #define CPUFAMILY_ARM_MONSOON_MISTRAL 0xe81e7ef6
1335 #define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07d34b9f
1336 #define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504d2
1337 #define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1b588bb3
1338 
1339 StringRef sys::getHostCPUName() {
1340   uint32_t Family;
1341   size_t Length = sizeof(Family);
1342   sysctlbyname("hw.cpufamily", &Family, &Length, NULL, 0);
1343 
1344   switch (Family) {
1345   case CPUFAMILY_ARM_SWIFT:
1346     return "swift";
1347   case CPUFAMILY_ARM_CYCLONE:
1348     return "apple-a7";
1349   case CPUFAMILY_ARM_TYPHOON:
1350     return "apple-a8";
1351   case CPUFAMILY_ARM_TWISTER:
1352     return "apple-a9";
1353   case CPUFAMILY_ARM_HURRICANE:
1354     return "apple-a10";
1355   case CPUFAMILY_ARM_MONSOON_MISTRAL:
1356     return "apple-a11";
1357   case CPUFAMILY_ARM_VORTEX_TEMPEST:
1358     return "apple-a12";
1359   case CPUFAMILY_ARM_LIGHTNING_THUNDER:
1360     return "apple-a13";
1361   case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
1362     return "apple-m1";
1363   default:
1364     // Default to the newest CPU we know about.
1365     return "apple-m1";
1366   }
1367 }
1368 #elif defined(_AIX)
1369 StringRef sys::getHostCPUName() {
1370   switch (_system_configuration.implementation) {
1371   case POWER_4:
1372     if (_system_configuration.version == PV_4_3)
1373       return "970";
1374     return "pwr4";
1375   case POWER_5:
1376     if (_system_configuration.version == PV_5)
1377       return "pwr5";
1378     return "pwr5x";
1379   case POWER_6:
1380     if (_system_configuration.version == PV_6_Compat)
1381       return "pwr6";
1382     return "pwr6x";
1383   case POWER_7:
1384     return "pwr7";
1385   case POWER_8:
1386     return "pwr8";
1387   case POWER_9:
1388     return "pwr9";
1389 // TODO: simplify this once the macro is available in all OS levels.
1390 #ifdef POWER_10
1391   case POWER_10:
1392 #else
1393   case 0x40000:
1394 #endif
1395     return "pwr10";
1396   default:
1397     return "generic";
1398   }
1399 }
1400 #elif defined(__riscv)
1401 StringRef sys::getHostCPUName() {
1402 #if defined(__linux__)
1403   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1404   StringRef Content = P ? P->getBuffer() : "";
1405   return detail::getHostCPUNameForRISCV(Content);
1406 #else
1407 #if __riscv_xlen == 64
1408   return "generic-rv64";
1409 #elif __riscv_xlen == 32
1410   return "generic-rv32";
1411 #else
1412 #error "Unhandled value of __riscv_xlen"
1413 #endif
1414 #endif
1415 }
1416 #else
1417 StringRef sys::getHostCPUName() { return "generic"; }
1418 namespace llvm {
1419 namespace sys {
1420 namespace detail {
1421 namespace x86 {
1422 
1423 VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
1424   return VendorSignatures::UNKNOWN;
1425 }
1426 
1427 } // namespace x86
1428 } // namespace detail
1429 } // namespace sys
1430 } // namespace llvm
1431 #endif
1432 
1433 #if defined(__linux__) && (defined(__i386__) || defined(__x86_64__))
1434 // On Linux, the number of physical cores can be computed from /proc/cpuinfo,
1435 // using the number of unique physical/core id pairs. The following
1436 // implementation reads the /proc/cpuinfo format on an x86_64 system.
1437 int computeHostNumPhysicalCores() {
1438   // Enabled represents the number of physical id/core id pairs with at least
1439   // one processor id enabled by the CPU affinity mask.
1440   cpu_set_t Affinity, Enabled;
1441   if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0)
1442     return -1;
1443   CPU_ZERO(&Enabled);
1444 
1445   // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be
1446   // mmapped because it appears to have 0 size.
1447   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
1448       llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
1449   if (std::error_code EC = Text.getError()) {
1450     llvm::errs() << "Can't read "
1451                  << "/proc/cpuinfo: " << EC.message() << "\n";
1452     return -1;
1453   }
1454   SmallVector<StringRef, 8> strs;
1455   (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1,
1456                              /*KeepEmpty=*/false);
1457   int CurProcessor = -1;
1458   int CurPhysicalId = -1;
1459   int CurSiblings = -1;
1460   int CurCoreId = -1;
1461   for (StringRef Line : strs) {
1462     std::pair<StringRef, StringRef> Data = Line.split(':');
1463     auto Name = Data.first.trim();
1464     auto Val = Data.second.trim();
1465     // These fields are available if the kernel is configured with CONFIG_SMP.
1466     if (Name == "processor")
1467       Val.getAsInteger(10, CurProcessor);
1468     else if (Name == "physical id")
1469       Val.getAsInteger(10, CurPhysicalId);
1470     else if (Name == "siblings")
1471       Val.getAsInteger(10, CurSiblings);
1472     else if (Name == "core id") {
1473       Val.getAsInteger(10, CurCoreId);
1474       // The processor id corresponds to an index into cpu_set_t.
1475       if (CPU_ISSET(CurProcessor, &Affinity))
1476         CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled);
1477     }
1478   }
1479   return CPU_COUNT(&Enabled);
1480 }
1481 #elif defined(__linux__) && defined(__powerpc__)
1482 int computeHostNumPhysicalCores() {
1483   cpu_set_t Affinity;
1484   if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0)
1485     return CPU_COUNT(&Affinity);
1486 
1487   // The call to sched_getaffinity() may have failed because the Affinity
1488   // mask is too small for the number of CPU's on the system (i.e. the
1489   // system has more than 1024 CPUs). Allocate a mask large enough for
1490   // twice as many CPUs.
1491   cpu_set_t *DynAffinity;
1492   DynAffinity = CPU_ALLOC(2048);
1493   if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) {
1494     int NumCPUs = CPU_COUNT(DynAffinity);
1495     CPU_FREE(DynAffinity);
1496     return NumCPUs;
1497   }
1498   return -1;
1499 }
1500 #elif defined(__linux__) && defined(__s390x__)
1501 int computeHostNumPhysicalCores() { return sysconf(_SC_NPROCESSORS_ONLN); }
1502 #elif defined(__APPLE__)
1503 // Gets the number of *physical cores* on the machine.
1504 int computeHostNumPhysicalCores() {
1505   uint32_t count;
1506   size_t len = sizeof(count);
1507   sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0);
1508   if (count < 1) {
1509     int nm[2];
1510     nm[0] = CTL_HW;
1511     nm[1] = HW_AVAILCPU;
1512     sysctl(nm, 2, &count, &len, NULL, 0);
1513     if (count < 1)
1514       return -1;
1515   }
1516   return count;
1517 }
1518 #elif defined(__MVS__)
1519 int computeHostNumPhysicalCores() {
1520   enum {
1521     // Byte offset of the pointer to the Communications Vector Table (CVT) in
1522     // the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and
1523     // will be zero-extended to uintptr_t.
1524     FLCCVT = 16,
1525     // Byte offset of the pointer to the Common System Data Area (CSD) in the
1526     // CVT. The table entry is a 31-bit pointer and will be zero-extended to
1527     // uintptr_t.
1528     CVTCSD = 660,
1529     // Byte offset to the number of live CPs in the LPAR, stored as a signed
1530     // 32-bit value in the table.
1531     CSD_NUMBER_ONLINE_STANDARD_CPS = 264,
1532   };
1533   char *PSA = 0;
1534   char *CVT = reinterpret_cast<char *>(
1535       static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(PSA[FLCCVT])));
1536   char *CSD = reinterpret_cast<char *>(
1537       static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(CVT[CVTCSD])));
1538   return reinterpret_cast<int &>(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]);
1539 }
1540 #elif defined(_WIN32) && LLVM_ENABLE_THREADS != 0
1541 // Defined in llvm/lib/Support/Windows/Threading.inc
1542 int computeHostNumPhysicalCores();
1543 #else
1544 // On other systems, return -1 to indicate unknown.
1545 static int computeHostNumPhysicalCores() { return -1; }
1546 #endif
1547 
1548 int sys::getHostNumPhysicalCores() {
1549   static int NumCores = computeHostNumPhysicalCores();
1550   return NumCores;
1551 }
1552 
1553 #if defined(__i386__) || defined(_M_IX86) || \
1554     defined(__x86_64__) || defined(_M_X64)
1555 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1556   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1557   unsigned MaxLevel;
1558 
1559   if (getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX) || MaxLevel < 1)
1560     return false;
1561 
1562   getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
1563 
1564   Features["cx8"]    = (EDX >>  8) & 1;
1565   Features["cmov"]   = (EDX >> 15) & 1;
1566   Features["mmx"]    = (EDX >> 23) & 1;
1567   Features["fxsr"]   = (EDX >> 24) & 1;
1568   Features["sse"]    = (EDX >> 25) & 1;
1569   Features["sse2"]   = (EDX >> 26) & 1;
1570 
1571   Features["sse3"]   = (ECX >>  0) & 1;
1572   Features["pclmul"] = (ECX >>  1) & 1;
1573   Features["ssse3"]  = (ECX >>  9) & 1;
1574   Features["cx16"]   = (ECX >> 13) & 1;
1575   Features["sse4.1"] = (ECX >> 19) & 1;
1576   Features["sse4.2"] = (ECX >> 20) & 1;
1577   Features["crc32"]  = Features["sse4.2"];
1578   Features["movbe"]  = (ECX >> 22) & 1;
1579   Features["popcnt"] = (ECX >> 23) & 1;
1580   Features["aes"]    = (ECX >> 25) & 1;
1581   Features["rdrnd"]  = (ECX >> 30) & 1;
1582 
1583   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1584   // indicates that the AVX registers will be saved and restored on context
1585   // switch, then we have full AVX support.
1586   bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX);
1587   bool HasAVXSave = HasXSave && ((ECX >> 28) & 1) && ((EAX & 0x6) == 0x6);
1588 #if defined(__APPLE__)
1589   // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1590   // save the AVX512 context if we use AVX512 instructions, even the bit is not
1591   // set right now.
1592   bool HasAVX512Save = true;
1593 #else
1594   // AVX512 requires additional context to be saved by the OS.
1595   bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0);
1596 #endif
1597   // AMX requires additional context to be saved by the OS.
1598   const unsigned AMXBits = (1 << 17) | (1 << 18);
1599   bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits);
1600 
1601   Features["avx"]   = HasAVXSave;
1602   Features["fma"]   = ((ECX >> 12) & 1) && HasAVXSave;
1603   // Only enable XSAVE if OS has enabled support for saving YMM state.
1604   Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave;
1605   Features["f16c"]  = ((ECX >> 29) & 1) && HasAVXSave;
1606 
1607   unsigned MaxExtLevel;
1608   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1609 
1610   bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1611                      !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1612   Features["sahf"]   = HasExtLeaf1 && ((ECX >>  0) & 1);
1613   Features["lzcnt"]  = HasExtLeaf1 && ((ECX >>  5) & 1);
1614   Features["sse4a"]  = HasExtLeaf1 && ((ECX >>  6) & 1);
1615   Features["prfchw"] = HasExtLeaf1 && ((ECX >>  8) & 1);
1616   Features["xop"]    = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave;
1617   Features["lwp"]    = HasExtLeaf1 && ((ECX >> 15) & 1);
1618   Features["fma4"]   = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave;
1619   Features["tbm"]    = HasExtLeaf1 && ((ECX >> 21) & 1);
1620   Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1);
1621 
1622   Features["64bit"]  = HasExtLeaf1 && ((EDX >> 29) & 1);
1623 
1624   // Miscellaneous memory related features, detected by
1625   // using the 0x80000008 leaf of the CPUID instruction
1626   bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
1627                      !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX);
1628   Features["clzero"]   = HasExtLeaf8 && ((EBX >> 0) & 1);
1629   Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1);
1630 
1631   bool HasLeaf7 =
1632       MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1633 
1634   Features["fsgsbase"]   = HasLeaf7 && ((EBX >>  0) & 1);
1635   Features["sgx"]        = HasLeaf7 && ((EBX >>  2) & 1);
1636   Features["bmi"]        = HasLeaf7 && ((EBX >>  3) & 1);
1637   // AVX2 is only supported if we have the OS save support from AVX.
1638   Features["avx2"]       = HasLeaf7 && ((EBX >>  5) & 1) && HasAVXSave;
1639   Features["bmi2"]       = HasLeaf7 && ((EBX >>  8) & 1);
1640   Features["invpcid"]    = HasLeaf7 && ((EBX >> 10) & 1);
1641   Features["rtm"]        = HasLeaf7 && ((EBX >> 11) & 1);
1642   // AVX512 is only supported if the OS supports the context save for it.
1643   Features["avx512f"]    = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
1644   Features["avx512dq"]   = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
1645   Features["rdseed"]     = HasLeaf7 && ((EBX >> 18) & 1);
1646   Features["adx"]        = HasLeaf7 && ((EBX >> 19) & 1);
1647   Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save;
1648   Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1);
1649   Features["clwb"]       = HasLeaf7 && ((EBX >> 24) & 1);
1650   Features["avx512pf"]   = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
1651   Features["avx512er"]   = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save;
1652   Features["avx512cd"]   = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
1653   Features["sha"]        = HasLeaf7 && ((EBX >> 29) & 1);
1654   Features["avx512bw"]   = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
1655   Features["avx512vl"]   = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
1656 
1657   Features["prefetchwt1"]     = HasLeaf7 && ((ECX >>  0) & 1);
1658   Features["avx512vbmi"]      = HasLeaf7 && ((ECX >>  1) & 1) && HasAVX512Save;
1659   Features["pku"]             = HasLeaf7 && ((ECX >>  4) & 1);
1660   Features["waitpkg"]         = HasLeaf7 && ((ECX >>  5) & 1);
1661   Features["avx512vbmi2"]     = HasLeaf7 && ((ECX >>  6) & 1) && HasAVX512Save;
1662   Features["shstk"]           = HasLeaf7 && ((ECX >>  7) & 1);
1663   Features["gfni"]            = HasLeaf7 && ((ECX >>  8) & 1);
1664   Features["vaes"]            = HasLeaf7 && ((ECX >>  9) & 1) && HasAVXSave;
1665   Features["vpclmulqdq"]      = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave;
1666   Features["avx512vnni"]      = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save;
1667   Features["avx512bitalg"]    = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save;
1668   Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save;
1669   Features["rdpid"]           = HasLeaf7 && ((ECX >> 22) & 1);
1670   Features["kl"]              = HasLeaf7 && ((ECX >> 23) & 1); // key locker
1671   Features["cldemote"]        = HasLeaf7 && ((ECX >> 25) & 1);
1672   Features["movdiri"]         = HasLeaf7 && ((ECX >> 27) & 1);
1673   Features["movdir64b"]       = HasLeaf7 && ((ECX >> 28) & 1);
1674   Features["enqcmd"]          = HasLeaf7 && ((ECX >> 29) & 1);
1675 
1676   Features["uintr"]           = HasLeaf7 && ((EDX >> 5) & 1);
1677   Features["avx512vp2intersect"] =
1678       HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save;
1679   Features["serialize"]       = HasLeaf7 && ((EDX >> 14) & 1);
1680   Features["tsxldtrk"]        = HasLeaf7 && ((EDX >> 16) & 1);
1681   // There are two CPUID leafs which information associated with the pconfig
1682   // instruction:
1683   // EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th
1684   // bit of EDX), while the EAX=0x1b leaf returns information on the
1685   // availability of specific pconfig leafs.
1686   // The target feature here only refers to the the first of these two.
1687   // Users might need to check for the availability of specific pconfig
1688   // leaves using cpuid, since that information is ignored while
1689   // detecting features using the "-march=native" flag.
1690   // For more info, see X86 ISA docs.
1691   Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1);
1692   Features["amx-bf16"]   = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave;
1693   Features["avx512fp16"] = HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save;
1694   Features["amx-tile"]   = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave;
1695   Features["amx-int8"]   = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave;
1696   bool HasLeaf7Subleaf1 =
1697       MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1698   Features["avxvnni"]    = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave;
1699   Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
1700   Features["hreset"]     = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
1701 
1702   bool HasLeafD = MaxLevel >= 0xd &&
1703                   !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
1704 
1705   // Only enable XSAVE if OS has enabled support for saving YMM state.
1706   Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave;
1707   Features["xsavec"]   = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave;
1708   Features["xsaves"]   = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave;
1709 
1710   bool HasLeaf14 = MaxLevel >= 0x14 &&
1711                   !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX);
1712 
1713   Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1);
1714 
1715   bool HasLeaf19 =
1716       MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX);
1717   Features["widekl"] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1);
1718 
1719   return true;
1720 }
1721 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
1722 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1723   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1724   if (!P)
1725     return false;
1726 
1727   SmallVector<StringRef, 32> Lines;
1728   P->getBuffer().split(Lines, "\n");
1729 
1730   SmallVector<StringRef, 32> CPUFeatures;
1731 
1732   // Look for the CPU features.
1733   for (unsigned I = 0, E = Lines.size(); I != E; ++I)
1734     if (Lines[I].startswith("Features")) {
1735       Lines[I].split(CPUFeatures, ' ');
1736       break;
1737     }
1738 
1739 #if defined(__aarch64__)
1740   // Keep track of which crypto features we have seen
1741   enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 };
1742   uint32_t crypto = 0;
1743 #endif
1744 
1745   for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
1746     StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I])
1747 #if defined(__aarch64__)
1748                                    .Case("asimd", "neon")
1749                                    .Case("fp", "fp-armv8")
1750                                    .Case("crc32", "crc")
1751                                    .Case("atomics", "lse")
1752                                    .Case("sve", "sve")
1753                                    .Case("sve2", "sve2")
1754 #else
1755                                    .Case("half", "fp16")
1756                                    .Case("neon", "neon")
1757                                    .Case("vfpv3", "vfp3")
1758                                    .Case("vfpv3d16", "d16")
1759                                    .Case("vfpv4", "vfp4")
1760                                    .Case("idiva", "hwdiv-arm")
1761                                    .Case("idivt", "hwdiv")
1762 #endif
1763                                    .Default("");
1764 
1765 #if defined(__aarch64__)
1766     // We need to check crypto separately since we need all of the crypto
1767     // extensions to enable the subtarget feature
1768     if (CPUFeatures[I] == "aes")
1769       crypto |= CAP_AES;
1770     else if (CPUFeatures[I] == "pmull")
1771       crypto |= CAP_PMULL;
1772     else if (CPUFeatures[I] == "sha1")
1773       crypto |= CAP_SHA1;
1774     else if (CPUFeatures[I] == "sha2")
1775       crypto |= CAP_SHA2;
1776 #endif
1777 
1778     if (LLVMFeatureStr != "")
1779       Features[LLVMFeatureStr] = true;
1780   }
1781 
1782 #if defined(__aarch64__)
1783   // If we have all crypto bits we can add the feature
1784   if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2))
1785     Features["crypto"] = true;
1786 #endif
1787 
1788   return true;
1789 }
1790 #elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64))
1791 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1792   if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
1793     Features["neon"] = true;
1794   if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE))
1795     Features["crc"] = true;
1796   if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
1797     Features["crypto"] = true;
1798 
1799   return true;
1800 }
1801 #else
1802 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; }
1803 #endif
1804 
1805 std::string sys::getProcessTriple() {
1806   std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE);
1807   Triple PT(Triple::normalize(TargetTripleString));
1808 
1809   if (sizeof(void *) == 8 && PT.isArch32Bit())
1810     PT = PT.get64BitArchVariant();
1811   if (sizeof(void *) == 4 && PT.isArch64Bit())
1812     PT = PT.get32BitArchVariant();
1813 
1814   return PT.str();
1815 }
1816