1 //===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file implements the operating system Host concept.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Support/Host.h"
14 #include "llvm/ADT/SmallSet.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/StringMap.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/ADT/Triple.h"
20 #include "llvm/Config/llvm-config.h"
21 #include "llvm/Support/BCD.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/FileSystem.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/X86TargetParser.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <assert.h>
28 #include <string.h>
29 
30 // Include the platform-specific parts of this class.
31 #ifdef LLVM_ON_UNIX
32 #include "Unix/Host.inc"
33 #include <sched.h>
34 #endif
35 #ifdef _WIN32
36 #include "Windows/Host.inc"
37 #endif
38 #ifdef _MSC_VER
39 #include <intrin.h>
40 #endif
41 #if defined(__APPLE__) && (!defined(__x86_64__))
42 #include <mach/host_info.h>
43 #include <mach/mach.h>
44 #include <mach/mach_host.h>
45 #include <mach/machine.h>
46 #endif
47 #ifdef _AIX
48 #include <sys/systemcfg.h>
49 #endif
50 
51 #define DEBUG_TYPE "host-detection"
52 
53 //===----------------------------------------------------------------------===//
54 //
55 //  Implementations of the CPU detection routines
56 //
57 //===----------------------------------------------------------------------===//
58 
59 using namespace llvm;
60 
61 static std::unique_ptr<llvm::MemoryBuffer>
62     LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() {
63   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
64       llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
65   if (std::error_code EC = Text.getError()) {
66     llvm::errs() << "Can't read "
67                  << "/proc/cpuinfo: " << EC.message() << "\n";
68     return nullptr;
69   }
70   return std::move(*Text);
71 }
72 
73 StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) {
74   // Access to the Processor Version Register (PVR) on PowerPC is privileged,
75   // and so we must use an operating-system interface to determine the current
76   // processor type. On Linux, this is exposed through the /proc/cpuinfo file.
77   const char *generic = "generic";
78 
79   // The cpu line is second (after the 'processor: 0' line), so if this
80   // buffer is too small then something has changed (or is wrong).
81   StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin();
82   StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end();
83 
84   StringRef::const_iterator CIP = CPUInfoStart;
85 
86   StringRef::const_iterator CPUStart = 0;
87   size_t CPULen = 0;
88 
89   // We need to find the first line which starts with cpu, spaces, and a colon.
90   // After the colon, there may be some additional spaces and then the cpu type.
91   while (CIP < CPUInfoEnd && CPUStart == 0) {
92     if (CIP < CPUInfoEnd && *CIP == '\n')
93       ++CIP;
94 
95     if (CIP < CPUInfoEnd && *CIP == 'c') {
96       ++CIP;
97       if (CIP < CPUInfoEnd && *CIP == 'p') {
98         ++CIP;
99         if (CIP < CPUInfoEnd && *CIP == 'u') {
100           ++CIP;
101           while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
102             ++CIP;
103 
104           if (CIP < CPUInfoEnd && *CIP == ':') {
105             ++CIP;
106             while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
107               ++CIP;
108 
109             if (CIP < CPUInfoEnd) {
110               CPUStart = CIP;
111               while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' &&
112                                           *CIP != ',' && *CIP != '\n'))
113                 ++CIP;
114               CPULen = CIP - CPUStart;
115             }
116           }
117         }
118       }
119     }
120 
121     if (CPUStart == 0)
122       while (CIP < CPUInfoEnd && *CIP != '\n')
123         ++CIP;
124   }
125 
126   if (CPUStart == 0)
127     return generic;
128 
129   return StringSwitch<const char *>(StringRef(CPUStart, CPULen))
130       .Case("604e", "604e")
131       .Case("604", "604")
132       .Case("7400", "7400")
133       .Case("7410", "7400")
134       .Case("7447", "7400")
135       .Case("7455", "7450")
136       .Case("G4", "g4")
137       .Case("POWER4", "970")
138       .Case("PPC970FX", "970")
139       .Case("PPC970MP", "970")
140       .Case("G5", "g5")
141       .Case("POWER5", "g5")
142       .Case("A2", "a2")
143       .Case("POWER6", "pwr6")
144       .Case("POWER7", "pwr7")
145       .Case("POWER8", "pwr8")
146       .Case("POWER8E", "pwr8")
147       .Case("POWER8NVL", "pwr8")
148       .Case("POWER9", "pwr9")
149       .Case("POWER10", "pwr10")
150       // FIXME: If we get a simulator or machine with the capabilities of
151       // mcpu=future, we should revisit this and add the name reported by the
152       // simulator/machine.
153       .Default(generic);
154 }
155 
156 StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
157   // The cpuid register on arm is not accessible from user space. On Linux,
158   // it is exposed through the /proc/cpuinfo file.
159 
160   // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line
161   // in all cases.
162   SmallVector<StringRef, 32> Lines;
163   ProcCpuinfoContent.split(Lines, "\n");
164 
165   // Look for the CPU implementer line.
166   StringRef Implementer;
167   StringRef Hardware;
168   StringRef Part;
169   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
170     if (Lines[I].startswith("CPU implementer"))
171       Implementer = Lines[I].substr(15).ltrim("\t :");
172     if (Lines[I].startswith("Hardware"))
173       Hardware = Lines[I].substr(8).ltrim("\t :");
174     if (Lines[I].startswith("CPU part"))
175       Part = Lines[I].substr(8).ltrim("\t :");
176   }
177 
178   if (Implementer == "0x41") { // ARM Ltd.
179     // MSM8992/8994 may give cpu part for the core that the kernel is running on,
180     // which is undeterministic and wrong. Always return cortex-a53 for these SoC.
181     if (Hardware.endswith("MSM8994") || Hardware.endswith("MSM8996"))
182       return "cortex-a53";
183 
184 
185     // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
186     // values correspond to the "Part number" in the CP15/c0 register. The
187     // contents are specified in the various processor manuals.
188     // This corresponds to the Main ID Register in Technical Reference Manuals.
189     // and is used in programs like sys-utils
190     return StringSwitch<const char *>(Part)
191         .Case("0x926", "arm926ej-s")
192         .Case("0xb02", "mpcore")
193         .Case("0xb36", "arm1136j-s")
194         .Case("0xb56", "arm1156t2-s")
195         .Case("0xb76", "arm1176jz-s")
196         .Case("0xc08", "cortex-a8")
197         .Case("0xc09", "cortex-a9")
198         .Case("0xc0f", "cortex-a15")
199         .Case("0xc20", "cortex-m0")
200         .Case("0xc23", "cortex-m3")
201         .Case("0xc24", "cortex-m4")
202         .Case("0xd22", "cortex-m55")
203         .Case("0xd02", "cortex-a34")
204         .Case("0xd04", "cortex-a35")
205         .Case("0xd03", "cortex-a53")
206         .Case("0xd07", "cortex-a57")
207         .Case("0xd08", "cortex-a72")
208         .Case("0xd09", "cortex-a73")
209         .Case("0xd0a", "cortex-a75")
210         .Case("0xd0b", "cortex-a76")
211         .Case("0xd0d", "cortex-a77")
212         .Case("0xd41", "cortex-a78")
213         .Case("0xd44", "cortex-x1")
214         .Case("0xd0c", "neoverse-n1")
215         .Case("0xd49", "neoverse-n2")
216         .Default("generic");
217   }
218 
219   if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium.
220     return StringSwitch<const char *>(Part)
221       .Case("0x516", "thunderx2t99")
222       .Case("0x0516", "thunderx2t99")
223       .Case("0xaf", "thunderx2t99")
224       .Case("0x0af", "thunderx2t99")
225       .Case("0xa1", "thunderxt88")
226       .Case("0x0a1", "thunderxt88")
227       .Default("generic");
228   }
229 
230   if (Implementer == "0x46") { // Fujitsu Ltd.
231     return StringSwitch<const char *>(Part)
232       .Case("0x001", "a64fx")
233       .Default("generic");
234   }
235 
236   if (Implementer == "0x4e") { // NVIDIA Corporation
237     return StringSwitch<const char *>(Part)
238         .Case("0x004", "carmel")
239         .Default("generic");
240   }
241 
242   if (Implementer == "0x48") // HiSilicon Technologies, Inc.
243     // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
244     // values correspond to the "Part number" in the CP15/c0 register. The
245     // contents are specified in the various processor manuals.
246     return StringSwitch<const char *>(Part)
247       .Case("0xd01", "tsv110")
248       .Default("generic");
249 
250   if (Implementer == "0x51") // Qualcomm Technologies, Inc.
251     // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
252     // values correspond to the "Part number" in the CP15/c0 register. The
253     // contents are specified in the various processor manuals.
254     return StringSwitch<const char *>(Part)
255         .Case("0x06f", "krait") // APQ8064
256         .Case("0x201", "kryo")
257         .Case("0x205", "kryo")
258         .Case("0x211", "kryo")
259         .Case("0x800", "cortex-a73") // Kryo 2xx Gold
260         .Case("0x801", "cortex-a73") // Kryo 2xx Silver
261         .Case("0x802", "cortex-a75") // Kryo 3xx Gold
262         .Case("0x803", "cortex-a75") // Kryo 3xx Silver
263         .Case("0x804", "cortex-a76") // Kryo 4xx Gold
264         .Case("0x805", "cortex-a76") // Kryo 4xx/5xx Silver
265         .Case("0xc00", "falkor")
266         .Case("0xc01", "saphira")
267         .Default("generic");
268   if (Implementer == "0x53") { // Samsung Electronics Co., Ltd.
269     // The Exynos chips have a convoluted ID scheme that doesn't seem to follow
270     // any predictive pattern across variants and parts.
271     unsigned Variant = 0, Part = 0;
272 
273     // Look for the CPU variant line, whose value is a 1 digit hexadecimal
274     // number, corresponding to the Variant bits in the CP15/C0 register.
275     for (auto I : Lines)
276       if (I.consume_front("CPU variant"))
277         I.ltrim("\t :").getAsInteger(0, Variant);
278 
279     // Look for the CPU part line, whose value is a 3 digit hexadecimal
280     // number, corresponding to the PartNum bits in the CP15/C0 register.
281     for (auto I : Lines)
282       if (I.consume_front("CPU part"))
283         I.ltrim("\t :").getAsInteger(0, Part);
284 
285     unsigned Exynos = (Variant << 12) | Part;
286     switch (Exynos) {
287     default:
288       // Default by falling through to Exynos M3.
289       LLVM_FALLTHROUGH;
290     case 0x1002:
291       return "exynos-m3";
292     case 0x1003:
293       return "exynos-m4";
294     }
295   }
296 
297   return "generic";
298 }
299 
300 namespace {
301 StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) {
302   switch (Id) {
303     case 2064:  // z900 not supported by LLVM
304     case 2066:
305     case 2084:  // z990 not supported by LLVM
306     case 2086:
307     case 2094:  // z9-109 not supported by LLVM
308     case 2096:
309       return "generic";
310     case 2097:
311     case 2098:
312       return "z10";
313     case 2817:
314     case 2818:
315       return "z196";
316     case 2827:
317     case 2828:
318       return "zEC12";
319     case 2964:
320     case 2965:
321       return HaveVectorSupport? "z13" : "zEC12";
322     case 3906:
323     case 3907:
324       return HaveVectorSupport? "z14" : "zEC12";
325     case 8561:
326     case 8562:
327       return HaveVectorSupport? "z15" : "zEC12";
328     case 3931:
329     case 3932:
330     default:
331       return HaveVectorSupport? "arch14" : "zEC12";
332   }
333 }
334 } // end anonymous namespace
335 
336 StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) {
337   // STIDP is a privileged operation, so use /proc/cpuinfo instead.
338 
339   // The "processor 0:" line comes after a fair amount of other information,
340   // including a cache breakdown, but this should be plenty.
341   SmallVector<StringRef, 32> Lines;
342   ProcCpuinfoContent.split(Lines, "\n");
343 
344   // Look for the CPU features.
345   SmallVector<StringRef, 32> CPUFeatures;
346   for (unsigned I = 0, E = Lines.size(); I != E; ++I)
347     if (Lines[I].startswith("features")) {
348       size_t Pos = Lines[I].find(':');
349       if (Pos != StringRef::npos) {
350         Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' ');
351         break;
352       }
353     }
354 
355   // We need to check for the presence of vector support independently of
356   // the machine type, since we may only use the vector register set when
357   // supported by the kernel (and hypervisor).
358   bool HaveVectorSupport = false;
359   for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
360     if (CPUFeatures[I] == "vx")
361       HaveVectorSupport = true;
362   }
363 
364   // Now check the processor machine type.
365   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
366     if (Lines[I].startswith("processor ")) {
367       size_t Pos = Lines[I].find("machine = ");
368       if (Pos != StringRef::npos) {
369         Pos += sizeof("machine = ") - 1;
370         unsigned int Id;
371         if (!Lines[I].drop_front(Pos).getAsInteger(10, Id))
372           return getCPUNameFromS390Model(Id, HaveVectorSupport);
373       }
374       break;
375     }
376   }
377 
378   return "generic";
379 }
380 
381 StringRef sys::detail::getHostCPUNameForBPF() {
382 #if !defined(__linux__) || !defined(__x86_64__)
383   return "generic";
384 #else
385   uint8_t v3_insns[40] __attribute__ ((aligned (8))) =
386       /* BPF_MOV64_IMM(BPF_REG_0, 0) */
387     { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
388       /* BPF_MOV64_IMM(BPF_REG_2, 1) */
389       0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
390       /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
391       0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
392       /* BPF_MOV64_IMM(BPF_REG_0, 1) */
393       0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
394       /* BPF_EXIT_INSN() */
395       0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
396 
397   uint8_t v2_insns[40] __attribute__ ((aligned (8))) =
398       /* BPF_MOV64_IMM(BPF_REG_0, 0) */
399     { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
400       /* BPF_MOV64_IMM(BPF_REG_2, 1) */
401       0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
402       /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
403       0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
404       /* BPF_MOV64_IMM(BPF_REG_0, 1) */
405       0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
406       /* BPF_EXIT_INSN() */
407       0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
408 
409   struct bpf_prog_load_attr {
410     uint32_t prog_type;
411     uint32_t insn_cnt;
412     uint64_t insns;
413     uint64_t license;
414     uint32_t log_level;
415     uint32_t log_size;
416     uint64_t log_buf;
417     uint32_t kern_version;
418     uint32_t prog_flags;
419   } attr = {};
420   attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
421   attr.insn_cnt = 5;
422   attr.insns = (uint64_t)v3_insns;
423   attr.license = (uint64_t)"DUMMY";
424 
425   int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr,
426                    sizeof(attr));
427   if (fd >= 0) {
428     close(fd);
429     return "v3";
430   }
431 
432   /* Clear the whole attr in case its content changed by syscall. */
433   memset(&attr, 0, sizeof(attr));
434   attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
435   attr.insn_cnt = 5;
436   attr.insns = (uint64_t)v2_insns;
437   attr.license = (uint64_t)"DUMMY";
438   fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr));
439   if (fd >= 0) {
440     close(fd);
441     return "v2";
442   }
443   return "v1";
444 #endif
445 }
446 
447 #if defined(__i386__) || defined(_M_IX86) || \
448     defined(__x86_64__) || defined(_M_X64)
449 
450 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
451 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
452 // support. Consequently, for i386, the presence of CPUID is checked first
453 // via the corresponding eflags bit.
454 // Removal of cpuid.h header motivated by PR30384
455 // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp
456 // or test-suite, but are used in external projects e.g. libstdcxx
457 static bool isCpuIdSupported() {
458 #if defined(__GNUC__) || defined(__clang__)
459 #if defined(__i386__)
460   int __cpuid_supported;
461   __asm__("  pushfl\n"
462           "  popl   %%eax\n"
463           "  movl   %%eax,%%ecx\n"
464           "  xorl   $0x00200000,%%eax\n"
465           "  pushl  %%eax\n"
466           "  popfl\n"
467           "  pushfl\n"
468           "  popl   %%eax\n"
469           "  movl   $0,%0\n"
470           "  cmpl   %%eax,%%ecx\n"
471           "  je     1f\n"
472           "  movl   $1,%0\n"
473           "1:"
474           : "=r"(__cpuid_supported)
475           :
476           : "eax", "ecx");
477   if (!__cpuid_supported)
478     return false;
479 #endif
480   return true;
481 #endif
482   return true;
483 }
484 
485 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
486 /// the specified arguments.  If we can't run cpuid on the host, return true.
487 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
488                                unsigned *rECX, unsigned *rEDX) {
489 #if defined(__GNUC__) || defined(__clang__)
490 #if defined(__x86_64__)
491   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
492   // FIXME: should we save this for Clang?
493   __asm__("movq\t%%rbx, %%rsi\n\t"
494           "cpuid\n\t"
495           "xchgq\t%%rbx, %%rsi\n\t"
496           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
497           : "a"(value));
498   return false;
499 #elif defined(__i386__)
500   __asm__("movl\t%%ebx, %%esi\n\t"
501           "cpuid\n\t"
502           "xchgl\t%%ebx, %%esi\n\t"
503           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
504           : "a"(value));
505   return false;
506 #else
507   return true;
508 #endif
509 #elif defined(_MSC_VER)
510   // The MSVC intrinsic is portable across x86 and x64.
511   int registers[4];
512   __cpuid(registers, value);
513   *rEAX = registers[0];
514   *rEBX = registers[1];
515   *rECX = registers[2];
516   *rEDX = registers[3];
517   return false;
518 #else
519   return true;
520 #endif
521 }
522 
523 namespace llvm {
524 namespace sys {
525 namespace detail {
526 namespace x86 {
527 
528 VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
529   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
530   if (MaxLeaf == nullptr)
531     MaxLeaf = &EAX;
532   else
533     *MaxLeaf = 0;
534 
535   if (!isCpuIdSupported())
536     return VendorSignatures::UNKNOWN;
537 
538   if (getX86CpuIDAndInfo(0, MaxLeaf, &EBX, &ECX, &EDX) || *MaxLeaf < 1)
539     return VendorSignatures::UNKNOWN;
540 
541   // "Genu ineI ntel"
542   if (EBX == 0x756e6547 && EDX == 0x49656e69 && ECX == 0x6c65746e)
543     return VendorSignatures::GENUINE_INTEL;
544 
545   // "Auth enti cAMD"
546   if (EBX == 0x68747541 && EDX == 0x69746e65 && ECX == 0x444d4163)
547     return VendorSignatures::AUTHENTIC_AMD;
548 
549   return VendorSignatures::UNKNOWN;
550 }
551 
552 } // namespace x86
553 } // namespace detail
554 } // namespace sys
555 } // namespace llvm
556 
557 using namespace llvm::sys::detail::x86;
558 
559 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
560 /// the 4 values in the specified arguments.  If we can't run cpuid on the host,
561 /// return true.
562 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
563                                  unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
564                                  unsigned *rEDX) {
565 #if defined(__GNUC__) || defined(__clang__)
566 #if defined(__x86_64__)
567   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
568   // FIXME: should we save this for Clang?
569   __asm__("movq\t%%rbx, %%rsi\n\t"
570           "cpuid\n\t"
571           "xchgq\t%%rbx, %%rsi\n\t"
572           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
573           : "a"(value), "c"(subleaf));
574   return false;
575 #elif defined(__i386__)
576   __asm__("movl\t%%ebx, %%esi\n\t"
577           "cpuid\n\t"
578           "xchgl\t%%ebx, %%esi\n\t"
579           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
580           : "a"(value), "c"(subleaf));
581   return false;
582 #else
583   return true;
584 #endif
585 #elif defined(_MSC_VER)
586   int registers[4];
587   __cpuidex(registers, value, subleaf);
588   *rEAX = registers[0];
589   *rEBX = registers[1];
590   *rECX = registers[2];
591   *rEDX = registers[3];
592   return false;
593 #else
594   return true;
595 #endif
596 }
597 
598 // Read control register 0 (XCR0). Used to detect features such as AVX.
599 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
600 #if defined(__GNUC__) || defined(__clang__)
601   // Check xgetbv; this uses a .byte sequence instead of the instruction
602   // directly because older assemblers do not include support for xgetbv and
603   // there is no easy way to conditionally compile based on the assembler used.
604   __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
605   return false;
606 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
607   unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
608   *rEAX = Result;
609   *rEDX = Result >> 32;
610   return false;
611 #else
612   return true;
613 #endif
614 }
615 
616 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
617                                  unsigned *Model) {
618   *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
619   *Model = (EAX >> 4) & 0xf;  // Bits 4 - 7
620   if (*Family == 6 || *Family == 0xf) {
621     if (*Family == 0xf)
622       // Examine extended family ID if family ID is F.
623       *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
624     // Examine extended model ID if family ID is 6 or F.
625     *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
626   }
627 }
628 
629 static StringRef
630 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
631                                 const unsigned *Features,
632                                 unsigned *Type, unsigned *Subtype) {
633   auto testFeature = [&](unsigned F) {
634     return (Features[F / 32] & (1U << (F % 32))) != 0;
635   };
636 
637   StringRef CPU;
638 
639   switch (Family) {
640   case 3:
641     CPU = "i386";
642     break;
643   case 4:
644     CPU = "i486";
645     break;
646   case 5:
647     if (testFeature(X86::FEATURE_MMX)) {
648       CPU = "pentium-mmx";
649       break;
650     }
651     CPU = "pentium";
652     break;
653   case 6:
654     switch (Model) {
655     case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
656                // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
657                // mobile processor, Intel Core 2 Extreme processor, Intel
658                // Pentium Dual-Core processor, Intel Xeon processor, model
659                // 0Fh. All processors are manufactured using the 65 nm process.
660     case 0x16: // Intel Celeron processor model 16h. All processors are
661                // manufactured using the 65 nm process
662       CPU = "core2";
663       *Type = X86::INTEL_CORE2;
664       break;
665     case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
666                // 17h. All processors are manufactured using the 45 nm process.
667                //
668                // 45nm: Penryn , Wolfdale, Yorkfield (XE)
669     case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
670                // the 45 nm process.
671       CPU = "penryn";
672       *Type = X86::INTEL_CORE2;
673       break;
674     case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
675                // processors are manufactured using the 45 nm process.
676     case 0x1e: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
677                // As found in a Summer 2010 model iMac.
678     case 0x1f:
679     case 0x2e:              // Nehalem EX
680       CPU = "nehalem";
681       *Type = X86::INTEL_COREI7;
682       *Subtype = X86::INTEL_COREI7_NEHALEM;
683       break;
684     case 0x25: // Intel Core i7, laptop version.
685     case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
686                // processors are manufactured using the 32 nm process.
687     case 0x2f: // Westmere EX
688       CPU = "westmere";
689       *Type = X86::INTEL_COREI7;
690       *Subtype = X86::INTEL_COREI7_WESTMERE;
691       break;
692     case 0x2a: // Intel Core i7 processor. All processors are manufactured
693                // using the 32 nm process.
694     case 0x2d:
695       CPU = "sandybridge";
696       *Type = X86::INTEL_COREI7;
697       *Subtype = X86::INTEL_COREI7_SANDYBRIDGE;
698       break;
699     case 0x3a:
700     case 0x3e:              // Ivy Bridge EP
701       CPU = "ivybridge";
702       *Type = X86::INTEL_COREI7;
703       *Subtype = X86::INTEL_COREI7_IVYBRIDGE;
704       break;
705 
706     // Haswell:
707     case 0x3c:
708     case 0x3f:
709     case 0x45:
710     case 0x46:
711       CPU = "haswell";
712       *Type = X86::INTEL_COREI7;
713       *Subtype = X86::INTEL_COREI7_HASWELL;
714       break;
715 
716     // Broadwell:
717     case 0x3d:
718     case 0x47:
719     case 0x4f:
720     case 0x56:
721       CPU = "broadwell";
722       *Type = X86::INTEL_COREI7;
723       *Subtype = X86::INTEL_COREI7_BROADWELL;
724       break;
725 
726     // Skylake:
727     case 0x4e:              // Skylake mobile
728     case 0x5e:              // Skylake desktop
729     case 0x8e:              // Kaby Lake mobile
730     case 0x9e:              // Kaby Lake desktop
731     case 0xa5:              // Comet Lake-H/S
732     case 0xa6:              // Comet Lake-U
733       CPU = "skylake";
734       *Type = X86::INTEL_COREI7;
735       *Subtype = X86::INTEL_COREI7_SKYLAKE;
736       break;
737 
738     // Rocketlake:
739     case 0xa7:
740       CPU = "rocketlake";
741       *Type = X86::INTEL_COREI7;
742       *Subtype = X86::INTEL_COREI7_ROCKETLAKE;
743       break;
744 
745     // Skylake Xeon:
746     case 0x55:
747       *Type = X86::INTEL_COREI7;
748       if (testFeature(X86::FEATURE_AVX512BF16)) {
749         CPU = "cooperlake";
750         *Subtype = X86::INTEL_COREI7_COOPERLAKE;
751       } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
752         CPU = "cascadelake";
753         *Subtype = X86::INTEL_COREI7_CASCADELAKE;
754       } else {
755         CPU = "skylake-avx512";
756         *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512;
757       }
758       break;
759 
760     // Cannonlake:
761     case 0x66:
762       CPU = "cannonlake";
763       *Type = X86::INTEL_COREI7;
764       *Subtype = X86::INTEL_COREI7_CANNONLAKE;
765       break;
766 
767     // Icelake:
768     case 0x7d:
769     case 0x7e:
770       CPU = "icelake-client";
771       *Type = X86::INTEL_COREI7;
772       *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT;
773       break;
774 
775     // Tigerlake:
776     case 0x8c:
777     case 0x8d:
778       CPU = "tigerlake";
779       *Type = X86::INTEL_COREI7;
780       *Subtype = X86::INTEL_COREI7_TIGERLAKE;
781       break;
782 
783     // Alderlake:
784     case 0x97:
785     case 0x9a:
786       CPU = "alderlake";
787       *Type = X86::INTEL_COREI7;
788       *Subtype = X86::INTEL_COREI7_ALDERLAKE;
789       break;
790 
791     // Icelake Xeon:
792     case 0x6a:
793     case 0x6c:
794       CPU = "icelake-server";
795       *Type = X86::INTEL_COREI7;
796       *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER;
797       break;
798 
799     // Sapphire Rapids:
800     case 0x8f:
801       CPU = "sapphirerapids";
802       *Type = X86::INTEL_COREI7;
803       *Subtype = X86::INTEL_COREI7_SAPPHIRERAPIDS;
804       break;
805 
806     case 0x1c: // Most 45 nm Intel Atom processors
807     case 0x26: // 45 nm Atom Lincroft
808     case 0x27: // 32 nm Atom Medfield
809     case 0x35: // 32 nm Atom Midview
810     case 0x36: // 32 nm Atom Midview
811       CPU = "bonnell";
812       *Type = X86::INTEL_BONNELL;
813       break;
814 
815     // Atom Silvermont codes from the Intel software optimization guide.
816     case 0x37:
817     case 0x4a:
818     case 0x4d:
819     case 0x5a:
820     case 0x5d:
821     case 0x4c: // really airmont
822       CPU = "silvermont";
823       *Type = X86::INTEL_SILVERMONT;
824       break;
825     // Goldmont:
826     case 0x5c: // Apollo Lake
827     case 0x5f: // Denverton
828       CPU = "goldmont";
829       *Type = X86::INTEL_GOLDMONT;
830       break;
831     case 0x7a:
832       CPU = "goldmont-plus";
833       *Type = X86::INTEL_GOLDMONT_PLUS;
834       break;
835     case 0x86:
836       CPU = "tremont";
837       *Type = X86::INTEL_TREMONT;
838       break;
839 
840     // Xeon Phi (Knights Landing + Knights Mill):
841     case 0x57:
842       CPU = "knl";
843       *Type = X86::INTEL_KNL;
844       break;
845     case 0x85:
846       CPU = "knm";
847       *Type = X86::INTEL_KNM;
848       break;
849 
850     default: // Unknown family 6 CPU, try to guess.
851       // Don't both with Type/Subtype here, they aren't used by the caller.
852       // They're used above to keep the code in sync with compiler-rt.
853       // TODO detect tigerlake host from model
854       if (testFeature(X86::FEATURE_AVX512VP2INTERSECT)) {
855         CPU = "tigerlake";
856       } else if (testFeature(X86::FEATURE_AVX512VBMI2)) {
857         CPU = "icelake-client";
858       } else if (testFeature(X86::FEATURE_AVX512VBMI)) {
859         CPU = "cannonlake";
860       } else if (testFeature(X86::FEATURE_AVX512BF16)) {
861         CPU = "cooperlake";
862       } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
863         CPU = "cascadelake";
864       } else if (testFeature(X86::FEATURE_AVX512VL)) {
865         CPU = "skylake-avx512";
866       } else if (testFeature(X86::FEATURE_AVX512ER)) {
867         CPU = "knl";
868       } else if (testFeature(X86::FEATURE_CLFLUSHOPT)) {
869         if (testFeature(X86::FEATURE_SHA))
870           CPU = "goldmont";
871         else
872           CPU = "skylake";
873       } else if (testFeature(X86::FEATURE_ADX)) {
874         CPU = "broadwell";
875       } else if (testFeature(X86::FEATURE_AVX2)) {
876         CPU = "haswell";
877       } else if (testFeature(X86::FEATURE_AVX)) {
878         CPU = "sandybridge";
879       } else if (testFeature(X86::FEATURE_SSE4_2)) {
880         if (testFeature(X86::FEATURE_MOVBE))
881           CPU = "silvermont";
882         else
883           CPU = "nehalem";
884       } else if (testFeature(X86::FEATURE_SSE4_1)) {
885         CPU = "penryn";
886       } else if (testFeature(X86::FEATURE_SSSE3)) {
887         if (testFeature(X86::FEATURE_MOVBE))
888           CPU = "bonnell";
889         else
890           CPU = "core2";
891       } else if (testFeature(X86::FEATURE_64BIT)) {
892         CPU = "core2";
893       } else if (testFeature(X86::FEATURE_SSE3)) {
894         CPU = "yonah";
895       } else if (testFeature(X86::FEATURE_SSE2)) {
896         CPU = "pentium-m";
897       } else if (testFeature(X86::FEATURE_SSE)) {
898         CPU = "pentium3";
899       } else if (testFeature(X86::FEATURE_MMX)) {
900         CPU = "pentium2";
901       } else {
902         CPU = "pentiumpro";
903       }
904       break;
905     }
906     break;
907   case 15: {
908     if (testFeature(X86::FEATURE_64BIT)) {
909       CPU = "nocona";
910       break;
911     }
912     if (testFeature(X86::FEATURE_SSE3)) {
913       CPU = "prescott";
914       break;
915     }
916     CPU = "pentium4";
917     break;
918   }
919   default:
920     break; // Unknown.
921   }
922 
923   return CPU;
924 }
925 
926 static StringRef
927 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
928                               const unsigned *Features,
929                               unsigned *Type, unsigned *Subtype) {
930   auto testFeature = [&](unsigned F) {
931     return (Features[F / 32] & (1U << (F % 32))) != 0;
932   };
933 
934   StringRef CPU;
935 
936   switch (Family) {
937   case 4:
938     CPU = "i486";
939     break;
940   case 5:
941     CPU = "pentium";
942     switch (Model) {
943     case 6:
944     case 7:
945       CPU = "k6";
946       break;
947     case 8:
948       CPU = "k6-2";
949       break;
950     case 9:
951     case 13:
952       CPU = "k6-3";
953       break;
954     case 10:
955       CPU = "geode";
956       break;
957     }
958     break;
959   case 6:
960     if (testFeature(X86::FEATURE_SSE)) {
961       CPU = "athlon-xp";
962       break;
963     }
964     CPU = "athlon";
965     break;
966   case 15:
967     if (testFeature(X86::FEATURE_SSE3)) {
968       CPU = "k8-sse3";
969       break;
970     }
971     CPU = "k8";
972     break;
973   case 16:
974     CPU = "amdfam10";
975     *Type = X86::AMDFAM10H; // "amdfam10"
976     switch (Model) {
977     case 2:
978       *Subtype = X86::AMDFAM10H_BARCELONA;
979       break;
980     case 4:
981       *Subtype = X86::AMDFAM10H_SHANGHAI;
982       break;
983     case 8:
984       *Subtype = X86::AMDFAM10H_ISTANBUL;
985       break;
986     }
987     break;
988   case 20:
989     CPU = "btver1";
990     *Type = X86::AMD_BTVER1;
991     break;
992   case 21:
993     CPU = "bdver1";
994     *Type = X86::AMDFAM15H;
995     if (Model >= 0x60 && Model <= 0x7f) {
996       CPU = "bdver4";
997       *Subtype = X86::AMDFAM15H_BDVER4;
998       break; // 60h-7Fh: Excavator
999     }
1000     if (Model >= 0x30 && Model <= 0x3f) {
1001       CPU = "bdver3";
1002       *Subtype = X86::AMDFAM15H_BDVER3;
1003       break; // 30h-3Fh: Steamroller
1004     }
1005     if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
1006       CPU = "bdver2";
1007       *Subtype = X86::AMDFAM15H_BDVER2;
1008       break; // 02h, 10h-1Fh: Piledriver
1009     }
1010     if (Model <= 0x0f) {
1011       *Subtype = X86::AMDFAM15H_BDVER1;
1012       break; // 00h-0Fh: Bulldozer
1013     }
1014     break;
1015   case 22:
1016     CPU = "btver2";
1017     *Type = X86::AMD_BTVER2;
1018     break;
1019   case 23:
1020     CPU = "znver1";
1021     *Type = X86::AMDFAM17H;
1022     if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
1023       CPU = "znver2";
1024       *Subtype = X86::AMDFAM17H_ZNVER2;
1025       break; // 30h-3fh, 71h: Zen2
1026     }
1027     if (Model <= 0x0f) {
1028       *Subtype = X86::AMDFAM17H_ZNVER1;
1029       break; // 00h-0Fh: Zen1
1030     }
1031     break;
1032   case 25:
1033     CPU = "znver3";
1034     *Type = X86::AMDFAM19H;
1035     if (Model <= 0x0f) {
1036       *Subtype = X86::AMDFAM19H_ZNVER3;
1037       break; // 00h-0Fh: Zen3
1038     }
1039     break;
1040   default:
1041     break; // Unknown AMD CPU.
1042   }
1043 
1044   return CPU;
1045 }
1046 
1047 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
1048                                  unsigned *Features) {
1049   unsigned EAX, EBX;
1050 
1051   auto setFeature = [&](unsigned F) {
1052     Features[F / 32] |= 1U << (F % 32);
1053   };
1054 
1055   if ((EDX >> 15) & 1)
1056     setFeature(X86::FEATURE_CMOV);
1057   if ((EDX >> 23) & 1)
1058     setFeature(X86::FEATURE_MMX);
1059   if ((EDX >> 25) & 1)
1060     setFeature(X86::FEATURE_SSE);
1061   if ((EDX >> 26) & 1)
1062     setFeature(X86::FEATURE_SSE2);
1063 
1064   if ((ECX >> 0) & 1)
1065     setFeature(X86::FEATURE_SSE3);
1066   if ((ECX >> 1) & 1)
1067     setFeature(X86::FEATURE_PCLMUL);
1068   if ((ECX >> 9) & 1)
1069     setFeature(X86::FEATURE_SSSE3);
1070   if ((ECX >> 12) & 1)
1071     setFeature(X86::FEATURE_FMA);
1072   if ((ECX >> 19) & 1)
1073     setFeature(X86::FEATURE_SSE4_1);
1074   if ((ECX >> 20) & 1) {
1075     setFeature(X86::FEATURE_SSE4_2);
1076     setFeature(X86::FEATURE_CRC32);
1077   }
1078   if ((ECX >> 23) & 1)
1079     setFeature(X86::FEATURE_POPCNT);
1080   if ((ECX >> 25) & 1)
1081     setFeature(X86::FEATURE_AES);
1082 
1083   if ((ECX >> 22) & 1)
1084     setFeature(X86::FEATURE_MOVBE);
1085 
1086   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1087   // indicates that the AVX registers will be saved and restored on context
1088   // switch, then we have full AVX support.
1089   const unsigned AVXBits = (1 << 27) | (1 << 28);
1090   bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
1091                 ((EAX & 0x6) == 0x6);
1092 #if defined(__APPLE__)
1093   // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1094   // save the AVX512 context if we use AVX512 instructions, even the bit is not
1095   // set right now.
1096   bool HasAVX512Save = true;
1097 #else
1098   // AVX512 requires additional context to be saved by the OS.
1099   bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
1100 #endif
1101 
1102   if (HasAVX)
1103     setFeature(X86::FEATURE_AVX);
1104 
1105   bool HasLeaf7 =
1106       MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1107 
1108   if (HasLeaf7 && ((EBX >> 3) & 1))
1109     setFeature(X86::FEATURE_BMI);
1110   if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
1111     setFeature(X86::FEATURE_AVX2);
1112   if (HasLeaf7 && ((EBX >> 8) & 1))
1113     setFeature(X86::FEATURE_BMI2);
1114   if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
1115     setFeature(X86::FEATURE_AVX512F);
1116   if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
1117     setFeature(X86::FEATURE_AVX512DQ);
1118   if (HasLeaf7 && ((EBX >> 19) & 1))
1119     setFeature(X86::FEATURE_ADX);
1120   if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
1121     setFeature(X86::FEATURE_AVX512IFMA);
1122   if (HasLeaf7 && ((EBX >> 23) & 1))
1123     setFeature(X86::FEATURE_CLFLUSHOPT);
1124   if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
1125     setFeature(X86::FEATURE_AVX512PF);
1126   if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
1127     setFeature(X86::FEATURE_AVX512ER);
1128   if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
1129     setFeature(X86::FEATURE_AVX512CD);
1130   if (HasLeaf7 && ((EBX >> 29) & 1))
1131     setFeature(X86::FEATURE_SHA);
1132   if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
1133     setFeature(X86::FEATURE_AVX512BW);
1134   if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
1135     setFeature(X86::FEATURE_AVX512VL);
1136 
1137   if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
1138     setFeature(X86::FEATURE_AVX512VBMI);
1139   if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
1140     setFeature(X86::FEATURE_AVX512VBMI2);
1141   if (HasLeaf7 && ((ECX >> 8) & 1))
1142     setFeature(X86::FEATURE_GFNI);
1143   if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
1144     setFeature(X86::FEATURE_VPCLMULQDQ);
1145   if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
1146     setFeature(X86::FEATURE_AVX512VNNI);
1147   if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
1148     setFeature(X86::FEATURE_AVX512BITALG);
1149   if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
1150     setFeature(X86::FEATURE_AVX512VPOPCNTDQ);
1151 
1152   if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
1153     setFeature(X86::FEATURE_AVX5124VNNIW);
1154   if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
1155     setFeature(X86::FEATURE_AVX5124FMAPS);
1156   if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
1157     setFeature(X86::FEATURE_AVX512VP2INTERSECT);
1158 
1159   bool HasLeaf7Subleaf1 =
1160       MaxLeaf >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1161   if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
1162     setFeature(X86::FEATURE_AVX512BF16);
1163 
1164   unsigned MaxExtLevel;
1165   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1166 
1167   bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1168                      !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1169   if (HasExtLeaf1 && ((ECX >> 6) & 1))
1170     setFeature(X86::FEATURE_SSE4_A);
1171   if (HasExtLeaf1 && ((ECX >> 11) & 1))
1172     setFeature(X86::FEATURE_XOP);
1173   if (HasExtLeaf1 && ((ECX >> 16) & 1))
1174     setFeature(X86::FEATURE_FMA4);
1175 
1176   if (HasExtLeaf1 && ((EDX >> 29) & 1))
1177     setFeature(X86::FEATURE_64BIT);
1178 }
1179 
1180 StringRef sys::getHostCPUName() {
1181   unsigned MaxLeaf = 0;
1182   const VendorSignatures Vendor = getVendorSignature(&MaxLeaf);
1183   if (Vendor == VendorSignatures::UNKNOWN)
1184     return "generic";
1185 
1186   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1187   getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
1188 
1189   unsigned Family = 0, Model = 0;
1190   unsigned Features[(X86::CPU_FEATURE_MAX + 31) / 32] = {0};
1191   detectX86FamilyModel(EAX, &Family, &Model);
1192   getAvailableFeatures(ECX, EDX, MaxLeaf, Features);
1193 
1194   // These aren't consumed in this file, but we try to keep some source code the
1195   // same or similar to compiler-rt.
1196   unsigned Type = 0;
1197   unsigned Subtype = 0;
1198 
1199   StringRef CPU;
1200 
1201   if (Vendor == VendorSignatures::GENUINE_INTEL) {
1202     CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type,
1203                                           &Subtype);
1204   } else if (Vendor == VendorSignatures::AUTHENTIC_AMD) {
1205     CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type,
1206                                         &Subtype);
1207   }
1208 
1209   if (!CPU.empty())
1210     return CPU;
1211 
1212   return "generic";
1213 }
1214 
1215 #elif defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__))
1216 StringRef sys::getHostCPUName() {
1217   host_basic_info_data_t hostInfo;
1218   mach_msg_type_number_t infoCount;
1219 
1220   infoCount = HOST_BASIC_INFO_COUNT;
1221   mach_port_t hostPort = mach_host_self();
1222   host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo,
1223             &infoCount);
1224   mach_port_deallocate(mach_task_self(), hostPort);
1225 
1226   if (hostInfo.cpu_type != CPU_TYPE_POWERPC)
1227     return "generic";
1228 
1229   switch (hostInfo.cpu_subtype) {
1230   case CPU_SUBTYPE_POWERPC_601:
1231     return "601";
1232   case CPU_SUBTYPE_POWERPC_602:
1233     return "602";
1234   case CPU_SUBTYPE_POWERPC_603:
1235     return "603";
1236   case CPU_SUBTYPE_POWERPC_603e:
1237     return "603e";
1238   case CPU_SUBTYPE_POWERPC_603ev:
1239     return "603ev";
1240   case CPU_SUBTYPE_POWERPC_604:
1241     return "604";
1242   case CPU_SUBTYPE_POWERPC_604e:
1243     return "604e";
1244   case CPU_SUBTYPE_POWERPC_620:
1245     return "620";
1246   case CPU_SUBTYPE_POWERPC_750:
1247     return "750";
1248   case CPU_SUBTYPE_POWERPC_7400:
1249     return "7400";
1250   case CPU_SUBTYPE_POWERPC_7450:
1251     return "7450";
1252   case CPU_SUBTYPE_POWERPC_970:
1253     return "970";
1254   default:;
1255   }
1256 
1257   return "generic";
1258 }
1259 #elif defined(__linux__) && (defined(__ppc__) || defined(__powerpc__))
1260 StringRef sys::getHostCPUName() {
1261   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1262   StringRef Content = P ? P->getBuffer() : "";
1263   return detail::getHostCPUNameForPowerPC(Content);
1264 }
1265 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
1266 StringRef sys::getHostCPUName() {
1267   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1268   StringRef Content = P ? P->getBuffer() : "";
1269   return detail::getHostCPUNameForARM(Content);
1270 }
1271 #elif defined(__linux__) && defined(__s390x__)
1272 StringRef sys::getHostCPUName() {
1273   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1274   StringRef Content = P ? P->getBuffer() : "";
1275   return detail::getHostCPUNameForS390x(Content);
1276 }
1277 #elif defined(__MVS__)
1278 StringRef sys::getHostCPUName() {
1279   // Get pointer to Communications Vector Table (CVT).
1280   // The pointer is located at offset 16 of the Prefixed Save Area (PSA).
1281   // It is stored as 31 bit pointer and will be zero-extended to 64 bit.
1282   int *StartToCVTOffset = reinterpret_cast<int *>(0x10);
1283   // Since its stored as a 31-bit pointer, get the 4 bytes from the start
1284   // of address.
1285   int ReadValue = *StartToCVTOffset;
1286   // Explicitly clear the high order bit.
1287   ReadValue = (ReadValue & 0x7FFFFFFF);
1288   char *CVT = reinterpret_cast<char *>(ReadValue);
1289   // The model number is located in the CVT prefix at offset -6 and stored as
1290   // signless packed decimal.
1291   uint16_t Id = *(uint16_t *)&CVT[-6];
1292   // Convert number to integer.
1293   Id = decodePackedBCD<uint16_t>(Id, false);
1294   // Check for vector support. It's stored in field CVTFLAG5 (offset 244),
1295   // bit CVTVEF (X'80'). The facilities list is part of the PSA but the vector
1296   // extension can only be used if bit CVTVEF is on.
1297   bool HaveVectorSupport = CVT[244] & 0x80;
1298   return getCPUNameFromS390Model(Id, HaveVectorSupport);
1299 }
1300 #elif defined(__APPLE__) && defined(__aarch64__)
1301 StringRef sys::getHostCPUName() {
1302   return "cyclone";
1303 }
1304 #elif defined(__APPLE__) && defined(__arm__)
1305 StringRef sys::getHostCPUName() {
1306   host_basic_info_data_t hostInfo;
1307   mach_msg_type_number_t infoCount;
1308 
1309   infoCount = HOST_BASIC_INFO_COUNT;
1310   mach_port_t hostPort = mach_host_self();
1311   host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo,
1312             &infoCount);
1313   mach_port_deallocate(mach_task_self(), hostPort);
1314 
1315   if (hostInfo.cpu_type != CPU_TYPE_ARM) {
1316     assert(false && "CPUType not equal to ARM should not be possible on ARM");
1317     return "generic";
1318   }
1319   switch (hostInfo.cpu_subtype) {
1320     case CPU_SUBTYPE_ARM_V7S:
1321       return "swift";
1322     default:;
1323     }
1324 
1325   return "generic";
1326 }
1327 #elif defined(_AIX)
1328 StringRef sys::getHostCPUName() {
1329   switch (_system_configuration.implementation) {
1330   case POWER_4:
1331     if (_system_configuration.version == PV_4_3)
1332       return "970";
1333     return "pwr4";
1334   case POWER_5:
1335     if (_system_configuration.version == PV_5)
1336       return "pwr5";
1337     return "pwr5x";
1338   case POWER_6:
1339     if (_system_configuration.version == PV_6_Compat)
1340       return "pwr6";
1341     return "pwr6x";
1342   case POWER_7:
1343     return "pwr7";
1344   case POWER_8:
1345     return "pwr8";
1346   case POWER_9:
1347     return "pwr9";
1348 // TODO: simplify this once the macro is available in all OS levels.
1349 #ifdef POWER_10
1350   case POWER_10:
1351 #else
1352   case 0x40000:
1353 #endif
1354     return "pwr10";
1355   default:
1356     return "generic";
1357   }
1358 }
1359 #else
1360 StringRef sys::getHostCPUName() { return "generic"; }
1361 namespace llvm {
1362 namespace sys {
1363 namespace detail {
1364 namespace x86 {
1365 
1366 VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
1367   return VendorSignatures::UNKNOWN;
1368 }
1369 
1370 } // namespace x86
1371 } // namespace detail
1372 } // namespace sys
1373 } // namespace llvm
1374 #endif
1375 
1376 #if defined(__linux__) && (defined(__i386__) || defined(__x86_64__))
1377 // On Linux, the number of physical cores can be computed from /proc/cpuinfo,
1378 // using the number of unique physical/core id pairs. The following
1379 // implementation reads the /proc/cpuinfo format on an x86_64 system.
1380 int computeHostNumPhysicalCores() {
1381   // Enabled represents the number of physical id/core id pairs with at least
1382   // one processor id enabled by the CPU affinity mask.
1383   cpu_set_t Affinity, Enabled;
1384   if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0)
1385     return -1;
1386   CPU_ZERO(&Enabled);
1387 
1388   // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be
1389   // mmapped because it appears to have 0 size.
1390   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
1391       llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
1392   if (std::error_code EC = Text.getError()) {
1393     llvm::errs() << "Can't read "
1394                  << "/proc/cpuinfo: " << EC.message() << "\n";
1395     return -1;
1396   }
1397   SmallVector<StringRef, 8> strs;
1398   (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1,
1399                              /*KeepEmpty=*/false);
1400   int CurProcessor = -1;
1401   int CurPhysicalId = -1;
1402   int CurSiblings = -1;
1403   int CurCoreId = -1;
1404   for (StringRef Line : strs) {
1405     std::pair<StringRef, StringRef> Data = Line.split(':');
1406     auto Name = Data.first.trim();
1407     auto Val = Data.second.trim();
1408     // These fields are available if the kernel is configured with CONFIG_SMP.
1409     if (Name == "processor")
1410       Val.getAsInteger(10, CurProcessor);
1411     else if (Name == "physical id")
1412       Val.getAsInteger(10, CurPhysicalId);
1413     else if (Name == "siblings")
1414       Val.getAsInteger(10, CurSiblings);
1415     else if (Name == "core id") {
1416       Val.getAsInteger(10, CurCoreId);
1417       // The processor id corresponds to an index into cpu_set_t.
1418       if (CPU_ISSET(CurProcessor, &Affinity))
1419         CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled);
1420     }
1421   }
1422   return CPU_COUNT(&Enabled);
1423 }
1424 #elif defined(__linux__) && defined(__powerpc__)
1425 int computeHostNumPhysicalCores() {
1426   cpu_set_t Affinity;
1427   if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0)
1428     return CPU_COUNT(&Affinity);
1429 
1430   // The call to sched_getaffinity() may have failed because the Affinity
1431   // mask is too small for the number of CPU's on the system (i.e. the
1432   // system has more than 1024 CPUs). Allocate a mask large enough for
1433   // twice as many CPUs.
1434   cpu_set_t *DynAffinity;
1435   DynAffinity = CPU_ALLOC(2048);
1436   if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) {
1437     int NumCPUs = CPU_COUNT(DynAffinity);
1438     CPU_FREE(DynAffinity);
1439     return NumCPUs;
1440   }
1441   return -1;
1442 }
1443 #elif defined(__linux__) && defined(__s390x__)
1444 int computeHostNumPhysicalCores() { return sysconf(_SC_NPROCESSORS_ONLN); }
1445 #elif defined(__APPLE__)
1446 #include <sys/param.h>
1447 #include <sys/sysctl.h>
1448 
1449 // Gets the number of *physical cores* on the machine.
1450 int computeHostNumPhysicalCores() {
1451   uint32_t count;
1452   size_t len = sizeof(count);
1453   sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0);
1454   if (count < 1) {
1455     int nm[2];
1456     nm[0] = CTL_HW;
1457     nm[1] = HW_AVAILCPU;
1458     sysctl(nm, 2, &count, &len, NULL, 0);
1459     if (count < 1)
1460       return -1;
1461   }
1462   return count;
1463 }
1464 #elif defined(__MVS__)
1465 int computeHostNumPhysicalCores() {
1466   enum {
1467     // Byte offset of the pointer to the Communications Vector Table (CVT) in
1468     // the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and
1469     // will be zero-extended to uintptr_t.
1470     FLCCVT = 16,
1471     // Byte offset of the pointer to the Common System Data Area (CSD) in the
1472     // CVT. The table entry is a 31-bit pointer and will be zero-extended to
1473     // uintptr_t.
1474     CVTCSD = 660,
1475     // Byte offset to the number of live CPs in the LPAR, stored as a signed
1476     // 32-bit value in the table.
1477     CSD_NUMBER_ONLINE_STANDARD_CPS = 264,
1478   };
1479   char *PSA = 0;
1480   char *CVT = reinterpret_cast<char *>(
1481       static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(PSA[FLCCVT])));
1482   char *CSD = reinterpret_cast<char *>(
1483       static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(CVT[CVTCSD])));
1484   return reinterpret_cast<int &>(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]);
1485 }
1486 #elif defined(_WIN32) && LLVM_ENABLE_THREADS != 0
1487 // Defined in llvm/lib/Support/Windows/Threading.inc
1488 int computeHostNumPhysicalCores();
1489 #else
1490 // On other systems, return -1 to indicate unknown.
1491 static int computeHostNumPhysicalCores() { return -1; }
1492 #endif
1493 
1494 int sys::getHostNumPhysicalCores() {
1495   static int NumCores = computeHostNumPhysicalCores();
1496   return NumCores;
1497 }
1498 
1499 #if defined(__i386__) || defined(_M_IX86) || \
1500     defined(__x86_64__) || defined(_M_X64)
1501 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1502   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1503   unsigned MaxLevel;
1504 
1505   if (getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX) || MaxLevel < 1)
1506     return false;
1507 
1508   getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
1509 
1510   Features["cx8"]    = (EDX >>  8) & 1;
1511   Features["cmov"]   = (EDX >> 15) & 1;
1512   Features["mmx"]    = (EDX >> 23) & 1;
1513   Features["fxsr"]   = (EDX >> 24) & 1;
1514   Features["sse"]    = (EDX >> 25) & 1;
1515   Features["sse2"]   = (EDX >> 26) & 1;
1516 
1517   Features["sse3"]   = (ECX >>  0) & 1;
1518   Features["pclmul"] = (ECX >>  1) & 1;
1519   Features["ssse3"]  = (ECX >>  9) & 1;
1520   Features["cx16"]   = (ECX >> 13) & 1;
1521   Features["sse4.1"] = (ECX >> 19) & 1;
1522   Features["sse4.2"] = (ECX >> 20) & 1;
1523   Features["crc32"]  = Features["sse4.2"];
1524   Features["movbe"]  = (ECX >> 22) & 1;
1525   Features["popcnt"] = (ECX >> 23) & 1;
1526   Features["aes"]    = (ECX >> 25) & 1;
1527   Features["rdrnd"]  = (ECX >> 30) & 1;
1528 
1529   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1530   // indicates that the AVX registers will be saved and restored on context
1531   // switch, then we have full AVX support.
1532   bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX);
1533   bool HasAVXSave = HasXSave && ((ECX >> 28) & 1) && ((EAX & 0x6) == 0x6);
1534 #if defined(__APPLE__)
1535   // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1536   // save the AVX512 context if we use AVX512 instructions, even the bit is not
1537   // set right now.
1538   bool HasAVX512Save = true;
1539 #else
1540   // AVX512 requires additional context to be saved by the OS.
1541   bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0);
1542 #endif
1543   // AMX requires additional context to be saved by the OS.
1544   const unsigned AMXBits = (1 << 17) | (1 << 18);
1545   bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits);
1546 
1547   Features["avx"]   = HasAVXSave;
1548   Features["fma"]   = ((ECX >> 12) & 1) && HasAVXSave;
1549   // Only enable XSAVE if OS has enabled support for saving YMM state.
1550   Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave;
1551   Features["f16c"]  = ((ECX >> 29) & 1) && HasAVXSave;
1552 
1553   unsigned MaxExtLevel;
1554   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1555 
1556   bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1557                      !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1558   Features["sahf"]   = HasExtLeaf1 && ((ECX >>  0) & 1);
1559   Features["lzcnt"]  = HasExtLeaf1 && ((ECX >>  5) & 1);
1560   Features["sse4a"]  = HasExtLeaf1 && ((ECX >>  6) & 1);
1561   Features["prfchw"] = HasExtLeaf1 && ((ECX >>  8) & 1);
1562   Features["xop"]    = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave;
1563   Features["lwp"]    = HasExtLeaf1 && ((ECX >> 15) & 1);
1564   Features["fma4"]   = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave;
1565   Features["tbm"]    = HasExtLeaf1 && ((ECX >> 21) & 1);
1566   Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1);
1567 
1568   Features["64bit"]  = HasExtLeaf1 && ((EDX >> 29) & 1);
1569 
1570   // Miscellaneous memory related features, detected by
1571   // using the 0x80000008 leaf of the CPUID instruction
1572   bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
1573                      !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX);
1574   Features["clzero"]   = HasExtLeaf8 && ((EBX >> 0) & 1);
1575   Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1);
1576 
1577   bool HasLeaf7 =
1578       MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1579 
1580   Features["fsgsbase"]   = HasLeaf7 && ((EBX >>  0) & 1);
1581   Features["sgx"]        = HasLeaf7 && ((EBX >>  2) & 1);
1582   Features["bmi"]        = HasLeaf7 && ((EBX >>  3) & 1);
1583   // AVX2 is only supported if we have the OS save support from AVX.
1584   Features["avx2"]       = HasLeaf7 && ((EBX >>  5) & 1) && HasAVXSave;
1585   Features["bmi2"]       = HasLeaf7 && ((EBX >>  8) & 1);
1586   Features["invpcid"]    = HasLeaf7 && ((EBX >> 10) & 1);
1587   Features["rtm"]        = HasLeaf7 && ((EBX >> 11) & 1);
1588   // AVX512 is only supported if the OS supports the context save for it.
1589   Features["avx512f"]    = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
1590   Features["avx512dq"]   = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
1591   Features["rdseed"]     = HasLeaf7 && ((EBX >> 18) & 1);
1592   Features["adx"]        = HasLeaf7 && ((EBX >> 19) & 1);
1593   Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save;
1594   Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1);
1595   Features["clwb"]       = HasLeaf7 && ((EBX >> 24) & 1);
1596   Features["avx512pf"]   = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
1597   Features["avx512er"]   = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save;
1598   Features["avx512cd"]   = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
1599   Features["sha"]        = HasLeaf7 && ((EBX >> 29) & 1);
1600   Features["avx512bw"]   = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
1601   Features["avx512vl"]   = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
1602 
1603   Features["prefetchwt1"]     = HasLeaf7 && ((ECX >>  0) & 1);
1604   Features["avx512vbmi"]      = HasLeaf7 && ((ECX >>  1) & 1) && HasAVX512Save;
1605   Features["pku"]             = HasLeaf7 && ((ECX >>  4) & 1);
1606   Features["waitpkg"]         = HasLeaf7 && ((ECX >>  5) & 1);
1607   Features["avx512vbmi2"]     = HasLeaf7 && ((ECX >>  6) & 1) && HasAVX512Save;
1608   Features["shstk"]           = HasLeaf7 && ((ECX >>  7) & 1);
1609   Features["gfni"]            = HasLeaf7 && ((ECX >>  8) & 1);
1610   Features["vaes"]            = HasLeaf7 && ((ECX >>  9) & 1) && HasAVXSave;
1611   Features["vpclmulqdq"]      = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave;
1612   Features["avx512vnni"]      = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save;
1613   Features["avx512bitalg"]    = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save;
1614   Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save;
1615   Features["rdpid"]           = HasLeaf7 && ((ECX >> 22) & 1);
1616   Features["kl"]              = HasLeaf7 && ((ECX >> 23) & 1); // key locker
1617   Features["cldemote"]        = HasLeaf7 && ((ECX >> 25) & 1);
1618   Features["movdiri"]         = HasLeaf7 && ((ECX >> 27) & 1);
1619   Features["movdir64b"]       = HasLeaf7 && ((ECX >> 28) & 1);
1620   Features["enqcmd"]          = HasLeaf7 && ((ECX >> 29) & 1);
1621 
1622   Features["uintr"]           = HasLeaf7 && ((EDX >> 5) & 1);
1623   Features["avx512vp2intersect"] =
1624       HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save;
1625   Features["serialize"]       = HasLeaf7 && ((EDX >> 14) & 1);
1626   Features["tsxldtrk"]        = HasLeaf7 && ((EDX >> 16) & 1);
1627   // There are two CPUID leafs which information associated with the pconfig
1628   // instruction:
1629   // EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th
1630   // bit of EDX), while the EAX=0x1b leaf returns information on the
1631   // availability of specific pconfig leafs.
1632   // The target feature here only refers to the the first of these two.
1633   // Users might need to check for the availability of specific pconfig
1634   // leaves using cpuid, since that information is ignored while
1635   // detecting features using the "-march=native" flag.
1636   // For more info, see X86 ISA docs.
1637   Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1);
1638   Features["amx-bf16"]   = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave;
1639   Features["avx512fp16"] = HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save;
1640   Features["amx-tile"]   = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave;
1641   Features["amx-int8"]   = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave;
1642   bool HasLeaf7Subleaf1 =
1643       MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1644   Features["avxvnni"]    = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave;
1645   Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
1646   Features["hreset"]     = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
1647 
1648   bool HasLeafD = MaxLevel >= 0xd &&
1649                   !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
1650 
1651   // Only enable XSAVE if OS has enabled support for saving YMM state.
1652   Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave;
1653   Features["xsavec"]   = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave;
1654   Features["xsaves"]   = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave;
1655 
1656   bool HasLeaf14 = MaxLevel >= 0x14 &&
1657                   !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX);
1658 
1659   Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1);
1660 
1661   bool HasLeaf19 =
1662       MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX);
1663   Features["widekl"] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1);
1664 
1665   return true;
1666 }
1667 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
1668 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1669   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1670   if (!P)
1671     return false;
1672 
1673   SmallVector<StringRef, 32> Lines;
1674   P->getBuffer().split(Lines, "\n");
1675 
1676   SmallVector<StringRef, 32> CPUFeatures;
1677 
1678   // Look for the CPU features.
1679   for (unsigned I = 0, E = Lines.size(); I != E; ++I)
1680     if (Lines[I].startswith("Features")) {
1681       Lines[I].split(CPUFeatures, ' ');
1682       break;
1683     }
1684 
1685 #if defined(__aarch64__)
1686   // Keep track of which crypto features we have seen
1687   enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 };
1688   uint32_t crypto = 0;
1689 #endif
1690 
1691   for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
1692     StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I])
1693 #if defined(__aarch64__)
1694                                    .Case("asimd", "neon")
1695                                    .Case("fp", "fp-armv8")
1696                                    .Case("crc32", "crc")
1697 #else
1698                                    .Case("half", "fp16")
1699                                    .Case("neon", "neon")
1700                                    .Case("vfpv3", "vfp3")
1701                                    .Case("vfpv3d16", "d16")
1702                                    .Case("vfpv4", "vfp4")
1703                                    .Case("idiva", "hwdiv-arm")
1704                                    .Case("idivt", "hwdiv")
1705 #endif
1706                                    .Default("");
1707 
1708 #if defined(__aarch64__)
1709     // We need to check crypto separately since we need all of the crypto
1710     // extensions to enable the subtarget feature
1711     if (CPUFeatures[I] == "aes")
1712       crypto |= CAP_AES;
1713     else if (CPUFeatures[I] == "pmull")
1714       crypto |= CAP_PMULL;
1715     else if (CPUFeatures[I] == "sha1")
1716       crypto |= CAP_SHA1;
1717     else if (CPUFeatures[I] == "sha2")
1718       crypto |= CAP_SHA2;
1719 #endif
1720 
1721     if (LLVMFeatureStr != "")
1722       Features[LLVMFeatureStr] = true;
1723   }
1724 
1725 #if defined(__aarch64__)
1726   // If we have all crypto bits we can add the feature
1727   if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2))
1728     Features["crypto"] = true;
1729 #endif
1730 
1731   return true;
1732 }
1733 #elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64))
1734 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1735   if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
1736     Features["neon"] = true;
1737   if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE))
1738     Features["crc"] = true;
1739   if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
1740     Features["crypto"] = true;
1741 
1742   return true;
1743 }
1744 #else
1745 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; }
1746 #endif
1747 
1748 std::string sys::getProcessTriple() {
1749   std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE);
1750   Triple PT(Triple::normalize(TargetTripleString));
1751 
1752   if (sizeof(void *) == 8 && PT.isArch32Bit())
1753     PT = PT.get64BitArchVariant();
1754   if (sizeof(void *) == 4 && PT.isArch64Bit())
1755     PT = PT.get32BitArchVariant();
1756 
1757   return PT.str();
1758 }
1759