1 //===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file implements the operating system Host concept.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Support/Host.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringMap.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/ADT/Triple.h"
19 #include "llvm/Config/llvm-config.h"
20 #include "llvm/Support/MemoryBuffer.h"
21 #include "llvm/Support/X86TargetParser.h"
22 #include "llvm/Support/raw_ostream.h"
23 #include <string.h>
24 
25 // Include the platform-specific parts of this class.
26 #ifdef LLVM_ON_UNIX
27 #include "Unix/Host.inc"
28 #include <sched.h>
29 #endif
30 #ifdef _WIN32
31 #include "Windows/Host.inc"
32 #endif
33 #ifdef _MSC_VER
34 #include <intrin.h>
35 #endif
36 #ifdef __MVS__
37 #include "llvm/Support/BCD.h"
38 #endif
39 #if defined(__APPLE__)
40 #include <mach/host_info.h>
41 #include <mach/mach.h>
42 #include <mach/mach_host.h>
43 #include <mach/machine.h>
44 #include <sys/param.h>
45 #include <sys/sysctl.h>
46 #endif
47 #ifdef _AIX
48 #include <sys/systemcfg.h>
49 #endif
50 #if defined(__sun__) && defined(__svr4__)
51 #include <kstat.h>
52 #endif
53 
54 #define DEBUG_TYPE "host-detection"
55 
56 //===----------------------------------------------------------------------===//
57 //
58 //  Implementations of the CPU detection routines
59 //
60 //===----------------------------------------------------------------------===//
61 
62 using namespace llvm;
63 
64 static std::unique_ptr<llvm::MemoryBuffer>
getProcCpuinfoContent()65     LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() {
66   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
67       llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
68   if (std::error_code EC = Text.getError()) {
69     llvm::errs() << "Can't read "
70                  << "/proc/cpuinfo: " << EC.message() << "\n";
71     return nullptr;
72   }
73   return std::move(*Text);
74 }
75 
getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent)76 StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) {
77   // Access to the Processor Version Register (PVR) on PowerPC is privileged,
78   // and so we must use an operating-system interface to determine the current
79   // processor type. On Linux, this is exposed through the /proc/cpuinfo file.
80   const char *generic = "generic";
81 
82   // The cpu line is second (after the 'processor: 0' line), so if this
83   // buffer is too small then something has changed (or is wrong).
84   StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin();
85   StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end();
86 
87   StringRef::const_iterator CIP = CPUInfoStart;
88 
89   StringRef::const_iterator CPUStart = nullptr;
90   size_t CPULen = 0;
91 
92   // We need to find the first line which starts with cpu, spaces, and a colon.
93   // After the colon, there may be some additional spaces and then the cpu type.
94   while (CIP < CPUInfoEnd && CPUStart == nullptr) {
95     if (CIP < CPUInfoEnd && *CIP == '\n')
96       ++CIP;
97 
98     if (CIP < CPUInfoEnd && *CIP == 'c') {
99       ++CIP;
100       if (CIP < CPUInfoEnd && *CIP == 'p') {
101         ++CIP;
102         if (CIP < CPUInfoEnd && *CIP == 'u') {
103           ++CIP;
104           while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
105             ++CIP;
106 
107           if (CIP < CPUInfoEnd && *CIP == ':') {
108             ++CIP;
109             while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
110               ++CIP;
111 
112             if (CIP < CPUInfoEnd) {
113               CPUStart = CIP;
114               while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' &&
115                                           *CIP != ',' && *CIP != '\n'))
116                 ++CIP;
117               CPULen = CIP - CPUStart;
118             }
119           }
120         }
121       }
122     }
123 
124     if (CPUStart == nullptr)
125       while (CIP < CPUInfoEnd && *CIP != '\n')
126         ++CIP;
127   }
128 
129   if (CPUStart == nullptr)
130     return generic;
131 
132   return StringSwitch<const char *>(StringRef(CPUStart, CPULen))
133       .Case("604e", "604e")
134       .Case("604", "604")
135       .Case("7400", "7400")
136       .Case("7410", "7400")
137       .Case("7447", "7400")
138       .Case("7455", "7450")
139       .Case("G4", "g4")
140       .Case("POWER4", "970")
141       .Case("PPC970FX", "970")
142       .Case("PPC970MP", "970")
143       .Case("G5", "g5")
144       .Case("POWER5", "g5")
145       .Case("A2", "a2")
146       .Case("POWER6", "pwr6")
147       .Case("POWER7", "pwr7")
148       .Case("POWER8", "pwr8")
149       .Case("POWER8E", "pwr8")
150       .Case("POWER8NVL", "pwr8")
151       .Case("POWER9", "pwr9")
152       .Case("POWER10", "pwr10")
153       // FIXME: If we get a simulator or machine with the capabilities of
154       // mcpu=future, we should revisit this and add the name reported by the
155       // simulator/machine.
156       .Default(generic);
157 }
158 
getHostCPUNameForARM(StringRef ProcCpuinfoContent)159 StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
160   // The cpuid register on arm is not accessible from user space. On Linux,
161   // it is exposed through the /proc/cpuinfo file.
162 
163   // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line
164   // in all cases.
165   SmallVector<StringRef, 32> Lines;
166   ProcCpuinfoContent.split(Lines, "\n");
167 
168   // Look for the CPU implementer line.
169   StringRef Implementer;
170   StringRef Hardware;
171   StringRef Part;
172   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
173     if (Lines[I].startswith("CPU implementer"))
174       Implementer = Lines[I].substr(15).ltrim("\t :");
175     if (Lines[I].startswith("Hardware"))
176       Hardware = Lines[I].substr(8).ltrim("\t :");
177     if (Lines[I].startswith("CPU part"))
178       Part = Lines[I].substr(8).ltrim("\t :");
179   }
180 
181   if (Implementer == "0x41") { // ARM Ltd.
182     // MSM8992/8994 may give cpu part for the core that the kernel is running on,
183     // which is undeterministic and wrong. Always return cortex-a53 for these SoC.
184     if (Hardware.endswith("MSM8994") || Hardware.endswith("MSM8996"))
185       return "cortex-a53";
186 
187 
188     // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
189     // values correspond to the "Part number" in the CP15/c0 register. The
190     // contents are specified in the various processor manuals.
191     // This corresponds to the Main ID Register in Technical Reference Manuals.
192     // and is used in programs like sys-utils
193     return StringSwitch<const char *>(Part)
194         .Case("0x926", "arm926ej-s")
195         .Case("0xb02", "mpcore")
196         .Case("0xb36", "arm1136j-s")
197         .Case("0xb56", "arm1156t2-s")
198         .Case("0xb76", "arm1176jz-s")
199         .Case("0xc08", "cortex-a8")
200         .Case("0xc09", "cortex-a9")
201         .Case("0xc0f", "cortex-a15")
202         .Case("0xc20", "cortex-m0")
203         .Case("0xc23", "cortex-m3")
204         .Case("0xc24", "cortex-m4")
205         .Case("0xd22", "cortex-m55")
206         .Case("0xd02", "cortex-a34")
207         .Case("0xd04", "cortex-a35")
208         .Case("0xd03", "cortex-a53")
209         .Case("0xd07", "cortex-a57")
210         .Case("0xd08", "cortex-a72")
211         .Case("0xd09", "cortex-a73")
212         .Case("0xd0a", "cortex-a75")
213         .Case("0xd0b", "cortex-a76")
214         .Case("0xd0d", "cortex-a77")
215         .Case("0xd41", "cortex-a78")
216         .Case("0xd44", "cortex-x1")
217         .Case("0xd4c", "cortex-x1c")
218         .Case("0xd0c", "neoverse-n1")
219         .Case("0xd49", "neoverse-n2")
220         .Case("0xd40", "neoverse-v1")
221         .Default("generic");
222   }
223 
224   if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium.
225     return StringSwitch<const char *>(Part)
226       .Case("0x516", "thunderx2t99")
227       .Case("0x0516", "thunderx2t99")
228       .Case("0xaf", "thunderx2t99")
229       .Case("0x0af", "thunderx2t99")
230       .Case("0xa1", "thunderxt88")
231       .Case("0x0a1", "thunderxt88")
232       .Default("generic");
233   }
234 
235   if (Implementer == "0x46") { // Fujitsu Ltd.
236     return StringSwitch<const char *>(Part)
237       .Case("0x001", "a64fx")
238       .Default("generic");
239   }
240 
241   if (Implementer == "0x4e") { // NVIDIA Corporation
242     return StringSwitch<const char *>(Part)
243         .Case("0x004", "carmel")
244         .Default("generic");
245   }
246 
247   if (Implementer == "0x48") // HiSilicon Technologies, Inc.
248     // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
249     // values correspond to the "Part number" in the CP15/c0 register. The
250     // contents are specified in the various processor manuals.
251     return StringSwitch<const char *>(Part)
252       .Case("0xd01", "tsv110")
253       .Default("generic");
254 
255   if (Implementer == "0x51") // Qualcomm Technologies, Inc.
256     // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
257     // values correspond to the "Part number" in the CP15/c0 register. The
258     // contents are specified in the various processor manuals.
259     return StringSwitch<const char *>(Part)
260         .Case("0x06f", "krait") // APQ8064
261         .Case("0x201", "kryo")
262         .Case("0x205", "kryo")
263         .Case("0x211", "kryo")
264         .Case("0x800", "cortex-a73") // Kryo 2xx Gold
265         .Case("0x801", "cortex-a73") // Kryo 2xx Silver
266         .Case("0x802", "cortex-a75") // Kryo 3xx Gold
267         .Case("0x803", "cortex-a75") // Kryo 3xx Silver
268         .Case("0x804", "cortex-a76") // Kryo 4xx Gold
269         .Case("0x805", "cortex-a76") // Kryo 4xx/5xx Silver
270         .Case("0xc00", "falkor")
271         .Case("0xc01", "saphira")
272         .Default("generic");
273   if (Implementer == "0x53") { // Samsung Electronics Co., Ltd.
274     // The Exynos chips have a convoluted ID scheme that doesn't seem to follow
275     // any predictive pattern across variants and parts.
276     unsigned Variant = 0, Part = 0;
277 
278     // Look for the CPU variant line, whose value is a 1 digit hexadecimal
279     // number, corresponding to the Variant bits in the CP15/C0 register.
280     for (auto I : Lines)
281       if (I.consume_front("CPU variant"))
282         I.ltrim("\t :").getAsInteger(0, Variant);
283 
284     // Look for the CPU part line, whose value is a 3 digit hexadecimal
285     // number, corresponding to the PartNum bits in the CP15/C0 register.
286     for (auto I : Lines)
287       if (I.consume_front("CPU part"))
288         I.ltrim("\t :").getAsInteger(0, Part);
289 
290     unsigned Exynos = (Variant << 12) | Part;
291     switch (Exynos) {
292     default:
293       // Default by falling through to Exynos M3.
294       LLVM_FALLTHROUGH;
295     case 0x1002:
296       return "exynos-m3";
297     case 0x1003:
298       return "exynos-m4";
299     }
300   }
301 
302   if (Implementer == "0xc0") { // Ampere Computing
303     return StringSwitch<const char *>(Part)
304         .Case("0xac3", "ampere1")
305         .Default("generic");
306   }
307 
308   return "generic";
309 }
310 
311 namespace {
getCPUNameFromS390Model(unsigned int Id,bool HaveVectorSupport)312 StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) {
313   switch (Id) {
314     case 2064:  // z900 not supported by LLVM
315     case 2066:
316     case 2084:  // z990 not supported by LLVM
317     case 2086:
318     case 2094:  // z9-109 not supported by LLVM
319     case 2096:
320       return "generic";
321     case 2097:
322     case 2098:
323       return "z10";
324     case 2817:
325     case 2818:
326       return "z196";
327     case 2827:
328     case 2828:
329       return "zEC12";
330     case 2964:
331     case 2965:
332       return HaveVectorSupport? "z13" : "zEC12";
333     case 3906:
334     case 3907:
335       return HaveVectorSupport? "z14" : "zEC12";
336     case 8561:
337     case 8562:
338       return HaveVectorSupport? "z15" : "zEC12";
339     case 3931:
340     case 3932:
341     default:
342       return HaveVectorSupport? "z16" : "zEC12";
343   }
344 }
345 } // end anonymous namespace
346 
getHostCPUNameForS390x(StringRef ProcCpuinfoContent)347 StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) {
348   // STIDP is a privileged operation, so use /proc/cpuinfo instead.
349 
350   // The "processor 0:" line comes after a fair amount of other information,
351   // including a cache breakdown, but this should be plenty.
352   SmallVector<StringRef, 32> Lines;
353   ProcCpuinfoContent.split(Lines, "\n");
354 
355   // Look for the CPU features.
356   SmallVector<StringRef, 32> CPUFeatures;
357   for (unsigned I = 0, E = Lines.size(); I != E; ++I)
358     if (Lines[I].startswith("features")) {
359       size_t Pos = Lines[I].find(':');
360       if (Pos != StringRef::npos) {
361         Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' ');
362         break;
363       }
364     }
365 
366   // We need to check for the presence of vector support independently of
367   // the machine type, since we may only use the vector register set when
368   // supported by the kernel (and hypervisor).
369   bool HaveVectorSupport = false;
370   for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
371     if (CPUFeatures[I] == "vx")
372       HaveVectorSupport = true;
373   }
374 
375   // Now check the processor machine type.
376   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
377     if (Lines[I].startswith("processor ")) {
378       size_t Pos = Lines[I].find("machine = ");
379       if (Pos != StringRef::npos) {
380         Pos += sizeof("machine = ") - 1;
381         unsigned int Id;
382         if (!Lines[I].drop_front(Pos).getAsInteger(10, Id))
383           return getCPUNameFromS390Model(Id, HaveVectorSupport);
384       }
385       break;
386     }
387   }
388 
389   return "generic";
390 }
391 
getHostCPUNameForRISCV(StringRef ProcCpuinfoContent)392 StringRef sys::detail::getHostCPUNameForRISCV(StringRef ProcCpuinfoContent) {
393   // There are 24 lines in /proc/cpuinfo
394   SmallVector<StringRef> Lines;
395   ProcCpuinfoContent.split(Lines, "\n");
396 
397   // Look for uarch line to determine cpu name
398   StringRef UArch;
399   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
400     if (Lines[I].startswith("uarch")) {
401       UArch = Lines[I].substr(5).ltrim("\t :");
402       break;
403     }
404   }
405 
406   return StringSwitch<const char *>(UArch)
407       .Case("sifive,u74-mc", "sifive-u74")
408       .Case("sifive,bullet0", "sifive-u74")
409       .Default("generic");
410 }
411 
getHostCPUNameForBPF()412 StringRef sys::detail::getHostCPUNameForBPF() {
413 #if !defined(__linux__) || !defined(__x86_64__)
414   return "generic";
415 #else
416   uint8_t v3_insns[40] __attribute__ ((aligned (8))) =
417       /* BPF_MOV64_IMM(BPF_REG_0, 0) */
418     { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
419       /* BPF_MOV64_IMM(BPF_REG_2, 1) */
420       0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
421       /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
422       0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
423       /* BPF_MOV64_IMM(BPF_REG_0, 1) */
424       0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
425       /* BPF_EXIT_INSN() */
426       0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
427 
428   uint8_t v2_insns[40] __attribute__ ((aligned (8))) =
429       /* BPF_MOV64_IMM(BPF_REG_0, 0) */
430     { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
431       /* BPF_MOV64_IMM(BPF_REG_2, 1) */
432       0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
433       /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
434       0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
435       /* BPF_MOV64_IMM(BPF_REG_0, 1) */
436       0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
437       /* BPF_EXIT_INSN() */
438       0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
439 
440   struct bpf_prog_load_attr {
441     uint32_t prog_type;
442     uint32_t insn_cnt;
443     uint64_t insns;
444     uint64_t license;
445     uint32_t log_level;
446     uint32_t log_size;
447     uint64_t log_buf;
448     uint32_t kern_version;
449     uint32_t prog_flags;
450   } attr = {};
451   attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
452   attr.insn_cnt = 5;
453   attr.insns = (uint64_t)v3_insns;
454   attr.license = (uint64_t)"DUMMY";
455 
456   int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr,
457                    sizeof(attr));
458   if (fd >= 0) {
459     close(fd);
460     return "v3";
461   }
462 
463   /* Clear the whole attr in case its content changed by syscall. */
464   memset(&attr, 0, sizeof(attr));
465   attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
466   attr.insn_cnt = 5;
467   attr.insns = (uint64_t)v2_insns;
468   attr.license = (uint64_t)"DUMMY";
469   fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr));
470   if (fd >= 0) {
471     close(fd);
472     return "v2";
473   }
474   return "v1";
475 #endif
476 }
477 
478 #if defined(__i386__) || defined(_M_IX86) || \
479     defined(__x86_64__) || defined(_M_X64)
480 
481 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
482 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
483 // support. Consequently, for i386, the presence of CPUID is checked first
484 // via the corresponding eflags bit.
485 // Removal of cpuid.h header motivated by PR30384
486 // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp
487 // or test-suite, but are used in external projects e.g. libstdcxx
isCpuIdSupported()488 static bool isCpuIdSupported() {
489 #if defined(__GNUC__) || defined(__clang__)
490 #if defined(__i386__)
491   int __cpuid_supported;
492   __asm__("  pushfl\n"
493           "  popl   %%eax\n"
494           "  movl   %%eax,%%ecx\n"
495           "  xorl   $0x00200000,%%eax\n"
496           "  pushl  %%eax\n"
497           "  popfl\n"
498           "  pushfl\n"
499           "  popl   %%eax\n"
500           "  movl   $0,%0\n"
501           "  cmpl   %%eax,%%ecx\n"
502           "  je     1f\n"
503           "  movl   $1,%0\n"
504           "1:"
505           : "=r"(__cpuid_supported)
506           :
507           : "eax", "ecx");
508   if (!__cpuid_supported)
509     return false;
510 #endif
511   return true;
512 #endif
513   return true;
514 }
515 
516 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
517 /// the specified arguments.  If we can't run cpuid on the host, return true.
getX86CpuIDAndInfo(unsigned value,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)518 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
519                                unsigned *rECX, unsigned *rEDX) {
520 #if defined(__GNUC__) || defined(__clang__)
521 #if defined(__x86_64__)
522   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
523   // FIXME: should we save this for Clang?
524   __asm__("movq\t%%rbx, %%rsi\n\t"
525           "cpuid\n\t"
526           "xchgq\t%%rbx, %%rsi\n\t"
527           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
528           : "a"(value));
529   return false;
530 #elif defined(__i386__)
531   __asm__("movl\t%%ebx, %%esi\n\t"
532           "cpuid\n\t"
533           "xchgl\t%%ebx, %%esi\n\t"
534           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
535           : "a"(value));
536   return false;
537 #else
538   return true;
539 #endif
540 #elif defined(_MSC_VER)
541   // The MSVC intrinsic is portable across x86 and x64.
542   int registers[4];
543   __cpuid(registers, value);
544   *rEAX = registers[0];
545   *rEBX = registers[1];
546   *rECX = registers[2];
547   *rEDX = registers[3];
548   return false;
549 #else
550   return true;
551 #endif
552 }
553 
554 namespace llvm {
555 namespace sys {
556 namespace detail {
557 namespace x86 {
558 
getVendorSignature(unsigned * MaxLeaf)559 VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
560   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
561   if (MaxLeaf == nullptr)
562     MaxLeaf = &EAX;
563   else
564     *MaxLeaf = 0;
565 
566   if (!isCpuIdSupported())
567     return VendorSignatures::UNKNOWN;
568 
569   if (getX86CpuIDAndInfo(0, MaxLeaf, &EBX, &ECX, &EDX) || *MaxLeaf < 1)
570     return VendorSignatures::UNKNOWN;
571 
572   // "Genu ineI ntel"
573   if (EBX == 0x756e6547 && EDX == 0x49656e69 && ECX == 0x6c65746e)
574     return VendorSignatures::GENUINE_INTEL;
575 
576   // "Auth enti cAMD"
577   if (EBX == 0x68747541 && EDX == 0x69746e65 && ECX == 0x444d4163)
578     return VendorSignatures::AUTHENTIC_AMD;
579 
580   return VendorSignatures::UNKNOWN;
581 }
582 
583 } // namespace x86
584 } // namespace detail
585 } // namespace sys
586 } // namespace llvm
587 
588 using namespace llvm::sys::detail::x86;
589 
590 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
591 /// the 4 values in the specified arguments.  If we can't run cpuid on the host,
592 /// return true.
getX86CpuIDAndInfoEx(unsigned value,unsigned subleaf,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)593 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
594                                  unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
595                                  unsigned *rEDX) {
596 #if defined(__GNUC__) || defined(__clang__)
597 #if defined(__x86_64__)
598   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
599   // FIXME: should we save this for Clang?
600   __asm__("movq\t%%rbx, %%rsi\n\t"
601           "cpuid\n\t"
602           "xchgq\t%%rbx, %%rsi\n\t"
603           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
604           : "a"(value), "c"(subleaf));
605   return false;
606 #elif defined(__i386__)
607   __asm__("movl\t%%ebx, %%esi\n\t"
608           "cpuid\n\t"
609           "xchgl\t%%ebx, %%esi\n\t"
610           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
611           : "a"(value), "c"(subleaf));
612   return false;
613 #else
614   return true;
615 #endif
616 #elif defined(_MSC_VER)
617   int registers[4];
618   __cpuidex(registers, value, subleaf);
619   *rEAX = registers[0];
620   *rEBX = registers[1];
621   *rECX = registers[2];
622   *rEDX = registers[3];
623   return false;
624 #else
625   return true;
626 #endif
627 }
628 
629 // Read control register 0 (XCR0). Used to detect features such as AVX.
getX86XCR0(unsigned * rEAX,unsigned * rEDX)630 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
631 #if defined(__GNUC__) || defined(__clang__)
632   // Check xgetbv; this uses a .byte sequence instead of the instruction
633   // directly because older assemblers do not include support for xgetbv and
634   // there is no easy way to conditionally compile based on the assembler used.
635   __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
636   return false;
637 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
638   unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
639   *rEAX = Result;
640   *rEDX = Result >> 32;
641   return false;
642 #else
643   return true;
644 #endif
645 }
646 
detectX86FamilyModel(unsigned EAX,unsigned * Family,unsigned * Model)647 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
648                                  unsigned *Model) {
649   *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
650   *Model = (EAX >> 4) & 0xf;  // Bits 4 - 7
651   if (*Family == 6 || *Family == 0xf) {
652     if (*Family == 0xf)
653       // Examine extended family ID if family ID is F.
654       *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
655     // Examine extended model ID if family ID is 6 or F.
656     *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
657   }
658 }
659 
660 static StringRef
getIntelProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)661 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
662                                 const unsigned *Features,
663                                 unsigned *Type, unsigned *Subtype) {
664   auto testFeature = [&](unsigned F) {
665     return (Features[F / 32] & (1U << (F % 32))) != 0;
666   };
667 
668   StringRef CPU;
669 
670   switch (Family) {
671   case 3:
672     CPU = "i386";
673     break;
674   case 4:
675     CPU = "i486";
676     break;
677   case 5:
678     if (testFeature(X86::FEATURE_MMX)) {
679       CPU = "pentium-mmx";
680       break;
681     }
682     CPU = "pentium";
683     break;
684   case 6:
685     switch (Model) {
686     case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
687                // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
688                // mobile processor, Intel Core 2 Extreme processor, Intel
689                // Pentium Dual-Core processor, Intel Xeon processor, model
690                // 0Fh. All processors are manufactured using the 65 nm process.
691     case 0x16: // Intel Celeron processor model 16h. All processors are
692                // manufactured using the 65 nm process
693       CPU = "core2";
694       *Type = X86::INTEL_CORE2;
695       break;
696     case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
697                // 17h. All processors are manufactured using the 45 nm process.
698                //
699                // 45nm: Penryn , Wolfdale, Yorkfield (XE)
700     case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
701                // the 45 nm process.
702       CPU = "penryn";
703       *Type = X86::INTEL_CORE2;
704       break;
705     case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
706                // processors are manufactured using the 45 nm process.
707     case 0x1e: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
708                // As found in a Summer 2010 model iMac.
709     case 0x1f:
710     case 0x2e:              // Nehalem EX
711       CPU = "nehalem";
712       *Type = X86::INTEL_COREI7;
713       *Subtype = X86::INTEL_COREI7_NEHALEM;
714       break;
715     case 0x25: // Intel Core i7, laptop version.
716     case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
717                // processors are manufactured using the 32 nm process.
718     case 0x2f: // Westmere EX
719       CPU = "westmere";
720       *Type = X86::INTEL_COREI7;
721       *Subtype = X86::INTEL_COREI7_WESTMERE;
722       break;
723     case 0x2a: // Intel Core i7 processor. All processors are manufactured
724                // using the 32 nm process.
725     case 0x2d:
726       CPU = "sandybridge";
727       *Type = X86::INTEL_COREI7;
728       *Subtype = X86::INTEL_COREI7_SANDYBRIDGE;
729       break;
730     case 0x3a:
731     case 0x3e:              // Ivy Bridge EP
732       CPU = "ivybridge";
733       *Type = X86::INTEL_COREI7;
734       *Subtype = X86::INTEL_COREI7_IVYBRIDGE;
735       break;
736 
737     // Haswell:
738     case 0x3c:
739     case 0x3f:
740     case 0x45:
741     case 0x46:
742       CPU = "haswell";
743       *Type = X86::INTEL_COREI7;
744       *Subtype = X86::INTEL_COREI7_HASWELL;
745       break;
746 
747     // Broadwell:
748     case 0x3d:
749     case 0x47:
750     case 0x4f:
751     case 0x56:
752       CPU = "broadwell";
753       *Type = X86::INTEL_COREI7;
754       *Subtype = X86::INTEL_COREI7_BROADWELL;
755       break;
756 
757     // Skylake:
758     case 0x4e:              // Skylake mobile
759     case 0x5e:              // Skylake desktop
760     case 0x8e:              // Kaby Lake mobile
761     case 0x9e:              // Kaby Lake desktop
762     case 0xa5:              // Comet Lake-H/S
763     case 0xa6:              // Comet Lake-U
764       CPU = "skylake";
765       *Type = X86::INTEL_COREI7;
766       *Subtype = X86::INTEL_COREI7_SKYLAKE;
767       break;
768 
769     // Rocketlake:
770     case 0xa7:
771       CPU = "rocketlake";
772       *Type = X86::INTEL_COREI7;
773       *Subtype = X86::INTEL_COREI7_ROCKETLAKE;
774       break;
775 
776     // Skylake Xeon:
777     case 0x55:
778       *Type = X86::INTEL_COREI7;
779       if (testFeature(X86::FEATURE_AVX512BF16)) {
780         CPU = "cooperlake";
781         *Subtype = X86::INTEL_COREI7_COOPERLAKE;
782       } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
783         CPU = "cascadelake";
784         *Subtype = X86::INTEL_COREI7_CASCADELAKE;
785       } else {
786         CPU = "skylake-avx512";
787         *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512;
788       }
789       break;
790 
791     // Cannonlake:
792     case 0x66:
793       CPU = "cannonlake";
794       *Type = X86::INTEL_COREI7;
795       *Subtype = X86::INTEL_COREI7_CANNONLAKE;
796       break;
797 
798     // Icelake:
799     case 0x7d:
800     case 0x7e:
801       CPU = "icelake-client";
802       *Type = X86::INTEL_COREI7;
803       *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT;
804       break;
805 
806     // Tigerlake:
807     case 0x8c:
808     case 0x8d:
809       CPU = "tigerlake";
810       *Type = X86::INTEL_COREI7;
811       *Subtype = X86::INTEL_COREI7_TIGERLAKE;
812       break;
813 
814     // Alderlake:
815     case 0x97:
816     case 0x9a:
817       CPU = "alderlake";
818       *Type = X86::INTEL_COREI7;
819       *Subtype = X86::INTEL_COREI7_ALDERLAKE;
820       break;
821 
822     // Icelake Xeon:
823     case 0x6a:
824     case 0x6c:
825       CPU = "icelake-server";
826       *Type = X86::INTEL_COREI7;
827       *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER;
828       break;
829 
830     // Sapphire Rapids:
831     case 0x8f:
832       CPU = "sapphirerapids";
833       *Type = X86::INTEL_COREI7;
834       *Subtype = X86::INTEL_COREI7_SAPPHIRERAPIDS;
835       break;
836 
837     case 0x1c: // Most 45 nm Intel Atom processors
838     case 0x26: // 45 nm Atom Lincroft
839     case 0x27: // 32 nm Atom Medfield
840     case 0x35: // 32 nm Atom Midview
841     case 0x36: // 32 nm Atom Midview
842       CPU = "bonnell";
843       *Type = X86::INTEL_BONNELL;
844       break;
845 
846     // Atom Silvermont codes from the Intel software optimization guide.
847     case 0x37:
848     case 0x4a:
849     case 0x4d:
850     case 0x5a:
851     case 0x5d:
852     case 0x4c: // really airmont
853       CPU = "silvermont";
854       *Type = X86::INTEL_SILVERMONT;
855       break;
856     // Goldmont:
857     case 0x5c: // Apollo Lake
858     case 0x5f: // Denverton
859       CPU = "goldmont";
860       *Type = X86::INTEL_GOLDMONT;
861       break;
862     case 0x7a:
863       CPU = "goldmont-plus";
864       *Type = X86::INTEL_GOLDMONT_PLUS;
865       break;
866     case 0x86:
867       CPU = "tremont";
868       *Type = X86::INTEL_TREMONT;
869       break;
870 
871     // Xeon Phi (Knights Landing + Knights Mill):
872     case 0x57:
873       CPU = "knl";
874       *Type = X86::INTEL_KNL;
875       break;
876     case 0x85:
877       CPU = "knm";
878       *Type = X86::INTEL_KNM;
879       break;
880 
881     default: // Unknown family 6 CPU, try to guess.
882       // Don't both with Type/Subtype here, they aren't used by the caller.
883       // They're used above to keep the code in sync with compiler-rt.
884       // TODO detect tigerlake host from model
885       if (testFeature(X86::FEATURE_AVX512VP2INTERSECT)) {
886         CPU = "tigerlake";
887       } else if (testFeature(X86::FEATURE_AVX512VBMI2)) {
888         CPU = "icelake-client";
889       } else if (testFeature(X86::FEATURE_AVX512VBMI)) {
890         CPU = "cannonlake";
891       } else if (testFeature(X86::FEATURE_AVX512BF16)) {
892         CPU = "cooperlake";
893       } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
894         CPU = "cascadelake";
895       } else if (testFeature(X86::FEATURE_AVX512VL)) {
896         CPU = "skylake-avx512";
897       } else if (testFeature(X86::FEATURE_AVX512ER)) {
898         CPU = "knl";
899       } else if (testFeature(X86::FEATURE_CLFLUSHOPT)) {
900         if (testFeature(X86::FEATURE_SHA))
901           CPU = "goldmont";
902         else
903           CPU = "skylake";
904       } else if (testFeature(X86::FEATURE_ADX)) {
905         CPU = "broadwell";
906       } else if (testFeature(X86::FEATURE_AVX2)) {
907         CPU = "haswell";
908       } else if (testFeature(X86::FEATURE_AVX)) {
909         CPU = "sandybridge";
910       } else if (testFeature(X86::FEATURE_SSE4_2)) {
911         if (testFeature(X86::FEATURE_MOVBE))
912           CPU = "silvermont";
913         else
914           CPU = "nehalem";
915       } else if (testFeature(X86::FEATURE_SSE4_1)) {
916         CPU = "penryn";
917       } else if (testFeature(X86::FEATURE_SSSE3)) {
918         if (testFeature(X86::FEATURE_MOVBE))
919           CPU = "bonnell";
920         else
921           CPU = "core2";
922       } else if (testFeature(X86::FEATURE_64BIT)) {
923         CPU = "core2";
924       } else if (testFeature(X86::FEATURE_SSE3)) {
925         CPU = "yonah";
926       } else if (testFeature(X86::FEATURE_SSE2)) {
927         CPU = "pentium-m";
928       } else if (testFeature(X86::FEATURE_SSE)) {
929         CPU = "pentium3";
930       } else if (testFeature(X86::FEATURE_MMX)) {
931         CPU = "pentium2";
932       } else {
933         CPU = "pentiumpro";
934       }
935       break;
936     }
937     break;
938   case 15: {
939     if (testFeature(X86::FEATURE_64BIT)) {
940       CPU = "nocona";
941       break;
942     }
943     if (testFeature(X86::FEATURE_SSE3)) {
944       CPU = "prescott";
945       break;
946     }
947     CPU = "pentium4";
948     break;
949   }
950   default:
951     break; // Unknown.
952   }
953 
954   return CPU;
955 }
956 
957 static StringRef
getAMDProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)958 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
959                               const unsigned *Features,
960                               unsigned *Type, unsigned *Subtype) {
961   auto testFeature = [&](unsigned F) {
962     return (Features[F / 32] & (1U << (F % 32))) != 0;
963   };
964 
965   StringRef CPU;
966 
967   switch (Family) {
968   case 4:
969     CPU = "i486";
970     break;
971   case 5:
972     CPU = "pentium";
973     switch (Model) {
974     case 6:
975     case 7:
976       CPU = "k6";
977       break;
978     case 8:
979       CPU = "k6-2";
980       break;
981     case 9:
982     case 13:
983       CPU = "k6-3";
984       break;
985     case 10:
986       CPU = "geode";
987       break;
988     }
989     break;
990   case 6:
991     if (testFeature(X86::FEATURE_SSE)) {
992       CPU = "athlon-xp";
993       break;
994     }
995     CPU = "athlon";
996     break;
997   case 15:
998     if (testFeature(X86::FEATURE_SSE3)) {
999       CPU = "k8-sse3";
1000       break;
1001     }
1002     CPU = "k8";
1003     break;
1004   case 16:
1005     CPU = "amdfam10";
1006     *Type = X86::AMDFAM10H; // "amdfam10"
1007     switch (Model) {
1008     case 2:
1009       *Subtype = X86::AMDFAM10H_BARCELONA;
1010       break;
1011     case 4:
1012       *Subtype = X86::AMDFAM10H_SHANGHAI;
1013       break;
1014     case 8:
1015       *Subtype = X86::AMDFAM10H_ISTANBUL;
1016       break;
1017     }
1018     break;
1019   case 20:
1020     CPU = "btver1";
1021     *Type = X86::AMD_BTVER1;
1022     break;
1023   case 21:
1024     CPU = "bdver1";
1025     *Type = X86::AMDFAM15H;
1026     if (Model >= 0x60 && Model <= 0x7f) {
1027       CPU = "bdver4";
1028       *Subtype = X86::AMDFAM15H_BDVER4;
1029       break; // 60h-7Fh: Excavator
1030     }
1031     if (Model >= 0x30 && Model <= 0x3f) {
1032       CPU = "bdver3";
1033       *Subtype = X86::AMDFAM15H_BDVER3;
1034       break; // 30h-3Fh: Steamroller
1035     }
1036     if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
1037       CPU = "bdver2";
1038       *Subtype = X86::AMDFAM15H_BDVER2;
1039       break; // 02h, 10h-1Fh: Piledriver
1040     }
1041     if (Model <= 0x0f) {
1042       *Subtype = X86::AMDFAM15H_BDVER1;
1043       break; // 00h-0Fh: Bulldozer
1044     }
1045     break;
1046   case 22:
1047     CPU = "btver2";
1048     *Type = X86::AMD_BTVER2;
1049     break;
1050   case 23:
1051     CPU = "znver1";
1052     *Type = X86::AMDFAM17H;
1053     if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
1054       CPU = "znver2";
1055       *Subtype = X86::AMDFAM17H_ZNVER2;
1056       break; // 30h-3fh, 71h: Zen2
1057     }
1058     if (Model <= 0x0f) {
1059       *Subtype = X86::AMDFAM17H_ZNVER1;
1060       break; // 00h-0Fh: Zen1
1061     }
1062     break;
1063   case 25:
1064     CPU = "znver3";
1065     *Type = X86::AMDFAM19H;
1066     if (Model <= 0x0f || Model == 0x21) {
1067       *Subtype = X86::AMDFAM19H_ZNVER3;
1068       break; // 00h-0Fh, 21h: Zen3
1069     }
1070     break;
1071   default:
1072     break; // Unknown AMD CPU.
1073   }
1074 
1075   return CPU;
1076 }
1077 
getAvailableFeatures(unsigned ECX,unsigned EDX,unsigned MaxLeaf,unsigned * Features)1078 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
1079                                  unsigned *Features) {
1080   unsigned EAX, EBX;
1081 
1082   auto setFeature = [&](unsigned F) {
1083     Features[F / 32] |= 1U << (F % 32);
1084   };
1085 
1086   if ((EDX >> 15) & 1)
1087     setFeature(X86::FEATURE_CMOV);
1088   if ((EDX >> 23) & 1)
1089     setFeature(X86::FEATURE_MMX);
1090   if ((EDX >> 25) & 1)
1091     setFeature(X86::FEATURE_SSE);
1092   if ((EDX >> 26) & 1)
1093     setFeature(X86::FEATURE_SSE2);
1094 
1095   if ((ECX >> 0) & 1)
1096     setFeature(X86::FEATURE_SSE3);
1097   if ((ECX >> 1) & 1)
1098     setFeature(X86::FEATURE_PCLMUL);
1099   if ((ECX >> 9) & 1)
1100     setFeature(X86::FEATURE_SSSE3);
1101   if ((ECX >> 12) & 1)
1102     setFeature(X86::FEATURE_FMA);
1103   if ((ECX >> 19) & 1)
1104     setFeature(X86::FEATURE_SSE4_1);
1105   if ((ECX >> 20) & 1) {
1106     setFeature(X86::FEATURE_SSE4_2);
1107     setFeature(X86::FEATURE_CRC32);
1108   }
1109   if ((ECX >> 23) & 1)
1110     setFeature(X86::FEATURE_POPCNT);
1111   if ((ECX >> 25) & 1)
1112     setFeature(X86::FEATURE_AES);
1113 
1114   if ((ECX >> 22) & 1)
1115     setFeature(X86::FEATURE_MOVBE);
1116 
1117   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1118   // indicates that the AVX registers will be saved and restored on context
1119   // switch, then we have full AVX support.
1120   const unsigned AVXBits = (1 << 27) | (1 << 28);
1121   bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
1122                 ((EAX & 0x6) == 0x6);
1123 #if defined(__APPLE__)
1124   // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1125   // save the AVX512 context if we use AVX512 instructions, even the bit is not
1126   // set right now.
1127   bool HasAVX512Save = true;
1128 #else
1129   // AVX512 requires additional context to be saved by the OS.
1130   bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
1131 #endif
1132 
1133   if (HasAVX)
1134     setFeature(X86::FEATURE_AVX);
1135 
1136   bool HasLeaf7 =
1137       MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1138 
1139   if (HasLeaf7 && ((EBX >> 3) & 1))
1140     setFeature(X86::FEATURE_BMI);
1141   if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
1142     setFeature(X86::FEATURE_AVX2);
1143   if (HasLeaf7 && ((EBX >> 8) & 1))
1144     setFeature(X86::FEATURE_BMI2);
1145   if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
1146     setFeature(X86::FEATURE_AVX512F);
1147   if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
1148     setFeature(X86::FEATURE_AVX512DQ);
1149   if (HasLeaf7 && ((EBX >> 19) & 1))
1150     setFeature(X86::FEATURE_ADX);
1151   if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
1152     setFeature(X86::FEATURE_AVX512IFMA);
1153   if (HasLeaf7 && ((EBX >> 23) & 1))
1154     setFeature(X86::FEATURE_CLFLUSHOPT);
1155   if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
1156     setFeature(X86::FEATURE_AVX512PF);
1157   if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
1158     setFeature(X86::FEATURE_AVX512ER);
1159   if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
1160     setFeature(X86::FEATURE_AVX512CD);
1161   if (HasLeaf7 && ((EBX >> 29) & 1))
1162     setFeature(X86::FEATURE_SHA);
1163   if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
1164     setFeature(X86::FEATURE_AVX512BW);
1165   if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
1166     setFeature(X86::FEATURE_AVX512VL);
1167 
1168   if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
1169     setFeature(X86::FEATURE_AVX512VBMI);
1170   if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
1171     setFeature(X86::FEATURE_AVX512VBMI2);
1172   if (HasLeaf7 && ((ECX >> 8) & 1))
1173     setFeature(X86::FEATURE_GFNI);
1174   if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
1175     setFeature(X86::FEATURE_VPCLMULQDQ);
1176   if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
1177     setFeature(X86::FEATURE_AVX512VNNI);
1178   if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
1179     setFeature(X86::FEATURE_AVX512BITALG);
1180   if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
1181     setFeature(X86::FEATURE_AVX512VPOPCNTDQ);
1182 
1183   if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
1184     setFeature(X86::FEATURE_AVX5124VNNIW);
1185   if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
1186     setFeature(X86::FEATURE_AVX5124FMAPS);
1187   if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
1188     setFeature(X86::FEATURE_AVX512VP2INTERSECT);
1189 
1190   bool HasLeaf7Subleaf1 =
1191       MaxLeaf >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1192   if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
1193     setFeature(X86::FEATURE_AVX512BF16);
1194 
1195   unsigned MaxExtLevel;
1196   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1197 
1198   bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1199                      !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1200   if (HasExtLeaf1 && ((ECX >> 6) & 1))
1201     setFeature(X86::FEATURE_SSE4_A);
1202   if (HasExtLeaf1 && ((ECX >> 11) & 1))
1203     setFeature(X86::FEATURE_XOP);
1204   if (HasExtLeaf1 && ((ECX >> 16) & 1))
1205     setFeature(X86::FEATURE_FMA4);
1206 
1207   if (HasExtLeaf1 && ((EDX >> 29) & 1))
1208     setFeature(X86::FEATURE_64BIT);
1209 }
1210 
getHostCPUName()1211 StringRef sys::getHostCPUName() {
1212   unsigned MaxLeaf = 0;
1213   const VendorSignatures Vendor = getVendorSignature(&MaxLeaf);
1214   if (Vendor == VendorSignatures::UNKNOWN)
1215     return "generic";
1216 
1217   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1218   getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
1219 
1220   unsigned Family = 0, Model = 0;
1221   unsigned Features[(X86::CPU_FEATURE_MAX + 31) / 32] = {0};
1222   detectX86FamilyModel(EAX, &Family, &Model);
1223   getAvailableFeatures(ECX, EDX, MaxLeaf, Features);
1224 
1225   // These aren't consumed in this file, but we try to keep some source code the
1226   // same or similar to compiler-rt.
1227   unsigned Type = 0;
1228   unsigned Subtype = 0;
1229 
1230   StringRef CPU;
1231 
1232   if (Vendor == VendorSignatures::GENUINE_INTEL) {
1233     CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type,
1234                                           &Subtype);
1235   } else if (Vendor == VendorSignatures::AUTHENTIC_AMD) {
1236     CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type,
1237                                         &Subtype);
1238   }
1239 
1240   if (!CPU.empty())
1241     return CPU;
1242 
1243   return "generic";
1244 }
1245 
1246 #elif defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__))
getHostCPUName()1247 StringRef sys::getHostCPUName() {
1248   host_basic_info_data_t hostInfo;
1249   mach_msg_type_number_t infoCount;
1250 
1251   infoCount = HOST_BASIC_INFO_COUNT;
1252   mach_port_t hostPort = mach_host_self();
1253   host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo,
1254             &infoCount);
1255   mach_port_deallocate(mach_task_self(), hostPort);
1256 
1257   if (hostInfo.cpu_type != CPU_TYPE_POWERPC)
1258     return "generic";
1259 
1260   switch (hostInfo.cpu_subtype) {
1261   case CPU_SUBTYPE_POWERPC_601:
1262     return "601";
1263   case CPU_SUBTYPE_POWERPC_602:
1264     return "602";
1265   case CPU_SUBTYPE_POWERPC_603:
1266     return "603";
1267   case CPU_SUBTYPE_POWERPC_603e:
1268     return "603e";
1269   case CPU_SUBTYPE_POWERPC_603ev:
1270     return "603ev";
1271   case CPU_SUBTYPE_POWERPC_604:
1272     return "604";
1273   case CPU_SUBTYPE_POWERPC_604e:
1274     return "604e";
1275   case CPU_SUBTYPE_POWERPC_620:
1276     return "620";
1277   case CPU_SUBTYPE_POWERPC_750:
1278     return "750";
1279   case CPU_SUBTYPE_POWERPC_7400:
1280     return "7400";
1281   case CPU_SUBTYPE_POWERPC_7450:
1282     return "7450";
1283   case CPU_SUBTYPE_POWERPC_970:
1284     return "970";
1285   default:;
1286   }
1287 
1288   return "generic";
1289 }
1290 #elif defined(__linux__) && (defined(__ppc__) || defined(__powerpc__))
getHostCPUName()1291 StringRef sys::getHostCPUName() {
1292   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1293   StringRef Content = P ? P->getBuffer() : "";
1294   return detail::getHostCPUNameForPowerPC(Content);
1295 }
1296 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
getHostCPUName()1297 StringRef sys::getHostCPUName() {
1298   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1299   StringRef Content = P ? P->getBuffer() : "";
1300   return detail::getHostCPUNameForARM(Content);
1301 }
1302 #elif defined(__linux__) && defined(__s390x__)
getHostCPUName()1303 StringRef sys::getHostCPUName() {
1304   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1305   StringRef Content = P ? P->getBuffer() : "";
1306   return detail::getHostCPUNameForS390x(Content);
1307 }
1308 #elif defined(__MVS__)
getHostCPUName()1309 StringRef sys::getHostCPUName() {
1310   // Get pointer to Communications Vector Table (CVT).
1311   // The pointer is located at offset 16 of the Prefixed Save Area (PSA).
1312   // It is stored as 31 bit pointer and will be zero-extended to 64 bit.
1313   int *StartToCVTOffset = reinterpret_cast<int *>(0x10);
1314   // Since its stored as a 31-bit pointer, get the 4 bytes from the start
1315   // of address.
1316   int ReadValue = *StartToCVTOffset;
1317   // Explicitly clear the high order bit.
1318   ReadValue = (ReadValue & 0x7FFFFFFF);
1319   char *CVT = reinterpret_cast<char *>(ReadValue);
1320   // The model number is located in the CVT prefix at offset -6 and stored as
1321   // signless packed decimal.
1322   uint16_t Id = *(uint16_t *)&CVT[-6];
1323   // Convert number to integer.
1324   Id = decodePackedBCD<uint16_t>(Id, false);
1325   // Check for vector support. It's stored in field CVTFLAG5 (offset 244),
1326   // bit CVTVEF (X'80'). The facilities list is part of the PSA but the vector
1327   // extension can only be used if bit CVTVEF is on.
1328   bool HaveVectorSupport = CVT[244] & 0x80;
1329   return getCPUNameFromS390Model(Id, HaveVectorSupport);
1330 }
1331 #elif defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__))
1332 #define CPUFAMILY_ARM_SWIFT 0x1e2d6381
1333 #define CPUFAMILY_ARM_CYCLONE 0x37a09642
1334 #define CPUFAMILY_ARM_TYPHOON 0x2c91a47e
1335 #define CPUFAMILY_ARM_TWISTER 0x92fb37c8
1336 #define CPUFAMILY_ARM_HURRICANE 0x67ceee93
1337 #define CPUFAMILY_ARM_MONSOON_MISTRAL 0xe81e7ef6
1338 #define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07d34b9f
1339 #define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504d2
1340 #define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1b588bb3
1341 
getHostCPUName()1342 StringRef sys::getHostCPUName() {
1343   uint32_t Family;
1344   size_t Length = sizeof(Family);
1345   sysctlbyname("hw.cpufamily", &Family, &Length, NULL, 0);
1346 
1347   switch (Family) {
1348   case CPUFAMILY_ARM_SWIFT:
1349     return "swift";
1350   case CPUFAMILY_ARM_CYCLONE:
1351     return "apple-a7";
1352   case CPUFAMILY_ARM_TYPHOON:
1353     return "apple-a8";
1354   case CPUFAMILY_ARM_TWISTER:
1355     return "apple-a9";
1356   case CPUFAMILY_ARM_HURRICANE:
1357     return "apple-a10";
1358   case CPUFAMILY_ARM_MONSOON_MISTRAL:
1359     return "apple-a11";
1360   case CPUFAMILY_ARM_VORTEX_TEMPEST:
1361     return "apple-a12";
1362   case CPUFAMILY_ARM_LIGHTNING_THUNDER:
1363     return "apple-a13";
1364   case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
1365     return "apple-m1";
1366   default:
1367     // Default to the newest CPU we know about.
1368     return "apple-m1";
1369   }
1370 }
1371 #elif defined(_AIX)
getHostCPUName()1372 StringRef sys::getHostCPUName() {
1373   switch (_system_configuration.implementation) {
1374   case POWER_4:
1375     if (_system_configuration.version == PV_4_3)
1376       return "970";
1377     return "pwr4";
1378   case POWER_5:
1379     if (_system_configuration.version == PV_5)
1380       return "pwr5";
1381     return "pwr5x";
1382   case POWER_6:
1383     if (_system_configuration.version == PV_6_Compat)
1384       return "pwr6";
1385     return "pwr6x";
1386   case POWER_7:
1387     return "pwr7";
1388   case POWER_8:
1389     return "pwr8";
1390   case POWER_9:
1391     return "pwr9";
1392 // TODO: simplify this once the macro is available in all OS levels.
1393 #ifdef POWER_10
1394   case POWER_10:
1395 #else
1396   case 0x40000:
1397 #endif
1398     return "pwr10";
1399   default:
1400     return "generic";
1401   }
1402 }
1403 #elif defined(__riscv)
getHostCPUName()1404 StringRef sys::getHostCPUName() {
1405 #if defined(__linux__)
1406   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1407   StringRef Content = P ? P->getBuffer() : "";
1408   return detail::getHostCPUNameForRISCV(Content);
1409 #else
1410 #if __riscv_xlen == 64
1411   return "generic-rv64";
1412 #elif __riscv_xlen == 32
1413   return "generic-rv32";
1414 #else
1415 #error "Unhandled value of __riscv_xlen"
1416 #endif
1417 #endif
1418 }
1419 #elif defined(__sparc__)
1420 #if defined(__linux__)
getHostCPUNameForSPARC(StringRef ProcCpuinfoContent)1421 StringRef sys::detail::getHostCPUNameForSPARC(StringRef ProcCpuinfoContent) {
1422   SmallVector<StringRef> Lines;
1423   ProcCpuinfoContent.split(Lines, "\n");
1424 
1425   // Look for cpu line to determine cpu name
1426   StringRef Cpu;
1427   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
1428     if (Lines[I].startswith("cpu")) {
1429       Cpu = Lines[I].substr(5).ltrim("\t :");
1430       break;
1431     }
1432   }
1433 
1434   return StringSwitch<const char *>(Cpu)
1435       .StartsWith("SuperSparc", "supersparc")
1436       .StartsWith("HyperSparc", "hypersparc")
1437       .StartsWith("SpitFire", "ultrasparc")
1438       .StartsWith("BlackBird", "ultrasparc")
1439       .StartsWith("Sabre", " ultrasparc")
1440       .StartsWith("Hummingbird", "ultrasparc")
1441       .StartsWith("Cheetah", "ultrasparc3")
1442       .StartsWith("Jalapeno", "ultrasparc3")
1443       .StartsWith("Jaguar", "ultrasparc3")
1444       .StartsWith("Panther", "ultrasparc3")
1445       .StartsWith("Serrano", "ultrasparc3")
1446       .StartsWith("UltraSparc T1", "niagara")
1447       .StartsWith("UltraSparc T2", "niagara2")
1448       .StartsWith("UltraSparc T3", "niagara3")
1449       .StartsWith("UltraSparc T4", "niagara4")
1450       .StartsWith("UltraSparc T5", "niagara4")
1451       .StartsWith("LEON", "leon3")
1452       // niagara7/m8 not supported by LLVM yet.
1453       .StartsWith("SPARC-M7", "niagara4" /* "niagara7" */)
1454       .StartsWith("SPARC-S7", "niagara4" /* "niagara7" */)
1455       .StartsWith("SPARC-M8", "niagara4" /* "m8" */)
1456       .Default("generic");
1457 }
1458 #endif
1459 
getHostCPUName()1460 StringRef sys::getHostCPUName() {
1461 #if defined(__linux__)
1462   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1463   StringRef Content = P ? P->getBuffer() : "";
1464   return detail::getHostCPUNameForSPARC(Content);
1465 #elif defined(__sun__) && defined(__svr4__)
1466   char *buf = NULL;
1467   kstat_ctl_t *kc;
1468   kstat_t *ksp;
1469   kstat_named_t *brand = NULL;
1470 
1471   kc = kstat_open();
1472   if (kc != NULL) {
1473     ksp = kstat_lookup(kc, const_cast<char *>("cpu_info"), -1, NULL);
1474     if (ksp != NULL && kstat_read(kc, ksp, NULL) != -1 &&
1475         ksp->ks_type == KSTAT_TYPE_NAMED)
1476       brand =
1477           (kstat_named_t *)kstat_data_lookup(ksp, const_cast<char *>("brand"));
1478     if (brand != NULL && brand->data_type == KSTAT_DATA_STRING)
1479       buf = KSTAT_NAMED_STR_PTR(brand);
1480   }
1481   kstat_close(kc);
1482 
1483   return StringSwitch<const char *>(buf)
1484       .Case("TMS390S10", "supersparc") // Texas Instruments microSPARC I
1485       .Case("TMS390Z50", "supersparc") // Texas Instruments SuperSPARC I
1486       .Case("TMS390Z55",
1487             "supersparc") // Texas Instruments SuperSPARC I with SuperCache
1488       .Case("MB86904", "supersparc") // Fujitsu microSPARC II
1489       .Case("MB86907", "supersparc") // Fujitsu TurboSPARC
1490       .Case("RT623", "hypersparc")   // Ross hyperSPARC
1491       .Case("RT625", "hypersparc")
1492       .Case("RT626", "hypersparc")
1493       .Case("UltraSPARC-I", "ultrasparc")
1494       .Case("UltraSPARC-II", "ultrasparc")
1495       .Case("UltraSPARC-IIe", "ultrasparc")
1496       .Case("UltraSPARC-IIi", "ultrasparc")
1497       .Case("SPARC64-III", "ultrasparc")
1498       .Case("SPARC64-IV", "ultrasparc")
1499       .Case("UltraSPARC-III", "ultrasparc3")
1500       .Case("UltraSPARC-III+", "ultrasparc3")
1501       .Case("UltraSPARC-IIIi", "ultrasparc3")
1502       .Case("UltraSPARC-IIIi+", "ultrasparc3")
1503       .Case("UltraSPARC-IV", "ultrasparc3")
1504       .Case("UltraSPARC-IV+", "ultrasparc3")
1505       .Case("SPARC64-V", "ultrasparc3")
1506       .Case("SPARC64-VI", "ultrasparc3")
1507       .Case("SPARC64-VII", "ultrasparc3")
1508       .Case("UltraSPARC-T1", "niagara")
1509       .Case("UltraSPARC-T2", "niagara2")
1510       .Case("UltraSPARC-T2", "niagara2")
1511       .Case("UltraSPARC-T2+", "niagara2")
1512       .Case("SPARC-T3", "niagara3")
1513       .Case("SPARC-T4", "niagara4")
1514       .Case("SPARC-T5", "niagara4")
1515       // niagara7/m8 not supported by LLVM yet.
1516       .Case("SPARC-M7", "niagara4" /* "niagara7" */)
1517       .Case("SPARC-S7", "niagara4" /* "niagara7" */)
1518       .Case("SPARC-M8", "niagara4" /* "m8" */)
1519       .Default("generic");
1520 #else
1521   return "generic";
1522 #endif
1523 }
1524 #else
getHostCPUName()1525 StringRef sys::getHostCPUName() { return "generic"; }
1526 namespace llvm {
1527 namespace sys {
1528 namespace detail {
1529 namespace x86 {
1530 
getVendorSignature(unsigned * MaxLeaf)1531 VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
1532   return VendorSignatures::UNKNOWN;
1533 }
1534 
1535 } // namespace x86
1536 } // namespace detail
1537 } // namespace sys
1538 } // namespace llvm
1539 #endif
1540 
1541 #if defined(__linux__) && (defined(__i386__) || defined(__x86_64__))
1542 // On Linux, the number of physical cores can be computed from /proc/cpuinfo,
1543 // using the number of unique physical/core id pairs. The following
1544 // implementation reads the /proc/cpuinfo format on an x86_64 system.
computeHostNumPhysicalCores()1545 int computeHostNumPhysicalCores() {
1546   // Enabled represents the number of physical id/core id pairs with at least
1547   // one processor id enabled by the CPU affinity mask.
1548   cpu_set_t Affinity, Enabled;
1549   if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0)
1550     return -1;
1551   CPU_ZERO(&Enabled);
1552 
1553   // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be
1554   // mmapped because it appears to have 0 size.
1555   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
1556       llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
1557   if (std::error_code EC = Text.getError()) {
1558     llvm::errs() << "Can't read "
1559                  << "/proc/cpuinfo: " << EC.message() << "\n";
1560     return -1;
1561   }
1562   SmallVector<StringRef, 8> strs;
1563   (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1,
1564                              /*KeepEmpty=*/false);
1565   int CurProcessor = -1;
1566   int CurPhysicalId = -1;
1567   int CurSiblings = -1;
1568   int CurCoreId = -1;
1569   for (StringRef Line : strs) {
1570     std::pair<StringRef, StringRef> Data = Line.split(':');
1571     auto Name = Data.first.trim();
1572     auto Val = Data.second.trim();
1573     // These fields are available if the kernel is configured with CONFIG_SMP.
1574     if (Name == "processor")
1575       Val.getAsInteger(10, CurProcessor);
1576     else if (Name == "physical id")
1577       Val.getAsInteger(10, CurPhysicalId);
1578     else if (Name == "siblings")
1579       Val.getAsInteger(10, CurSiblings);
1580     else if (Name == "core id") {
1581       Val.getAsInteger(10, CurCoreId);
1582       // The processor id corresponds to an index into cpu_set_t.
1583       if (CPU_ISSET(CurProcessor, &Affinity))
1584         CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled);
1585     }
1586   }
1587   return CPU_COUNT(&Enabled);
1588 }
1589 #elif defined(__linux__) && defined(__powerpc__)
computeHostNumPhysicalCores()1590 int computeHostNumPhysicalCores() {
1591   cpu_set_t Affinity;
1592   if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0)
1593     return CPU_COUNT(&Affinity);
1594 
1595   // The call to sched_getaffinity() may have failed because the Affinity
1596   // mask is too small for the number of CPU's on the system (i.e. the
1597   // system has more than 1024 CPUs). Allocate a mask large enough for
1598   // twice as many CPUs.
1599   cpu_set_t *DynAffinity;
1600   DynAffinity = CPU_ALLOC(2048);
1601   if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) {
1602     int NumCPUs = CPU_COUNT(DynAffinity);
1603     CPU_FREE(DynAffinity);
1604     return NumCPUs;
1605   }
1606   return -1;
1607 }
1608 #elif defined(__linux__) && defined(__s390x__)
computeHostNumPhysicalCores()1609 int computeHostNumPhysicalCores() { return sysconf(_SC_NPROCESSORS_ONLN); }
1610 #elif defined(__APPLE__)
1611 // Gets the number of *physical cores* on the machine.
computeHostNumPhysicalCores()1612 int computeHostNumPhysicalCores() {
1613   uint32_t count;
1614   size_t len = sizeof(count);
1615   sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0);
1616   if (count < 1) {
1617     int nm[2];
1618     nm[0] = CTL_HW;
1619     nm[1] = HW_AVAILCPU;
1620     sysctl(nm, 2, &count, &len, NULL, 0);
1621     if (count < 1)
1622       return -1;
1623   }
1624   return count;
1625 }
1626 #elif defined(__MVS__)
computeHostNumPhysicalCores()1627 int computeHostNumPhysicalCores() {
1628   enum {
1629     // Byte offset of the pointer to the Communications Vector Table (CVT) in
1630     // the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and
1631     // will be zero-extended to uintptr_t.
1632     FLCCVT = 16,
1633     // Byte offset of the pointer to the Common System Data Area (CSD) in the
1634     // CVT. The table entry is a 31-bit pointer and will be zero-extended to
1635     // uintptr_t.
1636     CVTCSD = 660,
1637     // Byte offset to the number of live CPs in the LPAR, stored as a signed
1638     // 32-bit value in the table.
1639     CSD_NUMBER_ONLINE_STANDARD_CPS = 264,
1640   };
1641   char *PSA = 0;
1642   char *CVT = reinterpret_cast<char *>(
1643       static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(PSA[FLCCVT])));
1644   char *CSD = reinterpret_cast<char *>(
1645       static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(CVT[CVTCSD])));
1646   return reinterpret_cast<int &>(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]);
1647 }
1648 #elif defined(_WIN32) && LLVM_ENABLE_THREADS != 0
1649 // Defined in llvm/lib/Support/Windows/Threading.inc
1650 int computeHostNumPhysicalCores();
1651 #else
1652 // On other systems, return -1 to indicate unknown.
computeHostNumPhysicalCores()1653 static int computeHostNumPhysicalCores() { return -1; }
1654 #endif
1655 
getHostNumPhysicalCores()1656 int sys::getHostNumPhysicalCores() {
1657   static int NumCores = computeHostNumPhysicalCores();
1658   return NumCores;
1659 }
1660 
1661 #if defined(__i386__) || defined(_M_IX86) || \
1662     defined(__x86_64__) || defined(_M_X64)
getHostCPUFeatures(StringMap<bool> & Features)1663 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1664   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1665   unsigned MaxLevel;
1666 
1667   if (getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX) || MaxLevel < 1)
1668     return false;
1669 
1670   getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
1671 
1672   Features["cx8"]    = (EDX >>  8) & 1;
1673   Features["cmov"]   = (EDX >> 15) & 1;
1674   Features["mmx"]    = (EDX >> 23) & 1;
1675   Features["fxsr"]   = (EDX >> 24) & 1;
1676   Features["sse"]    = (EDX >> 25) & 1;
1677   Features["sse2"]   = (EDX >> 26) & 1;
1678 
1679   Features["sse3"]   = (ECX >>  0) & 1;
1680   Features["pclmul"] = (ECX >>  1) & 1;
1681   Features["ssse3"]  = (ECX >>  9) & 1;
1682   Features["cx16"]   = (ECX >> 13) & 1;
1683   Features["sse4.1"] = (ECX >> 19) & 1;
1684   Features["sse4.2"] = (ECX >> 20) & 1;
1685   Features["crc32"]  = Features["sse4.2"];
1686   Features["movbe"]  = (ECX >> 22) & 1;
1687   Features["popcnt"] = (ECX >> 23) & 1;
1688   Features["aes"]    = (ECX >> 25) & 1;
1689   Features["rdrnd"]  = (ECX >> 30) & 1;
1690 
1691   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1692   // indicates that the AVX registers will be saved and restored on context
1693   // switch, then we have full AVX support.
1694   bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX);
1695   bool HasAVXSave = HasXSave && ((ECX >> 28) & 1) && ((EAX & 0x6) == 0x6);
1696 #if defined(__APPLE__)
1697   // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1698   // save the AVX512 context if we use AVX512 instructions, even the bit is not
1699   // set right now.
1700   bool HasAVX512Save = true;
1701 #else
1702   // AVX512 requires additional context to be saved by the OS.
1703   bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0);
1704 #endif
1705   // AMX requires additional context to be saved by the OS.
1706   const unsigned AMXBits = (1 << 17) | (1 << 18);
1707   bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits);
1708 
1709   Features["avx"]   = HasAVXSave;
1710   Features["fma"]   = ((ECX >> 12) & 1) && HasAVXSave;
1711   // Only enable XSAVE if OS has enabled support for saving YMM state.
1712   Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave;
1713   Features["f16c"]  = ((ECX >> 29) & 1) && HasAVXSave;
1714 
1715   unsigned MaxExtLevel;
1716   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1717 
1718   bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1719                      !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1720   Features["sahf"]   = HasExtLeaf1 && ((ECX >>  0) & 1);
1721   Features["lzcnt"]  = HasExtLeaf1 && ((ECX >>  5) & 1);
1722   Features["sse4a"]  = HasExtLeaf1 && ((ECX >>  6) & 1);
1723   Features["prfchw"] = HasExtLeaf1 && ((ECX >>  8) & 1);
1724   Features["xop"]    = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave;
1725   Features["lwp"]    = HasExtLeaf1 && ((ECX >> 15) & 1);
1726   Features["fma4"]   = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave;
1727   Features["tbm"]    = HasExtLeaf1 && ((ECX >> 21) & 1);
1728   Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1);
1729 
1730   Features["64bit"]  = HasExtLeaf1 && ((EDX >> 29) & 1);
1731 
1732   // Miscellaneous memory related features, detected by
1733   // using the 0x80000008 leaf of the CPUID instruction
1734   bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
1735                      !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX);
1736   Features["clzero"]   = HasExtLeaf8 && ((EBX >> 0) & 1);
1737   Features["rdpru"]    = HasExtLeaf8 && ((EBX >> 4) & 1);
1738   Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1);
1739 
1740   bool HasLeaf7 =
1741       MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1742 
1743   Features["fsgsbase"]   = HasLeaf7 && ((EBX >>  0) & 1);
1744   Features["sgx"]        = HasLeaf7 && ((EBX >>  2) & 1);
1745   Features["bmi"]        = HasLeaf7 && ((EBX >>  3) & 1);
1746   // AVX2 is only supported if we have the OS save support from AVX.
1747   Features["avx2"]       = HasLeaf7 && ((EBX >>  5) & 1) && HasAVXSave;
1748   Features["bmi2"]       = HasLeaf7 && ((EBX >>  8) & 1);
1749   Features["invpcid"]    = HasLeaf7 && ((EBX >> 10) & 1);
1750   Features["rtm"]        = HasLeaf7 && ((EBX >> 11) & 1);
1751   // AVX512 is only supported if the OS supports the context save for it.
1752   Features["avx512f"]    = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
1753   Features["avx512dq"]   = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
1754   Features["rdseed"]     = HasLeaf7 && ((EBX >> 18) & 1);
1755   Features["adx"]        = HasLeaf7 && ((EBX >> 19) & 1);
1756   Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save;
1757   Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1);
1758   Features["clwb"]       = HasLeaf7 && ((EBX >> 24) & 1);
1759   Features["avx512pf"]   = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
1760   Features["avx512er"]   = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save;
1761   Features["avx512cd"]   = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
1762   Features["sha"]        = HasLeaf7 && ((EBX >> 29) & 1);
1763   Features["avx512bw"]   = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
1764   Features["avx512vl"]   = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
1765 
1766   Features["prefetchwt1"]     = HasLeaf7 && ((ECX >>  0) & 1);
1767   Features["avx512vbmi"]      = HasLeaf7 && ((ECX >>  1) & 1) && HasAVX512Save;
1768   Features["pku"]             = HasLeaf7 && ((ECX >>  4) & 1);
1769   Features["waitpkg"]         = HasLeaf7 && ((ECX >>  5) & 1);
1770   Features["avx512vbmi2"]     = HasLeaf7 && ((ECX >>  6) & 1) && HasAVX512Save;
1771   Features["shstk"]           = HasLeaf7 && ((ECX >>  7) & 1);
1772   Features["gfni"]            = HasLeaf7 && ((ECX >>  8) & 1);
1773   Features["vaes"]            = HasLeaf7 && ((ECX >>  9) & 1) && HasAVXSave;
1774   Features["vpclmulqdq"]      = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave;
1775   Features["avx512vnni"]      = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save;
1776   Features["avx512bitalg"]    = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save;
1777   Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save;
1778   Features["rdpid"]           = HasLeaf7 && ((ECX >> 22) & 1);
1779   Features["kl"]              = HasLeaf7 && ((ECX >> 23) & 1); // key locker
1780   Features["cldemote"]        = HasLeaf7 && ((ECX >> 25) & 1);
1781   Features["movdiri"]         = HasLeaf7 && ((ECX >> 27) & 1);
1782   Features["movdir64b"]       = HasLeaf7 && ((ECX >> 28) & 1);
1783   Features["enqcmd"]          = HasLeaf7 && ((ECX >> 29) & 1);
1784 
1785   Features["uintr"]           = HasLeaf7 && ((EDX >> 5) & 1);
1786   Features["avx512vp2intersect"] =
1787       HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save;
1788   Features["serialize"]       = HasLeaf7 && ((EDX >> 14) & 1);
1789   Features["tsxldtrk"]        = HasLeaf7 && ((EDX >> 16) & 1);
1790   // There are two CPUID leafs which information associated with the pconfig
1791   // instruction:
1792   // EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th
1793   // bit of EDX), while the EAX=0x1b leaf returns information on the
1794   // availability of specific pconfig leafs.
1795   // The target feature here only refers to the the first of these two.
1796   // Users might need to check for the availability of specific pconfig
1797   // leaves using cpuid, since that information is ignored while
1798   // detecting features using the "-march=native" flag.
1799   // For more info, see X86 ISA docs.
1800   Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1);
1801   Features["amx-bf16"]   = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave;
1802   Features["avx512fp16"] = HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save;
1803   Features["amx-tile"]   = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave;
1804   Features["amx-int8"]   = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave;
1805   bool HasLeaf7Subleaf1 =
1806       MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1807   Features["avxvnni"]    = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave;
1808   Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
1809   Features["hreset"]     = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
1810 
1811   bool HasLeafD = MaxLevel >= 0xd &&
1812                   !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
1813 
1814   // Only enable XSAVE if OS has enabled support for saving YMM state.
1815   Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave;
1816   Features["xsavec"]   = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave;
1817   Features["xsaves"]   = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave;
1818 
1819   bool HasLeaf14 = MaxLevel >= 0x14 &&
1820                   !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX);
1821 
1822   Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1);
1823 
1824   bool HasLeaf19 =
1825       MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX);
1826   Features["widekl"] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1);
1827 
1828   return true;
1829 }
1830 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
getHostCPUFeatures(StringMap<bool> & Features)1831 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1832   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1833   if (!P)
1834     return false;
1835 
1836   SmallVector<StringRef, 32> Lines;
1837   P->getBuffer().split(Lines, "\n");
1838 
1839   SmallVector<StringRef, 32> CPUFeatures;
1840 
1841   // Look for the CPU features.
1842   for (unsigned I = 0, E = Lines.size(); I != E; ++I)
1843     if (Lines[I].startswith("Features")) {
1844       Lines[I].split(CPUFeatures, ' ');
1845       break;
1846     }
1847 
1848 #if defined(__aarch64__)
1849   // Keep track of which crypto features we have seen
1850   enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 };
1851   uint32_t crypto = 0;
1852 #endif
1853 
1854   for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
1855     StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I])
1856 #if defined(__aarch64__)
1857                                    .Case("asimd", "neon")
1858                                    .Case("fp", "fp-armv8")
1859                                    .Case("crc32", "crc")
1860                                    .Case("atomics", "lse")
1861                                    .Case("sve", "sve")
1862                                    .Case("sve2", "sve2")
1863 #else
1864                                    .Case("half", "fp16")
1865                                    .Case("neon", "neon")
1866                                    .Case("vfpv3", "vfp3")
1867                                    .Case("vfpv3d16", "d16")
1868                                    .Case("vfpv4", "vfp4")
1869                                    .Case("idiva", "hwdiv-arm")
1870                                    .Case("idivt", "hwdiv")
1871 #endif
1872                                    .Default("");
1873 
1874 #if defined(__aarch64__)
1875     // We need to check crypto separately since we need all of the crypto
1876     // extensions to enable the subtarget feature
1877     if (CPUFeatures[I] == "aes")
1878       crypto |= CAP_AES;
1879     else if (CPUFeatures[I] == "pmull")
1880       crypto |= CAP_PMULL;
1881     else if (CPUFeatures[I] == "sha1")
1882       crypto |= CAP_SHA1;
1883     else if (CPUFeatures[I] == "sha2")
1884       crypto |= CAP_SHA2;
1885 #endif
1886 
1887     if (LLVMFeatureStr != "")
1888       Features[LLVMFeatureStr] = true;
1889   }
1890 
1891 #if defined(__aarch64__)
1892   // If we have all crypto bits we can add the feature
1893   if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2))
1894     Features["crypto"] = true;
1895 #endif
1896 
1897   return true;
1898 }
1899 #elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64))
getHostCPUFeatures(StringMap<bool> & Features)1900 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1901   if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
1902     Features["neon"] = true;
1903   if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE))
1904     Features["crc"] = true;
1905   if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
1906     Features["crypto"] = true;
1907 
1908   return true;
1909 }
1910 #else
getHostCPUFeatures(StringMap<bool> & Features)1911 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; }
1912 #endif
1913 
getProcessTriple()1914 std::string sys::getProcessTriple() {
1915   std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE);
1916   Triple PT(Triple::normalize(TargetTripleString));
1917 
1918   if (sizeof(void *) == 8 && PT.isArch32Bit())
1919     PT = PT.get64BitArchVariant();
1920   if (sizeof(void *) == 4 && PT.isArch64Bit())
1921     PT = PT.get32BitArchVariant();
1922 
1923   return PT.str();
1924 }
1925