1 //===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the operating system Host concept.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "llvm/Support/Host.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringMap.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/ADT/Triple.h"
19 #include "llvm/Config/llvm-config.h"
20 #include "llvm/Support/MemoryBuffer.h"
21 #include "llvm/Support/X86TargetParser.h"
22 #include "llvm/Support/raw_ostream.h"
23 #include <string.h>
24
25 // Include the platform-specific parts of this class.
26 #ifdef LLVM_ON_UNIX
27 #include "Unix/Host.inc"
28 #include <sched.h>
29 #endif
30 #ifdef _WIN32
31 #include "Windows/Host.inc"
32 #endif
33 #ifdef _MSC_VER
34 #include <intrin.h>
35 #endif
36 #ifdef __MVS__
37 #include "llvm/Support/BCD.h"
38 #endif
39 #if defined(__APPLE__)
40 #include <mach/host_info.h>
41 #include <mach/mach.h>
42 #include <mach/mach_host.h>
43 #include <mach/machine.h>
44 #include <sys/param.h>
45 #include <sys/sysctl.h>
46 #endif
47 #ifdef _AIX
48 #include <sys/systemcfg.h>
49 #endif
50 #if defined(__sun__) && defined(__svr4__)
51 #include <kstat.h>
52 #endif
53
54 #define DEBUG_TYPE "host-detection"
55
56 //===----------------------------------------------------------------------===//
57 //
58 // Implementations of the CPU detection routines
59 //
60 //===----------------------------------------------------------------------===//
61
62 using namespace llvm;
63
64 static std::unique_ptr<llvm::MemoryBuffer>
getProcCpuinfoContent()65 LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() {
66 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
67 llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
68 if (std::error_code EC = Text.getError()) {
69 llvm::errs() << "Can't read "
70 << "/proc/cpuinfo: " << EC.message() << "\n";
71 return nullptr;
72 }
73 return std::move(*Text);
74 }
75
getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent)76 StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) {
77 // Access to the Processor Version Register (PVR) on PowerPC is privileged,
78 // and so we must use an operating-system interface to determine the current
79 // processor type. On Linux, this is exposed through the /proc/cpuinfo file.
80 const char *generic = "generic";
81
82 // The cpu line is second (after the 'processor: 0' line), so if this
83 // buffer is too small then something has changed (or is wrong).
84 StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin();
85 StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end();
86
87 StringRef::const_iterator CIP = CPUInfoStart;
88
89 StringRef::const_iterator CPUStart = nullptr;
90 size_t CPULen = 0;
91
92 // We need to find the first line which starts with cpu, spaces, and a colon.
93 // After the colon, there may be some additional spaces and then the cpu type.
94 while (CIP < CPUInfoEnd && CPUStart == nullptr) {
95 if (CIP < CPUInfoEnd && *CIP == '\n')
96 ++CIP;
97
98 if (CIP < CPUInfoEnd && *CIP == 'c') {
99 ++CIP;
100 if (CIP < CPUInfoEnd && *CIP == 'p') {
101 ++CIP;
102 if (CIP < CPUInfoEnd && *CIP == 'u') {
103 ++CIP;
104 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
105 ++CIP;
106
107 if (CIP < CPUInfoEnd && *CIP == ':') {
108 ++CIP;
109 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
110 ++CIP;
111
112 if (CIP < CPUInfoEnd) {
113 CPUStart = CIP;
114 while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' &&
115 *CIP != ',' && *CIP != '\n'))
116 ++CIP;
117 CPULen = CIP - CPUStart;
118 }
119 }
120 }
121 }
122 }
123
124 if (CPUStart == nullptr)
125 while (CIP < CPUInfoEnd && *CIP != '\n')
126 ++CIP;
127 }
128
129 if (CPUStart == nullptr)
130 return generic;
131
132 return StringSwitch<const char *>(StringRef(CPUStart, CPULen))
133 .Case("604e", "604e")
134 .Case("604", "604")
135 .Case("7400", "7400")
136 .Case("7410", "7400")
137 .Case("7447", "7400")
138 .Case("7455", "7450")
139 .Case("G4", "g4")
140 .Case("POWER4", "970")
141 .Case("PPC970FX", "970")
142 .Case("PPC970MP", "970")
143 .Case("G5", "g5")
144 .Case("POWER5", "g5")
145 .Case("A2", "a2")
146 .Case("POWER6", "pwr6")
147 .Case("POWER7", "pwr7")
148 .Case("POWER8", "pwr8")
149 .Case("POWER8E", "pwr8")
150 .Case("POWER8NVL", "pwr8")
151 .Case("POWER9", "pwr9")
152 .Case("POWER10", "pwr10")
153 // FIXME: If we get a simulator or machine with the capabilities of
154 // mcpu=future, we should revisit this and add the name reported by the
155 // simulator/machine.
156 .Default(generic);
157 }
158
getHostCPUNameForARM(StringRef ProcCpuinfoContent)159 StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
160 // The cpuid register on arm is not accessible from user space. On Linux,
161 // it is exposed through the /proc/cpuinfo file.
162
163 // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line
164 // in all cases.
165 SmallVector<StringRef, 32> Lines;
166 ProcCpuinfoContent.split(Lines, "\n");
167
168 // Look for the CPU implementer line.
169 StringRef Implementer;
170 StringRef Hardware;
171 StringRef Part;
172 for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
173 if (Lines[I].startswith("CPU implementer"))
174 Implementer = Lines[I].substr(15).ltrim("\t :");
175 if (Lines[I].startswith("Hardware"))
176 Hardware = Lines[I].substr(8).ltrim("\t :");
177 if (Lines[I].startswith("CPU part"))
178 Part = Lines[I].substr(8).ltrim("\t :");
179 }
180
181 if (Implementer == "0x41") { // ARM Ltd.
182 // MSM8992/8994 may give cpu part for the core that the kernel is running on,
183 // which is undeterministic and wrong. Always return cortex-a53 for these SoC.
184 if (Hardware.endswith("MSM8994") || Hardware.endswith("MSM8996"))
185 return "cortex-a53";
186
187
188 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
189 // values correspond to the "Part number" in the CP15/c0 register. The
190 // contents are specified in the various processor manuals.
191 // This corresponds to the Main ID Register in Technical Reference Manuals.
192 // and is used in programs like sys-utils
193 return StringSwitch<const char *>(Part)
194 .Case("0x926", "arm926ej-s")
195 .Case("0xb02", "mpcore")
196 .Case("0xb36", "arm1136j-s")
197 .Case("0xb56", "arm1156t2-s")
198 .Case("0xb76", "arm1176jz-s")
199 .Case("0xc08", "cortex-a8")
200 .Case("0xc09", "cortex-a9")
201 .Case("0xc0f", "cortex-a15")
202 .Case("0xc20", "cortex-m0")
203 .Case("0xc23", "cortex-m3")
204 .Case("0xc24", "cortex-m4")
205 .Case("0xd22", "cortex-m55")
206 .Case("0xd02", "cortex-a34")
207 .Case("0xd04", "cortex-a35")
208 .Case("0xd03", "cortex-a53")
209 .Case("0xd07", "cortex-a57")
210 .Case("0xd08", "cortex-a72")
211 .Case("0xd09", "cortex-a73")
212 .Case("0xd0a", "cortex-a75")
213 .Case("0xd0b", "cortex-a76")
214 .Case("0xd0d", "cortex-a77")
215 .Case("0xd41", "cortex-a78")
216 .Case("0xd44", "cortex-x1")
217 .Case("0xd4c", "cortex-x1c")
218 .Case("0xd0c", "neoverse-n1")
219 .Case("0xd49", "neoverse-n2")
220 .Case("0xd40", "neoverse-v1")
221 .Default("generic");
222 }
223
224 if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium.
225 return StringSwitch<const char *>(Part)
226 .Case("0x516", "thunderx2t99")
227 .Case("0x0516", "thunderx2t99")
228 .Case("0xaf", "thunderx2t99")
229 .Case("0x0af", "thunderx2t99")
230 .Case("0xa1", "thunderxt88")
231 .Case("0x0a1", "thunderxt88")
232 .Default("generic");
233 }
234
235 if (Implementer == "0x46") { // Fujitsu Ltd.
236 return StringSwitch<const char *>(Part)
237 .Case("0x001", "a64fx")
238 .Default("generic");
239 }
240
241 if (Implementer == "0x4e") { // NVIDIA Corporation
242 return StringSwitch<const char *>(Part)
243 .Case("0x004", "carmel")
244 .Default("generic");
245 }
246
247 if (Implementer == "0x48") // HiSilicon Technologies, Inc.
248 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
249 // values correspond to the "Part number" in the CP15/c0 register. The
250 // contents are specified in the various processor manuals.
251 return StringSwitch<const char *>(Part)
252 .Case("0xd01", "tsv110")
253 .Default("generic");
254
255 if (Implementer == "0x51") // Qualcomm Technologies, Inc.
256 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
257 // values correspond to the "Part number" in the CP15/c0 register. The
258 // contents are specified in the various processor manuals.
259 return StringSwitch<const char *>(Part)
260 .Case("0x06f", "krait") // APQ8064
261 .Case("0x201", "kryo")
262 .Case("0x205", "kryo")
263 .Case("0x211", "kryo")
264 .Case("0x800", "cortex-a73") // Kryo 2xx Gold
265 .Case("0x801", "cortex-a73") // Kryo 2xx Silver
266 .Case("0x802", "cortex-a75") // Kryo 3xx Gold
267 .Case("0x803", "cortex-a75") // Kryo 3xx Silver
268 .Case("0x804", "cortex-a76") // Kryo 4xx Gold
269 .Case("0x805", "cortex-a76") // Kryo 4xx/5xx Silver
270 .Case("0xc00", "falkor")
271 .Case("0xc01", "saphira")
272 .Default("generic");
273 if (Implementer == "0x53") { // Samsung Electronics Co., Ltd.
274 // The Exynos chips have a convoluted ID scheme that doesn't seem to follow
275 // any predictive pattern across variants and parts.
276 unsigned Variant = 0, Part = 0;
277
278 // Look for the CPU variant line, whose value is a 1 digit hexadecimal
279 // number, corresponding to the Variant bits in the CP15/C0 register.
280 for (auto I : Lines)
281 if (I.consume_front("CPU variant"))
282 I.ltrim("\t :").getAsInteger(0, Variant);
283
284 // Look for the CPU part line, whose value is a 3 digit hexadecimal
285 // number, corresponding to the PartNum bits in the CP15/C0 register.
286 for (auto I : Lines)
287 if (I.consume_front("CPU part"))
288 I.ltrim("\t :").getAsInteger(0, Part);
289
290 unsigned Exynos = (Variant << 12) | Part;
291 switch (Exynos) {
292 default:
293 // Default by falling through to Exynos M3.
294 LLVM_FALLTHROUGH;
295 case 0x1002:
296 return "exynos-m3";
297 case 0x1003:
298 return "exynos-m4";
299 }
300 }
301
302 if (Implementer == "0xc0") { // Ampere Computing
303 return StringSwitch<const char *>(Part)
304 .Case("0xac3", "ampere1")
305 .Default("generic");
306 }
307
308 return "generic";
309 }
310
311 namespace {
getCPUNameFromS390Model(unsigned int Id,bool HaveVectorSupport)312 StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) {
313 switch (Id) {
314 case 2064: // z900 not supported by LLVM
315 case 2066:
316 case 2084: // z990 not supported by LLVM
317 case 2086:
318 case 2094: // z9-109 not supported by LLVM
319 case 2096:
320 return "generic";
321 case 2097:
322 case 2098:
323 return "z10";
324 case 2817:
325 case 2818:
326 return "z196";
327 case 2827:
328 case 2828:
329 return "zEC12";
330 case 2964:
331 case 2965:
332 return HaveVectorSupport? "z13" : "zEC12";
333 case 3906:
334 case 3907:
335 return HaveVectorSupport? "z14" : "zEC12";
336 case 8561:
337 case 8562:
338 return HaveVectorSupport? "z15" : "zEC12";
339 case 3931:
340 case 3932:
341 default:
342 return HaveVectorSupport? "z16" : "zEC12";
343 }
344 }
345 } // end anonymous namespace
346
getHostCPUNameForS390x(StringRef ProcCpuinfoContent)347 StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) {
348 // STIDP is a privileged operation, so use /proc/cpuinfo instead.
349
350 // The "processor 0:" line comes after a fair amount of other information,
351 // including a cache breakdown, but this should be plenty.
352 SmallVector<StringRef, 32> Lines;
353 ProcCpuinfoContent.split(Lines, "\n");
354
355 // Look for the CPU features.
356 SmallVector<StringRef, 32> CPUFeatures;
357 for (unsigned I = 0, E = Lines.size(); I != E; ++I)
358 if (Lines[I].startswith("features")) {
359 size_t Pos = Lines[I].find(':');
360 if (Pos != StringRef::npos) {
361 Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' ');
362 break;
363 }
364 }
365
366 // We need to check for the presence of vector support independently of
367 // the machine type, since we may only use the vector register set when
368 // supported by the kernel (and hypervisor).
369 bool HaveVectorSupport = false;
370 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
371 if (CPUFeatures[I] == "vx")
372 HaveVectorSupport = true;
373 }
374
375 // Now check the processor machine type.
376 for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
377 if (Lines[I].startswith("processor ")) {
378 size_t Pos = Lines[I].find("machine = ");
379 if (Pos != StringRef::npos) {
380 Pos += sizeof("machine = ") - 1;
381 unsigned int Id;
382 if (!Lines[I].drop_front(Pos).getAsInteger(10, Id))
383 return getCPUNameFromS390Model(Id, HaveVectorSupport);
384 }
385 break;
386 }
387 }
388
389 return "generic";
390 }
391
getHostCPUNameForRISCV(StringRef ProcCpuinfoContent)392 StringRef sys::detail::getHostCPUNameForRISCV(StringRef ProcCpuinfoContent) {
393 // There are 24 lines in /proc/cpuinfo
394 SmallVector<StringRef> Lines;
395 ProcCpuinfoContent.split(Lines, "\n");
396
397 // Look for uarch line to determine cpu name
398 StringRef UArch;
399 for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
400 if (Lines[I].startswith("uarch")) {
401 UArch = Lines[I].substr(5).ltrim("\t :");
402 break;
403 }
404 }
405
406 return StringSwitch<const char *>(UArch)
407 .Case("sifive,u74-mc", "sifive-u74")
408 .Case("sifive,bullet0", "sifive-u74")
409 .Default("generic");
410 }
411
getHostCPUNameForBPF()412 StringRef sys::detail::getHostCPUNameForBPF() {
413 #if !defined(__linux__) || !defined(__x86_64__)
414 return "generic";
415 #else
416 uint8_t v3_insns[40] __attribute__ ((aligned (8))) =
417 /* BPF_MOV64_IMM(BPF_REG_0, 0) */
418 { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
419 /* BPF_MOV64_IMM(BPF_REG_2, 1) */
420 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
421 /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
422 0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
423 /* BPF_MOV64_IMM(BPF_REG_0, 1) */
424 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
425 /* BPF_EXIT_INSN() */
426 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
427
428 uint8_t v2_insns[40] __attribute__ ((aligned (8))) =
429 /* BPF_MOV64_IMM(BPF_REG_0, 0) */
430 { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
431 /* BPF_MOV64_IMM(BPF_REG_2, 1) */
432 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
433 /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
434 0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
435 /* BPF_MOV64_IMM(BPF_REG_0, 1) */
436 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
437 /* BPF_EXIT_INSN() */
438 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
439
440 struct bpf_prog_load_attr {
441 uint32_t prog_type;
442 uint32_t insn_cnt;
443 uint64_t insns;
444 uint64_t license;
445 uint32_t log_level;
446 uint32_t log_size;
447 uint64_t log_buf;
448 uint32_t kern_version;
449 uint32_t prog_flags;
450 } attr = {};
451 attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
452 attr.insn_cnt = 5;
453 attr.insns = (uint64_t)v3_insns;
454 attr.license = (uint64_t)"DUMMY";
455
456 int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr,
457 sizeof(attr));
458 if (fd >= 0) {
459 close(fd);
460 return "v3";
461 }
462
463 /* Clear the whole attr in case its content changed by syscall. */
464 memset(&attr, 0, sizeof(attr));
465 attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
466 attr.insn_cnt = 5;
467 attr.insns = (uint64_t)v2_insns;
468 attr.license = (uint64_t)"DUMMY";
469 fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr));
470 if (fd >= 0) {
471 close(fd);
472 return "v2";
473 }
474 return "v1";
475 #endif
476 }
477
478 #if defined(__i386__) || defined(_M_IX86) || \
479 defined(__x86_64__) || defined(_M_X64)
480
481 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
482 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
483 // support. Consequently, for i386, the presence of CPUID is checked first
484 // via the corresponding eflags bit.
485 // Removal of cpuid.h header motivated by PR30384
486 // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp
487 // or test-suite, but are used in external projects e.g. libstdcxx
isCpuIdSupported()488 static bool isCpuIdSupported() {
489 #if defined(__GNUC__) || defined(__clang__)
490 #if defined(__i386__)
491 int __cpuid_supported;
492 __asm__(" pushfl\n"
493 " popl %%eax\n"
494 " movl %%eax,%%ecx\n"
495 " xorl $0x00200000,%%eax\n"
496 " pushl %%eax\n"
497 " popfl\n"
498 " pushfl\n"
499 " popl %%eax\n"
500 " movl $0,%0\n"
501 " cmpl %%eax,%%ecx\n"
502 " je 1f\n"
503 " movl $1,%0\n"
504 "1:"
505 : "=r"(__cpuid_supported)
506 :
507 : "eax", "ecx");
508 if (!__cpuid_supported)
509 return false;
510 #endif
511 return true;
512 #endif
513 return true;
514 }
515
516 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
517 /// the specified arguments. If we can't run cpuid on the host, return true.
getX86CpuIDAndInfo(unsigned value,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)518 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
519 unsigned *rECX, unsigned *rEDX) {
520 #if defined(__GNUC__) || defined(__clang__)
521 #if defined(__x86_64__)
522 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
523 // FIXME: should we save this for Clang?
524 __asm__("movq\t%%rbx, %%rsi\n\t"
525 "cpuid\n\t"
526 "xchgq\t%%rbx, %%rsi\n\t"
527 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
528 : "a"(value));
529 return false;
530 #elif defined(__i386__)
531 __asm__("movl\t%%ebx, %%esi\n\t"
532 "cpuid\n\t"
533 "xchgl\t%%ebx, %%esi\n\t"
534 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
535 : "a"(value));
536 return false;
537 #else
538 return true;
539 #endif
540 #elif defined(_MSC_VER)
541 // The MSVC intrinsic is portable across x86 and x64.
542 int registers[4];
543 __cpuid(registers, value);
544 *rEAX = registers[0];
545 *rEBX = registers[1];
546 *rECX = registers[2];
547 *rEDX = registers[3];
548 return false;
549 #else
550 return true;
551 #endif
552 }
553
554 namespace llvm {
555 namespace sys {
556 namespace detail {
557 namespace x86 {
558
getVendorSignature(unsigned * MaxLeaf)559 VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
560 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
561 if (MaxLeaf == nullptr)
562 MaxLeaf = &EAX;
563 else
564 *MaxLeaf = 0;
565
566 if (!isCpuIdSupported())
567 return VendorSignatures::UNKNOWN;
568
569 if (getX86CpuIDAndInfo(0, MaxLeaf, &EBX, &ECX, &EDX) || *MaxLeaf < 1)
570 return VendorSignatures::UNKNOWN;
571
572 // "Genu ineI ntel"
573 if (EBX == 0x756e6547 && EDX == 0x49656e69 && ECX == 0x6c65746e)
574 return VendorSignatures::GENUINE_INTEL;
575
576 // "Auth enti cAMD"
577 if (EBX == 0x68747541 && EDX == 0x69746e65 && ECX == 0x444d4163)
578 return VendorSignatures::AUTHENTIC_AMD;
579
580 return VendorSignatures::UNKNOWN;
581 }
582
583 } // namespace x86
584 } // namespace detail
585 } // namespace sys
586 } // namespace llvm
587
588 using namespace llvm::sys::detail::x86;
589
590 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
591 /// the 4 values in the specified arguments. If we can't run cpuid on the host,
592 /// return true.
getX86CpuIDAndInfoEx(unsigned value,unsigned subleaf,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)593 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
594 unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
595 unsigned *rEDX) {
596 #if defined(__GNUC__) || defined(__clang__)
597 #if defined(__x86_64__)
598 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
599 // FIXME: should we save this for Clang?
600 __asm__("movq\t%%rbx, %%rsi\n\t"
601 "cpuid\n\t"
602 "xchgq\t%%rbx, %%rsi\n\t"
603 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
604 : "a"(value), "c"(subleaf));
605 return false;
606 #elif defined(__i386__)
607 __asm__("movl\t%%ebx, %%esi\n\t"
608 "cpuid\n\t"
609 "xchgl\t%%ebx, %%esi\n\t"
610 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
611 : "a"(value), "c"(subleaf));
612 return false;
613 #else
614 return true;
615 #endif
616 #elif defined(_MSC_VER)
617 int registers[4];
618 __cpuidex(registers, value, subleaf);
619 *rEAX = registers[0];
620 *rEBX = registers[1];
621 *rECX = registers[2];
622 *rEDX = registers[3];
623 return false;
624 #else
625 return true;
626 #endif
627 }
628
629 // Read control register 0 (XCR0). Used to detect features such as AVX.
getX86XCR0(unsigned * rEAX,unsigned * rEDX)630 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
631 #if defined(__GNUC__) || defined(__clang__)
632 // Check xgetbv; this uses a .byte sequence instead of the instruction
633 // directly because older assemblers do not include support for xgetbv and
634 // there is no easy way to conditionally compile based on the assembler used.
635 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
636 return false;
637 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
638 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
639 *rEAX = Result;
640 *rEDX = Result >> 32;
641 return false;
642 #else
643 return true;
644 #endif
645 }
646
detectX86FamilyModel(unsigned EAX,unsigned * Family,unsigned * Model)647 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
648 unsigned *Model) {
649 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
650 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7
651 if (*Family == 6 || *Family == 0xf) {
652 if (*Family == 0xf)
653 // Examine extended family ID if family ID is F.
654 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
655 // Examine extended model ID if family ID is 6 or F.
656 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
657 }
658 }
659
660 static StringRef
getIntelProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)661 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
662 const unsigned *Features,
663 unsigned *Type, unsigned *Subtype) {
664 auto testFeature = [&](unsigned F) {
665 return (Features[F / 32] & (1U << (F % 32))) != 0;
666 };
667
668 StringRef CPU;
669
670 switch (Family) {
671 case 3:
672 CPU = "i386";
673 break;
674 case 4:
675 CPU = "i486";
676 break;
677 case 5:
678 if (testFeature(X86::FEATURE_MMX)) {
679 CPU = "pentium-mmx";
680 break;
681 }
682 CPU = "pentium";
683 break;
684 case 6:
685 switch (Model) {
686 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
687 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
688 // mobile processor, Intel Core 2 Extreme processor, Intel
689 // Pentium Dual-Core processor, Intel Xeon processor, model
690 // 0Fh. All processors are manufactured using the 65 nm process.
691 case 0x16: // Intel Celeron processor model 16h. All processors are
692 // manufactured using the 65 nm process
693 CPU = "core2";
694 *Type = X86::INTEL_CORE2;
695 break;
696 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
697 // 17h. All processors are manufactured using the 45 nm process.
698 //
699 // 45nm: Penryn , Wolfdale, Yorkfield (XE)
700 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
701 // the 45 nm process.
702 CPU = "penryn";
703 *Type = X86::INTEL_CORE2;
704 break;
705 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
706 // processors are manufactured using the 45 nm process.
707 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
708 // As found in a Summer 2010 model iMac.
709 case 0x1f:
710 case 0x2e: // Nehalem EX
711 CPU = "nehalem";
712 *Type = X86::INTEL_COREI7;
713 *Subtype = X86::INTEL_COREI7_NEHALEM;
714 break;
715 case 0x25: // Intel Core i7, laptop version.
716 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
717 // processors are manufactured using the 32 nm process.
718 case 0x2f: // Westmere EX
719 CPU = "westmere";
720 *Type = X86::INTEL_COREI7;
721 *Subtype = X86::INTEL_COREI7_WESTMERE;
722 break;
723 case 0x2a: // Intel Core i7 processor. All processors are manufactured
724 // using the 32 nm process.
725 case 0x2d:
726 CPU = "sandybridge";
727 *Type = X86::INTEL_COREI7;
728 *Subtype = X86::INTEL_COREI7_SANDYBRIDGE;
729 break;
730 case 0x3a:
731 case 0x3e: // Ivy Bridge EP
732 CPU = "ivybridge";
733 *Type = X86::INTEL_COREI7;
734 *Subtype = X86::INTEL_COREI7_IVYBRIDGE;
735 break;
736
737 // Haswell:
738 case 0x3c:
739 case 0x3f:
740 case 0x45:
741 case 0x46:
742 CPU = "haswell";
743 *Type = X86::INTEL_COREI7;
744 *Subtype = X86::INTEL_COREI7_HASWELL;
745 break;
746
747 // Broadwell:
748 case 0x3d:
749 case 0x47:
750 case 0x4f:
751 case 0x56:
752 CPU = "broadwell";
753 *Type = X86::INTEL_COREI7;
754 *Subtype = X86::INTEL_COREI7_BROADWELL;
755 break;
756
757 // Skylake:
758 case 0x4e: // Skylake mobile
759 case 0x5e: // Skylake desktop
760 case 0x8e: // Kaby Lake mobile
761 case 0x9e: // Kaby Lake desktop
762 case 0xa5: // Comet Lake-H/S
763 case 0xa6: // Comet Lake-U
764 CPU = "skylake";
765 *Type = X86::INTEL_COREI7;
766 *Subtype = X86::INTEL_COREI7_SKYLAKE;
767 break;
768
769 // Rocketlake:
770 case 0xa7:
771 CPU = "rocketlake";
772 *Type = X86::INTEL_COREI7;
773 *Subtype = X86::INTEL_COREI7_ROCKETLAKE;
774 break;
775
776 // Skylake Xeon:
777 case 0x55:
778 *Type = X86::INTEL_COREI7;
779 if (testFeature(X86::FEATURE_AVX512BF16)) {
780 CPU = "cooperlake";
781 *Subtype = X86::INTEL_COREI7_COOPERLAKE;
782 } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
783 CPU = "cascadelake";
784 *Subtype = X86::INTEL_COREI7_CASCADELAKE;
785 } else {
786 CPU = "skylake-avx512";
787 *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512;
788 }
789 break;
790
791 // Cannonlake:
792 case 0x66:
793 CPU = "cannonlake";
794 *Type = X86::INTEL_COREI7;
795 *Subtype = X86::INTEL_COREI7_CANNONLAKE;
796 break;
797
798 // Icelake:
799 case 0x7d:
800 case 0x7e:
801 CPU = "icelake-client";
802 *Type = X86::INTEL_COREI7;
803 *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT;
804 break;
805
806 // Tigerlake:
807 case 0x8c:
808 case 0x8d:
809 CPU = "tigerlake";
810 *Type = X86::INTEL_COREI7;
811 *Subtype = X86::INTEL_COREI7_TIGERLAKE;
812 break;
813
814 // Alderlake:
815 case 0x97:
816 case 0x9a:
817 CPU = "alderlake";
818 *Type = X86::INTEL_COREI7;
819 *Subtype = X86::INTEL_COREI7_ALDERLAKE;
820 break;
821
822 // Icelake Xeon:
823 case 0x6a:
824 case 0x6c:
825 CPU = "icelake-server";
826 *Type = X86::INTEL_COREI7;
827 *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER;
828 break;
829
830 // Sapphire Rapids:
831 case 0x8f:
832 CPU = "sapphirerapids";
833 *Type = X86::INTEL_COREI7;
834 *Subtype = X86::INTEL_COREI7_SAPPHIRERAPIDS;
835 break;
836
837 case 0x1c: // Most 45 nm Intel Atom processors
838 case 0x26: // 45 nm Atom Lincroft
839 case 0x27: // 32 nm Atom Medfield
840 case 0x35: // 32 nm Atom Midview
841 case 0x36: // 32 nm Atom Midview
842 CPU = "bonnell";
843 *Type = X86::INTEL_BONNELL;
844 break;
845
846 // Atom Silvermont codes from the Intel software optimization guide.
847 case 0x37:
848 case 0x4a:
849 case 0x4d:
850 case 0x5a:
851 case 0x5d:
852 case 0x4c: // really airmont
853 CPU = "silvermont";
854 *Type = X86::INTEL_SILVERMONT;
855 break;
856 // Goldmont:
857 case 0x5c: // Apollo Lake
858 case 0x5f: // Denverton
859 CPU = "goldmont";
860 *Type = X86::INTEL_GOLDMONT;
861 break;
862 case 0x7a:
863 CPU = "goldmont-plus";
864 *Type = X86::INTEL_GOLDMONT_PLUS;
865 break;
866 case 0x86:
867 CPU = "tremont";
868 *Type = X86::INTEL_TREMONT;
869 break;
870
871 // Xeon Phi (Knights Landing + Knights Mill):
872 case 0x57:
873 CPU = "knl";
874 *Type = X86::INTEL_KNL;
875 break;
876 case 0x85:
877 CPU = "knm";
878 *Type = X86::INTEL_KNM;
879 break;
880
881 default: // Unknown family 6 CPU, try to guess.
882 // Don't both with Type/Subtype here, they aren't used by the caller.
883 // They're used above to keep the code in sync with compiler-rt.
884 // TODO detect tigerlake host from model
885 if (testFeature(X86::FEATURE_AVX512VP2INTERSECT)) {
886 CPU = "tigerlake";
887 } else if (testFeature(X86::FEATURE_AVX512VBMI2)) {
888 CPU = "icelake-client";
889 } else if (testFeature(X86::FEATURE_AVX512VBMI)) {
890 CPU = "cannonlake";
891 } else if (testFeature(X86::FEATURE_AVX512BF16)) {
892 CPU = "cooperlake";
893 } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
894 CPU = "cascadelake";
895 } else if (testFeature(X86::FEATURE_AVX512VL)) {
896 CPU = "skylake-avx512";
897 } else if (testFeature(X86::FEATURE_AVX512ER)) {
898 CPU = "knl";
899 } else if (testFeature(X86::FEATURE_CLFLUSHOPT)) {
900 if (testFeature(X86::FEATURE_SHA))
901 CPU = "goldmont";
902 else
903 CPU = "skylake";
904 } else if (testFeature(X86::FEATURE_ADX)) {
905 CPU = "broadwell";
906 } else if (testFeature(X86::FEATURE_AVX2)) {
907 CPU = "haswell";
908 } else if (testFeature(X86::FEATURE_AVX)) {
909 CPU = "sandybridge";
910 } else if (testFeature(X86::FEATURE_SSE4_2)) {
911 if (testFeature(X86::FEATURE_MOVBE))
912 CPU = "silvermont";
913 else
914 CPU = "nehalem";
915 } else if (testFeature(X86::FEATURE_SSE4_1)) {
916 CPU = "penryn";
917 } else if (testFeature(X86::FEATURE_SSSE3)) {
918 if (testFeature(X86::FEATURE_MOVBE))
919 CPU = "bonnell";
920 else
921 CPU = "core2";
922 } else if (testFeature(X86::FEATURE_64BIT)) {
923 CPU = "core2";
924 } else if (testFeature(X86::FEATURE_SSE3)) {
925 CPU = "yonah";
926 } else if (testFeature(X86::FEATURE_SSE2)) {
927 CPU = "pentium-m";
928 } else if (testFeature(X86::FEATURE_SSE)) {
929 CPU = "pentium3";
930 } else if (testFeature(X86::FEATURE_MMX)) {
931 CPU = "pentium2";
932 } else {
933 CPU = "pentiumpro";
934 }
935 break;
936 }
937 break;
938 case 15: {
939 if (testFeature(X86::FEATURE_64BIT)) {
940 CPU = "nocona";
941 break;
942 }
943 if (testFeature(X86::FEATURE_SSE3)) {
944 CPU = "prescott";
945 break;
946 }
947 CPU = "pentium4";
948 break;
949 }
950 default:
951 break; // Unknown.
952 }
953
954 return CPU;
955 }
956
957 static StringRef
getAMDProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)958 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
959 const unsigned *Features,
960 unsigned *Type, unsigned *Subtype) {
961 auto testFeature = [&](unsigned F) {
962 return (Features[F / 32] & (1U << (F % 32))) != 0;
963 };
964
965 StringRef CPU;
966
967 switch (Family) {
968 case 4:
969 CPU = "i486";
970 break;
971 case 5:
972 CPU = "pentium";
973 switch (Model) {
974 case 6:
975 case 7:
976 CPU = "k6";
977 break;
978 case 8:
979 CPU = "k6-2";
980 break;
981 case 9:
982 case 13:
983 CPU = "k6-3";
984 break;
985 case 10:
986 CPU = "geode";
987 break;
988 }
989 break;
990 case 6:
991 if (testFeature(X86::FEATURE_SSE)) {
992 CPU = "athlon-xp";
993 break;
994 }
995 CPU = "athlon";
996 break;
997 case 15:
998 if (testFeature(X86::FEATURE_SSE3)) {
999 CPU = "k8-sse3";
1000 break;
1001 }
1002 CPU = "k8";
1003 break;
1004 case 16:
1005 CPU = "amdfam10";
1006 *Type = X86::AMDFAM10H; // "amdfam10"
1007 switch (Model) {
1008 case 2:
1009 *Subtype = X86::AMDFAM10H_BARCELONA;
1010 break;
1011 case 4:
1012 *Subtype = X86::AMDFAM10H_SHANGHAI;
1013 break;
1014 case 8:
1015 *Subtype = X86::AMDFAM10H_ISTANBUL;
1016 break;
1017 }
1018 break;
1019 case 20:
1020 CPU = "btver1";
1021 *Type = X86::AMD_BTVER1;
1022 break;
1023 case 21:
1024 CPU = "bdver1";
1025 *Type = X86::AMDFAM15H;
1026 if (Model >= 0x60 && Model <= 0x7f) {
1027 CPU = "bdver4";
1028 *Subtype = X86::AMDFAM15H_BDVER4;
1029 break; // 60h-7Fh: Excavator
1030 }
1031 if (Model >= 0x30 && Model <= 0x3f) {
1032 CPU = "bdver3";
1033 *Subtype = X86::AMDFAM15H_BDVER3;
1034 break; // 30h-3Fh: Steamroller
1035 }
1036 if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
1037 CPU = "bdver2";
1038 *Subtype = X86::AMDFAM15H_BDVER2;
1039 break; // 02h, 10h-1Fh: Piledriver
1040 }
1041 if (Model <= 0x0f) {
1042 *Subtype = X86::AMDFAM15H_BDVER1;
1043 break; // 00h-0Fh: Bulldozer
1044 }
1045 break;
1046 case 22:
1047 CPU = "btver2";
1048 *Type = X86::AMD_BTVER2;
1049 break;
1050 case 23:
1051 CPU = "znver1";
1052 *Type = X86::AMDFAM17H;
1053 if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
1054 CPU = "znver2";
1055 *Subtype = X86::AMDFAM17H_ZNVER2;
1056 break; // 30h-3fh, 71h: Zen2
1057 }
1058 if (Model <= 0x0f) {
1059 *Subtype = X86::AMDFAM17H_ZNVER1;
1060 break; // 00h-0Fh: Zen1
1061 }
1062 break;
1063 case 25:
1064 CPU = "znver3";
1065 *Type = X86::AMDFAM19H;
1066 if (Model <= 0x0f || Model == 0x21) {
1067 *Subtype = X86::AMDFAM19H_ZNVER3;
1068 break; // 00h-0Fh, 21h: Zen3
1069 }
1070 break;
1071 default:
1072 break; // Unknown AMD CPU.
1073 }
1074
1075 return CPU;
1076 }
1077
getAvailableFeatures(unsigned ECX,unsigned EDX,unsigned MaxLeaf,unsigned * Features)1078 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
1079 unsigned *Features) {
1080 unsigned EAX, EBX;
1081
1082 auto setFeature = [&](unsigned F) {
1083 Features[F / 32] |= 1U << (F % 32);
1084 };
1085
1086 if ((EDX >> 15) & 1)
1087 setFeature(X86::FEATURE_CMOV);
1088 if ((EDX >> 23) & 1)
1089 setFeature(X86::FEATURE_MMX);
1090 if ((EDX >> 25) & 1)
1091 setFeature(X86::FEATURE_SSE);
1092 if ((EDX >> 26) & 1)
1093 setFeature(X86::FEATURE_SSE2);
1094
1095 if ((ECX >> 0) & 1)
1096 setFeature(X86::FEATURE_SSE3);
1097 if ((ECX >> 1) & 1)
1098 setFeature(X86::FEATURE_PCLMUL);
1099 if ((ECX >> 9) & 1)
1100 setFeature(X86::FEATURE_SSSE3);
1101 if ((ECX >> 12) & 1)
1102 setFeature(X86::FEATURE_FMA);
1103 if ((ECX >> 19) & 1)
1104 setFeature(X86::FEATURE_SSE4_1);
1105 if ((ECX >> 20) & 1) {
1106 setFeature(X86::FEATURE_SSE4_2);
1107 setFeature(X86::FEATURE_CRC32);
1108 }
1109 if ((ECX >> 23) & 1)
1110 setFeature(X86::FEATURE_POPCNT);
1111 if ((ECX >> 25) & 1)
1112 setFeature(X86::FEATURE_AES);
1113
1114 if ((ECX >> 22) & 1)
1115 setFeature(X86::FEATURE_MOVBE);
1116
1117 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1118 // indicates that the AVX registers will be saved and restored on context
1119 // switch, then we have full AVX support.
1120 const unsigned AVXBits = (1 << 27) | (1 << 28);
1121 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
1122 ((EAX & 0x6) == 0x6);
1123 #if defined(__APPLE__)
1124 // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1125 // save the AVX512 context if we use AVX512 instructions, even the bit is not
1126 // set right now.
1127 bool HasAVX512Save = true;
1128 #else
1129 // AVX512 requires additional context to be saved by the OS.
1130 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
1131 #endif
1132
1133 if (HasAVX)
1134 setFeature(X86::FEATURE_AVX);
1135
1136 bool HasLeaf7 =
1137 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1138
1139 if (HasLeaf7 && ((EBX >> 3) & 1))
1140 setFeature(X86::FEATURE_BMI);
1141 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
1142 setFeature(X86::FEATURE_AVX2);
1143 if (HasLeaf7 && ((EBX >> 8) & 1))
1144 setFeature(X86::FEATURE_BMI2);
1145 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
1146 setFeature(X86::FEATURE_AVX512F);
1147 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
1148 setFeature(X86::FEATURE_AVX512DQ);
1149 if (HasLeaf7 && ((EBX >> 19) & 1))
1150 setFeature(X86::FEATURE_ADX);
1151 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
1152 setFeature(X86::FEATURE_AVX512IFMA);
1153 if (HasLeaf7 && ((EBX >> 23) & 1))
1154 setFeature(X86::FEATURE_CLFLUSHOPT);
1155 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
1156 setFeature(X86::FEATURE_AVX512PF);
1157 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
1158 setFeature(X86::FEATURE_AVX512ER);
1159 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
1160 setFeature(X86::FEATURE_AVX512CD);
1161 if (HasLeaf7 && ((EBX >> 29) & 1))
1162 setFeature(X86::FEATURE_SHA);
1163 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
1164 setFeature(X86::FEATURE_AVX512BW);
1165 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
1166 setFeature(X86::FEATURE_AVX512VL);
1167
1168 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
1169 setFeature(X86::FEATURE_AVX512VBMI);
1170 if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
1171 setFeature(X86::FEATURE_AVX512VBMI2);
1172 if (HasLeaf7 && ((ECX >> 8) & 1))
1173 setFeature(X86::FEATURE_GFNI);
1174 if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
1175 setFeature(X86::FEATURE_VPCLMULQDQ);
1176 if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
1177 setFeature(X86::FEATURE_AVX512VNNI);
1178 if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
1179 setFeature(X86::FEATURE_AVX512BITALG);
1180 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
1181 setFeature(X86::FEATURE_AVX512VPOPCNTDQ);
1182
1183 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
1184 setFeature(X86::FEATURE_AVX5124VNNIW);
1185 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
1186 setFeature(X86::FEATURE_AVX5124FMAPS);
1187 if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
1188 setFeature(X86::FEATURE_AVX512VP2INTERSECT);
1189
1190 bool HasLeaf7Subleaf1 =
1191 MaxLeaf >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1192 if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
1193 setFeature(X86::FEATURE_AVX512BF16);
1194
1195 unsigned MaxExtLevel;
1196 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1197
1198 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1199 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1200 if (HasExtLeaf1 && ((ECX >> 6) & 1))
1201 setFeature(X86::FEATURE_SSE4_A);
1202 if (HasExtLeaf1 && ((ECX >> 11) & 1))
1203 setFeature(X86::FEATURE_XOP);
1204 if (HasExtLeaf1 && ((ECX >> 16) & 1))
1205 setFeature(X86::FEATURE_FMA4);
1206
1207 if (HasExtLeaf1 && ((EDX >> 29) & 1))
1208 setFeature(X86::FEATURE_64BIT);
1209 }
1210
getHostCPUName()1211 StringRef sys::getHostCPUName() {
1212 unsigned MaxLeaf = 0;
1213 const VendorSignatures Vendor = getVendorSignature(&MaxLeaf);
1214 if (Vendor == VendorSignatures::UNKNOWN)
1215 return "generic";
1216
1217 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1218 getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
1219
1220 unsigned Family = 0, Model = 0;
1221 unsigned Features[(X86::CPU_FEATURE_MAX + 31) / 32] = {0};
1222 detectX86FamilyModel(EAX, &Family, &Model);
1223 getAvailableFeatures(ECX, EDX, MaxLeaf, Features);
1224
1225 // These aren't consumed in this file, but we try to keep some source code the
1226 // same or similar to compiler-rt.
1227 unsigned Type = 0;
1228 unsigned Subtype = 0;
1229
1230 StringRef CPU;
1231
1232 if (Vendor == VendorSignatures::GENUINE_INTEL) {
1233 CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type,
1234 &Subtype);
1235 } else if (Vendor == VendorSignatures::AUTHENTIC_AMD) {
1236 CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type,
1237 &Subtype);
1238 }
1239
1240 if (!CPU.empty())
1241 return CPU;
1242
1243 return "generic";
1244 }
1245
1246 #elif defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__))
getHostCPUName()1247 StringRef sys::getHostCPUName() {
1248 host_basic_info_data_t hostInfo;
1249 mach_msg_type_number_t infoCount;
1250
1251 infoCount = HOST_BASIC_INFO_COUNT;
1252 mach_port_t hostPort = mach_host_self();
1253 host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo,
1254 &infoCount);
1255 mach_port_deallocate(mach_task_self(), hostPort);
1256
1257 if (hostInfo.cpu_type != CPU_TYPE_POWERPC)
1258 return "generic";
1259
1260 switch (hostInfo.cpu_subtype) {
1261 case CPU_SUBTYPE_POWERPC_601:
1262 return "601";
1263 case CPU_SUBTYPE_POWERPC_602:
1264 return "602";
1265 case CPU_SUBTYPE_POWERPC_603:
1266 return "603";
1267 case CPU_SUBTYPE_POWERPC_603e:
1268 return "603e";
1269 case CPU_SUBTYPE_POWERPC_603ev:
1270 return "603ev";
1271 case CPU_SUBTYPE_POWERPC_604:
1272 return "604";
1273 case CPU_SUBTYPE_POWERPC_604e:
1274 return "604e";
1275 case CPU_SUBTYPE_POWERPC_620:
1276 return "620";
1277 case CPU_SUBTYPE_POWERPC_750:
1278 return "750";
1279 case CPU_SUBTYPE_POWERPC_7400:
1280 return "7400";
1281 case CPU_SUBTYPE_POWERPC_7450:
1282 return "7450";
1283 case CPU_SUBTYPE_POWERPC_970:
1284 return "970";
1285 default:;
1286 }
1287
1288 return "generic";
1289 }
1290 #elif defined(__linux__) && (defined(__ppc__) || defined(__powerpc__))
getHostCPUName()1291 StringRef sys::getHostCPUName() {
1292 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1293 StringRef Content = P ? P->getBuffer() : "";
1294 return detail::getHostCPUNameForPowerPC(Content);
1295 }
1296 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
getHostCPUName()1297 StringRef sys::getHostCPUName() {
1298 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1299 StringRef Content = P ? P->getBuffer() : "";
1300 return detail::getHostCPUNameForARM(Content);
1301 }
1302 #elif defined(__linux__) && defined(__s390x__)
getHostCPUName()1303 StringRef sys::getHostCPUName() {
1304 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1305 StringRef Content = P ? P->getBuffer() : "";
1306 return detail::getHostCPUNameForS390x(Content);
1307 }
1308 #elif defined(__MVS__)
getHostCPUName()1309 StringRef sys::getHostCPUName() {
1310 // Get pointer to Communications Vector Table (CVT).
1311 // The pointer is located at offset 16 of the Prefixed Save Area (PSA).
1312 // It is stored as 31 bit pointer and will be zero-extended to 64 bit.
1313 int *StartToCVTOffset = reinterpret_cast<int *>(0x10);
1314 // Since its stored as a 31-bit pointer, get the 4 bytes from the start
1315 // of address.
1316 int ReadValue = *StartToCVTOffset;
1317 // Explicitly clear the high order bit.
1318 ReadValue = (ReadValue & 0x7FFFFFFF);
1319 char *CVT = reinterpret_cast<char *>(ReadValue);
1320 // The model number is located in the CVT prefix at offset -6 and stored as
1321 // signless packed decimal.
1322 uint16_t Id = *(uint16_t *)&CVT[-6];
1323 // Convert number to integer.
1324 Id = decodePackedBCD<uint16_t>(Id, false);
1325 // Check for vector support. It's stored in field CVTFLAG5 (offset 244),
1326 // bit CVTVEF (X'80'). The facilities list is part of the PSA but the vector
1327 // extension can only be used if bit CVTVEF is on.
1328 bool HaveVectorSupport = CVT[244] & 0x80;
1329 return getCPUNameFromS390Model(Id, HaveVectorSupport);
1330 }
1331 #elif defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__))
1332 #define CPUFAMILY_ARM_SWIFT 0x1e2d6381
1333 #define CPUFAMILY_ARM_CYCLONE 0x37a09642
1334 #define CPUFAMILY_ARM_TYPHOON 0x2c91a47e
1335 #define CPUFAMILY_ARM_TWISTER 0x92fb37c8
1336 #define CPUFAMILY_ARM_HURRICANE 0x67ceee93
1337 #define CPUFAMILY_ARM_MONSOON_MISTRAL 0xe81e7ef6
1338 #define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07d34b9f
1339 #define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504d2
1340 #define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1b588bb3
1341
getHostCPUName()1342 StringRef sys::getHostCPUName() {
1343 uint32_t Family;
1344 size_t Length = sizeof(Family);
1345 sysctlbyname("hw.cpufamily", &Family, &Length, NULL, 0);
1346
1347 switch (Family) {
1348 case CPUFAMILY_ARM_SWIFT:
1349 return "swift";
1350 case CPUFAMILY_ARM_CYCLONE:
1351 return "apple-a7";
1352 case CPUFAMILY_ARM_TYPHOON:
1353 return "apple-a8";
1354 case CPUFAMILY_ARM_TWISTER:
1355 return "apple-a9";
1356 case CPUFAMILY_ARM_HURRICANE:
1357 return "apple-a10";
1358 case CPUFAMILY_ARM_MONSOON_MISTRAL:
1359 return "apple-a11";
1360 case CPUFAMILY_ARM_VORTEX_TEMPEST:
1361 return "apple-a12";
1362 case CPUFAMILY_ARM_LIGHTNING_THUNDER:
1363 return "apple-a13";
1364 case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
1365 return "apple-m1";
1366 default:
1367 // Default to the newest CPU we know about.
1368 return "apple-m1";
1369 }
1370 }
1371 #elif defined(_AIX)
getHostCPUName()1372 StringRef sys::getHostCPUName() {
1373 switch (_system_configuration.implementation) {
1374 case POWER_4:
1375 if (_system_configuration.version == PV_4_3)
1376 return "970";
1377 return "pwr4";
1378 case POWER_5:
1379 if (_system_configuration.version == PV_5)
1380 return "pwr5";
1381 return "pwr5x";
1382 case POWER_6:
1383 if (_system_configuration.version == PV_6_Compat)
1384 return "pwr6";
1385 return "pwr6x";
1386 case POWER_7:
1387 return "pwr7";
1388 case POWER_8:
1389 return "pwr8";
1390 case POWER_9:
1391 return "pwr9";
1392 // TODO: simplify this once the macro is available in all OS levels.
1393 #ifdef POWER_10
1394 case POWER_10:
1395 #else
1396 case 0x40000:
1397 #endif
1398 return "pwr10";
1399 default:
1400 return "generic";
1401 }
1402 }
1403 #elif defined(__riscv)
getHostCPUName()1404 StringRef sys::getHostCPUName() {
1405 #if defined(__linux__)
1406 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1407 StringRef Content = P ? P->getBuffer() : "";
1408 return detail::getHostCPUNameForRISCV(Content);
1409 #else
1410 #if __riscv_xlen == 64
1411 return "generic-rv64";
1412 #elif __riscv_xlen == 32
1413 return "generic-rv32";
1414 #else
1415 #error "Unhandled value of __riscv_xlen"
1416 #endif
1417 #endif
1418 }
1419 #elif defined(__sparc__)
1420 #if defined(__linux__)
getHostCPUNameForSPARC(StringRef ProcCpuinfoContent)1421 StringRef sys::detail::getHostCPUNameForSPARC(StringRef ProcCpuinfoContent) {
1422 SmallVector<StringRef> Lines;
1423 ProcCpuinfoContent.split(Lines, "\n");
1424
1425 // Look for cpu line to determine cpu name
1426 StringRef Cpu;
1427 for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
1428 if (Lines[I].startswith("cpu")) {
1429 Cpu = Lines[I].substr(5).ltrim("\t :");
1430 break;
1431 }
1432 }
1433
1434 return StringSwitch<const char *>(Cpu)
1435 .StartsWith("SuperSparc", "supersparc")
1436 .StartsWith("HyperSparc", "hypersparc")
1437 .StartsWith("SpitFire", "ultrasparc")
1438 .StartsWith("BlackBird", "ultrasparc")
1439 .StartsWith("Sabre", " ultrasparc")
1440 .StartsWith("Hummingbird", "ultrasparc")
1441 .StartsWith("Cheetah", "ultrasparc3")
1442 .StartsWith("Jalapeno", "ultrasparc3")
1443 .StartsWith("Jaguar", "ultrasparc3")
1444 .StartsWith("Panther", "ultrasparc3")
1445 .StartsWith("Serrano", "ultrasparc3")
1446 .StartsWith("UltraSparc T1", "niagara")
1447 .StartsWith("UltraSparc T2", "niagara2")
1448 .StartsWith("UltraSparc T3", "niagara3")
1449 .StartsWith("UltraSparc T4", "niagara4")
1450 .StartsWith("UltraSparc T5", "niagara4")
1451 .StartsWith("LEON", "leon3")
1452 // niagara7/m8 not supported by LLVM yet.
1453 .StartsWith("SPARC-M7", "niagara4" /* "niagara7" */)
1454 .StartsWith("SPARC-S7", "niagara4" /* "niagara7" */)
1455 .StartsWith("SPARC-M8", "niagara4" /* "m8" */)
1456 .Default("generic");
1457 }
1458 #endif
1459
getHostCPUName()1460 StringRef sys::getHostCPUName() {
1461 #if defined(__linux__)
1462 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1463 StringRef Content = P ? P->getBuffer() : "";
1464 return detail::getHostCPUNameForSPARC(Content);
1465 #elif defined(__sun__) && defined(__svr4__)
1466 char *buf = NULL;
1467 kstat_ctl_t *kc;
1468 kstat_t *ksp;
1469 kstat_named_t *brand = NULL;
1470
1471 kc = kstat_open();
1472 if (kc != NULL) {
1473 ksp = kstat_lookup(kc, const_cast<char *>("cpu_info"), -1, NULL);
1474 if (ksp != NULL && kstat_read(kc, ksp, NULL) != -1 &&
1475 ksp->ks_type == KSTAT_TYPE_NAMED)
1476 brand =
1477 (kstat_named_t *)kstat_data_lookup(ksp, const_cast<char *>("brand"));
1478 if (brand != NULL && brand->data_type == KSTAT_DATA_STRING)
1479 buf = KSTAT_NAMED_STR_PTR(brand);
1480 }
1481 kstat_close(kc);
1482
1483 return StringSwitch<const char *>(buf)
1484 .Case("TMS390S10", "supersparc") // Texas Instruments microSPARC I
1485 .Case("TMS390Z50", "supersparc") // Texas Instruments SuperSPARC I
1486 .Case("TMS390Z55",
1487 "supersparc") // Texas Instruments SuperSPARC I with SuperCache
1488 .Case("MB86904", "supersparc") // Fujitsu microSPARC II
1489 .Case("MB86907", "supersparc") // Fujitsu TurboSPARC
1490 .Case("RT623", "hypersparc") // Ross hyperSPARC
1491 .Case("RT625", "hypersparc")
1492 .Case("RT626", "hypersparc")
1493 .Case("UltraSPARC-I", "ultrasparc")
1494 .Case("UltraSPARC-II", "ultrasparc")
1495 .Case("UltraSPARC-IIe", "ultrasparc")
1496 .Case("UltraSPARC-IIi", "ultrasparc")
1497 .Case("SPARC64-III", "ultrasparc")
1498 .Case("SPARC64-IV", "ultrasparc")
1499 .Case("UltraSPARC-III", "ultrasparc3")
1500 .Case("UltraSPARC-III+", "ultrasparc3")
1501 .Case("UltraSPARC-IIIi", "ultrasparc3")
1502 .Case("UltraSPARC-IIIi+", "ultrasparc3")
1503 .Case("UltraSPARC-IV", "ultrasparc3")
1504 .Case("UltraSPARC-IV+", "ultrasparc3")
1505 .Case("SPARC64-V", "ultrasparc3")
1506 .Case("SPARC64-VI", "ultrasparc3")
1507 .Case("SPARC64-VII", "ultrasparc3")
1508 .Case("UltraSPARC-T1", "niagara")
1509 .Case("UltraSPARC-T2", "niagara2")
1510 .Case("UltraSPARC-T2", "niagara2")
1511 .Case("UltraSPARC-T2+", "niagara2")
1512 .Case("SPARC-T3", "niagara3")
1513 .Case("SPARC-T4", "niagara4")
1514 .Case("SPARC-T5", "niagara4")
1515 // niagara7/m8 not supported by LLVM yet.
1516 .Case("SPARC-M7", "niagara4" /* "niagara7" */)
1517 .Case("SPARC-S7", "niagara4" /* "niagara7" */)
1518 .Case("SPARC-M8", "niagara4" /* "m8" */)
1519 .Default("generic");
1520 #else
1521 return "generic";
1522 #endif
1523 }
1524 #else
getHostCPUName()1525 StringRef sys::getHostCPUName() { return "generic"; }
1526 namespace llvm {
1527 namespace sys {
1528 namespace detail {
1529 namespace x86 {
1530
getVendorSignature(unsigned * MaxLeaf)1531 VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
1532 return VendorSignatures::UNKNOWN;
1533 }
1534
1535 } // namespace x86
1536 } // namespace detail
1537 } // namespace sys
1538 } // namespace llvm
1539 #endif
1540
1541 #if defined(__linux__) && (defined(__i386__) || defined(__x86_64__))
1542 // On Linux, the number of physical cores can be computed from /proc/cpuinfo,
1543 // using the number of unique physical/core id pairs. The following
1544 // implementation reads the /proc/cpuinfo format on an x86_64 system.
computeHostNumPhysicalCores()1545 int computeHostNumPhysicalCores() {
1546 // Enabled represents the number of physical id/core id pairs with at least
1547 // one processor id enabled by the CPU affinity mask.
1548 cpu_set_t Affinity, Enabled;
1549 if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0)
1550 return -1;
1551 CPU_ZERO(&Enabled);
1552
1553 // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be
1554 // mmapped because it appears to have 0 size.
1555 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
1556 llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
1557 if (std::error_code EC = Text.getError()) {
1558 llvm::errs() << "Can't read "
1559 << "/proc/cpuinfo: " << EC.message() << "\n";
1560 return -1;
1561 }
1562 SmallVector<StringRef, 8> strs;
1563 (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1,
1564 /*KeepEmpty=*/false);
1565 int CurProcessor = -1;
1566 int CurPhysicalId = -1;
1567 int CurSiblings = -1;
1568 int CurCoreId = -1;
1569 for (StringRef Line : strs) {
1570 std::pair<StringRef, StringRef> Data = Line.split(':');
1571 auto Name = Data.first.trim();
1572 auto Val = Data.second.trim();
1573 // These fields are available if the kernel is configured with CONFIG_SMP.
1574 if (Name == "processor")
1575 Val.getAsInteger(10, CurProcessor);
1576 else if (Name == "physical id")
1577 Val.getAsInteger(10, CurPhysicalId);
1578 else if (Name == "siblings")
1579 Val.getAsInteger(10, CurSiblings);
1580 else if (Name == "core id") {
1581 Val.getAsInteger(10, CurCoreId);
1582 // The processor id corresponds to an index into cpu_set_t.
1583 if (CPU_ISSET(CurProcessor, &Affinity))
1584 CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled);
1585 }
1586 }
1587 return CPU_COUNT(&Enabled);
1588 }
1589 #elif defined(__linux__) && defined(__powerpc__)
computeHostNumPhysicalCores()1590 int computeHostNumPhysicalCores() {
1591 cpu_set_t Affinity;
1592 if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0)
1593 return CPU_COUNT(&Affinity);
1594
1595 // The call to sched_getaffinity() may have failed because the Affinity
1596 // mask is too small for the number of CPU's on the system (i.e. the
1597 // system has more than 1024 CPUs). Allocate a mask large enough for
1598 // twice as many CPUs.
1599 cpu_set_t *DynAffinity;
1600 DynAffinity = CPU_ALLOC(2048);
1601 if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) {
1602 int NumCPUs = CPU_COUNT(DynAffinity);
1603 CPU_FREE(DynAffinity);
1604 return NumCPUs;
1605 }
1606 return -1;
1607 }
1608 #elif defined(__linux__) && defined(__s390x__)
computeHostNumPhysicalCores()1609 int computeHostNumPhysicalCores() { return sysconf(_SC_NPROCESSORS_ONLN); }
1610 #elif defined(__APPLE__)
1611 // Gets the number of *physical cores* on the machine.
computeHostNumPhysicalCores()1612 int computeHostNumPhysicalCores() {
1613 uint32_t count;
1614 size_t len = sizeof(count);
1615 sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0);
1616 if (count < 1) {
1617 int nm[2];
1618 nm[0] = CTL_HW;
1619 nm[1] = HW_AVAILCPU;
1620 sysctl(nm, 2, &count, &len, NULL, 0);
1621 if (count < 1)
1622 return -1;
1623 }
1624 return count;
1625 }
1626 #elif defined(__MVS__)
computeHostNumPhysicalCores()1627 int computeHostNumPhysicalCores() {
1628 enum {
1629 // Byte offset of the pointer to the Communications Vector Table (CVT) in
1630 // the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and
1631 // will be zero-extended to uintptr_t.
1632 FLCCVT = 16,
1633 // Byte offset of the pointer to the Common System Data Area (CSD) in the
1634 // CVT. The table entry is a 31-bit pointer and will be zero-extended to
1635 // uintptr_t.
1636 CVTCSD = 660,
1637 // Byte offset to the number of live CPs in the LPAR, stored as a signed
1638 // 32-bit value in the table.
1639 CSD_NUMBER_ONLINE_STANDARD_CPS = 264,
1640 };
1641 char *PSA = 0;
1642 char *CVT = reinterpret_cast<char *>(
1643 static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(PSA[FLCCVT])));
1644 char *CSD = reinterpret_cast<char *>(
1645 static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(CVT[CVTCSD])));
1646 return reinterpret_cast<int &>(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]);
1647 }
1648 #elif defined(_WIN32) && LLVM_ENABLE_THREADS != 0
1649 // Defined in llvm/lib/Support/Windows/Threading.inc
1650 int computeHostNumPhysicalCores();
1651 #else
1652 // On other systems, return -1 to indicate unknown.
computeHostNumPhysicalCores()1653 static int computeHostNumPhysicalCores() { return -1; }
1654 #endif
1655
getHostNumPhysicalCores()1656 int sys::getHostNumPhysicalCores() {
1657 static int NumCores = computeHostNumPhysicalCores();
1658 return NumCores;
1659 }
1660
1661 #if defined(__i386__) || defined(_M_IX86) || \
1662 defined(__x86_64__) || defined(_M_X64)
getHostCPUFeatures(StringMap<bool> & Features)1663 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1664 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1665 unsigned MaxLevel;
1666
1667 if (getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX) || MaxLevel < 1)
1668 return false;
1669
1670 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
1671
1672 Features["cx8"] = (EDX >> 8) & 1;
1673 Features["cmov"] = (EDX >> 15) & 1;
1674 Features["mmx"] = (EDX >> 23) & 1;
1675 Features["fxsr"] = (EDX >> 24) & 1;
1676 Features["sse"] = (EDX >> 25) & 1;
1677 Features["sse2"] = (EDX >> 26) & 1;
1678
1679 Features["sse3"] = (ECX >> 0) & 1;
1680 Features["pclmul"] = (ECX >> 1) & 1;
1681 Features["ssse3"] = (ECX >> 9) & 1;
1682 Features["cx16"] = (ECX >> 13) & 1;
1683 Features["sse4.1"] = (ECX >> 19) & 1;
1684 Features["sse4.2"] = (ECX >> 20) & 1;
1685 Features["crc32"] = Features["sse4.2"];
1686 Features["movbe"] = (ECX >> 22) & 1;
1687 Features["popcnt"] = (ECX >> 23) & 1;
1688 Features["aes"] = (ECX >> 25) & 1;
1689 Features["rdrnd"] = (ECX >> 30) & 1;
1690
1691 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1692 // indicates that the AVX registers will be saved and restored on context
1693 // switch, then we have full AVX support.
1694 bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX);
1695 bool HasAVXSave = HasXSave && ((ECX >> 28) & 1) && ((EAX & 0x6) == 0x6);
1696 #if defined(__APPLE__)
1697 // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1698 // save the AVX512 context if we use AVX512 instructions, even the bit is not
1699 // set right now.
1700 bool HasAVX512Save = true;
1701 #else
1702 // AVX512 requires additional context to be saved by the OS.
1703 bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0);
1704 #endif
1705 // AMX requires additional context to be saved by the OS.
1706 const unsigned AMXBits = (1 << 17) | (1 << 18);
1707 bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits);
1708
1709 Features["avx"] = HasAVXSave;
1710 Features["fma"] = ((ECX >> 12) & 1) && HasAVXSave;
1711 // Only enable XSAVE if OS has enabled support for saving YMM state.
1712 Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave;
1713 Features["f16c"] = ((ECX >> 29) & 1) && HasAVXSave;
1714
1715 unsigned MaxExtLevel;
1716 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1717
1718 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1719 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1720 Features["sahf"] = HasExtLeaf1 && ((ECX >> 0) & 1);
1721 Features["lzcnt"] = HasExtLeaf1 && ((ECX >> 5) & 1);
1722 Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1);
1723 Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1);
1724 Features["xop"] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave;
1725 Features["lwp"] = HasExtLeaf1 && ((ECX >> 15) & 1);
1726 Features["fma4"] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave;
1727 Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1);
1728 Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1);
1729
1730 Features["64bit"] = HasExtLeaf1 && ((EDX >> 29) & 1);
1731
1732 // Miscellaneous memory related features, detected by
1733 // using the 0x80000008 leaf of the CPUID instruction
1734 bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
1735 !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX);
1736 Features["clzero"] = HasExtLeaf8 && ((EBX >> 0) & 1);
1737 Features["rdpru"] = HasExtLeaf8 && ((EBX >> 4) & 1);
1738 Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1);
1739
1740 bool HasLeaf7 =
1741 MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1742
1743 Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1);
1744 Features["sgx"] = HasLeaf7 && ((EBX >> 2) & 1);
1745 Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1);
1746 // AVX2 is only supported if we have the OS save support from AVX.
1747 Features["avx2"] = HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave;
1748 Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1);
1749 Features["invpcid"] = HasLeaf7 && ((EBX >> 10) & 1);
1750 Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1);
1751 // AVX512 is only supported if the OS supports the context save for it.
1752 Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
1753 Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
1754 Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1);
1755 Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1);
1756 Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save;
1757 Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1);
1758 Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1);
1759 Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
1760 Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save;
1761 Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
1762 Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1);
1763 Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
1764 Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
1765
1766 Features["prefetchwt1"] = HasLeaf7 && ((ECX >> 0) & 1);
1767 Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save;
1768 Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1);
1769 Features["waitpkg"] = HasLeaf7 && ((ECX >> 5) & 1);
1770 Features["avx512vbmi2"] = HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save;
1771 Features["shstk"] = HasLeaf7 && ((ECX >> 7) & 1);
1772 Features["gfni"] = HasLeaf7 && ((ECX >> 8) & 1);
1773 Features["vaes"] = HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave;
1774 Features["vpclmulqdq"] = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave;
1775 Features["avx512vnni"] = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save;
1776 Features["avx512bitalg"] = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save;
1777 Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save;
1778 Features["rdpid"] = HasLeaf7 && ((ECX >> 22) & 1);
1779 Features["kl"] = HasLeaf7 && ((ECX >> 23) & 1); // key locker
1780 Features["cldemote"] = HasLeaf7 && ((ECX >> 25) & 1);
1781 Features["movdiri"] = HasLeaf7 && ((ECX >> 27) & 1);
1782 Features["movdir64b"] = HasLeaf7 && ((ECX >> 28) & 1);
1783 Features["enqcmd"] = HasLeaf7 && ((ECX >> 29) & 1);
1784
1785 Features["uintr"] = HasLeaf7 && ((EDX >> 5) & 1);
1786 Features["avx512vp2intersect"] =
1787 HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save;
1788 Features["serialize"] = HasLeaf7 && ((EDX >> 14) & 1);
1789 Features["tsxldtrk"] = HasLeaf7 && ((EDX >> 16) & 1);
1790 // There are two CPUID leafs which information associated with the pconfig
1791 // instruction:
1792 // EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th
1793 // bit of EDX), while the EAX=0x1b leaf returns information on the
1794 // availability of specific pconfig leafs.
1795 // The target feature here only refers to the the first of these two.
1796 // Users might need to check for the availability of specific pconfig
1797 // leaves using cpuid, since that information is ignored while
1798 // detecting features using the "-march=native" flag.
1799 // For more info, see X86 ISA docs.
1800 Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1);
1801 Features["amx-bf16"] = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave;
1802 Features["avx512fp16"] = HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save;
1803 Features["amx-tile"] = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave;
1804 Features["amx-int8"] = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave;
1805 bool HasLeaf7Subleaf1 =
1806 MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1807 Features["avxvnni"] = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave;
1808 Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
1809 Features["hreset"] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
1810
1811 bool HasLeafD = MaxLevel >= 0xd &&
1812 !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
1813
1814 // Only enable XSAVE if OS has enabled support for saving YMM state.
1815 Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave;
1816 Features["xsavec"] = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave;
1817 Features["xsaves"] = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave;
1818
1819 bool HasLeaf14 = MaxLevel >= 0x14 &&
1820 !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX);
1821
1822 Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1);
1823
1824 bool HasLeaf19 =
1825 MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX);
1826 Features["widekl"] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1);
1827
1828 return true;
1829 }
1830 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
getHostCPUFeatures(StringMap<bool> & Features)1831 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1832 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1833 if (!P)
1834 return false;
1835
1836 SmallVector<StringRef, 32> Lines;
1837 P->getBuffer().split(Lines, "\n");
1838
1839 SmallVector<StringRef, 32> CPUFeatures;
1840
1841 // Look for the CPU features.
1842 for (unsigned I = 0, E = Lines.size(); I != E; ++I)
1843 if (Lines[I].startswith("Features")) {
1844 Lines[I].split(CPUFeatures, ' ');
1845 break;
1846 }
1847
1848 #if defined(__aarch64__)
1849 // Keep track of which crypto features we have seen
1850 enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 };
1851 uint32_t crypto = 0;
1852 #endif
1853
1854 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
1855 StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I])
1856 #if defined(__aarch64__)
1857 .Case("asimd", "neon")
1858 .Case("fp", "fp-armv8")
1859 .Case("crc32", "crc")
1860 .Case("atomics", "lse")
1861 .Case("sve", "sve")
1862 .Case("sve2", "sve2")
1863 #else
1864 .Case("half", "fp16")
1865 .Case("neon", "neon")
1866 .Case("vfpv3", "vfp3")
1867 .Case("vfpv3d16", "d16")
1868 .Case("vfpv4", "vfp4")
1869 .Case("idiva", "hwdiv-arm")
1870 .Case("idivt", "hwdiv")
1871 #endif
1872 .Default("");
1873
1874 #if defined(__aarch64__)
1875 // We need to check crypto separately since we need all of the crypto
1876 // extensions to enable the subtarget feature
1877 if (CPUFeatures[I] == "aes")
1878 crypto |= CAP_AES;
1879 else if (CPUFeatures[I] == "pmull")
1880 crypto |= CAP_PMULL;
1881 else if (CPUFeatures[I] == "sha1")
1882 crypto |= CAP_SHA1;
1883 else if (CPUFeatures[I] == "sha2")
1884 crypto |= CAP_SHA2;
1885 #endif
1886
1887 if (LLVMFeatureStr != "")
1888 Features[LLVMFeatureStr] = true;
1889 }
1890
1891 #if defined(__aarch64__)
1892 // If we have all crypto bits we can add the feature
1893 if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2))
1894 Features["crypto"] = true;
1895 #endif
1896
1897 return true;
1898 }
1899 #elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64))
getHostCPUFeatures(StringMap<bool> & Features)1900 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1901 if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
1902 Features["neon"] = true;
1903 if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE))
1904 Features["crc"] = true;
1905 if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
1906 Features["crypto"] = true;
1907
1908 return true;
1909 }
1910 #else
getHostCPUFeatures(StringMap<bool> & Features)1911 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; }
1912 #endif
1913
getProcessTriple()1914 std::string sys::getProcessTriple() {
1915 std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE);
1916 Triple PT(Triple::normalize(TargetTripleString));
1917
1918 if (sizeof(void *) == 8 && PT.isArch32Bit())
1919 PT = PT.get64BitArchVariant();
1920 if (sizeof(void *) == 4 && PT.isArch64Bit())
1921 PT = PT.get32BitArchVariant();
1922
1923 return PT.str();
1924 }
1925