1 //===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the operating system Host concept. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Support/Host.h" 14 #include "llvm/ADT/SmallSet.h" 15 #include "llvm/ADT/SmallVector.h" 16 #include "llvm/ADT/StringMap.h" 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/ADT/StringSwitch.h" 19 #include "llvm/ADT/Triple.h" 20 #include "llvm/Config/llvm-config.h" 21 #include "llvm/Support/Debug.h" 22 #include "llvm/Support/FileSystem.h" 23 #include "llvm/Support/MemoryBuffer.h" 24 #include "llvm/Support/X86TargetParser.h" 25 #include "llvm/Support/raw_ostream.h" 26 #include <assert.h> 27 #include <string.h> 28 29 // Include the platform-specific parts of this class. 30 #ifdef LLVM_ON_UNIX 31 #include "Unix/Host.inc" 32 #include <sched.h> 33 #endif 34 #ifdef _WIN32 35 #include "Windows/Host.inc" 36 #endif 37 #ifdef _MSC_VER 38 #include <intrin.h> 39 #endif 40 #if defined(__APPLE__) && (!defined(__x86_64__)) 41 #include <mach/host_info.h> 42 #include <mach/mach.h> 43 #include <mach/mach_host.h> 44 #include <mach/machine.h> 45 #endif 46 47 #define DEBUG_TYPE "host-detection" 48 49 //===----------------------------------------------------------------------===// 50 // 51 // Implementations of the CPU detection routines 52 // 53 //===----------------------------------------------------------------------===// 54 55 using namespace llvm; 56 57 static std::unique_ptr<llvm::MemoryBuffer> 58 LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() { 59 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = 60 llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); 61 if (std::error_code EC = Text.getError()) { 62 llvm::errs() << "Can't read " 63 << "/proc/cpuinfo: " << EC.message() << "\n"; 64 return nullptr; 65 } 66 return std::move(*Text); 67 } 68 69 StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) { 70 // Access to the Processor Version Register (PVR) on PowerPC is privileged, 71 // and so we must use an operating-system interface to determine the current 72 // processor type. On Linux, this is exposed through the /proc/cpuinfo file. 73 const char *generic = "generic"; 74 75 // The cpu line is second (after the 'processor: 0' line), so if this 76 // buffer is too small then something has changed (or is wrong). 77 StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin(); 78 StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end(); 79 80 StringRef::const_iterator CIP = CPUInfoStart; 81 82 StringRef::const_iterator CPUStart = 0; 83 size_t CPULen = 0; 84 85 // We need to find the first line which starts with cpu, spaces, and a colon. 86 // After the colon, there may be some additional spaces and then the cpu type. 87 while (CIP < CPUInfoEnd && CPUStart == 0) { 88 if (CIP < CPUInfoEnd && *CIP == '\n') 89 ++CIP; 90 91 if (CIP < CPUInfoEnd && *CIP == 'c') { 92 ++CIP; 93 if (CIP < CPUInfoEnd && *CIP == 'p') { 94 ++CIP; 95 if (CIP < CPUInfoEnd && *CIP == 'u') { 96 ++CIP; 97 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) 98 ++CIP; 99 100 if (CIP < CPUInfoEnd && *CIP == ':') { 101 ++CIP; 102 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) 103 ++CIP; 104 105 if (CIP < CPUInfoEnd) { 106 CPUStart = CIP; 107 while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' && 108 *CIP != ',' && *CIP != '\n')) 109 ++CIP; 110 CPULen = CIP - CPUStart; 111 } 112 } 113 } 114 } 115 } 116 117 if (CPUStart == 0) 118 while (CIP < CPUInfoEnd && *CIP != '\n') 119 ++CIP; 120 } 121 122 if (CPUStart == 0) 123 return generic; 124 125 return StringSwitch<const char *>(StringRef(CPUStart, CPULen)) 126 .Case("604e", "604e") 127 .Case("604", "604") 128 .Case("7400", "7400") 129 .Case("7410", "7400") 130 .Case("7447", "7400") 131 .Case("7455", "7450") 132 .Case("G4", "g4") 133 .Case("POWER4", "970") 134 .Case("PPC970FX", "970") 135 .Case("PPC970MP", "970") 136 .Case("G5", "g5") 137 .Case("POWER5", "g5") 138 .Case("A2", "a2") 139 .Case("POWER6", "pwr6") 140 .Case("POWER7", "pwr7") 141 .Case("POWER8", "pwr8") 142 .Case("POWER8E", "pwr8") 143 .Case("POWER8NVL", "pwr8") 144 .Case("POWER9", "pwr9") 145 .Case("POWER10", "pwr10") 146 // FIXME: If we get a simulator or machine with the capabilities of 147 // mcpu=future, we should revisit this and add the name reported by the 148 // simulator/machine. 149 .Default(generic); 150 } 151 152 StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { 153 // The cpuid register on arm is not accessible from user space. On Linux, 154 // it is exposed through the /proc/cpuinfo file. 155 156 // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line 157 // in all cases. 158 SmallVector<StringRef, 32> Lines; 159 ProcCpuinfoContent.split(Lines, "\n"); 160 161 // Look for the CPU implementer line. 162 StringRef Implementer; 163 StringRef Hardware; 164 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 165 if (Lines[I].startswith("CPU implementer")) 166 Implementer = Lines[I].substr(15).ltrim("\t :"); 167 if (Lines[I].startswith("Hardware")) 168 Hardware = Lines[I].substr(8).ltrim("\t :"); 169 } 170 171 if (Implementer == "0x41") { // ARM Ltd. 172 // MSM8992/8994 may give cpu part for the core that the kernel is running on, 173 // which is undeterministic and wrong. Always return cortex-a53 for these SoC. 174 if (Hardware.endswith("MSM8994") || Hardware.endswith("MSM8996")) 175 return "cortex-a53"; 176 177 178 // Look for the CPU part line. 179 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 180 if (Lines[I].startswith("CPU part")) 181 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 182 // values correspond to the "Part number" in the CP15/c0 register. The 183 // contents are specified in the various processor manuals. 184 // This corresponds to the Main ID Register in Technical Reference Manuals. 185 // and is used in programs like sys-utils 186 return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) 187 .Case("0x926", "arm926ej-s") 188 .Case("0xb02", "mpcore") 189 .Case("0xb36", "arm1136j-s") 190 .Case("0xb56", "arm1156t2-s") 191 .Case("0xb76", "arm1176jz-s") 192 .Case("0xc08", "cortex-a8") 193 .Case("0xc09", "cortex-a9") 194 .Case("0xc0f", "cortex-a15") 195 .Case("0xc20", "cortex-m0") 196 .Case("0xc23", "cortex-m3") 197 .Case("0xc24", "cortex-m4") 198 .Case("0xd22", "cortex-m55") 199 .Case("0xd02", "cortex-a34") 200 .Case("0xd04", "cortex-a35") 201 .Case("0xd03", "cortex-a53") 202 .Case("0xd07", "cortex-a57") 203 .Case("0xd08", "cortex-a72") 204 .Case("0xd09", "cortex-a73") 205 .Case("0xd0a", "cortex-a75") 206 .Case("0xd0b", "cortex-a76") 207 .Case("0xd0d", "cortex-a77") 208 .Case("0xd41", "cortex-a78") 209 .Case("0xd44", "cortex-x1") 210 .Case("0xd0c", "neoverse-n1") 211 .Default("generic"); 212 } 213 214 if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium. 215 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 216 if (Lines[I].startswith("CPU part")) { 217 return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) 218 .Case("0x516", "thunderx2t99") 219 .Case("0x0516", "thunderx2t99") 220 .Case("0xaf", "thunderx2t99") 221 .Case("0x0af", "thunderx2t99") 222 .Case("0xa1", "thunderxt88") 223 .Case("0x0a1", "thunderxt88") 224 .Default("generic"); 225 } 226 } 227 } 228 229 if (Implementer == "0x46") { // Fujitsu Ltd. 230 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 231 if (Lines[I].startswith("CPU part")) { 232 return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) 233 .Case("0x001", "a64fx") 234 .Default("generic"); 235 } 236 } 237 } 238 239 if (Implementer == "0x4e") { // NVIDIA Corporation 240 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 241 if (Lines[I].startswith("CPU part")) { 242 return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) 243 .Case("0x004", "carmel") 244 .Default("generic"); 245 } 246 } 247 } 248 249 if (Implementer == "0x48") // HiSilicon Technologies, Inc. 250 // Look for the CPU part line. 251 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 252 if (Lines[I].startswith("CPU part")) 253 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 254 // values correspond to the "Part number" in the CP15/c0 register. The 255 // contents are specified in the various processor manuals. 256 return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) 257 .Case("0xd01", "tsv110") 258 .Default("generic"); 259 260 if (Implementer == "0x51") // Qualcomm Technologies, Inc. 261 // Look for the CPU part line. 262 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 263 if (Lines[I].startswith("CPU part")) 264 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 265 // values correspond to the "Part number" in the CP15/c0 register. The 266 // contents are specified in the various processor manuals. 267 return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) 268 .Case("0x06f", "krait") // APQ8064 269 .Case("0x201", "kryo") 270 .Case("0x205", "kryo") 271 .Case("0x211", "kryo") 272 .Case("0x800", "cortex-a73") 273 .Case("0x801", "cortex-a73") 274 .Case("0x802", "cortex-a73") 275 .Case("0x803", "cortex-a73") 276 .Case("0x804", "cortex-a73") 277 .Case("0x805", "cortex-a73") 278 .Case("0xc00", "falkor") 279 .Case("0xc01", "saphira") 280 .Default("generic"); 281 282 if (Implementer == "0x53") { // Samsung Electronics Co., Ltd. 283 // The Exynos chips have a convoluted ID scheme that doesn't seem to follow 284 // any predictive pattern across variants and parts. 285 unsigned Variant = 0, Part = 0; 286 287 // Look for the CPU variant line, whose value is a 1 digit hexadecimal 288 // number, corresponding to the Variant bits in the CP15/C0 register. 289 for (auto I : Lines) 290 if (I.consume_front("CPU variant")) 291 I.ltrim("\t :").getAsInteger(0, Variant); 292 293 // Look for the CPU part line, whose value is a 3 digit hexadecimal 294 // number, corresponding to the PartNum bits in the CP15/C0 register. 295 for (auto I : Lines) 296 if (I.consume_front("CPU part")) 297 I.ltrim("\t :").getAsInteger(0, Part); 298 299 unsigned Exynos = (Variant << 12) | Part; 300 switch (Exynos) { 301 default: 302 // Default by falling through to Exynos M3. 303 LLVM_FALLTHROUGH; 304 case 0x1002: 305 return "exynos-m3"; 306 case 0x1003: 307 return "exynos-m4"; 308 } 309 } 310 311 return "generic"; 312 } 313 314 StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) { 315 // STIDP is a privileged operation, so use /proc/cpuinfo instead. 316 317 // The "processor 0:" line comes after a fair amount of other information, 318 // including a cache breakdown, but this should be plenty. 319 SmallVector<StringRef, 32> Lines; 320 ProcCpuinfoContent.split(Lines, "\n"); 321 322 // Look for the CPU features. 323 SmallVector<StringRef, 32> CPUFeatures; 324 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 325 if (Lines[I].startswith("features")) { 326 size_t Pos = Lines[I].find(":"); 327 if (Pos != StringRef::npos) { 328 Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' '); 329 break; 330 } 331 } 332 333 // We need to check for the presence of vector support independently of 334 // the machine type, since we may only use the vector register set when 335 // supported by the kernel (and hypervisor). 336 bool HaveVectorSupport = false; 337 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { 338 if (CPUFeatures[I] == "vx") 339 HaveVectorSupport = true; 340 } 341 342 // Now check the processor machine type. 343 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 344 if (Lines[I].startswith("processor ")) { 345 size_t Pos = Lines[I].find("machine = "); 346 if (Pos != StringRef::npos) { 347 Pos += sizeof("machine = ") - 1; 348 unsigned int Id; 349 if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) { 350 if (Id >= 8561 && HaveVectorSupport) 351 return "z15"; 352 if (Id >= 3906 && HaveVectorSupport) 353 return "z14"; 354 if (Id >= 2964 && HaveVectorSupport) 355 return "z13"; 356 if (Id >= 2827) 357 return "zEC12"; 358 if (Id >= 2817) 359 return "z196"; 360 } 361 } 362 break; 363 } 364 } 365 366 return "generic"; 367 } 368 369 StringRef sys::detail::getHostCPUNameForBPF() { 370 #if !defined(__linux__) || !defined(__x86_64__) 371 return "generic"; 372 #else 373 uint8_t v3_insns[40] __attribute__ ((aligned (8))) = 374 /* BPF_MOV64_IMM(BPF_REG_0, 0) */ 375 { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 376 /* BPF_MOV64_IMM(BPF_REG_2, 1) */ 377 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 378 /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */ 379 0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 380 /* BPF_MOV64_IMM(BPF_REG_0, 1) */ 381 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 382 /* BPF_EXIT_INSN() */ 383 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; 384 385 uint8_t v2_insns[40] __attribute__ ((aligned (8))) = 386 /* BPF_MOV64_IMM(BPF_REG_0, 0) */ 387 { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 388 /* BPF_MOV64_IMM(BPF_REG_2, 1) */ 389 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 390 /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */ 391 0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 392 /* BPF_MOV64_IMM(BPF_REG_0, 1) */ 393 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 394 /* BPF_EXIT_INSN() */ 395 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; 396 397 struct bpf_prog_load_attr { 398 uint32_t prog_type; 399 uint32_t insn_cnt; 400 uint64_t insns; 401 uint64_t license; 402 uint32_t log_level; 403 uint32_t log_size; 404 uint64_t log_buf; 405 uint32_t kern_version; 406 uint32_t prog_flags; 407 } attr = {}; 408 attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */ 409 attr.insn_cnt = 5; 410 attr.insns = (uint64_t)v3_insns; 411 attr.license = (uint64_t)"DUMMY"; 412 413 int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, 414 sizeof(attr)); 415 if (fd >= 0) { 416 close(fd); 417 return "v3"; 418 } 419 420 /* Clear the whole attr in case its content changed by syscall. */ 421 memset(&attr, 0, sizeof(attr)); 422 attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */ 423 attr.insn_cnt = 5; 424 attr.insns = (uint64_t)v2_insns; 425 attr.license = (uint64_t)"DUMMY"; 426 fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr)); 427 if (fd >= 0) { 428 close(fd); 429 return "v2"; 430 } 431 return "v1"; 432 #endif 433 } 434 435 #if defined(__i386__) || defined(_M_IX86) || \ 436 defined(__x86_64__) || defined(_M_X64) 437 438 enum VendorSignatures { 439 SIG_INTEL = 0x756e6547 /* Genu */, 440 SIG_AMD = 0x68747541 /* Auth */ 441 }; 442 443 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). 444 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID 445 // support. Consequently, for i386, the presence of CPUID is checked first 446 // via the corresponding eflags bit. 447 // Removal of cpuid.h header motivated by PR30384 448 // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp 449 // or test-suite, but are used in external projects e.g. libstdcxx 450 static bool isCpuIdSupported() { 451 #if defined(__GNUC__) || defined(__clang__) 452 #if defined(__i386__) 453 int __cpuid_supported; 454 __asm__(" pushfl\n" 455 " popl %%eax\n" 456 " movl %%eax,%%ecx\n" 457 " xorl $0x00200000,%%eax\n" 458 " pushl %%eax\n" 459 " popfl\n" 460 " pushfl\n" 461 " popl %%eax\n" 462 " movl $0,%0\n" 463 " cmpl %%eax,%%ecx\n" 464 " je 1f\n" 465 " movl $1,%0\n" 466 "1:" 467 : "=r"(__cpuid_supported) 468 : 469 : "eax", "ecx"); 470 if (!__cpuid_supported) 471 return false; 472 #endif 473 return true; 474 #endif 475 return true; 476 } 477 478 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in 479 /// the specified arguments. If we can't run cpuid on the host, return true. 480 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, 481 unsigned *rECX, unsigned *rEDX) { 482 #if defined(__GNUC__) || defined(__clang__) 483 #if defined(__x86_64__) 484 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 485 // FIXME: should we save this for Clang? 486 __asm__("movq\t%%rbx, %%rsi\n\t" 487 "cpuid\n\t" 488 "xchgq\t%%rbx, %%rsi\n\t" 489 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 490 : "a"(value)); 491 return false; 492 #elif defined(__i386__) 493 __asm__("movl\t%%ebx, %%esi\n\t" 494 "cpuid\n\t" 495 "xchgl\t%%ebx, %%esi\n\t" 496 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 497 : "a"(value)); 498 return false; 499 #else 500 return true; 501 #endif 502 #elif defined(_MSC_VER) 503 // The MSVC intrinsic is portable across x86 and x64. 504 int registers[4]; 505 __cpuid(registers, value); 506 *rEAX = registers[0]; 507 *rEBX = registers[1]; 508 *rECX = registers[2]; 509 *rEDX = registers[3]; 510 return false; 511 #else 512 return true; 513 #endif 514 } 515 516 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return 517 /// the 4 values in the specified arguments. If we can't run cpuid on the host, 518 /// return true. 519 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, 520 unsigned *rEAX, unsigned *rEBX, unsigned *rECX, 521 unsigned *rEDX) { 522 #if defined(__GNUC__) || defined(__clang__) 523 #if defined(__x86_64__) 524 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 525 // FIXME: should we save this for Clang? 526 __asm__("movq\t%%rbx, %%rsi\n\t" 527 "cpuid\n\t" 528 "xchgq\t%%rbx, %%rsi\n\t" 529 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 530 : "a"(value), "c"(subleaf)); 531 return false; 532 #elif defined(__i386__) 533 __asm__("movl\t%%ebx, %%esi\n\t" 534 "cpuid\n\t" 535 "xchgl\t%%ebx, %%esi\n\t" 536 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 537 : "a"(value), "c"(subleaf)); 538 return false; 539 #else 540 return true; 541 #endif 542 #elif defined(_MSC_VER) 543 int registers[4]; 544 __cpuidex(registers, value, subleaf); 545 *rEAX = registers[0]; 546 *rEBX = registers[1]; 547 *rECX = registers[2]; 548 *rEDX = registers[3]; 549 return false; 550 #else 551 return true; 552 #endif 553 } 554 555 // Read control register 0 (XCR0). Used to detect features such as AVX. 556 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { 557 #if defined(__GNUC__) || defined(__clang__) 558 // Check xgetbv; this uses a .byte sequence instead of the instruction 559 // directly because older assemblers do not include support for xgetbv and 560 // there is no easy way to conditionally compile based on the assembler used. 561 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0)); 562 return false; 563 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) 564 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); 565 *rEAX = Result; 566 *rEDX = Result >> 32; 567 return false; 568 #else 569 return true; 570 #endif 571 } 572 573 static void detectX86FamilyModel(unsigned EAX, unsigned *Family, 574 unsigned *Model) { 575 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11 576 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7 577 if (*Family == 6 || *Family == 0xf) { 578 if (*Family == 0xf) 579 // Examine extended family ID if family ID is F. 580 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27 581 // Examine extended model ID if family ID is 6 or F. 582 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 583 } 584 } 585 586 static StringRef 587 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, 588 const unsigned *Features, 589 unsigned *Type, unsigned *Subtype) { 590 auto testFeature = [&](unsigned F) { 591 return (Features[F / 32] & (1U << (F % 32))) != 0; 592 }; 593 594 StringRef CPU; 595 596 switch (Family) { 597 case 3: 598 CPU = "i386"; 599 break; 600 case 4: 601 CPU = "i486"; 602 break; 603 case 5: 604 if (testFeature(X86::FEATURE_MMX)) { 605 CPU = "pentium-mmx"; 606 break; 607 } 608 CPU = "pentium"; 609 break; 610 case 6: 611 switch (Model) { 612 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile 613 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad 614 // mobile processor, Intel Core 2 Extreme processor, Intel 615 // Pentium Dual-Core processor, Intel Xeon processor, model 616 // 0Fh. All processors are manufactured using the 65 nm process. 617 case 0x16: // Intel Celeron processor model 16h. All processors are 618 // manufactured using the 65 nm process 619 CPU = "core2"; 620 *Type = X86::INTEL_CORE2; 621 break; 622 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model 623 // 17h. All processors are manufactured using the 45 nm process. 624 // 625 // 45nm: Penryn , Wolfdale, Yorkfield (XE) 626 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using 627 // the 45 nm process. 628 CPU = "penryn"; 629 *Type = X86::INTEL_CORE2; 630 break; 631 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All 632 // processors are manufactured using the 45 nm process. 633 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. 634 // As found in a Summer 2010 model iMac. 635 case 0x1f: 636 case 0x2e: // Nehalem EX 637 CPU = "nehalem"; 638 *Type = X86::INTEL_COREI7; 639 *Subtype = X86::INTEL_COREI7_NEHALEM; 640 break; 641 case 0x25: // Intel Core i7, laptop version. 642 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All 643 // processors are manufactured using the 32 nm process. 644 case 0x2f: // Westmere EX 645 CPU = "westmere"; 646 *Type = X86::INTEL_COREI7; 647 *Subtype = X86::INTEL_COREI7_WESTMERE; 648 break; 649 case 0x2a: // Intel Core i7 processor. All processors are manufactured 650 // using the 32 nm process. 651 case 0x2d: 652 CPU = "sandybridge"; 653 *Type = X86::INTEL_COREI7; 654 *Subtype = X86::INTEL_COREI7_SANDYBRIDGE; 655 break; 656 case 0x3a: 657 case 0x3e: // Ivy Bridge EP 658 CPU = "ivybridge"; 659 *Type = X86::INTEL_COREI7; 660 *Subtype = X86::INTEL_COREI7_IVYBRIDGE; 661 break; 662 663 // Haswell: 664 case 0x3c: 665 case 0x3f: 666 case 0x45: 667 case 0x46: 668 CPU = "haswell"; 669 *Type = X86::INTEL_COREI7; 670 *Subtype = X86::INTEL_COREI7_HASWELL; 671 break; 672 673 // Broadwell: 674 case 0x3d: 675 case 0x47: 676 case 0x4f: 677 case 0x56: 678 CPU = "broadwell"; 679 *Type = X86::INTEL_COREI7; 680 *Subtype = X86::INTEL_COREI7_BROADWELL; 681 break; 682 683 // Skylake: 684 case 0x4e: // Skylake mobile 685 case 0x5e: // Skylake desktop 686 case 0x8e: // Kaby Lake mobile 687 case 0x9e: // Kaby Lake desktop 688 case 0xa5: // Comet Lake-H/S 689 case 0xa6: // Comet Lake-U 690 CPU = "skylake"; 691 *Type = X86::INTEL_COREI7; 692 *Subtype = X86::INTEL_COREI7_SKYLAKE; 693 break; 694 695 // Skylake Xeon: 696 case 0x55: 697 *Type = X86::INTEL_COREI7; 698 if (testFeature(X86::FEATURE_AVX512BF16)) { 699 CPU = "cooperlake"; 700 *Subtype = X86::INTEL_COREI7_COOPERLAKE; 701 } else if (testFeature(X86::FEATURE_AVX512VNNI)) { 702 CPU = "cascadelake"; 703 *Subtype = X86::INTEL_COREI7_CASCADELAKE; 704 } else { 705 CPU = "skylake-avx512"; 706 *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; 707 } 708 break; 709 710 // Cannonlake: 711 case 0x66: 712 CPU = "cannonlake"; 713 *Type = X86::INTEL_COREI7; 714 *Subtype = X86::INTEL_COREI7_CANNONLAKE; 715 break; 716 717 // Icelake: 718 case 0x7d: 719 case 0x7e: 720 CPU = "icelake-client"; 721 *Type = X86::INTEL_COREI7; 722 *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT; 723 break; 724 725 // Icelake Xeon: 726 case 0x6a: 727 case 0x6c: 728 CPU = "icelake-server"; 729 *Type = X86::INTEL_COREI7; 730 *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER; 731 break; 732 733 case 0x1c: // Most 45 nm Intel Atom processors 734 case 0x26: // 45 nm Atom Lincroft 735 case 0x27: // 32 nm Atom Medfield 736 case 0x35: // 32 nm Atom Midview 737 case 0x36: // 32 nm Atom Midview 738 CPU = "bonnell"; 739 *Type = X86::INTEL_BONNELL; 740 break; 741 742 // Atom Silvermont codes from the Intel software optimization guide. 743 case 0x37: 744 case 0x4a: 745 case 0x4d: 746 case 0x5a: 747 case 0x5d: 748 case 0x4c: // really airmont 749 CPU = "silvermont"; 750 *Type = X86::INTEL_SILVERMONT; 751 break; 752 // Goldmont: 753 case 0x5c: // Apollo Lake 754 case 0x5f: // Denverton 755 CPU = "goldmont"; 756 *Type = X86::INTEL_GOLDMONT; 757 break; 758 case 0x7a: 759 CPU = "goldmont-plus"; 760 *Type = X86::INTEL_GOLDMONT_PLUS; 761 break; 762 case 0x86: 763 *Type = X86::INTEL_TREMONT; 764 break; 765 766 case 0x57: 767 CPU = "tremont"; 768 *Type = X86::INTEL_KNL; 769 break; 770 771 case 0x85: 772 CPU = "knm"; 773 *Type = X86::INTEL_KNM; 774 break; 775 776 default: // Unknown family 6 CPU, try to guess. 777 // Don't both with Type/Subtype here, they aren't used by the caller. 778 // They're used above to keep the code in sync with compiler-rt. 779 // TODO detect tigerlake host from model 780 if (testFeature(X86::FEATURE_AVX512VP2INTERSECT)) { 781 CPU = "tigerlake"; 782 } else if (testFeature(X86::FEATURE_AVX512VBMI2)) { 783 CPU = "icelake-client"; 784 } else if (testFeature(X86::FEATURE_AVX512VBMI)) { 785 CPU = "cannonlake"; 786 } else if (testFeature(X86::FEATURE_AVX512BF16)) { 787 CPU = "cooperlake"; 788 } else if (testFeature(X86::FEATURE_AVX512VNNI)) { 789 CPU = "cascadelake"; 790 } else if (testFeature(X86::FEATURE_AVX512VL)) { 791 CPU = "skylake-avx512"; 792 } else if (testFeature(X86::FEATURE_AVX512ER)) { 793 CPU = "knl"; 794 } else if (testFeature(X86::FEATURE_CLFLUSHOPT)) { 795 if (testFeature(X86::FEATURE_SHA)) 796 CPU = "goldmont"; 797 else 798 CPU = "skylake"; 799 } else if (testFeature(X86::FEATURE_ADX)) { 800 CPU = "broadwell"; 801 } else if (testFeature(X86::FEATURE_AVX2)) { 802 CPU = "haswell"; 803 } else if (testFeature(X86::FEATURE_AVX)) { 804 CPU = "sandybridge"; 805 } else if (testFeature(X86::FEATURE_SSE4_2)) { 806 if (testFeature(X86::FEATURE_MOVBE)) 807 CPU = "silvermont"; 808 else 809 CPU = "nehalem"; 810 } else if (testFeature(X86::FEATURE_SSE4_1)) { 811 CPU = "penryn"; 812 } else if (testFeature(X86::FEATURE_SSSE3)) { 813 if (testFeature(X86::FEATURE_MOVBE)) 814 CPU = "bonnell"; 815 else 816 CPU = "core2"; 817 } else if (testFeature(X86::FEATURE_64BIT)) { 818 CPU = "core2"; 819 } else if (testFeature(X86::FEATURE_SSE3)) { 820 CPU = "yonah"; 821 } else if (testFeature(X86::FEATURE_SSE2)) { 822 CPU = "pentium-m"; 823 } else if (testFeature(X86::FEATURE_SSE)) { 824 CPU = "pentium3"; 825 } else if (testFeature(X86::FEATURE_MMX)) { 826 CPU = "pentium2"; 827 } else { 828 CPU = "pentiumpro"; 829 } 830 break; 831 } 832 break; 833 case 15: { 834 if (testFeature(X86::FEATURE_64BIT)) { 835 CPU = "nocona"; 836 break; 837 } 838 if (testFeature(X86::FEATURE_SSE3)) { 839 CPU = "prescott"; 840 break; 841 } 842 CPU = "pentium4"; 843 break; 844 } 845 default: 846 break; // Unknown. 847 } 848 849 return CPU; 850 } 851 852 static StringRef 853 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, 854 const unsigned *Features, 855 unsigned *Type, unsigned *Subtype) { 856 auto testFeature = [&](unsigned F) { 857 return (Features[F / 32] & (1U << (F % 32))) != 0; 858 }; 859 860 StringRef CPU; 861 862 switch (Family) { 863 case 4: 864 CPU = "i486"; 865 break; 866 case 5: 867 CPU = "pentium"; 868 switch (Model) { 869 case 6: 870 case 7: 871 CPU = "k6"; 872 break; 873 case 8: 874 CPU = "k6-2"; 875 break; 876 case 9: 877 case 13: 878 CPU = "k6-3"; 879 break; 880 case 10: 881 CPU = "geode"; 882 break; 883 } 884 break; 885 case 6: 886 if (testFeature(X86::FEATURE_SSE)) { 887 CPU = "athlon-xp"; 888 break; 889 } 890 CPU = "athlon"; 891 break; 892 case 15: 893 if (testFeature(X86::FEATURE_SSE3)) { 894 CPU = "k8-sse3"; 895 break; 896 } 897 CPU = "k8"; 898 break; 899 case 16: 900 CPU = "amdfam10"; 901 *Type = X86::AMDFAM10H; // "amdfam10" 902 switch (Model) { 903 case 2: 904 *Subtype = X86::AMDFAM10H_BARCELONA; 905 break; 906 case 4: 907 *Subtype = X86::AMDFAM10H_SHANGHAI; 908 break; 909 case 8: 910 *Subtype = X86::AMDFAM10H_ISTANBUL; 911 break; 912 } 913 break; 914 case 20: 915 CPU = "btver1"; 916 *Type = X86::AMD_BTVER1; 917 break; 918 case 21: 919 CPU = "bdver1"; 920 *Type = X86::AMDFAM15H; 921 if (Model >= 0x60 && Model <= 0x7f) { 922 CPU = "bdver4"; 923 *Subtype = X86::AMDFAM15H_BDVER4; 924 break; // 60h-7Fh: Excavator 925 } 926 if (Model >= 0x30 && Model <= 0x3f) { 927 CPU = "bdver3"; 928 *Subtype = X86::AMDFAM15H_BDVER3; 929 break; // 30h-3Fh: Steamroller 930 } 931 if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) { 932 CPU = "bdver2"; 933 *Subtype = X86::AMDFAM15H_BDVER2; 934 break; // 02h, 10h-1Fh: Piledriver 935 } 936 if (Model <= 0x0f) { 937 *Subtype = X86::AMDFAM15H_BDVER1; 938 break; // 00h-0Fh: Bulldozer 939 } 940 break; 941 case 22: 942 CPU = "btver2"; 943 *Type = X86::AMD_BTVER2; 944 break; 945 case 23: 946 CPU = "znver1"; 947 *Type = X86::AMDFAM17H; 948 if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) { 949 CPU = "znver2"; 950 *Subtype = X86::AMDFAM17H_ZNVER2; 951 break; // 30h-3fh, 71h: Zen2 952 } 953 if (Model <= 0x0f) { 954 *Subtype = X86::AMDFAM17H_ZNVER1; 955 break; // 00h-0Fh: Zen1 956 } 957 break; 958 default: 959 break; // Unknown AMD CPU. 960 } 961 962 return CPU; 963 } 964 965 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, 966 unsigned *Features) { 967 unsigned EAX, EBX; 968 969 auto setFeature = [&](unsigned F) { 970 Features[F / 32] |= 1U << (F % 32); 971 }; 972 973 if ((EDX >> 15) & 1) 974 setFeature(X86::FEATURE_CMOV); 975 if ((EDX >> 23) & 1) 976 setFeature(X86::FEATURE_MMX); 977 if ((EDX >> 25) & 1) 978 setFeature(X86::FEATURE_SSE); 979 if ((EDX >> 26) & 1) 980 setFeature(X86::FEATURE_SSE2); 981 982 if ((ECX >> 0) & 1) 983 setFeature(X86::FEATURE_SSE3); 984 if ((ECX >> 1) & 1) 985 setFeature(X86::FEATURE_PCLMUL); 986 if ((ECX >> 9) & 1) 987 setFeature(X86::FEATURE_SSSE3); 988 if ((ECX >> 12) & 1) 989 setFeature(X86::FEATURE_FMA); 990 if ((ECX >> 19) & 1) 991 setFeature(X86::FEATURE_SSE4_1); 992 if ((ECX >> 20) & 1) 993 setFeature(X86::FEATURE_SSE4_2); 994 if ((ECX >> 23) & 1) 995 setFeature(X86::FEATURE_POPCNT); 996 if ((ECX >> 25) & 1) 997 setFeature(X86::FEATURE_AES); 998 999 if ((ECX >> 22) & 1) 1000 setFeature(X86::FEATURE_MOVBE); 1001 1002 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 1003 // indicates that the AVX registers will be saved and restored on context 1004 // switch, then we have full AVX support. 1005 const unsigned AVXBits = (1 << 27) | (1 << 28); 1006 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && 1007 ((EAX & 0x6) == 0x6); 1008 #if defined(__APPLE__) 1009 // Darwin lazily saves the AVX512 context on first use: trust that the OS will 1010 // save the AVX512 context if we use AVX512 instructions, even the bit is not 1011 // set right now. 1012 bool HasAVX512Save = true; 1013 #else 1014 // AVX512 requires additional context to be saved by the OS. 1015 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); 1016 #endif 1017 1018 if (HasAVX) 1019 setFeature(X86::FEATURE_AVX); 1020 1021 bool HasLeaf7 = 1022 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 1023 1024 if (HasLeaf7 && ((EBX >> 3) & 1)) 1025 setFeature(X86::FEATURE_BMI); 1026 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) 1027 setFeature(X86::FEATURE_AVX2); 1028 if (HasLeaf7 && ((EBX >> 8) & 1)) 1029 setFeature(X86::FEATURE_BMI2); 1030 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) 1031 setFeature(X86::FEATURE_AVX512F); 1032 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) 1033 setFeature(X86::FEATURE_AVX512DQ); 1034 if (HasLeaf7 && ((EBX >> 19) & 1)) 1035 setFeature(X86::FEATURE_ADX); 1036 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) 1037 setFeature(X86::FEATURE_AVX512IFMA); 1038 if (HasLeaf7 && ((EBX >> 23) & 1)) 1039 setFeature(X86::FEATURE_CLFLUSHOPT); 1040 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) 1041 setFeature(X86::FEATURE_AVX512PF); 1042 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) 1043 setFeature(X86::FEATURE_AVX512ER); 1044 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) 1045 setFeature(X86::FEATURE_AVX512CD); 1046 if (HasLeaf7 && ((EBX >> 29) & 1)) 1047 setFeature(X86::FEATURE_SHA); 1048 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) 1049 setFeature(X86::FEATURE_AVX512BW); 1050 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) 1051 setFeature(X86::FEATURE_AVX512VL); 1052 1053 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) 1054 setFeature(X86::FEATURE_AVX512VBMI); 1055 if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save) 1056 setFeature(X86::FEATURE_AVX512VBMI2); 1057 if (HasLeaf7 && ((ECX >> 8) & 1)) 1058 setFeature(X86::FEATURE_GFNI); 1059 if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX) 1060 setFeature(X86::FEATURE_VPCLMULQDQ); 1061 if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save) 1062 setFeature(X86::FEATURE_AVX512VNNI); 1063 if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save) 1064 setFeature(X86::FEATURE_AVX512BITALG); 1065 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) 1066 setFeature(X86::FEATURE_AVX512VPOPCNTDQ); 1067 1068 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) 1069 setFeature(X86::FEATURE_AVX5124VNNIW); 1070 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) 1071 setFeature(X86::FEATURE_AVX5124FMAPS); 1072 if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save) 1073 setFeature(X86::FEATURE_AVX512VP2INTERSECT); 1074 1075 bool HasLeaf7Subleaf1 = 1076 MaxLeaf >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); 1077 if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save) 1078 setFeature(X86::FEATURE_AVX512BF16); 1079 1080 unsigned MaxExtLevel; 1081 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); 1082 1083 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && 1084 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 1085 if (HasExtLeaf1 && ((ECX >> 6) & 1)) 1086 setFeature(X86::FEATURE_SSE4_A); 1087 if (HasExtLeaf1 && ((ECX >> 11) & 1)) 1088 setFeature(X86::FEATURE_XOP); 1089 if (HasExtLeaf1 && ((ECX >> 16) & 1)) 1090 setFeature(X86::FEATURE_FMA4); 1091 1092 if (HasExtLeaf1 && ((EDX >> 29) & 1)) 1093 setFeature(X86::FEATURE_64BIT); 1094 } 1095 1096 StringRef sys::getHostCPUName() { 1097 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 1098 unsigned MaxLeaf, Vendor; 1099 1100 if (!isCpuIdSupported()) 1101 return "generic"; 1102 1103 if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) 1104 return "generic"; 1105 getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); 1106 1107 unsigned Family = 0, Model = 0; 1108 unsigned Features[(X86::CPU_FEATURE_MAX + 31) / 32] = {0}; 1109 detectX86FamilyModel(EAX, &Family, &Model); 1110 getAvailableFeatures(ECX, EDX, MaxLeaf, Features); 1111 1112 // These aren't consumed in this file, but we try to keep some source code the 1113 // same or similar to compiler-rt. 1114 unsigned Type = 0; 1115 unsigned Subtype = 0; 1116 1117 StringRef CPU; 1118 1119 if (Vendor == SIG_INTEL) { 1120 CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type, 1121 &Subtype); 1122 } else if (Vendor == SIG_AMD) { 1123 CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type, 1124 &Subtype); 1125 } 1126 1127 if (!CPU.empty()) 1128 return CPU; 1129 1130 return "generic"; 1131 } 1132 1133 #elif defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__)) 1134 StringRef sys::getHostCPUName() { 1135 host_basic_info_data_t hostInfo; 1136 mach_msg_type_number_t infoCount; 1137 1138 infoCount = HOST_BASIC_INFO_COUNT; 1139 mach_port_t hostPort = mach_host_self(); 1140 host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo, 1141 &infoCount); 1142 mach_port_deallocate(mach_task_self(), hostPort); 1143 1144 if (hostInfo.cpu_type != CPU_TYPE_POWERPC) 1145 return "generic"; 1146 1147 switch (hostInfo.cpu_subtype) { 1148 case CPU_SUBTYPE_POWERPC_601: 1149 return "601"; 1150 case CPU_SUBTYPE_POWERPC_602: 1151 return "602"; 1152 case CPU_SUBTYPE_POWERPC_603: 1153 return "603"; 1154 case CPU_SUBTYPE_POWERPC_603e: 1155 return "603e"; 1156 case CPU_SUBTYPE_POWERPC_603ev: 1157 return "603ev"; 1158 case CPU_SUBTYPE_POWERPC_604: 1159 return "604"; 1160 case CPU_SUBTYPE_POWERPC_604e: 1161 return "604e"; 1162 case CPU_SUBTYPE_POWERPC_620: 1163 return "620"; 1164 case CPU_SUBTYPE_POWERPC_750: 1165 return "750"; 1166 case CPU_SUBTYPE_POWERPC_7400: 1167 return "7400"; 1168 case CPU_SUBTYPE_POWERPC_7450: 1169 return "7450"; 1170 case CPU_SUBTYPE_POWERPC_970: 1171 return "970"; 1172 default:; 1173 } 1174 1175 return "generic"; 1176 } 1177 #elif defined(__linux__) && (defined(__ppc__) || defined(__powerpc__)) 1178 StringRef sys::getHostCPUName() { 1179 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1180 StringRef Content = P ? P->getBuffer() : ""; 1181 return detail::getHostCPUNameForPowerPC(Content); 1182 } 1183 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) 1184 StringRef sys::getHostCPUName() { 1185 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1186 StringRef Content = P ? P->getBuffer() : ""; 1187 return detail::getHostCPUNameForARM(Content); 1188 } 1189 #elif defined(__linux__) && defined(__s390x__) 1190 StringRef sys::getHostCPUName() { 1191 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1192 StringRef Content = P ? P->getBuffer() : ""; 1193 return detail::getHostCPUNameForS390x(Content); 1194 } 1195 #elif defined(__APPLE__) && defined(__aarch64__) 1196 StringRef sys::getHostCPUName() { 1197 return "cyclone"; 1198 } 1199 #elif defined(__APPLE__) && defined(__arm__) 1200 StringRef sys::getHostCPUName() { 1201 host_basic_info_data_t hostInfo; 1202 mach_msg_type_number_t infoCount; 1203 1204 infoCount = HOST_BASIC_INFO_COUNT; 1205 mach_port_t hostPort = mach_host_self(); 1206 host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo, 1207 &infoCount); 1208 mach_port_deallocate(mach_task_self(), hostPort); 1209 1210 if (hostInfo.cpu_type != CPU_TYPE_ARM) { 1211 assert(false && "CPUType not equal to ARM should not be possible on ARM"); 1212 return "generic"; 1213 } 1214 switch (hostInfo.cpu_subtype) { 1215 case CPU_SUBTYPE_ARM_V7S: 1216 return "swift"; 1217 default:; 1218 } 1219 1220 return "generic"; 1221 } 1222 #else 1223 StringRef sys::getHostCPUName() { return "generic"; } 1224 #endif 1225 1226 #if defined(__linux__) && (defined(__i386__) || defined(__x86_64__)) 1227 // On Linux, the number of physical cores can be computed from /proc/cpuinfo, 1228 // using the number of unique physical/core id pairs. The following 1229 // implementation reads the /proc/cpuinfo format on an x86_64 system. 1230 int computeHostNumPhysicalCores() { 1231 // Enabled represents the number of physical id/core id pairs with at least 1232 // one processor id enabled by the CPU affinity mask. 1233 cpu_set_t Affinity, Enabled; 1234 if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0) 1235 return -1; 1236 CPU_ZERO(&Enabled); 1237 1238 // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be 1239 // mmapped because it appears to have 0 size. 1240 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = 1241 llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); 1242 if (std::error_code EC = Text.getError()) { 1243 llvm::errs() << "Can't read " 1244 << "/proc/cpuinfo: " << EC.message() << "\n"; 1245 return -1; 1246 } 1247 SmallVector<StringRef, 8> strs; 1248 (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1, 1249 /*KeepEmpty=*/false); 1250 int CurProcessor = -1; 1251 int CurPhysicalId = -1; 1252 int CurSiblings = -1; 1253 int CurCoreId = -1; 1254 for (StringRef Line : strs) { 1255 std::pair<StringRef, StringRef> Data = Line.split(':'); 1256 auto Name = Data.first.trim(); 1257 auto Val = Data.second.trim(); 1258 // These fields are available if the kernel is configured with CONFIG_SMP. 1259 if (Name == "processor") 1260 Val.getAsInteger(10, CurProcessor); 1261 else if (Name == "physical id") 1262 Val.getAsInteger(10, CurPhysicalId); 1263 else if (Name == "siblings") 1264 Val.getAsInteger(10, CurSiblings); 1265 else if (Name == "core id") { 1266 Val.getAsInteger(10, CurCoreId); 1267 // The processor id corresponds to an index into cpu_set_t. 1268 if (CPU_ISSET(CurProcessor, &Affinity)) 1269 CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled); 1270 } 1271 } 1272 return CPU_COUNT(&Enabled); 1273 } 1274 #elif defined(__linux__) && defined(__powerpc__) 1275 int computeHostNumPhysicalCores() { 1276 cpu_set_t Affinity; 1277 if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0) 1278 return CPU_COUNT(&Affinity); 1279 1280 // The call to sched_getaffinity() may have failed because the Affinity 1281 // mask is too small for the number of CPU's on the system (i.e. the 1282 // system has more than 1024 CPUs). Allocate a mask large enough for 1283 // twice as many CPUs. 1284 cpu_set_t *DynAffinity; 1285 DynAffinity = CPU_ALLOC(2048); 1286 if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) { 1287 int NumCPUs = CPU_COUNT(DynAffinity); 1288 CPU_FREE(DynAffinity); 1289 return NumCPUs; 1290 } 1291 return -1; 1292 } 1293 #elif defined(__linux__) && defined(__s390x__) 1294 int computeHostNumPhysicalCores() { return sysconf(_SC_NPROCESSORS_ONLN); } 1295 #elif defined(__APPLE__) && defined(__x86_64__) 1296 #include <sys/param.h> 1297 #include <sys/sysctl.h> 1298 1299 // Gets the number of *physical cores* on the machine. 1300 int computeHostNumPhysicalCores() { 1301 uint32_t count; 1302 size_t len = sizeof(count); 1303 sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0); 1304 if (count < 1) { 1305 int nm[2]; 1306 nm[0] = CTL_HW; 1307 nm[1] = HW_AVAILCPU; 1308 sysctl(nm, 2, &count, &len, NULL, 0); 1309 if (count < 1) 1310 return -1; 1311 } 1312 return count; 1313 } 1314 #elif defined(_WIN32) && LLVM_ENABLE_THREADS != 0 1315 // Defined in llvm/lib/Support/Windows/Threading.inc 1316 int computeHostNumPhysicalCores(); 1317 #else 1318 // On other systems, return -1 to indicate unknown. 1319 static int computeHostNumPhysicalCores() { return -1; } 1320 #endif 1321 1322 int sys::getHostNumPhysicalCores() { 1323 static int NumCores = computeHostNumPhysicalCores(); 1324 return NumCores; 1325 } 1326 1327 #if defined(__i386__) || defined(_M_IX86) || \ 1328 defined(__x86_64__) || defined(_M_X64) 1329 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1330 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 1331 unsigned MaxLevel; 1332 1333 if (getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX) || MaxLevel < 1) 1334 return false; 1335 1336 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); 1337 1338 Features["cx8"] = (EDX >> 8) & 1; 1339 Features["cmov"] = (EDX >> 15) & 1; 1340 Features["mmx"] = (EDX >> 23) & 1; 1341 Features["fxsr"] = (EDX >> 24) & 1; 1342 Features["sse"] = (EDX >> 25) & 1; 1343 Features["sse2"] = (EDX >> 26) & 1; 1344 1345 Features["sse3"] = (ECX >> 0) & 1; 1346 Features["pclmul"] = (ECX >> 1) & 1; 1347 Features["ssse3"] = (ECX >> 9) & 1; 1348 Features["cx16"] = (ECX >> 13) & 1; 1349 Features["sse4.1"] = (ECX >> 19) & 1; 1350 Features["sse4.2"] = (ECX >> 20) & 1; 1351 Features["movbe"] = (ECX >> 22) & 1; 1352 Features["popcnt"] = (ECX >> 23) & 1; 1353 Features["aes"] = (ECX >> 25) & 1; 1354 Features["rdrnd"] = (ECX >> 30) & 1; 1355 1356 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 1357 // indicates that the AVX registers will be saved and restored on context 1358 // switch, then we have full AVX support. 1359 bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX); 1360 bool HasAVXSave = HasXSave && ((ECX >> 28) & 1) && ((EAX & 0x6) == 0x6); 1361 #if defined(__APPLE__) 1362 // Darwin lazily saves the AVX512 context on first use: trust that the OS will 1363 // save the AVX512 context if we use AVX512 instructions, even the bit is not 1364 // set right now. 1365 bool HasAVX512Save = true; 1366 #else 1367 // AVX512 requires additional context to be saved by the OS. 1368 bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0); 1369 #endif 1370 // AMX requires additional context to be saved by the OS. 1371 const unsigned AMXBits = (1 << 17) | (1 << 18); 1372 bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits); 1373 1374 Features["avx"] = HasAVXSave; 1375 Features["fma"] = ((ECX >> 12) & 1) && HasAVXSave; 1376 // Only enable XSAVE if OS has enabled support for saving YMM state. 1377 Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave; 1378 Features["f16c"] = ((ECX >> 29) & 1) && HasAVXSave; 1379 1380 unsigned MaxExtLevel; 1381 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); 1382 1383 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && 1384 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 1385 Features["sahf"] = HasExtLeaf1 && ((ECX >> 0) & 1); 1386 Features["lzcnt"] = HasExtLeaf1 && ((ECX >> 5) & 1); 1387 Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1); 1388 Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1); 1389 Features["xop"] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave; 1390 Features["lwp"] = HasExtLeaf1 && ((ECX >> 15) & 1); 1391 Features["fma4"] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave; 1392 Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1); 1393 Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1); 1394 1395 Features["64bit"] = HasExtLeaf1 && ((EDX >> 29) & 1); 1396 1397 // Miscellaneous memory related features, detected by 1398 // using the 0x80000008 leaf of the CPUID instruction 1399 bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 && 1400 !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX); 1401 Features["clzero"] = HasExtLeaf8 && ((EBX >> 0) & 1); 1402 Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1); 1403 1404 bool HasLeaf7 = 1405 MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 1406 1407 Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1); 1408 Features["sgx"] = HasLeaf7 && ((EBX >> 2) & 1); 1409 Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1); 1410 // AVX2 is only supported if we have the OS save support from AVX. 1411 Features["avx2"] = HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave; 1412 Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1); 1413 Features["invpcid"] = HasLeaf7 && ((EBX >> 10) & 1); 1414 Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1); 1415 // AVX512 is only supported if the OS supports the context save for it. 1416 Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save; 1417 Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save; 1418 Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1); 1419 Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1); 1420 Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save; 1421 Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1); 1422 Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1); 1423 Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save; 1424 Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save; 1425 Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save; 1426 Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1); 1427 Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save; 1428 Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save; 1429 1430 Features["prefetchwt1"] = HasLeaf7 && ((ECX >> 0) & 1); 1431 Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; 1432 Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1); 1433 Features["waitpkg"] = HasLeaf7 && ((ECX >> 5) & 1); 1434 Features["avx512vbmi2"] = HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save; 1435 Features["shstk"] = HasLeaf7 && ((ECX >> 7) & 1); 1436 Features["gfni"] = HasLeaf7 && ((ECX >> 8) & 1); 1437 Features["vaes"] = HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave; 1438 Features["vpclmulqdq"] = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave; 1439 Features["avx512vnni"] = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save; 1440 Features["avx512bitalg"] = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save; 1441 Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save; 1442 Features["rdpid"] = HasLeaf7 && ((ECX >> 22) & 1); 1443 Features["cldemote"] = HasLeaf7 && ((ECX >> 25) & 1); 1444 Features["movdiri"] = HasLeaf7 && ((ECX >> 27) & 1); 1445 Features["movdir64b"] = HasLeaf7 && ((ECX >> 28) & 1); 1446 Features["enqcmd"] = HasLeaf7 && ((ECX >> 29) & 1); 1447 1448 Features["avx512vp2intersect"] = 1449 HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save; 1450 Features["serialize"] = HasLeaf7 && ((EDX >> 14) & 1); 1451 Features["tsxldtrk"] = HasLeaf7 && ((EDX >> 16) & 1); 1452 // There are two CPUID leafs which information associated with the pconfig 1453 // instruction: 1454 // EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th 1455 // bit of EDX), while the EAX=0x1b leaf returns information on the 1456 // availability of specific pconfig leafs. 1457 // The target feature here only refers to the the first of these two. 1458 // Users might need to check for the availability of specific pconfig 1459 // leaves using cpuid, since that information is ignored while 1460 // detecting features using the "-march=native" flag. 1461 // For more info, see X86 ISA docs. 1462 Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1); 1463 Features["amx-bf16"] = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave; 1464 Features["amx-tile"] = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave; 1465 Features["amx-int8"] = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave; 1466 bool HasLeaf7Subleaf1 = 1467 MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); 1468 Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save; 1469 1470 bool HasLeafD = MaxLevel >= 0xd && 1471 !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); 1472 1473 // Only enable XSAVE if OS has enabled support for saving YMM state. 1474 Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave; 1475 Features["xsavec"] = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave; 1476 Features["xsaves"] = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave; 1477 1478 bool HasLeaf14 = MaxLevel >= 0x14 && 1479 !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX); 1480 1481 Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1); 1482 1483 return true; 1484 } 1485 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) 1486 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1487 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1488 if (!P) 1489 return false; 1490 1491 SmallVector<StringRef, 32> Lines; 1492 P->getBuffer().split(Lines, "\n"); 1493 1494 SmallVector<StringRef, 32> CPUFeatures; 1495 1496 // Look for the CPU features. 1497 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 1498 if (Lines[I].startswith("Features")) { 1499 Lines[I].split(CPUFeatures, ' '); 1500 break; 1501 } 1502 1503 #if defined(__aarch64__) 1504 // Keep track of which crypto features we have seen 1505 enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 }; 1506 uint32_t crypto = 0; 1507 #endif 1508 1509 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { 1510 StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I]) 1511 #if defined(__aarch64__) 1512 .Case("asimd", "neon") 1513 .Case("fp", "fp-armv8") 1514 .Case("crc32", "crc") 1515 #else 1516 .Case("half", "fp16") 1517 .Case("neon", "neon") 1518 .Case("vfpv3", "vfp3") 1519 .Case("vfpv3d16", "d16") 1520 .Case("vfpv4", "vfp4") 1521 .Case("idiva", "hwdiv-arm") 1522 .Case("idivt", "hwdiv") 1523 #endif 1524 .Default(""); 1525 1526 #if defined(__aarch64__) 1527 // We need to check crypto separately since we need all of the crypto 1528 // extensions to enable the subtarget feature 1529 if (CPUFeatures[I] == "aes") 1530 crypto |= CAP_AES; 1531 else if (CPUFeatures[I] == "pmull") 1532 crypto |= CAP_PMULL; 1533 else if (CPUFeatures[I] == "sha1") 1534 crypto |= CAP_SHA1; 1535 else if (CPUFeatures[I] == "sha2") 1536 crypto |= CAP_SHA2; 1537 #endif 1538 1539 if (LLVMFeatureStr != "") 1540 Features[LLVMFeatureStr] = true; 1541 } 1542 1543 #if defined(__aarch64__) 1544 // If we have all crypto bits we can add the feature 1545 if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2)) 1546 Features["crypto"] = true; 1547 #endif 1548 1549 return true; 1550 } 1551 #elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64)) 1552 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1553 if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) 1554 Features["neon"] = true; 1555 if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) 1556 Features["crc"] = true; 1557 if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) 1558 Features["crypto"] = true; 1559 1560 return true; 1561 } 1562 #else 1563 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; } 1564 #endif 1565 1566 std::string sys::getProcessTriple() { 1567 std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE); 1568 Triple PT(Triple::normalize(TargetTripleString)); 1569 1570 if (sizeof(void *) == 8 && PT.isArch32Bit()) 1571 PT = PT.get64BitArchVariant(); 1572 if (sizeof(void *) == 4 && PT.isArch64Bit()) 1573 PT = PT.get32BitArchVariant(); 1574 1575 return PT.str(); 1576 } 1577