1 //===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the operating system Host concept. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Support/Host.h" 14 #include "llvm/Support/TargetParser.h" 15 #include "llvm/ADT/SmallSet.h" 16 #include "llvm/ADT/SmallVector.h" 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/ADT/StringSwitch.h" 19 #include "llvm/ADT/Triple.h" 20 #include "llvm/Config/llvm-config.h" 21 #include "llvm/Support/Debug.h" 22 #include "llvm/Support/FileSystem.h" 23 #include "llvm/Support/MemoryBuffer.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include <assert.h> 26 #include <string.h> 27 28 // Include the platform-specific parts of this class. 29 #ifdef LLVM_ON_UNIX 30 #include "Unix/Host.inc" 31 #endif 32 #ifdef _WIN32 33 #include "Windows/Host.inc" 34 #endif 35 #ifdef _MSC_VER 36 #include <intrin.h> 37 #endif 38 #if defined(__APPLE__) && (!defined(__x86_64__)) 39 #include <mach/host_info.h> 40 #include <mach/mach.h> 41 #include <mach/mach_host.h> 42 #include <mach/machine.h> 43 #endif 44 45 #define DEBUG_TYPE "host-detection" 46 47 //===----------------------------------------------------------------------===// 48 // 49 // Implementations of the CPU detection routines 50 // 51 //===----------------------------------------------------------------------===// 52 53 using namespace llvm; 54 55 static std::unique_ptr<llvm::MemoryBuffer> 56 LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() { 57 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = 58 llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); 59 if (std::error_code EC = Text.getError()) { 60 llvm::errs() << "Can't read " 61 << "/proc/cpuinfo: " << EC.message() << "\n"; 62 return nullptr; 63 } 64 return std::move(*Text); 65 } 66 67 StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) { 68 // Access to the Processor Version Register (PVR) on PowerPC is privileged, 69 // and so we must use an operating-system interface to determine the current 70 // processor type. On Linux, this is exposed through the /proc/cpuinfo file. 71 const char *generic = "generic"; 72 73 // The cpu line is second (after the 'processor: 0' line), so if this 74 // buffer is too small then something has changed (or is wrong). 75 StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin(); 76 StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end(); 77 78 StringRef::const_iterator CIP = CPUInfoStart; 79 80 StringRef::const_iterator CPUStart = 0; 81 size_t CPULen = 0; 82 83 // We need to find the first line which starts with cpu, spaces, and a colon. 84 // After the colon, there may be some additional spaces and then the cpu type. 85 while (CIP < CPUInfoEnd && CPUStart == 0) { 86 if (CIP < CPUInfoEnd && *CIP == '\n') 87 ++CIP; 88 89 if (CIP < CPUInfoEnd && *CIP == 'c') { 90 ++CIP; 91 if (CIP < CPUInfoEnd && *CIP == 'p') { 92 ++CIP; 93 if (CIP < CPUInfoEnd && *CIP == 'u') { 94 ++CIP; 95 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) 96 ++CIP; 97 98 if (CIP < CPUInfoEnd && *CIP == ':') { 99 ++CIP; 100 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) 101 ++CIP; 102 103 if (CIP < CPUInfoEnd) { 104 CPUStart = CIP; 105 while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' && 106 *CIP != ',' && *CIP != '\n')) 107 ++CIP; 108 CPULen = CIP - CPUStart; 109 } 110 } 111 } 112 } 113 } 114 115 if (CPUStart == 0) 116 while (CIP < CPUInfoEnd && *CIP != '\n') 117 ++CIP; 118 } 119 120 if (CPUStart == 0) 121 return generic; 122 123 return StringSwitch<const char *>(StringRef(CPUStart, CPULen)) 124 .Case("604e", "604e") 125 .Case("604", "604") 126 .Case("7400", "7400") 127 .Case("7410", "7400") 128 .Case("7447", "7400") 129 .Case("7455", "7450") 130 .Case("G4", "g4") 131 .Case("POWER4", "970") 132 .Case("PPC970FX", "970") 133 .Case("PPC970MP", "970") 134 .Case("G5", "g5") 135 .Case("POWER5", "g5") 136 .Case("A2", "a2") 137 .Case("POWER6", "pwr6") 138 .Case("POWER7", "pwr7") 139 .Case("POWER8", "pwr8") 140 .Case("POWER8E", "pwr8") 141 .Case("POWER8NVL", "pwr8") 142 .Case("POWER9", "pwr9") 143 // FIXME: If we get a simulator or machine with the capabilities of 144 // mcpu=future, we should revisit this and add the name reported by the 145 // simulator/machine. 146 .Default(generic); 147 } 148 149 StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { 150 // The cpuid register on arm is not accessible from user space. On Linux, 151 // it is exposed through the /proc/cpuinfo file. 152 153 // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line 154 // in all cases. 155 SmallVector<StringRef, 32> Lines; 156 ProcCpuinfoContent.split(Lines, "\n"); 157 158 // Look for the CPU implementer line. 159 StringRef Implementer; 160 StringRef Hardware; 161 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 162 if (Lines[I].startswith("CPU implementer")) 163 Implementer = Lines[I].substr(15).ltrim("\t :"); 164 if (Lines[I].startswith("Hardware")) 165 Hardware = Lines[I].substr(8).ltrim("\t :"); 166 } 167 168 if (Implementer == "0x41") { // ARM Ltd. 169 // MSM8992/8994 may give cpu part for the core that the kernel is running on, 170 // which is undeterministic and wrong. Always return cortex-a53 for these SoC. 171 if (Hardware.endswith("MSM8994") || Hardware.endswith("MSM8996")) 172 return "cortex-a53"; 173 174 175 // Look for the CPU part line. 176 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 177 if (Lines[I].startswith("CPU part")) 178 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 179 // values correspond to the "Part number" in the CP15/c0 register. The 180 // contents are specified in the various processor manuals. 181 // This corresponds to the Main ID Register in Technical Reference Manuals. 182 // and is used in programs like sys-utils 183 return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) 184 .Case("0x926", "arm926ej-s") 185 .Case("0xb02", "mpcore") 186 .Case("0xb36", "arm1136j-s") 187 .Case("0xb56", "arm1156t2-s") 188 .Case("0xb76", "arm1176jz-s") 189 .Case("0xc08", "cortex-a8") 190 .Case("0xc09", "cortex-a9") 191 .Case("0xc0f", "cortex-a15") 192 .Case("0xc20", "cortex-m0") 193 .Case("0xc23", "cortex-m3") 194 .Case("0xc24", "cortex-m4") 195 .Case("0xd22", "cortex-m55") 196 .Case("0xd02", "cortex-a34") 197 .Case("0xd04", "cortex-a35") 198 .Case("0xd03", "cortex-a53") 199 .Case("0xd07", "cortex-a57") 200 .Case("0xd08", "cortex-a72") 201 .Case("0xd09", "cortex-a73") 202 .Case("0xd0a", "cortex-a75") 203 .Case("0xd0b", "cortex-a76") 204 .Default("generic"); 205 } 206 207 if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium. 208 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 209 if (Lines[I].startswith("CPU part")) { 210 return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) 211 .Case("0x516", "thunderx2t99") 212 .Case("0x0516", "thunderx2t99") 213 .Case("0xaf", "thunderx2t99") 214 .Case("0x0af", "thunderx2t99") 215 .Case("0xa1", "thunderxt88") 216 .Case("0x0a1", "thunderxt88") 217 .Default("generic"); 218 } 219 } 220 } 221 222 if (Implementer == "0x48") // HiSilicon Technologies, Inc. 223 // Look for the CPU part line. 224 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 225 if (Lines[I].startswith("CPU part")) 226 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 227 // values correspond to the "Part number" in the CP15/c0 register. The 228 // contents are specified in the various processor manuals. 229 return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) 230 .Case("0xd01", "tsv110") 231 .Default("generic"); 232 233 if (Implementer == "0x51") // Qualcomm Technologies, Inc. 234 // Look for the CPU part line. 235 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 236 if (Lines[I].startswith("CPU part")) 237 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 238 // values correspond to the "Part number" in the CP15/c0 register. The 239 // contents are specified in the various processor manuals. 240 return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) 241 .Case("0x06f", "krait") // APQ8064 242 .Case("0x201", "kryo") 243 .Case("0x205", "kryo") 244 .Case("0x211", "kryo") 245 .Case("0x800", "cortex-a73") 246 .Case("0x801", "cortex-a73") 247 .Case("0x802", "cortex-a73") 248 .Case("0x803", "cortex-a73") 249 .Case("0x804", "cortex-a73") 250 .Case("0x805", "cortex-a73") 251 .Case("0xc00", "falkor") 252 .Case("0xc01", "saphira") 253 .Default("generic"); 254 255 if (Implementer == "0x53") { // Samsung Electronics Co., Ltd. 256 // The Exynos chips have a convoluted ID scheme that doesn't seem to follow 257 // any predictive pattern across variants and parts. 258 unsigned Variant = 0, Part = 0; 259 260 // Look for the CPU variant line, whose value is a 1 digit hexadecimal 261 // number, corresponding to the Variant bits in the CP15/C0 register. 262 for (auto I : Lines) 263 if (I.consume_front("CPU variant")) 264 I.ltrim("\t :").getAsInteger(0, Variant); 265 266 // Look for the CPU part line, whose value is a 3 digit hexadecimal 267 // number, corresponding to the PartNum bits in the CP15/C0 register. 268 for (auto I : Lines) 269 if (I.consume_front("CPU part")) 270 I.ltrim("\t :").getAsInteger(0, Part); 271 272 unsigned Exynos = (Variant << 12) | Part; 273 switch (Exynos) { 274 default: 275 // Default by falling through to Exynos M3. 276 LLVM_FALLTHROUGH; 277 case 0x1002: 278 return "exynos-m3"; 279 case 0x1003: 280 return "exynos-m4"; 281 } 282 } 283 284 return "generic"; 285 } 286 287 StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) { 288 // STIDP is a privileged operation, so use /proc/cpuinfo instead. 289 290 // The "processor 0:" line comes after a fair amount of other information, 291 // including a cache breakdown, but this should be plenty. 292 SmallVector<StringRef, 32> Lines; 293 ProcCpuinfoContent.split(Lines, "\n"); 294 295 // Look for the CPU features. 296 SmallVector<StringRef, 32> CPUFeatures; 297 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 298 if (Lines[I].startswith("features")) { 299 size_t Pos = Lines[I].find(":"); 300 if (Pos != StringRef::npos) { 301 Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' '); 302 break; 303 } 304 } 305 306 // We need to check for the presence of vector support independently of 307 // the machine type, since we may only use the vector register set when 308 // supported by the kernel (and hypervisor). 309 bool HaveVectorSupport = false; 310 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { 311 if (CPUFeatures[I] == "vx") 312 HaveVectorSupport = true; 313 } 314 315 // Now check the processor machine type. 316 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 317 if (Lines[I].startswith("processor ")) { 318 size_t Pos = Lines[I].find("machine = "); 319 if (Pos != StringRef::npos) { 320 Pos += sizeof("machine = ") - 1; 321 unsigned int Id; 322 if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) { 323 if (Id >= 8561 && HaveVectorSupport) 324 return "z15"; 325 if (Id >= 3906 && HaveVectorSupport) 326 return "z14"; 327 if (Id >= 2964 && HaveVectorSupport) 328 return "z13"; 329 if (Id >= 2827) 330 return "zEC12"; 331 if (Id >= 2817) 332 return "z196"; 333 } 334 } 335 break; 336 } 337 } 338 339 return "generic"; 340 } 341 342 StringRef sys::detail::getHostCPUNameForBPF() { 343 #if !defined(__linux__) || !defined(__x86_64__) 344 return "generic"; 345 #else 346 uint8_t v3_insns[40] __attribute__ ((aligned (8))) = 347 /* BPF_MOV64_IMM(BPF_REG_0, 0) */ 348 { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 349 /* BPF_MOV64_IMM(BPF_REG_2, 1) */ 350 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 351 /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */ 352 0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 353 /* BPF_MOV64_IMM(BPF_REG_0, 1) */ 354 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 355 /* BPF_EXIT_INSN() */ 356 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; 357 358 uint8_t v2_insns[40] __attribute__ ((aligned (8))) = 359 /* BPF_MOV64_IMM(BPF_REG_0, 0) */ 360 { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 361 /* BPF_MOV64_IMM(BPF_REG_2, 1) */ 362 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 363 /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */ 364 0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 365 /* BPF_MOV64_IMM(BPF_REG_0, 1) */ 366 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 367 /* BPF_EXIT_INSN() */ 368 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; 369 370 struct bpf_prog_load_attr { 371 uint32_t prog_type; 372 uint32_t insn_cnt; 373 uint64_t insns; 374 uint64_t license; 375 uint32_t log_level; 376 uint32_t log_size; 377 uint64_t log_buf; 378 uint32_t kern_version; 379 uint32_t prog_flags; 380 } attr = {}; 381 attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */ 382 attr.insn_cnt = 5; 383 attr.insns = (uint64_t)v3_insns; 384 attr.license = (uint64_t)"DUMMY"; 385 386 int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, 387 sizeof(attr)); 388 if (fd >= 0) { 389 close(fd); 390 return "v3"; 391 } 392 393 /* Clear the whole attr in case its content changed by syscall. */ 394 memset(&attr, 0, sizeof(attr)); 395 attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */ 396 attr.insn_cnt = 5; 397 attr.insns = (uint64_t)v2_insns; 398 attr.license = (uint64_t)"DUMMY"; 399 fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr)); 400 if (fd >= 0) { 401 close(fd); 402 return "v2"; 403 } 404 return "v1"; 405 #endif 406 } 407 408 #if defined(__i386__) || defined(_M_IX86) || \ 409 defined(__x86_64__) || defined(_M_X64) 410 411 enum VendorSignatures { 412 SIG_INTEL = 0x756e6547 /* Genu */, 413 SIG_AMD = 0x68747541 /* Auth */ 414 }; 415 416 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). 417 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID 418 // support. Consequently, for i386, the presence of CPUID is checked first 419 // via the corresponding eflags bit. 420 // Removal of cpuid.h header motivated by PR30384 421 // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp 422 // or test-suite, but are used in external projects e.g. libstdcxx 423 static bool isCpuIdSupported() { 424 #if defined(__GNUC__) || defined(__clang__) 425 #if defined(__i386__) 426 int __cpuid_supported; 427 __asm__(" pushfl\n" 428 " popl %%eax\n" 429 " movl %%eax,%%ecx\n" 430 " xorl $0x00200000,%%eax\n" 431 " pushl %%eax\n" 432 " popfl\n" 433 " pushfl\n" 434 " popl %%eax\n" 435 " movl $0,%0\n" 436 " cmpl %%eax,%%ecx\n" 437 " je 1f\n" 438 " movl $1,%0\n" 439 "1:" 440 : "=r"(__cpuid_supported) 441 : 442 : "eax", "ecx"); 443 if (!__cpuid_supported) 444 return false; 445 #endif 446 return true; 447 #endif 448 return true; 449 } 450 451 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in 452 /// the specified arguments. If we can't run cpuid on the host, return true. 453 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, 454 unsigned *rECX, unsigned *rEDX) { 455 #if defined(__GNUC__) || defined(__clang__) 456 #if defined(__x86_64__) 457 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 458 // FIXME: should we save this for Clang? 459 __asm__("movq\t%%rbx, %%rsi\n\t" 460 "cpuid\n\t" 461 "xchgq\t%%rbx, %%rsi\n\t" 462 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 463 : "a"(value)); 464 return false; 465 #elif defined(__i386__) 466 __asm__("movl\t%%ebx, %%esi\n\t" 467 "cpuid\n\t" 468 "xchgl\t%%ebx, %%esi\n\t" 469 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 470 : "a"(value)); 471 return false; 472 #else 473 return true; 474 #endif 475 #elif defined(_MSC_VER) 476 // The MSVC intrinsic is portable across x86 and x64. 477 int registers[4]; 478 __cpuid(registers, value); 479 *rEAX = registers[0]; 480 *rEBX = registers[1]; 481 *rECX = registers[2]; 482 *rEDX = registers[3]; 483 return false; 484 #else 485 return true; 486 #endif 487 } 488 489 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return 490 /// the 4 values in the specified arguments. If we can't run cpuid on the host, 491 /// return true. 492 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, 493 unsigned *rEAX, unsigned *rEBX, unsigned *rECX, 494 unsigned *rEDX) { 495 #if defined(__GNUC__) || defined(__clang__) 496 #if defined(__x86_64__) 497 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 498 // FIXME: should we save this for Clang? 499 __asm__("movq\t%%rbx, %%rsi\n\t" 500 "cpuid\n\t" 501 "xchgq\t%%rbx, %%rsi\n\t" 502 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 503 : "a"(value), "c"(subleaf)); 504 return false; 505 #elif defined(__i386__) 506 __asm__("movl\t%%ebx, %%esi\n\t" 507 "cpuid\n\t" 508 "xchgl\t%%ebx, %%esi\n\t" 509 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 510 : "a"(value), "c"(subleaf)); 511 return false; 512 #else 513 return true; 514 #endif 515 #elif defined(_MSC_VER) 516 int registers[4]; 517 __cpuidex(registers, value, subleaf); 518 *rEAX = registers[0]; 519 *rEBX = registers[1]; 520 *rECX = registers[2]; 521 *rEDX = registers[3]; 522 return false; 523 #else 524 return true; 525 #endif 526 } 527 528 // Read control register 0 (XCR0). Used to detect features such as AVX. 529 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { 530 #if defined(__GNUC__) || defined(__clang__) 531 // Check xgetbv; this uses a .byte sequence instead of the instruction 532 // directly because older assemblers do not include support for xgetbv and 533 // there is no easy way to conditionally compile based on the assembler used. 534 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0)); 535 return false; 536 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) 537 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); 538 *rEAX = Result; 539 *rEDX = Result >> 32; 540 return false; 541 #else 542 return true; 543 #endif 544 } 545 546 static void detectX86FamilyModel(unsigned EAX, unsigned *Family, 547 unsigned *Model) { 548 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11 549 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7 550 if (*Family == 6 || *Family == 0xf) { 551 if (*Family == 0xf) 552 // Examine extended family ID if family ID is F. 553 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27 554 // Examine extended model ID if family ID is 6 or F. 555 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 556 } 557 } 558 559 static void 560 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, 561 unsigned Brand_id, unsigned Features, 562 unsigned Features2, unsigned Features3, 563 unsigned *Type, unsigned *Subtype) { 564 if (Brand_id != 0) 565 return; 566 switch (Family) { 567 case 3: 568 *Type = X86::INTEL_i386; 569 break; 570 case 4: 571 *Type = X86::INTEL_i486; 572 break; 573 case 5: 574 if (Features & (1 << X86::FEATURE_MMX)) { 575 *Type = X86::INTEL_PENTIUM_MMX; 576 break; 577 } 578 *Type = X86::INTEL_PENTIUM; 579 break; 580 case 6: 581 switch (Model) { 582 case 0x01: // Pentium Pro processor 583 *Type = X86::INTEL_PENTIUM_PRO; 584 break; 585 case 0x03: // Intel Pentium II OverDrive processor, Pentium II processor, 586 // model 03 587 case 0x05: // Pentium II processor, model 05, Pentium II Xeon processor, 588 // model 05, and Intel Celeron processor, model 05 589 case 0x06: // Celeron processor, model 06 590 *Type = X86::INTEL_PENTIUM_II; 591 break; 592 case 0x07: // Pentium III processor, model 07, and Pentium III Xeon 593 // processor, model 07 594 case 0x08: // Pentium III processor, model 08, Pentium III Xeon processor, 595 // model 08, and Celeron processor, model 08 596 case 0x0a: // Pentium III Xeon processor, model 0Ah 597 case 0x0b: // Pentium III processor, model 0Bh 598 *Type = X86::INTEL_PENTIUM_III; 599 break; 600 case 0x09: // Intel Pentium M processor, Intel Celeron M processor model 09. 601 case 0x0d: // Intel Pentium M processor, Intel Celeron M processor, model 602 // 0Dh. All processors are manufactured using the 90 nm process. 603 case 0x15: // Intel EP80579 Integrated Processor and Intel EP80579 604 // Integrated Processor with Intel QuickAssist Technology 605 *Type = X86::INTEL_PENTIUM_M; 606 break; 607 case 0x0e: // Intel Core Duo processor, Intel Core Solo processor, model 608 // 0Eh. All processors are manufactured using the 65 nm process. 609 *Type = X86::INTEL_CORE_DUO; 610 break; // yonah 611 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile 612 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad 613 // mobile processor, Intel Core 2 Extreme processor, Intel 614 // Pentium Dual-Core processor, Intel Xeon processor, model 615 // 0Fh. All processors are manufactured using the 65 nm process. 616 case 0x16: // Intel Celeron processor model 16h. All processors are 617 // manufactured using the 65 nm process 618 *Type = X86::INTEL_CORE2; // "core2" 619 *Subtype = X86::INTEL_CORE2_65; 620 break; 621 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model 622 // 17h. All processors are manufactured using the 45 nm process. 623 // 624 // 45nm: Penryn , Wolfdale, Yorkfield (XE) 625 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using 626 // the 45 nm process. 627 *Type = X86::INTEL_CORE2; // "penryn" 628 *Subtype = X86::INTEL_CORE2_45; 629 break; 630 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All 631 // processors are manufactured using the 45 nm process. 632 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. 633 // As found in a Summer 2010 model iMac. 634 case 0x1f: 635 case 0x2e: // Nehalem EX 636 *Type = X86::INTEL_COREI7; // "nehalem" 637 *Subtype = X86::INTEL_COREI7_NEHALEM; 638 break; 639 case 0x25: // Intel Core i7, laptop version. 640 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All 641 // processors are manufactured using the 32 nm process. 642 case 0x2f: // Westmere EX 643 *Type = X86::INTEL_COREI7; // "westmere" 644 *Subtype = X86::INTEL_COREI7_WESTMERE; 645 break; 646 case 0x2a: // Intel Core i7 processor. All processors are manufactured 647 // using the 32 nm process. 648 case 0x2d: 649 *Type = X86::INTEL_COREI7; //"sandybridge" 650 *Subtype = X86::INTEL_COREI7_SANDYBRIDGE; 651 break; 652 case 0x3a: 653 case 0x3e: // Ivy Bridge EP 654 *Type = X86::INTEL_COREI7; // "ivybridge" 655 *Subtype = X86::INTEL_COREI7_IVYBRIDGE; 656 break; 657 658 // Haswell: 659 case 0x3c: 660 case 0x3f: 661 case 0x45: 662 case 0x46: 663 *Type = X86::INTEL_COREI7; // "haswell" 664 *Subtype = X86::INTEL_COREI7_HASWELL; 665 break; 666 667 // Broadwell: 668 case 0x3d: 669 case 0x47: 670 case 0x4f: 671 case 0x56: 672 *Type = X86::INTEL_COREI7; // "broadwell" 673 *Subtype = X86::INTEL_COREI7_BROADWELL; 674 break; 675 676 // Skylake: 677 case 0x4e: // Skylake mobile 678 case 0x5e: // Skylake desktop 679 case 0x8e: // Kaby Lake mobile 680 case 0x9e: // Kaby Lake desktop 681 *Type = X86::INTEL_COREI7; // "skylake" 682 *Subtype = X86::INTEL_COREI7_SKYLAKE; 683 break; 684 685 // Skylake Xeon: 686 case 0x55: 687 *Type = X86::INTEL_COREI7; 688 if (Features2 & (1 << (X86::FEATURE_AVX512BF16 - 32))) 689 *Subtype = X86::INTEL_COREI7_COOPERLAKE; // "cooperlake" 690 else if (Features2 & (1 << (X86::FEATURE_AVX512VNNI - 32))) 691 *Subtype = X86::INTEL_COREI7_CASCADELAKE; // "cascadelake" 692 else 693 *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512" 694 break; 695 696 // Cannonlake: 697 case 0x66: 698 *Type = X86::INTEL_COREI7; 699 *Subtype = X86::INTEL_COREI7_CANNONLAKE; // "cannonlake" 700 break; 701 702 // Icelake: 703 case 0x7d: 704 case 0x7e: 705 *Type = X86::INTEL_COREI7; 706 *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT; // "icelake-client" 707 break; 708 709 // Icelake Xeon: 710 case 0x6a: 711 case 0x6c: 712 *Type = X86::INTEL_COREI7; 713 *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER; // "icelake-server" 714 break; 715 716 case 0x1c: // Most 45 nm Intel Atom processors 717 case 0x26: // 45 nm Atom Lincroft 718 case 0x27: // 32 nm Atom Medfield 719 case 0x35: // 32 nm Atom Midview 720 case 0x36: // 32 nm Atom Midview 721 *Type = X86::INTEL_BONNELL; 722 break; // "bonnell" 723 724 // Atom Silvermont codes from the Intel software optimization guide. 725 case 0x37: 726 case 0x4a: 727 case 0x4d: 728 case 0x5a: 729 case 0x5d: 730 case 0x4c: // really airmont 731 *Type = X86::INTEL_SILVERMONT; 732 break; // "silvermont" 733 // Goldmont: 734 case 0x5c: // Apollo Lake 735 case 0x5f: // Denverton 736 *Type = X86::INTEL_GOLDMONT; 737 break; // "goldmont" 738 case 0x7a: 739 *Type = X86::INTEL_GOLDMONT_PLUS; 740 break; 741 case 0x86: 742 *Type = X86::INTEL_TREMONT; 743 break; 744 745 case 0x57: 746 *Type = X86::INTEL_KNL; // knl 747 break; 748 749 case 0x85: 750 *Type = X86::INTEL_KNM; // knm 751 break; 752 753 default: // Unknown family 6 CPU, try to guess. 754 // TODO detect tigerlake host 755 if (Features3 & (1 << (X86::FEATURE_AVX512VP2INTERSECT - 64))) { 756 *Type = X86::INTEL_COREI7; 757 *Subtype = X86::INTEL_COREI7_TIGERLAKE; 758 break; 759 } 760 761 if (Features & (1 << X86::FEATURE_AVX512VBMI2)) { 762 *Type = X86::INTEL_COREI7; 763 *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT; 764 break; 765 } 766 767 if (Features & (1 << X86::FEATURE_AVX512VBMI)) { 768 *Type = X86::INTEL_COREI7; 769 *Subtype = X86::INTEL_COREI7_CANNONLAKE; 770 break; 771 } 772 773 if (Features2 & (1 << (X86::FEATURE_AVX512BF16 - 32))) { 774 *Type = X86::INTEL_COREI7; 775 *Subtype = X86::INTEL_COREI7_COOPERLAKE; 776 break; 777 } 778 779 if (Features2 & (1 << (X86::FEATURE_AVX512VNNI - 32))) { 780 *Type = X86::INTEL_COREI7; 781 *Subtype = X86::INTEL_COREI7_CASCADELAKE; 782 break; 783 } 784 785 if (Features & (1 << X86::FEATURE_AVX512VL)) { 786 *Type = X86::INTEL_COREI7; 787 *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; 788 break; 789 } 790 791 if (Features & (1 << X86::FEATURE_AVX512ER)) { 792 *Type = X86::INTEL_KNL; // knl 793 break; 794 } 795 796 if (Features3 & (1 << (X86::FEATURE_CLFLUSHOPT - 64))) { 797 if (Features3 & (1 << (X86::FEATURE_SHA - 64))) { 798 *Type = X86::INTEL_GOLDMONT; 799 } else { 800 *Type = X86::INTEL_COREI7; 801 *Subtype = X86::INTEL_COREI7_SKYLAKE; 802 } 803 break; 804 } 805 if (Features3 & (1 << (X86::FEATURE_ADX - 64))) { 806 *Type = X86::INTEL_COREI7; 807 *Subtype = X86::INTEL_COREI7_BROADWELL; 808 break; 809 } 810 if (Features & (1 << X86::FEATURE_AVX2)) { 811 *Type = X86::INTEL_COREI7; 812 *Subtype = X86::INTEL_COREI7_HASWELL; 813 break; 814 } 815 if (Features & (1 << X86::FEATURE_AVX)) { 816 *Type = X86::INTEL_COREI7; 817 *Subtype = X86::INTEL_COREI7_SANDYBRIDGE; 818 break; 819 } 820 if (Features & (1 << X86::FEATURE_SSE4_2)) { 821 if (Features3 & (1 << (X86::FEATURE_MOVBE - 64))) { 822 *Type = X86::INTEL_SILVERMONT; 823 } else { 824 *Type = X86::INTEL_COREI7; 825 *Subtype = X86::INTEL_COREI7_NEHALEM; 826 } 827 break; 828 } 829 if (Features & (1 << X86::FEATURE_SSE4_1)) { 830 *Type = X86::INTEL_CORE2; // "penryn" 831 *Subtype = X86::INTEL_CORE2_45; 832 break; 833 } 834 if (Features & (1 << X86::FEATURE_SSSE3)) { 835 if (Features3 & (1 << (X86::FEATURE_MOVBE - 64))) { 836 *Type = X86::INTEL_BONNELL; // "bonnell" 837 } else { 838 *Type = X86::INTEL_CORE2; // "core2" 839 *Subtype = X86::INTEL_CORE2_65; 840 } 841 break; 842 } 843 if (Features3 & (1 << (X86::FEATURE_EM64T - 64))) { 844 *Type = X86::INTEL_CORE2; // "core2" 845 *Subtype = X86::INTEL_CORE2_65; 846 break; 847 } 848 if (Features & (1 << X86::FEATURE_SSE3)) { 849 *Type = X86::INTEL_CORE_DUO; 850 break; 851 } 852 if (Features & (1 << X86::FEATURE_SSE2)) { 853 *Type = X86::INTEL_PENTIUM_M; 854 break; 855 } 856 if (Features & (1 << X86::FEATURE_SSE)) { 857 *Type = X86::INTEL_PENTIUM_III; 858 break; 859 } 860 if (Features & (1 << X86::FEATURE_MMX)) { 861 *Type = X86::INTEL_PENTIUM_II; 862 break; 863 } 864 *Type = X86::INTEL_PENTIUM_PRO; 865 break; 866 } 867 break; 868 case 15: { 869 if (Features3 & (1 << (X86::FEATURE_EM64T - 64))) { 870 *Type = X86::INTEL_NOCONA; 871 break; 872 } 873 if (Features & (1 << X86::FEATURE_SSE3)) { 874 *Type = X86::INTEL_PRESCOTT; 875 break; 876 } 877 *Type = X86::INTEL_PENTIUM_IV; 878 break; 879 } 880 default: 881 break; /*"generic"*/ 882 } 883 } 884 885 static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, 886 unsigned Features, unsigned *Type, 887 unsigned *Subtype) { 888 // FIXME: this poorly matches the generated SubtargetFeatureKV table. There 889 // appears to be no way to generate the wide variety of AMD-specific targets 890 // from the information returned from CPUID. 891 switch (Family) { 892 case 4: 893 *Type = X86::AMD_i486; 894 break; 895 case 5: 896 *Type = X86::AMDPENTIUM; 897 switch (Model) { 898 case 6: 899 case 7: 900 *Subtype = X86::AMDPENTIUM_K6; 901 break; // "k6" 902 case 8: 903 *Subtype = X86::AMDPENTIUM_K62; 904 break; // "k6-2" 905 case 9: 906 case 13: 907 *Subtype = X86::AMDPENTIUM_K63; 908 break; // "k6-3" 909 case 10: 910 *Subtype = X86::AMDPENTIUM_GEODE; 911 break; // "geode" 912 } 913 break; 914 case 6: 915 if (Features & (1 << X86::FEATURE_SSE)) { 916 *Type = X86::AMD_ATHLON_XP; 917 break; // "athlon-xp" 918 } 919 *Type = X86::AMD_ATHLON; 920 break; // "athlon" 921 case 15: 922 if (Features & (1 << X86::FEATURE_SSE3)) { 923 *Type = X86::AMD_K8SSE3; 924 break; // "k8-sse3" 925 } 926 *Type = X86::AMD_K8; 927 break; // "k8" 928 case 16: 929 *Type = X86::AMDFAM10H; // "amdfam10" 930 switch (Model) { 931 case 2: 932 *Subtype = X86::AMDFAM10H_BARCELONA; 933 break; 934 case 4: 935 *Subtype = X86::AMDFAM10H_SHANGHAI; 936 break; 937 case 8: 938 *Subtype = X86::AMDFAM10H_ISTANBUL; 939 break; 940 } 941 break; 942 case 20: 943 *Type = X86::AMD_BTVER1; 944 break; // "btver1"; 945 case 21: 946 *Type = X86::AMDFAM15H; 947 if (Model >= 0x60 && Model <= 0x7f) { 948 *Subtype = X86::AMDFAM15H_BDVER4; 949 break; // "bdver4"; 60h-7Fh: Excavator 950 } 951 if (Model >= 0x30 && Model <= 0x3f) { 952 *Subtype = X86::AMDFAM15H_BDVER3; 953 break; // "bdver3"; 30h-3Fh: Steamroller 954 } 955 if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) { 956 *Subtype = X86::AMDFAM15H_BDVER2; 957 break; // "bdver2"; 02h, 10h-1Fh: Piledriver 958 } 959 if (Model <= 0x0f) { 960 *Subtype = X86::AMDFAM15H_BDVER1; 961 break; // "bdver1"; 00h-0Fh: Bulldozer 962 } 963 break; 964 case 22: 965 *Type = X86::AMD_BTVER2; 966 break; // "btver2" 967 case 23: 968 *Type = X86::AMDFAM17H; 969 if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) { 970 *Subtype = X86::AMDFAM17H_ZNVER2; 971 break; // "znver2"; 30h-3fh, 71h: Zen2 972 } 973 if (Model <= 0x0f) { 974 *Subtype = X86::AMDFAM17H_ZNVER1; 975 break; // "znver1"; 00h-0Fh: Zen1 976 } 977 break; 978 default: 979 break; // "generic" 980 } 981 } 982 983 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, 984 unsigned *FeaturesOut, unsigned *Features2Out, 985 unsigned *Features3Out) { 986 unsigned Features = 0; 987 unsigned Features2 = 0; 988 unsigned Features3 = 0; 989 unsigned EAX, EBX; 990 991 auto setFeature = [&](unsigned F) { 992 if (F < 32) 993 Features |= 1U << (F & 0x1f); 994 else if (F < 64) 995 Features2 |= 1U << ((F - 32) & 0x1f); 996 else if (F < 96) 997 Features3 |= 1U << ((F - 64) & 0x1f); 998 else 999 llvm_unreachable("Unexpected FeatureBit"); 1000 }; 1001 1002 if ((EDX >> 15) & 1) 1003 setFeature(X86::FEATURE_CMOV); 1004 if ((EDX >> 23) & 1) 1005 setFeature(X86::FEATURE_MMX); 1006 if ((EDX >> 25) & 1) 1007 setFeature(X86::FEATURE_SSE); 1008 if ((EDX >> 26) & 1) 1009 setFeature(X86::FEATURE_SSE2); 1010 1011 if ((ECX >> 0) & 1) 1012 setFeature(X86::FEATURE_SSE3); 1013 if ((ECX >> 1) & 1) 1014 setFeature(X86::FEATURE_PCLMUL); 1015 if ((ECX >> 9) & 1) 1016 setFeature(X86::FEATURE_SSSE3); 1017 if ((ECX >> 12) & 1) 1018 setFeature(X86::FEATURE_FMA); 1019 if ((ECX >> 19) & 1) 1020 setFeature(X86::FEATURE_SSE4_1); 1021 if ((ECX >> 20) & 1) 1022 setFeature(X86::FEATURE_SSE4_2); 1023 if ((ECX >> 23) & 1) 1024 setFeature(X86::FEATURE_POPCNT); 1025 if ((ECX >> 25) & 1) 1026 setFeature(X86::FEATURE_AES); 1027 1028 if ((ECX >> 22) & 1) 1029 setFeature(X86::FEATURE_MOVBE); 1030 1031 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 1032 // indicates that the AVX registers will be saved and restored on context 1033 // switch, then we have full AVX support. 1034 const unsigned AVXBits = (1 << 27) | (1 << 28); 1035 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && 1036 ((EAX & 0x6) == 0x6); 1037 #if defined(__APPLE__) 1038 // Darwin lazily saves the AVX512 context on first use: trust that the OS will 1039 // save the AVX512 context if we use AVX512 instructions, even the bit is not 1040 // set right now. 1041 bool HasAVX512Save = true; 1042 #else 1043 // AVX512 requires additional context to be saved by the OS. 1044 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); 1045 #endif 1046 1047 if (HasAVX) 1048 setFeature(X86::FEATURE_AVX); 1049 1050 bool HasLeaf7 = 1051 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 1052 1053 if (HasLeaf7 && ((EBX >> 3) & 1)) 1054 setFeature(X86::FEATURE_BMI); 1055 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) 1056 setFeature(X86::FEATURE_AVX2); 1057 if (HasLeaf7 && ((EBX >> 8) & 1)) 1058 setFeature(X86::FEATURE_BMI2); 1059 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) 1060 setFeature(X86::FEATURE_AVX512F); 1061 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) 1062 setFeature(X86::FEATURE_AVX512DQ); 1063 if (HasLeaf7 && ((EBX >> 19) & 1)) 1064 setFeature(X86::FEATURE_ADX); 1065 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) 1066 setFeature(X86::FEATURE_AVX512IFMA); 1067 if (HasLeaf7 && ((EBX >> 23) & 1)) 1068 setFeature(X86::FEATURE_CLFLUSHOPT); 1069 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) 1070 setFeature(X86::FEATURE_AVX512PF); 1071 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) 1072 setFeature(X86::FEATURE_AVX512ER); 1073 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) 1074 setFeature(X86::FEATURE_AVX512CD); 1075 if (HasLeaf7 && ((EBX >> 29) & 1)) 1076 setFeature(X86::FEATURE_SHA); 1077 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) 1078 setFeature(X86::FEATURE_AVX512BW); 1079 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) 1080 setFeature(X86::FEATURE_AVX512VL); 1081 1082 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) 1083 setFeature(X86::FEATURE_AVX512VBMI); 1084 if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save) 1085 setFeature(X86::FEATURE_AVX512VBMI2); 1086 if (HasLeaf7 && ((ECX >> 8) & 1)) 1087 setFeature(X86::FEATURE_GFNI); 1088 if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX) 1089 setFeature(X86::FEATURE_VPCLMULQDQ); 1090 if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save) 1091 setFeature(X86::FEATURE_AVX512VNNI); 1092 if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save) 1093 setFeature(X86::FEATURE_AVX512BITALG); 1094 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) 1095 setFeature(X86::FEATURE_AVX512VPOPCNTDQ); 1096 1097 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) 1098 setFeature(X86::FEATURE_AVX5124VNNIW); 1099 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) 1100 setFeature(X86::FEATURE_AVX5124FMAPS); 1101 if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save) 1102 setFeature(X86::FEATURE_AVX512VP2INTERSECT); 1103 1104 bool HasLeaf7Subleaf1 = 1105 MaxLeaf >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); 1106 if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save) 1107 setFeature(X86::FEATURE_AVX512BF16); 1108 1109 unsigned MaxExtLevel; 1110 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); 1111 1112 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && 1113 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 1114 if (HasExtLeaf1 && ((ECX >> 6) & 1)) 1115 setFeature(X86::FEATURE_SSE4_A); 1116 if (HasExtLeaf1 && ((ECX >> 11) & 1)) 1117 setFeature(X86::FEATURE_XOP); 1118 if (HasExtLeaf1 && ((ECX >> 16) & 1)) 1119 setFeature(X86::FEATURE_FMA4); 1120 1121 if (HasExtLeaf1 && ((EDX >> 29) & 1)) 1122 setFeature(X86::FEATURE_EM64T); 1123 1124 *FeaturesOut = Features; 1125 *Features2Out = Features2; 1126 *Features3Out = Features3; 1127 } 1128 1129 StringRef sys::getHostCPUName() { 1130 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 1131 unsigned MaxLeaf, Vendor; 1132 1133 #if defined(__GNUC__) || defined(__clang__) 1134 //FIXME: include cpuid.h from clang or copy __get_cpuid_max here 1135 // and simplify it to not invoke __cpuid (like cpu_model.c in 1136 // compiler-rt/lib/builtins/cpu_model.c? 1137 // Opting for the second option. 1138 if(!isCpuIdSupported()) 1139 return "generic"; 1140 #endif 1141 if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) 1142 return "generic"; 1143 getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); 1144 1145 unsigned Brand_id = EBX & 0xff; 1146 unsigned Family = 0, Model = 0; 1147 unsigned Features = 0, Features2 = 0, Features3 = 0; 1148 detectX86FamilyModel(EAX, &Family, &Model); 1149 getAvailableFeatures(ECX, EDX, MaxLeaf, &Features, &Features2, &Features3); 1150 1151 unsigned Type = 0; 1152 unsigned Subtype = 0; 1153 1154 if (Vendor == SIG_INTEL) { 1155 getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features, 1156 Features2, Features3, &Type, &Subtype); 1157 } else if (Vendor == SIG_AMD) { 1158 getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type, &Subtype); 1159 } 1160 1161 // Check subtypes first since those are more specific. 1162 #define X86_CPU_SUBTYPE(ARCHNAME, ENUM) \ 1163 if (Subtype == X86::ENUM) \ 1164 return ARCHNAME; 1165 #include "llvm/Support/X86TargetParser.def" 1166 1167 // Now check types. 1168 #define X86_CPU_TYPE(ARCHNAME, ENUM) \ 1169 if (Type == X86::ENUM) \ 1170 return ARCHNAME; 1171 #include "llvm/Support/X86TargetParser.def" 1172 1173 return "generic"; 1174 } 1175 1176 #elif defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__)) 1177 StringRef sys::getHostCPUName() { 1178 host_basic_info_data_t hostInfo; 1179 mach_msg_type_number_t infoCount; 1180 1181 infoCount = HOST_BASIC_INFO_COUNT; 1182 mach_port_t hostPort = mach_host_self(); 1183 host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo, 1184 &infoCount); 1185 mach_port_deallocate(mach_task_self(), hostPort); 1186 1187 if (hostInfo.cpu_type != CPU_TYPE_POWERPC) 1188 return "generic"; 1189 1190 switch (hostInfo.cpu_subtype) { 1191 case CPU_SUBTYPE_POWERPC_601: 1192 return "601"; 1193 case CPU_SUBTYPE_POWERPC_602: 1194 return "602"; 1195 case CPU_SUBTYPE_POWERPC_603: 1196 return "603"; 1197 case CPU_SUBTYPE_POWERPC_603e: 1198 return "603e"; 1199 case CPU_SUBTYPE_POWERPC_603ev: 1200 return "603ev"; 1201 case CPU_SUBTYPE_POWERPC_604: 1202 return "604"; 1203 case CPU_SUBTYPE_POWERPC_604e: 1204 return "604e"; 1205 case CPU_SUBTYPE_POWERPC_620: 1206 return "620"; 1207 case CPU_SUBTYPE_POWERPC_750: 1208 return "750"; 1209 case CPU_SUBTYPE_POWERPC_7400: 1210 return "7400"; 1211 case CPU_SUBTYPE_POWERPC_7450: 1212 return "7450"; 1213 case CPU_SUBTYPE_POWERPC_970: 1214 return "970"; 1215 default:; 1216 } 1217 1218 return "generic"; 1219 } 1220 #elif defined(__linux__) && (defined(__ppc__) || defined(__powerpc__)) 1221 StringRef sys::getHostCPUName() { 1222 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1223 StringRef Content = P ? P->getBuffer() : ""; 1224 return detail::getHostCPUNameForPowerPC(Content); 1225 } 1226 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) 1227 StringRef sys::getHostCPUName() { 1228 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1229 StringRef Content = P ? P->getBuffer() : ""; 1230 return detail::getHostCPUNameForARM(Content); 1231 } 1232 #elif defined(__linux__) && defined(__s390x__) 1233 StringRef sys::getHostCPUName() { 1234 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1235 StringRef Content = P ? P->getBuffer() : ""; 1236 return detail::getHostCPUNameForS390x(Content); 1237 } 1238 #elif defined(__APPLE__) && defined(__aarch64__) 1239 StringRef sys::getHostCPUName() { 1240 return "cyclone"; 1241 } 1242 #elif defined(__APPLE__) && defined(__arm__) 1243 StringRef sys::getHostCPUName() { 1244 host_basic_info_data_t hostInfo; 1245 mach_msg_type_number_t infoCount; 1246 1247 infoCount = HOST_BASIC_INFO_COUNT; 1248 mach_port_t hostPort = mach_host_self(); 1249 host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo, 1250 &infoCount); 1251 mach_port_deallocate(mach_task_self(), hostPort); 1252 1253 if (hostInfo.cpu_type != CPU_TYPE_ARM) { 1254 assert(false && "CPUType not equal to ARM should not be possible on ARM"); 1255 return "generic"; 1256 } 1257 switch (hostInfo.cpu_subtype) { 1258 case CPU_SUBTYPE_ARM_V7S: 1259 return "swift"; 1260 default:; 1261 } 1262 1263 return "generic"; 1264 } 1265 #else 1266 StringRef sys::getHostCPUName() { return "generic"; } 1267 #endif 1268 1269 #if defined(__linux__) && defined(__x86_64__) 1270 // On Linux, the number of physical cores can be computed from /proc/cpuinfo, 1271 // using the number of unique physical/core id pairs. The following 1272 // implementation reads the /proc/cpuinfo format on an x86_64 system. 1273 int computeHostNumPhysicalCores() { 1274 // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be 1275 // mmapped because it appears to have 0 size. 1276 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = 1277 llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); 1278 if (std::error_code EC = Text.getError()) { 1279 llvm::errs() << "Can't read " 1280 << "/proc/cpuinfo: " << EC.message() << "\n"; 1281 return -1; 1282 } 1283 SmallVector<StringRef, 8> strs; 1284 (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1, 1285 /*KeepEmpty=*/false); 1286 int CurPhysicalId = -1; 1287 int CurCoreId = -1; 1288 SmallSet<std::pair<int, int>, 32> UniqueItems; 1289 for (auto &Line : strs) { 1290 Line = Line.trim(); 1291 if (!Line.startswith("physical id") && !Line.startswith("core id")) 1292 continue; 1293 std::pair<StringRef, StringRef> Data = Line.split(':'); 1294 auto Name = Data.first.trim(); 1295 auto Val = Data.second.trim(); 1296 if (Name == "physical id") { 1297 assert(CurPhysicalId == -1 && 1298 "Expected a core id before seeing another physical id"); 1299 Val.getAsInteger(10, CurPhysicalId); 1300 } 1301 if (Name == "core id") { 1302 assert(CurCoreId == -1 && 1303 "Expected a physical id before seeing another core id"); 1304 Val.getAsInteger(10, CurCoreId); 1305 } 1306 if (CurPhysicalId != -1 && CurCoreId != -1) { 1307 UniqueItems.insert(std::make_pair(CurPhysicalId, CurCoreId)); 1308 CurPhysicalId = -1; 1309 CurCoreId = -1; 1310 } 1311 } 1312 return UniqueItems.size(); 1313 } 1314 #elif defined(__APPLE__) && defined(__x86_64__) 1315 #include <sys/param.h> 1316 #include <sys/sysctl.h> 1317 1318 // Gets the number of *physical cores* on the machine. 1319 int computeHostNumPhysicalCores() { 1320 uint32_t count; 1321 size_t len = sizeof(count); 1322 sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0); 1323 if (count < 1) { 1324 int nm[2]; 1325 nm[0] = CTL_HW; 1326 nm[1] = HW_AVAILCPU; 1327 sysctl(nm, 2, &count, &len, NULL, 0); 1328 if (count < 1) 1329 return -1; 1330 } 1331 return count; 1332 } 1333 #elif defined(_WIN32) && LLVM_ENABLE_THREADS != 0 1334 // Defined in llvm/lib/Support/Windows/Threading.inc 1335 int computeHostNumPhysicalCores(); 1336 #else 1337 // On other systems, return -1 to indicate unknown. 1338 static int computeHostNumPhysicalCores() { return -1; } 1339 #endif 1340 1341 int sys::getHostNumPhysicalCores() { 1342 static int NumCores = computeHostNumPhysicalCores(); 1343 return NumCores; 1344 } 1345 1346 #if defined(__i386__) || defined(_M_IX86) || \ 1347 defined(__x86_64__) || defined(_M_X64) 1348 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1349 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 1350 unsigned MaxLevel; 1351 union { 1352 unsigned u[3]; 1353 char c[12]; 1354 } text; 1355 1356 if (getX86CpuIDAndInfo(0, &MaxLevel, text.u + 0, text.u + 2, text.u + 1) || 1357 MaxLevel < 1) 1358 return false; 1359 1360 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); 1361 1362 Features["cx8"] = (EDX >> 8) & 1; 1363 Features["cmov"] = (EDX >> 15) & 1; 1364 Features["mmx"] = (EDX >> 23) & 1; 1365 Features["fxsr"] = (EDX >> 24) & 1; 1366 Features["sse"] = (EDX >> 25) & 1; 1367 Features["sse2"] = (EDX >> 26) & 1; 1368 1369 Features["sse3"] = (ECX >> 0) & 1; 1370 Features["pclmul"] = (ECX >> 1) & 1; 1371 Features["ssse3"] = (ECX >> 9) & 1; 1372 Features["cx16"] = (ECX >> 13) & 1; 1373 Features["sse4.1"] = (ECX >> 19) & 1; 1374 Features["sse4.2"] = (ECX >> 20) & 1; 1375 Features["movbe"] = (ECX >> 22) & 1; 1376 Features["popcnt"] = (ECX >> 23) & 1; 1377 Features["aes"] = (ECX >> 25) & 1; 1378 Features["rdrnd"] = (ECX >> 30) & 1; 1379 1380 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 1381 // indicates that the AVX registers will be saved and restored on context 1382 // switch, then we have full AVX support. 1383 bool HasAVXSave = ((ECX >> 27) & 1) && ((ECX >> 28) & 1) && 1384 !getX86XCR0(&EAX, &EDX) && ((EAX & 0x6) == 0x6); 1385 #if defined(__APPLE__) 1386 // Darwin lazily saves the AVX512 context on first use: trust that the OS will 1387 // save the AVX512 context if we use AVX512 instructions, even the bit is not 1388 // set right now. 1389 bool HasAVX512Save = true; 1390 #else 1391 // AVX512 requires additional context to be saved by the OS. 1392 bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0); 1393 #endif 1394 1395 Features["avx"] = HasAVXSave; 1396 Features["fma"] = ((ECX >> 12) & 1) && HasAVXSave; 1397 // Only enable XSAVE if OS has enabled support for saving YMM state. 1398 Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave; 1399 Features["f16c"] = ((ECX >> 29) & 1) && HasAVXSave; 1400 1401 unsigned MaxExtLevel; 1402 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); 1403 1404 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && 1405 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 1406 Features["sahf"] = HasExtLeaf1 && ((ECX >> 0) & 1); 1407 Features["lzcnt"] = HasExtLeaf1 && ((ECX >> 5) & 1); 1408 Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1); 1409 Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1); 1410 Features["xop"] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave; 1411 Features["lwp"] = HasExtLeaf1 && ((ECX >> 15) & 1); 1412 Features["fma4"] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave; 1413 Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1); 1414 Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1); 1415 1416 Features["64bit"] = HasExtLeaf1 && ((EDX >> 29) & 1); 1417 1418 // Miscellaneous memory related features, detected by 1419 // using the 0x80000008 leaf of the CPUID instruction 1420 bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 && 1421 !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX); 1422 Features["clzero"] = HasExtLeaf8 && ((EBX >> 0) & 1); 1423 Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1); 1424 1425 bool HasLeaf7 = 1426 MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 1427 1428 Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1); 1429 Features["sgx"] = HasLeaf7 && ((EBX >> 2) & 1); 1430 Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1); 1431 // AVX2 is only supported if we have the OS save support from AVX. 1432 Features["avx2"] = HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave; 1433 Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1); 1434 Features["invpcid"] = HasLeaf7 && ((EBX >> 10) & 1); 1435 Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1); 1436 // AVX512 is only supported if the OS supports the context save for it. 1437 Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save; 1438 Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save; 1439 Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1); 1440 Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1); 1441 Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save; 1442 Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1); 1443 Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1); 1444 Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save; 1445 Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save; 1446 Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save; 1447 Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1); 1448 Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save; 1449 Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save; 1450 1451 Features["prefetchwt1"] = HasLeaf7 && ((ECX >> 0) & 1); 1452 Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; 1453 Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1); 1454 Features["waitpkg"] = HasLeaf7 && ((ECX >> 5) & 1); 1455 Features["avx512vbmi2"] = HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save; 1456 Features["shstk"] = HasLeaf7 && ((ECX >> 7) & 1); 1457 Features["gfni"] = HasLeaf7 && ((ECX >> 8) & 1); 1458 Features["vaes"] = HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave; 1459 Features["vpclmulqdq"] = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave; 1460 Features["avx512vnni"] = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save; 1461 Features["avx512bitalg"] = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save; 1462 Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save; 1463 Features["rdpid"] = HasLeaf7 && ((ECX >> 22) & 1); 1464 Features["cldemote"] = HasLeaf7 && ((ECX >> 25) & 1); 1465 Features["movdiri"] = HasLeaf7 && ((ECX >> 27) & 1); 1466 Features["movdir64b"] = HasLeaf7 && ((ECX >> 28) & 1); 1467 Features["enqcmd"] = HasLeaf7 && ((ECX >> 29) & 1); 1468 1469 // There are two CPUID leafs which information associated with the pconfig 1470 // instruction: 1471 // EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th 1472 // bit of EDX), while the EAX=0x1b leaf returns information on the 1473 // availability of specific pconfig leafs. 1474 // The target feature here only refers to the the first of these two. 1475 // Users might need to check for the availability of specific pconfig 1476 // leaves using cpuid, since that information is ignored while 1477 // detecting features using the "-march=native" flag. 1478 // For more info, see X86 ISA docs. 1479 Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1); 1480 bool HasLeaf7Subleaf1 = 1481 MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); 1482 Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save; 1483 1484 bool HasLeafD = MaxLevel >= 0xd && 1485 !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); 1486 1487 // Only enable XSAVE if OS has enabled support for saving YMM state. 1488 Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave; 1489 Features["xsavec"] = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave; 1490 Features["xsaves"] = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave; 1491 1492 bool HasLeaf14 = MaxLevel >= 0x14 && 1493 !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX); 1494 1495 Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1); 1496 1497 return true; 1498 } 1499 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) 1500 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1501 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1502 if (!P) 1503 return false; 1504 1505 SmallVector<StringRef, 32> Lines; 1506 P->getBuffer().split(Lines, "\n"); 1507 1508 SmallVector<StringRef, 32> CPUFeatures; 1509 1510 // Look for the CPU features. 1511 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 1512 if (Lines[I].startswith("Features")) { 1513 Lines[I].split(CPUFeatures, ' '); 1514 break; 1515 } 1516 1517 #if defined(__aarch64__) 1518 // Keep track of which crypto features we have seen 1519 enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 }; 1520 uint32_t crypto = 0; 1521 #endif 1522 1523 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { 1524 StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I]) 1525 #if defined(__aarch64__) 1526 .Case("asimd", "neon") 1527 .Case("fp", "fp-armv8") 1528 .Case("crc32", "crc") 1529 #else 1530 .Case("half", "fp16") 1531 .Case("neon", "neon") 1532 .Case("vfpv3", "vfp3") 1533 .Case("vfpv3d16", "d16") 1534 .Case("vfpv4", "vfp4") 1535 .Case("idiva", "hwdiv-arm") 1536 .Case("idivt", "hwdiv") 1537 #endif 1538 .Default(""); 1539 1540 #if defined(__aarch64__) 1541 // We need to check crypto separately since we need all of the crypto 1542 // extensions to enable the subtarget feature 1543 if (CPUFeatures[I] == "aes") 1544 crypto |= CAP_AES; 1545 else if (CPUFeatures[I] == "pmull") 1546 crypto |= CAP_PMULL; 1547 else if (CPUFeatures[I] == "sha1") 1548 crypto |= CAP_SHA1; 1549 else if (CPUFeatures[I] == "sha2") 1550 crypto |= CAP_SHA2; 1551 #endif 1552 1553 if (LLVMFeatureStr != "") 1554 Features[LLVMFeatureStr] = true; 1555 } 1556 1557 #if defined(__aarch64__) 1558 // If we have all crypto bits we can add the feature 1559 if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2)) 1560 Features["crypto"] = true; 1561 #endif 1562 1563 return true; 1564 } 1565 #elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64)) 1566 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1567 if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) 1568 Features["neon"] = true; 1569 if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) 1570 Features["crc"] = true; 1571 if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) 1572 Features["crypto"] = true; 1573 1574 return true; 1575 } 1576 #else 1577 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; } 1578 #endif 1579 1580 std::string sys::getProcessTriple() { 1581 std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE); 1582 Triple PT(Triple::normalize(TargetTripleString)); 1583 1584 if (sizeof(void *) == 8 && PT.isArch32Bit()) 1585 PT = PT.get64BitArchVariant(); 1586 if (sizeof(void *) == 4 && PT.isArch64Bit()) 1587 PT = PT.get32BitArchVariant(); 1588 1589 return PT.str(); 1590 } 1591