1 //===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the operating system Host concept. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Support/Host.h" 14 #include "llvm/ADT/SmallSet.h" 15 #include "llvm/ADT/SmallVector.h" 16 #include "llvm/ADT/StringMap.h" 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/ADT/StringSwitch.h" 19 #include "llvm/ADT/Triple.h" 20 #include "llvm/Config/llvm-config.h" 21 #include "llvm/Support/Debug.h" 22 #include "llvm/Support/FileSystem.h" 23 #include "llvm/Support/MemoryBuffer.h" 24 #include "llvm/Support/X86TargetParser.h" 25 #include "llvm/Support/raw_ostream.h" 26 #include <assert.h> 27 #include <string.h> 28 29 // Include the platform-specific parts of this class. 30 #ifdef LLVM_ON_UNIX 31 #include "Unix/Host.inc" 32 #include <sched.h> 33 #endif 34 #ifdef _WIN32 35 #include "Windows/Host.inc" 36 #endif 37 #ifdef _MSC_VER 38 #include <intrin.h> 39 #endif 40 #if defined(__APPLE__) && (!defined(__x86_64__)) 41 #include <mach/host_info.h> 42 #include <mach/mach.h> 43 #include <mach/mach_host.h> 44 #include <mach/machine.h> 45 #endif 46 47 #define DEBUG_TYPE "host-detection" 48 49 //===----------------------------------------------------------------------===// 50 // 51 // Implementations of the CPU detection routines 52 // 53 //===----------------------------------------------------------------------===// 54 55 using namespace llvm; 56 57 static std::unique_ptr<llvm::MemoryBuffer> 58 LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() { 59 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = 60 llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); 61 if (std::error_code EC = Text.getError()) { 62 llvm::errs() << "Can't read " 63 << "/proc/cpuinfo: " << EC.message() << "\n"; 64 return nullptr; 65 } 66 return std::move(*Text); 67 } 68 69 StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) { 70 // Access to the Processor Version Register (PVR) on PowerPC is privileged, 71 // and so we must use an operating-system interface to determine the current 72 // processor type. On Linux, this is exposed through the /proc/cpuinfo file. 73 const char *generic = "generic"; 74 75 // The cpu line is second (after the 'processor: 0' line), so if this 76 // buffer is too small then something has changed (or is wrong). 77 StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin(); 78 StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end(); 79 80 StringRef::const_iterator CIP = CPUInfoStart; 81 82 StringRef::const_iterator CPUStart = 0; 83 size_t CPULen = 0; 84 85 // We need to find the first line which starts with cpu, spaces, and a colon. 86 // After the colon, there may be some additional spaces and then the cpu type. 87 while (CIP < CPUInfoEnd && CPUStart == 0) { 88 if (CIP < CPUInfoEnd && *CIP == '\n') 89 ++CIP; 90 91 if (CIP < CPUInfoEnd && *CIP == 'c') { 92 ++CIP; 93 if (CIP < CPUInfoEnd && *CIP == 'p') { 94 ++CIP; 95 if (CIP < CPUInfoEnd && *CIP == 'u') { 96 ++CIP; 97 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) 98 ++CIP; 99 100 if (CIP < CPUInfoEnd && *CIP == ':') { 101 ++CIP; 102 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) 103 ++CIP; 104 105 if (CIP < CPUInfoEnd) { 106 CPUStart = CIP; 107 while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' && 108 *CIP != ',' && *CIP != '\n')) 109 ++CIP; 110 CPULen = CIP - CPUStart; 111 } 112 } 113 } 114 } 115 } 116 117 if (CPUStart == 0) 118 while (CIP < CPUInfoEnd && *CIP != '\n') 119 ++CIP; 120 } 121 122 if (CPUStart == 0) 123 return generic; 124 125 return StringSwitch<const char *>(StringRef(CPUStart, CPULen)) 126 .Case("604e", "604e") 127 .Case("604", "604") 128 .Case("7400", "7400") 129 .Case("7410", "7400") 130 .Case("7447", "7400") 131 .Case("7455", "7450") 132 .Case("G4", "g4") 133 .Case("POWER4", "970") 134 .Case("PPC970FX", "970") 135 .Case("PPC970MP", "970") 136 .Case("G5", "g5") 137 .Case("POWER5", "g5") 138 .Case("A2", "a2") 139 .Case("POWER6", "pwr6") 140 .Case("POWER7", "pwr7") 141 .Case("POWER8", "pwr8") 142 .Case("POWER8E", "pwr8") 143 .Case("POWER8NVL", "pwr8") 144 .Case("POWER9", "pwr9") 145 .Case("POWER10", "pwr10") 146 // FIXME: If we get a simulator or machine with the capabilities of 147 // mcpu=future, we should revisit this and add the name reported by the 148 // simulator/machine. 149 .Default(generic); 150 } 151 152 StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { 153 // The cpuid register on arm is not accessible from user space. On Linux, 154 // it is exposed through the /proc/cpuinfo file. 155 156 // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line 157 // in all cases. 158 SmallVector<StringRef, 32> Lines; 159 ProcCpuinfoContent.split(Lines, "\n"); 160 161 // Look for the CPU implementer line. 162 StringRef Implementer; 163 StringRef Hardware; 164 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 165 if (Lines[I].startswith("CPU implementer")) 166 Implementer = Lines[I].substr(15).ltrim("\t :"); 167 if (Lines[I].startswith("Hardware")) 168 Hardware = Lines[I].substr(8).ltrim("\t :"); 169 } 170 171 if (Implementer == "0x41") { // ARM Ltd. 172 // MSM8992/8994 may give cpu part for the core that the kernel is running on, 173 // which is undeterministic and wrong. Always return cortex-a53 for these SoC. 174 if (Hardware.endswith("MSM8994") || Hardware.endswith("MSM8996")) 175 return "cortex-a53"; 176 177 178 // Look for the CPU part line. 179 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 180 if (Lines[I].startswith("CPU part")) 181 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 182 // values correspond to the "Part number" in the CP15/c0 register. The 183 // contents are specified in the various processor manuals. 184 // This corresponds to the Main ID Register in Technical Reference Manuals. 185 // and is used in programs like sys-utils 186 return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) 187 .Case("0x926", "arm926ej-s") 188 .Case("0xb02", "mpcore") 189 .Case("0xb36", "arm1136j-s") 190 .Case("0xb56", "arm1156t2-s") 191 .Case("0xb76", "arm1176jz-s") 192 .Case("0xc08", "cortex-a8") 193 .Case("0xc09", "cortex-a9") 194 .Case("0xc0f", "cortex-a15") 195 .Case("0xc20", "cortex-m0") 196 .Case("0xc23", "cortex-m3") 197 .Case("0xc24", "cortex-m4") 198 .Case("0xd22", "cortex-m55") 199 .Case("0xd02", "cortex-a34") 200 .Case("0xd04", "cortex-a35") 201 .Case("0xd03", "cortex-a53") 202 .Case("0xd07", "cortex-a57") 203 .Case("0xd08", "cortex-a72") 204 .Case("0xd09", "cortex-a73") 205 .Case("0xd0a", "cortex-a75") 206 .Case("0xd0b", "cortex-a76") 207 .Case("0xd0d", "cortex-a77") 208 .Case("0xd41", "cortex-a78") 209 .Case("0xd44", "cortex-x1") 210 .Case("0xd0c", "neoverse-n1") 211 .Default("generic"); 212 } 213 214 if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium. 215 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 216 if (Lines[I].startswith("CPU part")) { 217 return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) 218 .Case("0x516", "thunderx2t99") 219 .Case("0x0516", "thunderx2t99") 220 .Case("0xaf", "thunderx2t99") 221 .Case("0x0af", "thunderx2t99") 222 .Case("0xa1", "thunderxt88") 223 .Case("0x0a1", "thunderxt88") 224 .Default("generic"); 225 } 226 } 227 } 228 229 if (Implementer == "0x46") { // Fujitsu Ltd. 230 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 231 if (Lines[I].startswith("CPU part")) { 232 return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) 233 .Case("0x001", "a64fx") 234 .Default("generic"); 235 } 236 } 237 } 238 239 if (Implementer == "0x4e") { // NVIDIA Corporation 240 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 241 if (Lines[I].startswith("CPU part")) { 242 return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) 243 .Case("0x004", "carmel") 244 .Default("generic"); 245 } 246 } 247 } 248 249 if (Implementer == "0x48") // HiSilicon Technologies, Inc. 250 // Look for the CPU part line. 251 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 252 if (Lines[I].startswith("CPU part")) 253 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 254 // values correspond to the "Part number" in the CP15/c0 register. The 255 // contents are specified in the various processor manuals. 256 return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) 257 .Case("0xd01", "tsv110") 258 .Default("generic"); 259 260 if (Implementer == "0x51") // Qualcomm Technologies, Inc. 261 // Look for the CPU part line. 262 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 263 if (Lines[I].startswith("CPU part")) 264 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 265 // values correspond to the "Part number" in the CP15/c0 register. The 266 // contents are specified in the various processor manuals. 267 return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) 268 .Case("0x06f", "krait") // APQ8064 269 .Case("0x201", "kryo") 270 .Case("0x205", "kryo") 271 .Case("0x211", "kryo") 272 .Case("0x800", "cortex-a73") 273 .Case("0x801", "cortex-a73") 274 .Case("0x802", "cortex-a73") 275 .Case("0x803", "cortex-a73") 276 .Case("0x804", "cortex-a73") 277 .Case("0x805", "cortex-a73") 278 .Case("0xc00", "falkor") 279 .Case("0xc01", "saphira") 280 .Default("generic"); 281 282 if (Implementer == "0x53") { // Samsung Electronics Co., Ltd. 283 // The Exynos chips have a convoluted ID scheme that doesn't seem to follow 284 // any predictive pattern across variants and parts. 285 unsigned Variant = 0, Part = 0; 286 287 // Look for the CPU variant line, whose value is a 1 digit hexadecimal 288 // number, corresponding to the Variant bits in the CP15/C0 register. 289 for (auto I : Lines) 290 if (I.consume_front("CPU variant")) 291 I.ltrim("\t :").getAsInteger(0, Variant); 292 293 // Look for the CPU part line, whose value is a 3 digit hexadecimal 294 // number, corresponding to the PartNum bits in the CP15/C0 register. 295 for (auto I : Lines) 296 if (I.consume_front("CPU part")) 297 I.ltrim("\t :").getAsInteger(0, Part); 298 299 unsigned Exynos = (Variant << 12) | Part; 300 switch (Exynos) { 301 default: 302 // Default by falling through to Exynos M3. 303 LLVM_FALLTHROUGH; 304 case 0x1002: 305 return "exynos-m3"; 306 case 0x1003: 307 return "exynos-m4"; 308 } 309 } 310 311 return "generic"; 312 } 313 314 StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) { 315 // STIDP is a privileged operation, so use /proc/cpuinfo instead. 316 317 // The "processor 0:" line comes after a fair amount of other information, 318 // including a cache breakdown, but this should be plenty. 319 SmallVector<StringRef, 32> Lines; 320 ProcCpuinfoContent.split(Lines, "\n"); 321 322 // Look for the CPU features. 323 SmallVector<StringRef, 32> CPUFeatures; 324 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 325 if (Lines[I].startswith("features")) { 326 size_t Pos = Lines[I].find(":"); 327 if (Pos != StringRef::npos) { 328 Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' '); 329 break; 330 } 331 } 332 333 // We need to check for the presence of vector support independently of 334 // the machine type, since we may only use the vector register set when 335 // supported by the kernel (and hypervisor). 336 bool HaveVectorSupport = false; 337 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { 338 if (CPUFeatures[I] == "vx") 339 HaveVectorSupport = true; 340 } 341 342 // Now check the processor machine type. 343 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 344 if (Lines[I].startswith("processor ")) { 345 size_t Pos = Lines[I].find("machine = "); 346 if (Pos != StringRef::npos) { 347 Pos += sizeof("machine = ") - 1; 348 unsigned int Id; 349 if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) { 350 if (Id >= 8561 && HaveVectorSupport) 351 return "z15"; 352 if (Id >= 3906 && HaveVectorSupport) 353 return "z14"; 354 if (Id >= 2964 && HaveVectorSupport) 355 return "z13"; 356 if (Id >= 2827) 357 return "zEC12"; 358 if (Id >= 2817) 359 return "z196"; 360 } 361 } 362 break; 363 } 364 } 365 366 return "generic"; 367 } 368 369 StringRef sys::detail::getHostCPUNameForBPF() { 370 #if !defined(__linux__) || !defined(__x86_64__) 371 return "generic"; 372 #else 373 uint8_t v3_insns[40] __attribute__ ((aligned (8))) = 374 /* BPF_MOV64_IMM(BPF_REG_0, 0) */ 375 { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 376 /* BPF_MOV64_IMM(BPF_REG_2, 1) */ 377 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 378 /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */ 379 0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 380 /* BPF_MOV64_IMM(BPF_REG_0, 1) */ 381 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 382 /* BPF_EXIT_INSN() */ 383 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; 384 385 uint8_t v2_insns[40] __attribute__ ((aligned (8))) = 386 /* BPF_MOV64_IMM(BPF_REG_0, 0) */ 387 { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 388 /* BPF_MOV64_IMM(BPF_REG_2, 1) */ 389 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 390 /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */ 391 0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 392 /* BPF_MOV64_IMM(BPF_REG_0, 1) */ 393 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 394 /* BPF_EXIT_INSN() */ 395 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; 396 397 struct bpf_prog_load_attr { 398 uint32_t prog_type; 399 uint32_t insn_cnt; 400 uint64_t insns; 401 uint64_t license; 402 uint32_t log_level; 403 uint32_t log_size; 404 uint64_t log_buf; 405 uint32_t kern_version; 406 uint32_t prog_flags; 407 } attr = {}; 408 attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */ 409 attr.insn_cnt = 5; 410 attr.insns = (uint64_t)v3_insns; 411 attr.license = (uint64_t)"DUMMY"; 412 413 int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, 414 sizeof(attr)); 415 if (fd >= 0) { 416 close(fd); 417 return "v3"; 418 } 419 420 /* Clear the whole attr in case its content changed by syscall. */ 421 memset(&attr, 0, sizeof(attr)); 422 attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */ 423 attr.insn_cnt = 5; 424 attr.insns = (uint64_t)v2_insns; 425 attr.license = (uint64_t)"DUMMY"; 426 fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr)); 427 if (fd >= 0) { 428 close(fd); 429 return "v2"; 430 } 431 return "v1"; 432 #endif 433 } 434 435 #if defined(__i386__) || defined(_M_IX86) || \ 436 defined(__x86_64__) || defined(_M_X64) 437 438 enum VendorSignatures { 439 SIG_INTEL = 0x756e6547 /* Genu */, 440 SIG_AMD = 0x68747541 /* Auth */ 441 }; 442 443 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). 444 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID 445 // support. Consequently, for i386, the presence of CPUID is checked first 446 // via the corresponding eflags bit. 447 // Removal of cpuid.h header motivated by PR30384 448 // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp 449 // or test-suite, but are used in external projects e.g. libstdcxx 450 static bool isCpuIdSupported() { 451 #if defined(__GNUC__) || defined(__clang__) 452 #if defined(__i386__) 453 int __cpuid_supported; 454 __asm__(" pushfl\n" 455 " popl %%eax\n" 456 " movl %%eax,%%ecx\n" 457 " xorl $0x00200000,%%eax\n" 458 " pushl %%eax\n" 459 " popfl\n" 460 " pushfl\n" 461 " popl %%eax\n" 462 " movl $0,%0\n" 463 " cmpl %%eax,%%ecx\n" 464 " je 1f\n" 465 " movl $1,%0\n" 466 "1:" 467 : "=r"(__cpuid_supported) 468 : 469 : "eax", "ecx"); 470 if (!__cpuid_supported) 471 return false; 472 #endif 473 return true; 474 #endif 475 return true; 476 } 477 478 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in 479 /// the specified arguments. If we can't run cpuid on the host, return true. 480 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, 481 unsigned *rECX, unsigned *rEDX) { 482 #if defined(__GNUC__) || defined(__clang__) 483 #if defined(__x86_64__) 484 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 485 // FIXME: should we save this for Clang? 486 __asm__("movq\t%%rbx, %%rsi\n\t" 487 "cpuid\n\t" 488 "xchgq\t%%rbx, %%rsi\n\t" 489 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 490 : "a"(value)); 491 return false; 492 #elif defined(__i386__) 493 __asm__("movl\t%%ebx, %%esi\n\t" 494 "cpuid\n\t" 495 "xchgl\t%%ebx, %%esi\n\t" 496 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 497 : "a"(value)); 498 return false; 499 #else 500 return true; 501 #endif 502 #elif defined(_MSC_VER) 503 // The MSVC intrinsic is portable across x86 and x64. 504 int registers[4]; 505 __cpuid(registers, value); 506 *rEAX = registers[0]; 507 *rEBX = registers[1]; 508 *rECX = registers[2]; 509 *rEDX = registers[3]; 510 return false; 511 #else 512 return true; 513 #endif 514 } 515 516 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return 517 /// the 4 values in the specified arguments. If we can't run cpuid on the host, 518 /// return true. 519 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, 520 unsigned *rEAX, unsigned *rEBX, unsigned *rECX, 521 unsigned *rEDX) { 522 #if defined(__GNUC__) || defined(__clang__) 523 #if defined(__x86_64__) 524 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 525 // FIXME: should we save this for Clang? 526 __asm__("movq\t%%rbx, %%rsi\n\t" 527 "cpuid\n\t" 528 "xchgq\t%%rbx, %%rsi\n\t" 529 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 530 : "a"(value), "c"(subleaf)); 531 return false; 532 #elif defined(__i386__) 533 __asm__("movl\t%%ebx, %%esi\n\t" 534 "cpuid\n\t" 535 "xchgl\t%%ebx, %%esi\n\t" 536 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 537 : "a"(value), "c"(subleaf)); 538 return false; 539 #else 540 return true; 541 #endif 542 #elif defined(_MSC_VER) 543 int registers[4]; 544 __cpuidex(registers, value, subleaf); 545 *rEAX = registers[0]; 546 *rEBX = registers[1]; 547 *rECX = registers[2]; 548 *rEDX = registers[3]; 549 return false; 550 #else 551 return true; 552 #endif 553 } 554 555 // Read control register 0 (XCR0). Used to detect features such as AVX. 556 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { 557 #if defined(__GNUC__) || defined(__clang__) 558 // Check xgetbv; this uses a .byte sequence instead of the instruction 559 // directly because older assemblers do not include support for xgetbv and 560 // there is no easy way to conditionally compile based on the assembler used. 561 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0)); 562 return false; 563 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) 564 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); 565 *rEAX = Result; 566 *rEDX = Result >> 32; 567 return false; 568 #else 569 return true; 570 #endif 571 } 572 573 static void detectX86FamilyModel(unsigned EAX, unsigned *Family, 574 unsigned *Model) { 575 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11 576 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7 577 if (*Family == 6 || *Family == 0xf) { 578 if (*Family == 0xf) 579 // Examine extended family ID if family ID is F. 580 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27 581 // Examine extended model ID if family ID is 6 or F. 582 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 583 } 584 } 585 586 static StringRef 587 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, 588 const unsigned *Features, 589 unsigned *Type, unsigned *Subtype) { 590 auto testFeature = [&](unsigned F) { 591 return (Features[F / 32] & (1U << (F % 32))) != 0; 592 }; 593 594 StringRef CPU; 595 596 switch (Family) { 597 case 3: 598 CPU = "i386"; 599 break; 600 case 4: 601 CPU = "i486"; 602 break; 603 case 5: 604 if (testFeature(X86::FEATURE_MMX)) { 605 CPU = "pentium-mmx"; 606 break; 607 } 608 CPU = "pentium"; 609 break; 610 case 6: 611 switch (Model) { 612 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile 613 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad 614 // mobile processor, Intel Core 2 Extreme processor, Intel 615 // Pentium Dual-Core processor, Intel Xeon processor, model 616 // 0Fh. All processors are manufactured using the 65 nm process. 617 case 0x16: // Intel Celeron processor model 16h. All processors are 618 // manufactured using the 65 nm process 619 CPU = "core2"; 620 *Type = X86::INTEL_CORE2; 621 break; 622 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model 623 // 17h. All processors are manufactured using the 45 nm process. 624 // 625 // 45nm: Penryn , Wolfdale, Yorkfield (XE) 626 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using 627 // the 45 nm process. 628 CPU = "penryn"; 629 *Type = X86::INTEL_CORE2; 630 break; 631 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All 632 // processors are manufactured using the 45 nm process. 633 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. 634 // As found in a Summer 2010 model iMac. 635 case 0x1f: 636 case 0x2e: // Nehalem EX 637 CPU = "nehalem"; 638 *Type = X86::INTEL_COREI7; 639 *Subtype = X86::INTEL_COREI7_NEHALEM; 640 break; 641 case 0x25: // Intel Core i7, laptop version. 642 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All 643 // processors are manufactured using the 32 nm process. 644 case 0x2f: // Westmere EX 645 CPU = "westmere"; 646 *Type = X86::INTEL_COREI7; 647 *Subtype = X86::INTEL_COREI7_WESTMERE; 648 break; 649 case 0x2a: // Intel Core i7 processor. All processors are manufactured 650 // using the 32 nm process. 651 case 0x2d: 652 CPU = "sandybridge"; 653 *Type = X86::INTEL_COREI7; 654 *Subtype = X86::INTEL_COREI7_SANDYBRIDGE; 655 break; 656 case 0x3a: 657 case 0x3e: // Ivy Bridge EP 658 CPU = "ivybridge"; 659 *Type = X86::INTEL_COREI7; 660 *Subtype = X86::INTEL_COREI7_IVYBRIDGE; 661 break; 662 663 // Haswell: 664 case 0x3c: 665 case 0x3f: 666 case 0x45: 667 case 0x46: 668 CPU = "haswell"; 669 *Type = X86::INTEL_COREI7; 670 *Subtype = X86::INTEL_COREI7_HASWELL; 671 break; 672 673 // Broadwell: 674 case 0x3d: 675 case 0x47: 676 case 0x4f: 677 case 0x56: 678 CPU = "broadwell"; 679 *Type = X86::INTEL_COREI7; 680 *Subtype = X86::INTEL_COREI7_BROADWELL; 681 break; 682 683 // Skylake: 684 case 0x4e: // Skylake mobile 685 case 0x5e: // Skylake desktop 686 case 0x8e: // Kaby Lake mobile 687 case 0x9e: // Kaby Lake desktop 688 case 0xa5: // Comet Lake-H/S 689 case 0xa6: // Comet Lake-U 690 CPU = "skylake"; 691 *Type = X86::INTEL_COREI7; 692 *Subtype = X86::INTEL_COREI7_SKYLAKE; 693 break; 694 695 // Skylake Xeon: 696 case 0x55: 697 *Type = X86::INTEL_COREI7; 698 if (testFeature(X86::FEATURE_AVX512BF16)) { 699 CPU = "cooperlake"; 700 *Subtype = X86::INTEL_COREI7_COOPERLAKE; 701 } else if (testFeature(X86::FEATURE_AVX512VNNI)) { 702 CPU = "cascadelake"; 703 *Subtype = X86::INTEL_COREI7_CASCADELAKE; 704 } else { 705 CPU = "skylake-avx512"; 706 *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; 707 } 708 break; 709 710 // Cannonlake: 711 case 0x66: 712 CPU = "cannonlake"; 713 *Type = X86::INTEL_COREI7; 714 *Subtype = X86::INTEL_COREI7_CANNONLAKE; 715 break; 716 717 // Icelake: 718 case 0x7d: 719 case 0x7e: 720 CPU = "icelake-client"; 721 *Type = X86::INTEL_COREI7; 722 *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT; 723 break; 724 725 // Icelake Xeon: 726 case 0x6a: 727 case 0x6c: 728 CPU = "icelake-server"; 729 *Type = X86::INTEL_COREI7; 730 *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER; 731 break; 732 733 // Sapphire Rapids: 734 case 0x8f: 735 CPU = "sapphirerapids"; 736 *Type = X86::INTEL_COREI7; 737 *Subtype = X86::INTEL_COREI7_SAPPHIRERAPIDS; 738 break; 739 740 case 0x1c: // Most 45 nm Intel Atom processors 741 case 0x26: // 45 nm Atom Lincroft 742 case 0x27: // 32 nm Atom Medfield 743 case 0x35: // 32 nm Atom Midview 744 case 0x36: // 32 nm Atom Midview 745 CPU = "bonnell"; 746 *Type = X86::INTEL_BONNELL; 747 break; 748 749 // Atom Silvermont codes from the Intel software optimization guide. 750 case 0x37: 751 case 0x4a: 752 case 0x4d: 753 case 0x5a: 754 case 0x5d: 755 case 0x4c: // really airmont 756 CPU = "silvermont"; 757 *Type = X86::INTEL_SILVERMONT; 758 break; 759 // Goldmont: 760 case 0x5c: // Apollo Lake 761 case 0x5f: // Denverton 762 CPU = "goldmont"; 763 *Type = X86::INTEL_GOLDMONT; 764 break; 765 case 0x7a: 766 CPU = "goldmont-plus"; 767 *Type = X86::INTEL_GOLDMONT_PLUS; 768 break; 769 case 0x86: 770 *Type = X86::INTEL_TREMONT; 771 break; 772 773 case 0x57: 774 CPU = "tremont"; 775 *Type = X86::INTEL_KNL; 776 break; 777 778 case 0x85: 779 CPU = "knm"; 780 *Type = X86::INTEL_KNM; 781 break; 782 783 default: // Unknown family 6 CPU, try to guess. 784 // Don't both with Type/Subtype here, they aren't used by the caller. 785 // They're used above to keep the code in sync with compiler-rt. 786 // TODO detect tigerlake host from model 787 if (testFeature(X86::FEATURE_AVX512VP2INTERSECT)) { 788 CPU = "tigerlake"; 789 } else if (testFeature(X86::FEATURE_AVX512VBMI2)) { 790 CPU = "icelake-client"; 791 } else if (testFeature(X86::FEATURE_AVX512VBMI)) { 792 CPU = "cannonlake"; 793 } else if (testFeature(X86::FEATURE_AVX512BF16)) { 794 CPU = "cooperlake"; 795 } else if (testFeature(X86::FEATURE_AVX512VNNI)) { 796 CPU = "cascadelake"; 797 } else if (testFeature(X86::FEATURE_AVX512VL)) { 798 CPU = "skylake-avx512"; 799 } else if (testFeature(X86::FEATURE_AVX512ER)) { 800 CPU = "knl"; 801 } else if (testFeature(X86::FEATURE_CLFLUSHOPT)) { 802 if (testFeature(X86::FEATURE_SHA)) 803 CPU = "goldmont"; 804 else 805 CPU = "skylake"; 806 } else if (testFeature(X86::FEATURE_ADX)) { 807 CPU = "broadwell"; 808 } else if (testFeature(X86::FEATURE_AVX2)) { 809 CPU = "haswell"; 810 } else if (testFeature(X86::FEATURE_AVX)) { 811 CPU = "sandybridge"; 812 } else if (testFeature(X86::FEATURE_SSE4_2)) { 813 if (testFeature(X86::FEATURE_MOVBE)) 814 CPU = "silvermont"; 815 else 816 CPU = "nehalem"; 817 } else if (testFeature(X86::FEATURE_SSE4_1)) { 818 CPU = "penryn"; 819 } else if (testFeature(X86::FEATURE_SSSE3)) { 820 if (testFeature(X86::FEATURE_MOVBE)) 821 CPU = "bonnell"; 822 else 823 CPU = "core2"; 824 } else if (testFeature(X86::FEATURE_64BIT)) { 825 CPU = "core2"; 826 } else if (testFeature(X86::FEATURE_SSE3)) { 827 CPU = "yonah"; 828 } else if (testFeature(X86::FEATURE_SSE2)) { 829 CPU = "pentium-m"; 830 } else if (testFeature(X86::FEATURE_SSE)) { 831 CPU = "pentium3"; 832 } else if (testFeature(X86::FEATURE_MMX)) { 833 CPU = "pentium2"; 834 } else { 835 CPU = "pentiumpro"; 836 } 837 break; 838 } 839 break; 840 case 15: { 841 if (testFeature(X86::FEATURE_64BIT)) { 842 CPU = "nocona"; 843 break; 844 } 845 if (testFeature(X86::FEATURE_SSE3)) { 846 CPU = "prescott"; 847 break; 848 } 849 CPU = "pentium4"; 850 break; 851 } 852 default: 853 break; // Unknown. 854 } 855 856 return CPU; 857 } 858 859 static StringRef 860 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, 861 const unsigned *Features, 862 unsigned *Type, unsigned *Subtype) { 863 auto testFeature = [&](unsigned F) { 864 return (Features[F / 32] & (1U << (F % 32))) != 0; 865 }; 866 867 StringRef CPU; 868 869 switch (Family) { 870 case 4: 871 CPU = "i486"; 872 break; 873 case 5: 874 CPU = "pentium"; 875 switch (Model) { 876 case 6: 877 case 7: 878 CPU = "k6"; 879 break; 880 case 8: 881 CPU = "k6-2"; 882 break; 883 case 9: 884 case 13: 885 CPU = "k6-3"; 886 break; 887 case 10: 888 CPU = "geode"; 889 break; 890 } 891 break; 892 case 6: 893 if (testFeature(X86::FEATURE_SSE)) { 894 CPU = "athlon-xp"; 895 break; 896 } 897 CPU = "athlon"; 898 break; 899 case 15: 900 if (testFeature(X86::FEATURE_SSE3)) { 901 CPU = "k8-sse3"; 902 break; 903 } 904 CPU = "k8"; 905 break; 906 case 16: 907 CPU = "amdfam10"; 908 *Type = X86::AMDFAM10H; // "amdfam10" 909 switch (Model) { 910 case 2: 911 *Subtype = X86::AMDFAM10H_BARCELONA; 912 break; 913 case 4: 914 *Subtype = X86::AMDFAM10H_SHANGHAI; 915 break; 916 case 8: 917 *Subtype = X86::AMDFAM10H_ISTANBUL; 918 break; 919 } 920 break; 921 case 20: 922 CPU = "btver1"; 923 *Type = X86::AMD_BTVER1; 924 break; 925 case 21: 926 CPU = "bdver1"; 927 *Type = X86::AMDFAM15H; 928 if (Model >= 0x60 && Model <= 0x7f) { 929 CPU = "bdver4"; 930 *Subtype = X86::AMDFAM15H_BDVER4; 931 break; // 60h-7Fh: Excavator 932 } 933 if (Model >= 0x30 && Model <= 0x3f) { 934 CPU = "bdver3"; 935 *Subtype = X86::AMDFAM15H_BDVER3; 936 break; // 30h-3Fh: Steamroller 937 } 938 if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) { 939 CPU = "bdver2"; 940 *Subtype = X86::AMDFAM15H_BDVER2; 941 break; // 02h, 10h-1Fh: Piledriver 942 } 943 if (Model <= 0x0f) { 944 *Subtype = X86::AMDFAM15H_BDVER1; 945 break; // 00h-0Fh: Bulldozer 946 } 947 break; 948 case 22: 949 CPU = "btver2"; 950 *Type = X86::AMD_BTVER2; 951 break; 952 case 23: 953 CPU = "znver1"; 954 *Type = X86::AMDFAM17H; 955 if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) { 956 CPU = "znver2"; 957 *Subtype = X86::AMDFAM17H_ZNVER2; 958 break; // 30h-3fh, 71h: Zen2 959 } 960 if (Model <= 0x0f) { 961 *Subtype = X86::AMDFAM17H_ZNVER1; 962 break; // 00h-0Fh: Zen1 963 } 964 break; 965 default: 966 break; // Unknown AMD CPU. 967 } 968 969 return CPU; 970 } 971 972 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, 973 unsigned *Features) { 974 unsigned EAX, EBX; 975 976 auto setFeature = [&](unsigned F) { 977 Features[F / 32] |= 1U << (F % 32); 978 }; 979 980 if ((EDX >> 15) & 1) 981 setFeature(X86::FEATURE_CMOV); 982 if ((EDX >> 23) & 1) 983 setFeature(X86::FEATURE_MMX); 984 if ((EDX >> 25) & 1) 985 setFeature(X86::FEATURE_SSE); 986 if ((EDX >> 26) & 1) 987 setFeature(X86::FEATURE_SSE2); 988 989 if ((ECX >> 0) & 1) 990 setFeature(X86::FEATURE_SSE3); 991 if ((ECX >> 1) & 1) 992 setFeature(X86::FEATURE_PCLMUL); 993 if ((ECX >> 9) & 1) 994 setFeature(X86::FEATURE_SSSE3); 995 if ((ECX >> 12) & 1) 996 setFeature(X86::FEATURE_FMA); 997 if ((ECX >> 19) & 1) 998 setFeature(X86::FEATURE_SSE4_1); 999 if ((ECX >> 20) & 1) 1000 setFeature(X86::FEATURE_SSE4_2); 1001 if ((ECX >> 23) & 1) 1002 setFeature(X86::FEATURE_POPCNT); 1003 if ((ECX >> 25) & 1) 1004 setFeature(X86::FEATURE_AES); 1005 1006 if ((ECX >> 22) & 1) 1007 setFeature(X86::FEATURE_MOVBE); 1008 1009 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 1010 // indicates that the AVX registers will be saved and restored on context 1011 // switch, then we have full AVX support. 1012 const unsigned AVXBits = (1 << 27) | (1 << 28); 1013 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && 1014 ((EAX & 0x6) == 0x6); 1015 #if defined(__APPLE__) 1016 // Darwin lazily saves the AVX512 context on first use: trust that the OS will 1017 // save the AVX512 context if we use AVX512 instructions, even the bit is not 1018 // set right now. 1019 bool HasAVX512Save = true; 1020 #else 1021 // AVX512 requires additional context to be saved by the OS. 1022 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); 1023 #endif 1024 1025 if (HasAVX) 1026 setFeature(X86::FEATURE_AVX); 1027 1028 bool HasLeaf7 = 1029 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 1030 1031 if (HasLeaf7 && ((EBX >> 3) & 1)) 1032 setFeature(X86::FEATURE_BMI); 1033 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) 1034 setFeature(X86::FEATURE_AVX2); 1035 if (HasLeaf7 && ((EBX >> 8) & 1)) 1036 setFeature(X86::FEATURE_BMI2); 1037 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) 1038 setFeature(X86::FEATURE_AVX512F); 1039 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) 1040 setFeature(X86::FEATURE_AVX512DQ); 1041 if (HasLeaf7 && ((EBX >> 19) & 1)) 1042 setFeature(X86::FEATURE_ADX); 1043 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) 1044 setFeature(X86::FEATURE_AVX512IFMA); 1045 if (HasLeaf7 && ((EBX >> 23) & 1)) 1046 setFeature(X86::FEATURE_CLFLUSHOPT); 1047 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) 1048 setFeature(X86::FEATURE_AVX512PF); 1049 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) 1050 setFeature(X86::FEATURE_AVX512ER); 1051 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) 1052 setFeature(X86::FEATURE_AVX512CD); 1053 if (HasLeaf7 && ((EBX >> 29) & 1)) 1054 setFeature(X86::FEATURE_SHA); 1055 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) 1056 setFeature(X86::FEATURE_AVX512BW); 1057 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) 1058 setFeature(X86::FEATURE_AVX512VL); 1059 1060 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) 1061 setFeature(X86::FEATURE_AVX512VBMI); 1062 if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save) 1063 setFeature(X86::FEATURE_AVX512VBMI2); 1064 if (HasLeaf7 && ((ECX >> 8) & 1)) 1065 setFeature(X86::FEATURE_GFNI); 1066 if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX) 1067 setFeature(X86::FEATURE_VPCLMULQDQ); 1068 if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save) 1069 setFeature(X86::FEATURE_AVX512VNNI); 1070 if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save) 1071 setFeature(X86::FEATURE_AVX512BITALG); 1072 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) 1073 setFeature(X86::FEATURE_AVX512VPOPCNTDQ); 1074 1075 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) 1076 setFeature(X86::FEATURE_AVX5124VNNIW); 1077 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) 1078 setFeature(X86::FEATURE_AVX5124FMAPS); 1079 if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save) 1080 setFeature(X86::FEATURE_AVX512VP2INTERSECT); 1081 1082 bool HasLeaf7Subleaf1 = 1083 MaxLeaf >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); 1084 if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save) 1085 setFeature(X86::FEATURE_AVX512BF16); 1086 1087 unsigned MaxExtLevel; 1088 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); 1089 1090 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && 1091 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 1092 if (HasExtLeaf1 && ((ECX >> 6) & 1)) 1093 setFeature(X86::FEATURE_SSE4_A); 1094 if (HasExtLeaf1 && ((ECX >> 11) & 1)) 1095 setFeature(X86::FEATURE_XOP); 1096 if (HasExtLeaf1 && ((ECX >> 16) & 1)) 1097 setFeature(X86::FEATURE_FMA4); 1098 1099 if (HasExtLeaf1 && ((EDX >> 29) & 1)) 1100 setFeature(X86::FEATURE_64BIT); 1101 } 1102 1103 StringRef sys::getHostCPUName() { 1104 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 1105 unsigned MaxLeaf, Vendor; 1106 1107 if (!isCpuIdSupported()) 1108 return "generic"; 1109 1110 if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) 1111 return "generic"; 1112 getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); 1113 1114 unsigned Family = 0, Model = 0; 1115 unsigned Features[(X86::CPU_FEATURE_MAX + 31) / 32] = {0}; 1116 detectX86FamilyModel(EAX, &Family, &Model); 1117 getAvailableFeatures(ECX, EDX, MaxLeaf, Features); 1118 1119 // These aren't consumed in this file, but we try to keep some source code the 1120 // same or similar to compiler-rt. 1121 unsigned Type = 0; 1122 unsigned Subtype = 0; 1123 1124 StringRef CPU; 1125 1126 if (Vendor == SIG_INTEL) { 1127 CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type, 1128 &Subtype); 1129 } else if (Vendor == SIG_AMD) { 1130 CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type, 1131 &Subtype); 1132 } 1133 1134 if (!CPU.empty()) 1135 return CPU; 1136 1137 return "generic"; 1138 } 1139 1140 #elif defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__)) 1141 StringRef sys::getHostCPUName() { 1142 host_basic_info_data_t hostInfo; 1143 mach_msg_type_number_t infoCount; 1144 1145 infoCount = HOST_BASIC_INFO_COUNT; 1146 mach_port_t hostPort = mach_host_self(); 1147 host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo, 1148 &infoCount); 1149 mach_port_deallocate(mach_task_self(), hostPort); 1150 1151 if (hostInfo.cpu_type != CPU_TYPE_POWERPC) 1152 return "generic"; 1153 1154 switch (hostInfo.cpu_subtype) { 1155 case CPU_SUBTYPE_POWERPC_601: 1156 return "601"; 1157 case CPU_SUBTYPE_POWERPC_602: 1158 return "602"; 1159 case CPU_SUBTYPE_POWERPC_603: 1160 return "603"; 1161 case CPU_SUBTYPE_POWERPC_603e: 1162 return "603e"; 1163 case CPU_SUBTYPE_POWERPC_603ev: 1164 return "603ev"; 1165 case CPU_SUBTYPE_POWERPC_604: 1166 return "604"; 1167 case CPU_SUBTYPE_POWERPC_604e: 1168 return "604e"; 1169 case CPU_SUBTYPE_POWERPC_620: 1170 return "620"; 1171 case CPU_SUBTYPE_POWERPC_750: 1172 return "750"; 1173 case CPU_SUBTYPE_POWERPC_7400: 1174 return "7400"; 1175 case CPU_SUBTYPE_POWERPC_7450: 1176 return "7450"; 1177 case CPU_SUBTYPE_POWERPC_970: 1178 return "970"; 1179 default:; 1180 } 1181 1182 return "generic"; 1183 } 1184 #elif defined(__linux__) && (defined(__ppc__) || defined(__powerpc__)) 1185 StringRef sys::getHostCPUName() { 1186 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1187 StringRef Content = P ? P->getBuffer() : ""; 1188 return detail::getHostCPUNameForPowerPC(Content); 1189 } 1190 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) 1191 StringRef sys::getHostCPUName() { 1192 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1193 StringRef Content = P ? P->getBuffer() : ""; 1194 return detail::getHostCPUNameForARM(Content); 1195 } 1196 #elif defined(__linux__) && defined(__s390x__) 1197 StringRef sys::getHostCPUName() { 1198 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1199 StringRef Content = P ? P->getBuffer() : ""; 1200 return detail::getHostCPUNameForS390x(Content); 1201 } 1202 #elif defined(__APPLE__) && defined(__aarch64__) 1203 StringRef sys::getHostCPUName() { 1204 return "cyclone"; 1205 } 1206 #elif defined(__APPLE__) && defined(__arm__) 1207 StringRef sys::getHostCPUName() { 1208 host_basic_info_data_t hostInfo; 1209 mach_msg_type_number_t infoCount; 1210 1211 infoCount = HOST_BASIC_INFO_COUNT; 1212 mach_port_t hostPort = mach_host_self(); 1213 host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo, 1214 &infoCount); 1215 mach_port_deallocate(mach_task_self(), hostPort); 1216 1217 if (hostInfo.cpu_type != CPU_TYPE_ARM) { 1218 assert(false && "CPUType not equal to ARM should not be possible on ARM"); 1219 return "generic"; 1220 } 1221 switch (hostInfo.cpu_subtype) { 1222 case CPU_SUBTYPE_ARM_V7S: 1223 return "swift"; 1224 default:; 1225 } 1226 1227 return "generic"; 1228 } 1229 #else 1230 StringRef sys::getHostCPUName() { return "generic"; } 1231 #endif 1232 1233 #if defined(__linux__) && (defined(__i386__) || defined(__x86_64__)) 1234 // On Linux, the number of physical cores can be computed from /proc/cpuinfo, 1235 // using the number of unique physical/core id pairs. The following 1236 // implementation reads the /proc/cpuinfo format on an x86_64 system. 1237 int computeHostNumPhysicalCores() { 1238 // Enabled represents the number of physical id/core id pairs with at least 1239 // one processor id enabled by the CPU affinity mask. 1240 cpu_set_t Affinity, Enabled; 1241 if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0) 1242 return -1; 1243 CPU_ZERO(&Enabled); 1244 1245 // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be 1246 // mmapped because it appears to have 0 size. 1247 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = 1248 llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); 1249 if (std::error_code EC = Text.getError()) { 1250 llvm::errs() << "Can't read " 1251 << "/proc/cpuinfo: " << EC.message() << "\n"; 1252 return -1; 1253 } 1254 SmallVector<StringRef, 8> strs; 1255 (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1, 1256 /*KeepEmpty=*/false); 1257 int CurProcessor = -1; 1258 int CurPhysicalId = -1; 1259 int CurSiblings = -1; 1260 int CurCoreId = -1; 1261 for (StringRef Line : strs) { 1262 std::pair<StringRef, StringRef> Data = Line.split(':'); 1263 auto Name = Data.first.trim(); 1264 auto Val = Data.second.trim(); 1265 // These fields are available if the kernel is configured with CONFIG_SMP. 1266 if (Name == "processor") 1267 Val.getAsInteger(10, CurProcessor); 1268 else if (Name == "physical id") 1269 Val.getAsInteger(10, CurPhysicalId); 1270 else if (Name == "siblings") 1271 Val.getAsInteger(10, CurSiblings); 1272 else if (Name == "core id") { 1273 Val.getAsInteger(10, CurCoreId); 1274 // The processor id corresponds to an index into cpu_set_t. 1275 if (CPU_ISSET(CurProcessor, &Affinity)) 1276 CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled); 1277 } 1278 } 1279 return CPU_COUNT(&Enabled); 1280 } 1281 #elif defined(__linux__) && defined(__powerpc__) 1282 int computeHostNumPhysicalCores() { 1283 cpu_set_t Affinity; 1284 if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0) 1285 return CPU_COUNT(&Affinity); 1286 1287 // The call to sched_getaffinity() may have failed because the Affinity 1288 // mask is too small for the number of CPU's on the system (i.e. the 1289 // system has more than 1024 CPUs). Allocate a mask large enough for 1290 // twice as many CPUs. 1291 cpu_set_t *DynAffinity; 1292 DynAffinity = CPU_ALLOC(2048); 1293 if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) { 1294 int NumCPUs = CPU_COUNT(DynAffinity); 1295 CPU_FREE(DynAffinity); 1296 return NumCPUs; 1297 } 1298 return -1; 1299 } 1300 #elif defined(__linux__) && defined(__s390x__) 1301 int computeHostNumPhysicalCores() { return sysconf(_SC_NPROCESSORS_ONLN); } 1302 #elif defined(__APPLE__) && defined(__x86_64__) 1303 #include <sys/param.h> 1304 #include <sys/sysctl.h> 1305 1306 // Gets the number of *physical cores* on the machine. 1307 int computeHostNumPhysicalCores() { 1308 uint32_t count; 1309 size_t len = sizeof(count); 1310 sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0); 1311 if (count < 1) { 1312 int nm[2]; 1313 nm[0] = CTL_HW; 1314 nm[1] = HW_AVAILCPU; 1315 sysctl(nm, 2, &count, &len, NULL, 0); 1316 if (count < 1) 1317 return -1; 1318 } 1319 return count; 1320 } 1321 #elif defined(__MVS__) 1322 int computeHostNumPhysicalCores() { 1323 enum { 1324 // Byte offset of the pointer to the Communications Vector Table (CVT) in 1325 // the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and 1326 // will be zero-extended to uintptr_t. 1327 FLCCVT = 16, 1328 // Byte offset of the pointer to the Common System Data Area (CSD) in the 1329 // CVT. The table entry is a 31-bit pointer and will be zero-extended to 1330 // uintptr_t. 1331 CVTCSD = 660, 1332 // Byte offset to the number of live CPs in the LPAR, stored as a signed 1333 // 32-bit value in the table. 1334 CSD_NUMBER_ONLINE_STANDARD_CPS = 264, 1335 }; 1336 char *PSA = 0; 1337 char *CVT = reinterpret_cast<char *>( 1338 static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(PSA[FLCCVT]))); 1339 char *CSD = reinterpret_cast<char *>( 1340 static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(CVT[CVTCSD]))); 1341 return reinterpret_cast<int &>(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]); 1342 } 1343 #elif defined(_WIN32) && LLVM_ENABLE_THREADS != 0 1344 // Defined in llvm/lib/Support/Windows/Threading.inc 1345 int computeHostNumPhysicalCores(); 1346 #else 1347 // On other systems, return -1 to indicate unknown. 1348 static int computeHostNumPhysicalCores() { return -1; } 1349 #endif 1350 1351 int sys::getHostNumPhysicalCores() { 1352 static int NumCores = computeHostNumPhysicalCores(); 1353 return NumCores; 1354 } 1355 1356 #if defined(__i386__) || defined(_M_IX86) || \ 1357 defined(__x86_64__) || defined(_M_X64) 1358 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1359 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 1360 unsigned MaxLevel; 1361 1362 if (getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX) || MaxLevel < 1) 1363 return false; 1364 1365 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); 1366 1367 Features["cx8"] = (EDX >> 8) & 1; 1368 Features["cmov"] = (EDX >> 15) & 1; 1369 Features["mmx"] = (EDX >> 23) & 1; 1370 Features["fxsr"] = (EDX >> 24) & 1; 1371 Features["sse"] = (EDX >> 25) & 1; 1372 Features["sse2"] = (EDX >> 26) & 1; 1373 1374 Features["sse3"] = (ECX >> 0) & 1; 1375 Features["pclmul"] = (ECX >> 1) & 1; 1376 Features["ssse3"] = (ECX >> 9) & 1; 1377 Features["cx16"] = (ECX >> 13) & 1; 1378 Features["sse4.1"] = (ECX >> 19) & 1; 1379 Features["sse4.2"] = (ECX >> 20) & 1; 1380 Features["movbe"] = (ECX >> 22) & 1; 1381 Features["popcnt"] = (ECX >> 23) & 1; 1382 Features["aes"] = (ECX >> 25) & 1; 1383 Features["rdrnd"] = (ECX >> 30) & 1; 1384 1385 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 1386 // indicates that the AVX registers will be saved and restored on context 1387 // switch, then we have full AVX support. 1388 bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX); 1389 bool HasAVXSave = HasXSave && ((ECX >> 28) & 1) && ((EAX & 0x6) == 0x6); 1390 #if defined(__APPLE__) 1391 // Darwin lazily saves the AVX512 context on first use: trust that the OS will 1392 // save the AVX512 context if we use AVX512 instructions, even the bit is not 1393 // set right now. 1394 bool HasAVX512Save = true; 1395 #else 1396 // AVX512 requires additional context to be saved by the OS. 1397 bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0); 1398 #endif 1399 // AMX requires additional context to be saved by the OS. 1400 const unsigned AMXBits = (1 << 17) | (1 << 18); 1401 bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits); 1402 1403 Features["avx"] = HasAVXSave; 1404 Features["fma"] = ((ECX >> 12) & 1) && HasAVXSave; 1405 // Only enable XSAVE if OS has enabled support for saving YMM state. 1406 Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave; 1407 Features["f16c"] = ((ECX >> 29) & 1) && HasAVXSave; 1408 1409 unsigned MaxExtLevel; 1410 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); 1411 1412 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && 1413 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 1414 Features["sahf"] = HasExtLeaf1 && ((ECX >> 0) & 1); 1415 Features["lzcnt"] = HasExtLeaf1 && ((ECX >> 5) & 1); 1416 Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1); 1417 Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1); 1418 Features["xop"] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave; 1419 Features["lwp"] = HasExtLeaf1 && ((ECX >> 15) & 1); 1420 Features["fma4"] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave; 1421 Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1); 1422 Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1); 1423 1424 Features["64bit"] = HasExtLeaf1 && ((EDX >> 29) & 1); 1425 1426 // Miscellaneous memory related features, detected by 1427 // using the 0x80000008 leaf of the CPUID instruction 1428 bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 && 1429 !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX); 1430 Features["clzero"] = HasExtLeaf8 && ((EBX >> 0) & 1); 1431 Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1); 1432 1433 bool HasLeaf7 = 1434 MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 1435 1436 Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1); 1437 Features["sgx"] = HasLeaf7 && ((EBX >> 2) & 1); 1438 Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1); 1439 // AVX2 is only supported if we have the OS save support from AVX. 1440 Features["avx2"] = HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave; 1441 Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1); 1442 Features["invpcid"] = HasLeaf7 && ((EBX >> 10) & 1); 1443 Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1); 1444 // AVX512 is only supported if the OS supports the context save for it. 1445 Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save; 1446 Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save; 1447 Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1); 1448 Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1); 1449 Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save; 1450 Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1); 1451 Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1); 1452 Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save; 1453 Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save; 1454 Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save; 1455 Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1); 1456 Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save; 1457 Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save; 1458 1459 Features["prefetchwt1"] = HasLeaf7 && ((ECX >> 0) & 1); 1460 Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; 1461 Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1); 1462 Features["waitpkg"] = HasLeaf7 && ((ECX >> 5) & 1); 1463 Features["avx512vbmi2"] = HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save; 1464 Features["shstk"] = HasLeaf7 && ((ECX >> 7) & 1); 1465 Features["gfni"] = HasLeaf7 && ((ECX >> 8) & 1); 1466 Features["vaes"] = HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave; 1467 Features["vpclmulqdq"] = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave; 1468 Features["avx512vnni"] = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save; 1469 Features["avx512bitalg"] = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save; 1470 Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save; 1471 Features["rdpid"] = HasLeaf7 && ((ECX >> 22) & 1); 1472 Features["cldemote"] = HasLeaf7 && ((ECX >> 25) & 1); 1473 Features["movdiri"] = HasLeaf7 && ((ECX >> 27) & 1); 1474 Features["movdir64b"] = HasLeaf7 && ((ECX >> 28) & 1); 1475 Features["enqcmd"] = HasLeaf7 && ((ECX >> 29) & 1); 1476 1477 Features["avx512vp2intersect"] = 1478 HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save; 1479 Features["serialize"] = HasLeaf7 && ((EDX >> 14) & 1); 1480 Features["tsxldtrk"] = HasLeaf7 && ((EDX >> 16) & 1); 1481 // There are two CPUID leafs which information associated with the pconfig 1482 // instruction: 1483 // EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th 1484 // bit of EDX), while the EAX=0x1b leaf returns information on the 1485 // availability of specific pconfig leafs. 1486 // The target feature here only refers to the the first of these two. 1487 // Users might need to check for the availability of specific pconfig 1488 // leaves using cpuid, since that information is ignored while 1489 // detecting features using the "-march=native" flag. 1490 // For more info, see X86 ISA docs. 1491 Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1); 1492 Features["amx-bf16"] = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave; 1493 Features["amx-tile"] = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave; 1494 Features["amx-int8"] = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave; 1495 bool HasLeaf7Subleaf1 = 1496 MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); 1497 Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save; 1498 1499 bool HasLeafD = MaxLevel >= 0xd && 1500 !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); 1501 1502 // Only enable XSAVE if OS has enabled support for saving YMM state. 1503 Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave; 1504 Features["xsavec"] = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave; 1505 Features["xsaves"] = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave; 1506 1507 bool HasLeaf14 = MaxLevel >= 0x14 && 1508 !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX); 1509 1510 Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1); 1511 1512 return true; 1513 } 1514 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) 1515 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1516 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1517 if (!P) 1518 return false; 1519 1520 SmallVector<StringRef, 32> Lines; 1521 P->getBuffer().split(Lines, "\n"); 1522 1523 SmallVector<StringRef, 32> CPUFeatures; 1524 1525 // Look for the CPU features. 1526 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 1527 if (Lines[I].startswith("Features")) { 1528 Lines[I].split(CPUFeatures, ' '); 1529 break; 1530 } 1531 1532 #if defined(__aarch64__) 1533 // Keep track of which crypto features we have seen 1534 enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 }; 1535 uint32_t crypto = 0; 1536 #endif 1537 1538 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { 1539 StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I]) 1540 #if defined(__aarch64__) 1541 .Case("asimd", "neon") 1542 .Case("fp", "fp-armv8") 1543 .Case("crc32", "crc") 1544 #else 1545 .Case("half", "fp16") 1546 .Case("neon", "neon") 1547 .Case("vfpv3", "vfp3") 1548 .Case("vfpv3d16", "d16") 1549 .Case("vfpv4", "vfp4") 1550 .Case("idiva", "hwdiv-arm") 1551 .Case("idivt", "hwdiv") 1552 #endif 1553 .Default(""); 1554 1555 #if defined(__aarch64__) 1556 // We need to check crypto separately since we need all of the crypto 1557 // extensions to enable the subtarget feature 1558 if (CPUFeatures[I] == "aes") 1559 crypto |= CAP_AES; 1560 else if (CPUFeatures[I] == "pmull") 1561 crypto |= CAP_PMULL; 1562 else if (CPUFeatures[I] == "sha1") 1563 crypto |= CAP_SHA1; 1564 else if (CPUFeatures[I] == "sha2") 1565 crypto |= CAP_SHA2; 1566 #endif 1567 1568 if (LLVMFeatureStr != "") 1569 Features[LLVMFeatureStr] = true; 1570 } 1571 1572 #if defined(__aarch64__) 1573 // If we have all crypto bits we can add the feature 1574 if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2)) 1575 Features["crypto"] = true; 1576 #endif 1577 1578 return true; 1579 } 1580 #elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64)) 1581 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1582 if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) 1583 Features["neon"] = true; 1584 if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) 1585 Features["crc"] = true; 1586 if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) 1587 Features["crypto"] = true; 1588 1589 return true; 1590 } 1591 #else 1592 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; } 1593 #endif 1594 1595 std::string sys::getProcessTriple() { 1596 std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE); 1597 Triple PT(Triple::normalize(TargetTripleString)); 1598 1599 if (sizeof(void *) == 8 && PT.isArch32Bit()) 1600 PT = PT.get64BitArchVariant(); 1601 if (sizeof(void *) == 4 && PT.isArch64Bit()) 1602 PT = PT.get32BitArchVariant(); 1603 1604 return PT.str(); 1605 } 1606