1 //===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the operating system Host concept. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Support/Host.h" 14 #include "llvm/ADT/SmallSet.h" 15 #include "llvm/ADT/SmallVector.h" 16 #include "llvm/ADT/StringMap.h" 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/ADT/StringSwitch.h" 19 #include "llvm/ADT/Triple.h" 20 #include "llvm/Config/llvm-config.h" 21 #include "llvm/Support/BCD.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/FileSystem.h" 24 #include "llvm/Support/MemoryBuffer.h" 25 #include "llvm/Support/X86TargetParser.h" 26 #include "llvm/Support/raw_ostream.h" 27 #include <assert.h> 28 #include <string.h> 29 30 // Include the platform-specific parts of this class. 31 #ifdef LLVM_ON_UNIX 32 #include "Unix/Host.inc" 33 #include <sched.h> 34 #endif 35 #ifdef _WIN32 36 #include "Windows/Host.inc" 37 #endif 38 #ifdef _MSC_VER 39 #include <intrin.h> 40 #endif 41 #if defined(__APPLE__) && (!defined(__x86_64__)) 42 #include <mach/host_info.h> 43 #include <mach/mach.h> 44 #include <mach/mach_host.h> 45 #include <mach/machine.h> 46 #endif 47 #ifdef _AIX 48 #include <sys/systemcfg.h> 49 #endif 50 51 #define DEBUG_TYPE "host-detection" 52 53 //===----------------------------------------------------------------------===// 54 // 55 // Implementations of the CPU detection routines 56 // 57 //===----------------------------------------------------------------------===// 58 59 using namespace llvm; 60 61 static std::unique_ptr<llvm::MemoryBuffer> 62 LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() { 63 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = 64 llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); 65 if (std::error_code EC = Text.getError()) { 66 llvm::errs() << "Can't read " 67 << "/proc/cpuinfo: " << EC.message() << "\n"; 68 return nullptr; 69 } 70 return std::move(*Text); 71 } 72 73 StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) { 74 // Access to the Processor Version Register (PVR) on PowerPC is privileged, 75 // and so we must use an operating-system interface to determine the current 76 // processor type. On Linux, this is exposed through the /proc/cpuinfo file. 77 const char *generic = "generic"; 78 79 // The cpu line is second (after the 'processor: 0' line), so if this 80 // buffer is too small then something has changed (or is wrong). 81 StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin(); 82 StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end(); 83 84 StringRef::const_iterator CIP = CPUInfoStart; 85 86 StringRef::const_iterator CPUStart = 0; 87 size_t CPULen = 0; 88 89 // We need to find the first line which starts with cpu, spaces, and a colon. 90 // After the colon, there may be some additional spaces and then the cpu type. 91 while (CIP < CPUInfoEnd && CPUStart == 0) { 92 if (CIP < CPUInfoEnd && *CIP == '\n') 93 ++CIP; 94 95 if (CIP < CPUInfoEnd && *CIP == 'c') { 96 ++CIP; 97 if (CIP < CPUInfoEnd && *CIP == 'p') { 98 ++CIP; 99 if (CIP < CPUInfoEnd && *CIP == 'u') { 100 ++CIP; 101 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) 102 ++CIP; 103 104 if (CIP < CPUInfoEnd && *CIP == ':') { 105 ++CIP; 106 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) 107 ++CIP; 108 109 if (CIP < CPUInfoEnd) { 110 CPUStart = CIP; 111 while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' && 112 *CIP != ',' && *CIP != '\n')) 113 ++CIP; 114 CPULen = CIP - CPUStart; 115 } 116 } 117 } 118 } 119 } 120 121 if (CPUStart == 0) 122 while (CIP < CPUInfoEnd && *CIP != '\n') 123 ++CIP; 124 } 125 126 if (CPUStart == 0) 127 return generic; 128 129 return StringSwitch<const char *>(StringRef(CPUStart, CPULen)) 130 .Case("604e", "604e") 131 .Case("604", "604") 132 .Case("7400", "7400") 133 .Case("7410", "7400") 134 .Case("7447", "7400") 135 .Case("7455", "7450") 136 .Case("G4", "g4") 137 .Case("POWER4", "970") 138 .Case("PPC970FX", "970") 139 .Case("PPC970MP", "970") 140 .Case("G5", "g5") 141 .Case("POWER5", "g5") 142 .Case("A2", "a2") 143 .Case("POWER6", "pwr6") 144 .Case("POWER7", "pwr7") 145 .Case("POWER8", "pwr8") 146 .Case("POWER8E", "pwr8") 147 .Case("POWER8NVL", "pwr8") 148 .Case("POWER9", "pwr9") 149 .Case("POWER10", "pwr10") 150 // FIXME: If we get a simulator or machine with the capabilities of 151 // mcpu=future, we should revisit this and add the name reported by the 152 // simulator/machine. 153 .Default(generic); 154 } 155 156 StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { 157 // The cpuid register on arm is not accessible from user space. On Linux, 158 // it is exposed through the /proc/cpuinfo file. 159 160 // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line 161 // in all cases. 162 SmallVector<StringRef, 32> Lines; 163 ProcCpuinfoContent.split(Lines, "\n"); 164 165 // Look for the CPU implementer line. 166 StringRef Implementer; 167 StringRef Hardware; 168 StringRef Part; 169 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 170 if (Lines[I].startswith("CPU implementer")) 171 Implementer = Lines[I].substr(15).ltrim("\t :"); 172 if (Lines[I].startswith("Hardware")) 173 Hardware = Lines[I].substr(8).ltrim("\t :"); 174 if (Lines[I].startswith("CPU part")) 175 Part = Lines[I].substr(8).ltrim("\t :"); 176 } 177 178 if (Implementer == "0x41") { // ARM Ltd. 179 // MSM8992/8994 may give cpu part for the core that the kernel is running on, 180 // which is undeterministic and wrong. Always return cortex-a53 for these SoC. 181 if (Hardware.endswith("MSM8994") || Hardware.endswith("MSM8996")) 182 return "cortex-a53"; 183 184 185 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 186 // values correspond to the "Part number" in the CP15/c0 register. The 187 // contents are specified in the various processor manuals. 188 // This corresponds to the Main ID Register in Technical Reference Manuals. 189 // and is used in programs like sys-utils 190 return StringSwitch<const char *>(Part) 191 .Case("0x926", "arm926ej-s") 192 .Case("0xb02", "mpcore") 193 .Case("0xb36", "arm1136j-s") 194 .Case("0xb56", "arm1156t2-s") 195 .Case("0xb76", "arm1176jz-s") 196 .Case("0xc08", "cortex-a8") 197 .Case("0xc09", "cortex-a9") 198 .Case("0xc0f", "cortex-a15") 199 .Case("0xc20", "cortex-m0") 200 .Case("0xc23", "cortex-m3") 201 .Case("0xc24", "cortex-m4") 202 .Case("0xd22", "cortex-m55") 203 .Case("0xd02", "cortex-a34") 204 .Case("0xd04", "cortex-a35") 205 .Case("0xd03", "cortex-a53") 206 .Case("0xd07", "cortex-a57") 207 .Case("0xd08", "cortex-a72") 208 .Case("0xd09", "cortex-a73") 209 .Case("0xd0a", "cortex-a75") 210 .Case("0xd0b", "cortex-a76") 211 .Case("0xd0d", "cortex-a77") 212 .Case("0xd41", "cortex-a78") 213 .Case("0xd44", "cortex-x1") 214 .Case("0xd0c", "neoverse-n1") 215 .Case("0xd49", "neoverse-n2") 216 .Default("generic"); 217 } 218 219 if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium. 220 return StringSwitch<const char *>(Part) 221 .Case("0x516", "thunderx2t99") 222 .Case("0x0516", "thunderx2t99") 223 .Case("0xaf", "thunderx2t99") 224 .Case("0x0af", "thunderx2t99") 225 .Case("0xa1", "thunderxt88") 226 .Case("0x0a1", "thunderxt88") 227 .Default("generic"); 228 } 229 230 if (Implementer == "0x46") { // Fujitsu Ltd. 231 return StringSwitch<const char *>(Part) 232 .Case("0x001", "a64fx") 233 .Default("generic"); 234 } 235 236 if (Implementer == "0x4e") { // NVIDIA Corporation 237 return StringSwitch<const char *>(Part) 238 .Case("0x004", "carmel") 239 .Default("generic"); 240 } 241 242 if (Implementer == "0x48") // HiSilicon Technologies, Inc. 243 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 244 // values correspond to the "Part number" in the CP15/c0 register. The 245 // contents are specified in the various processor manuals. 246 return StringSwitch<const char *>(Part) 247 .Case("0xd01", "tsv110") 248 .Default("generic"); 249 250 if (Implementer == "0x51") // Qualcomm Technologies, Inc. 251 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 252 // values correspond to the "Part number" in the CP15/c0 register. The 253 // contents are specified in the various processor manuals. 254 return StringSwitch<const char *>(Part) 255 .Case("0x06f", "krait") // APQ8064 256 .Case("0x201", "kryo") 257 .Case("0x205", "kryo") 258 .Case("0x211", "kryo") 259 .Case("0x800", "cortex-a73") // Kryo 2xx Gold 260 .Case("0x801", "cortex-a73") // Kryo 2xx Silver 261 .Case("0x802", "cortex-a75") // Kryo 3xx Gold 262 .Case("0x803", "cortex-a75") // Kryo 3xx Silver 263 .Case("0x804", "cortex-a76") // Kryo 4xx Gold 264 .Case("0x805", "cortex-a76") // Kryo 4xx/5xx Silver 265 .Case("0xc00", "falkor") 266 .Case("0xc01", "saphira") 267 .Default("generic"); 268 if (Implementer == "0x53") { // Samsung Electronics Co., Ltd. 269 // The Exynos chips have a convoluted ID scheme that doesn't seem to follow 270 // any predictive pattern across variants and parts. 271 unsigned Variant = 0, Part = 0; 272 273 // Look for the CPU variant line, whose value is a 1 digit hexadecimal 274 // number, corresponding to the Variant bits in the CP15/C0 register. 275 for (auto I : Lines) 276 if (I.consume_front("CPU variant")) 277 I.ltrim("\t :").getAsInteger(0, Variant); 278 279 // Look for the CPU part line, whose value is a 3 digit hexadecimal 280 // number, corresponding to the PartNum bits in the CP15/C0 register. 281 for (auto I : Lines) 282 if (I.consume_front("CPU part")) 283 I.ltrim("\t :").getAsInteger(0, Part); 284 285 unsigned Exynos = (Variant << 12) | Part; 286 switch (Exynos) { 287 default: 288 // Default by falling through to Exynos M3. 289 LLVM_FALLTHROUGH; 290 case 0x1002: 291 return "exynos-m3"; 292 case 0x1003: 293 return "exynos-m4"; 294 } 295 } 296 297 return "generic"; 298 } 299 300 namespace { 301 StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) { 302 switch (Id) { 303 case 2064: // z900 not supported by LLVM 304 case 2066: 305 case 2084: // z990 not supported by LLVM 306 case 2086: 307 case 2094: // z9-109 not supported by LLVM 308 case 2096: 309 return "generic"; 310 case 2097: 311 case 2098: 312 return "z10"; 313 case 2817: 314 case 2818: 315 return "z196"; 316 case 2827: 317 case 2828: 318 return "zEC12"; 319 case 2964: 320 case 2965: 321 return HaveVectorSupport? "z13" : "zEC12"; 322 case 3906: 323 case 3907: 324 return HaveVectorSupport? "z14" : "zEC12"; 325 case 8561: 326 case 8562: 327 default: 328 return HaveVectorSupport? "z15" : "zEC12"; 329 } 330 } 331 } // end anonymous namespace 332 333 StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) { 334 // STIDP is a privileged operation, so use /proc/cpuinfo instead. 335 336 // The "processor 0:" line comes after a fair amount of other information, 337 // including a cache breakdown, but this should be plenty. 338 SmallVector<StringRef, 32> Lines; 339 ProcCpuinfoContent.split(Lines, "\n"); 340 341 // Look for the CPU features. 342 SmallVector<StringRef, 32> CPUFeatures; 343 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 344 if (Lines[I].startswith("features")) { 345 size_t Pos = Lines[I].find(':'); 346 if (Pos != StringRef::npos) { 347 Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' '); 348 break; 349 } 350 } 351 352 // We need to check for the presence of vector support independently of 353 // the machine type, since we may only use the vector register set when 354 // supported by the kernel (and hypervisor). 355 bool HaveVectorSupport = false; 356 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { 357 if (CPUFeatures[I] == "vx") 358 HaveVectorSupport = true; 359 } 360 361 // Now check the processor machine type. 362 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 363 if (Lines[I].startswith("processor ")) { 364 size_t Pos = Lines[I].find("machine = "); 365 if (Pos != StringRef::npos) { 366 Pos += sizeof("machine = ") - 1; 367 unsigned int Id; 368 if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) 369 return getCPUNameFromS390Model(Id, HaveVectorSupport); 370 } 371 break; 372 } 373 } 374 375 return "generic"; 376 } 377 378 StringRef sys::detail::getHostCPUNameForBPF() { 379 #if !defined(__linux__) || !defined(__x86_64__) 380 return "generic"; 381 #else 382 uint8_t v3_insns[40] __attribute__ ((aligned (8))) = 383 /* BPF_MOV64_IMM(BPF_REG_0, 0) */ 384 { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 385 /* BPF_MOV64_IMM(BPF_REG_2, 1) */ 386 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 387 /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */ 388 0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 389 /* BPF_MOV64_IMM(BPF_REG_0, 1) */ 390 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 391 /* BPF_EXIT_INSN() */ 392 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; 393 394 uint8_t v2_insns[40] __attribute__ ((aligned (8))) = 395 /* BPF_MOV64_IMM(BPF_REG_0, 0) */ 396 { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 397 /* BPF_MOV64_IMM(BPF_REG_2, 1) */ 398 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 399 /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */ 400 0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 401 /* BPF_MOV64_IMM(BPF_REG_0, 1) */ 402 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 403 /* BPF_EXIT_INSN() */ 404 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; 405 406 struct bpf_prog_load_attr { 407 uint32_t prog_type; 408 uint32_t insn_cnt; 409 uint64_t insns; 410 uint64_t license; 411 uint32_t log_level; 412 uint32_t log_size; 413 uint64_t log_buf; 414 uint32_t kern_version; 415 uint32_t prog_flags; 416 } attr = {}; 417 attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */ 418 attr.insn_cnt = 5; 419 attr.insns = (uint64_t)v3_insns; 420 attr.license = (uint64_t)"DUMMY"; 421 422 int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, 423 sizeof(attr)); 424 if (fd >= 0) { 425 close(fd); 426 return "v3"; 427 } 428 429 /* Clear the whole attr in case its content changed by syscall. */ 430 memset(&attr, 0, sizeof(attr)); 431 attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */ 432 attr.insn_cnt = 5; 433 attr.insns = (uint64_t)v2_insns; 434 attr.license = (uint64_t)"DUMMY"; 435 fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr)); 436 if (fd >= 0) { 437 close(fd); 438 return "v2"; 439 } 440 return "v1"; 441 #endif 442 } 443 444 #if defined(__i386__) || defined(_M_IX86) || \ 445 defined(__x86_64__) || defined(_M_X64) 446 447 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). 448 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID 449 // support. Consequently, for i386, the presence of CPUID is checked first 450 // via the corresponding eflags bit. 451 // Removal of cpuid.h header motivated by PR30384 452 // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp 453 // or test-suite, but are used in external projects e.g. libstdcxx 454 static bool isCpuIdSupported() { 455 #if defined(__GNUC__) || defined(__clang__) 456 #if defined(__i386__) 457 int __cpuid_supported; 458 __asm__(" pushfl\n" 459 " popl %%eax\n" 460 " movl %%eax,%%ecx\n" 461 " xorl $0x00200000,%%eax\n" 462 " pushl %%eax\n" 463 " popfl\n" 464 " pushfl\n" 465 " popl %%eax\n" 466 " movl $0,%0\n" 467 " cmpl %%eax,%%ecx\n" 468 " je 1f\n" 469 " movl $1,%0\n" 470 "1:" 471 : "=r"(__cpuid_supported) 472 : 473 : "eax", "ecx"); 474 if (!__cpuid_supported) 475 return false; 476 #endif 477 return true; 478 #endif 479 return true; 480 } 481 482 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in 483 /// the specified arguments. If we can't run cpuid on the host, return true. 484 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, 485 unsigned *rECX, unsigned *rEDX) { 486 #if defined(__GNUC__) || defined(__clang__) 487 #if defined(__x86_64__) 488 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 489 // FIXME: should we save this for Clang? 490 __asm__("movq\t%%rbx, %%rsi\n\t" 491 "cpuid\n\t" 492 "xchgq\t%%rbx, %%rsi\n\t" 493 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 494 : "a"(value)); 495 return false; 496 #elif defined(__i386__) 497 __asm__("movl\t%%ebx, %%esi\n\t" 498 "cpuid\n\t" 499 "xchgl\t%%ebx, %%esi\n\t" 500 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 501 : "a"(value)); 502 return false; 503 #else 504 return true; 505 #endif 506 #elif defined(_MSC_VER) 507 // The MSVC intrinsic is portable across x86 and x64. 508 int registers[4]; 509 __cpuid(registers, value); 510 *rEAX = registers[0]; 511 *rEBX = registers[1]; 512 *rECX = registers[2]; 513 *rEDX = registers[3]; 514 return false; 515 #else 516 return true; 517 #endif 518 } 519 520 namespace llvm { 521 namespace sys { 522 namespace detail { 523 namespace x86 { 524 525 VendorSignatures getVendorSignature(unsigned *MaxLeaf) { 526 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 527 if (MaxLeaf == nullptr) 528 MaxLeaf = &EAX; 529 else 530 *MaxLeaf = 0; 531 532 if (!isCpuIdSupported()) 533 return VendorSignatures::UNKNOWN; 534 535 if (getX86CpuIDAndInfo(0, MaxLeaf, &EBX, &ECX, &EDX) || *MaxLeaf < 1) 536 return VendorSignatures::UNKNOWN; 537 538 // "Genu ineI ntel" 539 if (EBX == 0x756e6547 && EDX == 0x49656e69 && ECX == 0x6c65746e) 540 return VendorSignatures::GENUINE_INTEL; 541 542 // "Auth enti cAMD" 543 if (EBX == 0x68747541 && EDX == 0x69746e65 && ECX == 0x444d4163) 544 return VendorSignatures::AUTHENTIC_AMD; 545 546 return VendorSignatures::UNKNOWN; 547 } 548 549 } // namespace x86 550 } // namespace detail 551 } // namespace sys 552 } // namespace llvm 553 554 using namespace llvm::sys::detail::x86; 555 556 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return 557 /// the 4 values in the specified arguments. If we can't run cpuid on the host, 558 /// return true. 559 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, 560 unsigned *rEAX, unsigned *rEBX, unsigned *rECX, 561 unsigned *rEDX) { 562 #if defined(__GNUC__) || defined(__clang__) 563 #if defined(__x86_64__) 564 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 565 // FIXME: should we save this for Clang? 566 __asm__("movq\t%%rbx, %%rsi\n\t" 567 "cpuid\n\t" 568 "xchgq\t%%rbx, %%rsi\n\t" 569 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 570 : "a"(value), "c"(subleaf)); 571 return false; 572 #elif defined(__i386__) 573 __asm__("movl\t%%ebx, %%esi\n\t" 574 "cpuid\n\t" 575 "xchgl\t%%ebx, %%esi\n\t" 576 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 577 : "a"(value), "c"(subleaf)); 578 return false; 579 #else 580 return true; 581 #endif 582 #elif defined(_MSC_VER) 583 int registers[4]; 584 __cpuidex(registers, value, subleaf); 585 *rEAX = registers[0]; 586 *rEBX = registers[1]; 587 *rECX = registers[2]; 588 *rEDX = registers[3]; 589 return false; 590 #else 591 return true; 592 #endif 593 } 594 595 // Read control register 0 (XCR0). Used to detect features such as AVX. 596 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { 597 #if defined(__GNUC__) || defined(__clang__) 598 // Check xgetbv; this uses a .byte sequence instead of the instruction 599 // directly because older assemblers do not include support for xgetbv and 600 // there is no easy way to conditionally compile based on the assembler used. 601 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0)); 602 return false; 603 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) 604 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); 605 *rEAX = Result; 606 *rEDX = Result >> 32; 607 return false; 608 #else 609 return true; 610 #endif 611 } 612 613 static void detectX86FamilyModel(unsigned EAX, unsigned *Family, 614 unsigned *Model) { 615 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11 616 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7 617 if (*Family == 6 || *Family == 0xf) { 618 if (*Family == 0xf) 619 // Examine extended family ID if family ID is F. 620 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27 621 // Examine extended model ID if family ID is 6 or F. 622 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 623 } 624 } 625 626 static StringRef 627 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, 628 const unsigned *Features, 629 unsigned *Type, unsigned *Subtype) { 630 auto testFeature = [&](unsigned F) { 631 return (Features[F / 32] & (1U << (F % 32))) != 0; 632 }; 633 634 StringRef CPU; 635 636 switch (Family) { 637 case 3: 638 CPU = "i386"; 639 break; 640 case 4: 641 CPU = "i486"; 642 break; 643 case 5: 644 if (testFeature(X86::FEATURE_MMX)) { 645 CPU = "pentium-mmx"; 646 break; 647 } 648 CPU = "pentium"; 649 break; 650 case 6: 651 switch (Model) { 652 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile 653 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad 654 // mobile processor, Intel Core 2 Extreme processor, Intel 655 // Pentium Dual-Core processor, Intel Xeon processor, model 656 // 0Fh. All processors are manufactured using the 65 nm process. 657 case 0x16: // Intel Celeron processor model 16h. All processors are 658 // manufactured using the 65 nm process 659 CPU = "core2"; 660 *Type = X86::INTEL_CORE2; 661 break; 662 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model 663 // 17h. All processors are manufactured using the 45 nm process. 664 // 665 // 45nm: Penryn , Wolfdale, Yorkfield (XE) 666 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using 667 // the 45 nm process. 668 CPU = "penryn"; 669 *Type = X86::INTEL_CORE2; 670 break; 671 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All 672 // processors are manufactured using the 45 nm process. 673 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. 674 // As found in a Summer 2010 model iMac. 675 case 0x1f: 676 case 0x2e: // Nehalem EX 677 CPU = "nehalem"; 678 *Type = X86::INTEL_COREI7; 679 *Subtype = X86::INTEL_COREI7_NEHALEM; 680 break; 681 case 0x25: // Intel Core i7, laptop version. 682 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All 683 // processors are manufactured using the 32 nm process. 684 case 0x2f: // Westmere EX 685 CPU = "westmere"; 686 *Type = X86::INTEL_COREI7; 687 *Subtype = X86::INTEL_COREI7_WESTMERE; 688 break; 689 case 0x2a: // Intel Core i7 processor. All processors are manufactured 690 // using the 32 nm process. 691 case 0x2d: 692 CPU = "sandybridge"; 693 *Type = X86::INTEL_COREI7; 694 *Subtype = X86::INTEL_COREI7_SANDYBRIDGE; 695 break; 696 case 0x3a: 697 case 0x3e: // Ivy Bridge EP 698 CPU = "ivybridge"; 699 *Type = X86::INTEL_COREI7; 700 *Subtype = X86::INTEL_COREI7_IVYBRIDGE; 701 break; 702 703 // Haswell: 704 case 0x3c: 705 case 0x3f: 706 case 0x45: 707 case 0x46: 708 CPU = "haswell"; 709 *Type = X86::INTEL_COREI7; 710 *Subtype = X86::INTEL_COREI7_HASWELL; 711 break; 712 713 // Broadwell: 714 case 0x3d: 715 case 0x47: 716 case 0x4f: 717 case 0x56: 718 CPU = "broadwell"; 719 *Type = X86::INTEL_COREI7; 720 *Subtype = X86::INTEL_COREI7_BROADWELL; 721 break; 722 723 // Skylake: 724 case 0x4e: // Skylake mobile 725 case 0x5e: // Skylake desktop 726 case 0x8e: // Kaby Lake mobile 727 case 0x9e: // Kaby Lake desktop 728 case 0xa5: // Comet Lake-H/S 729 case 0xa6: // Comet Lake-U 730 CPU = "skylake"; 731 *Type = X86::INTEL_COREI7; 732 *Subtype = X86::INTEL_COREI7_SKYLAKE; 733 break; 734 735 // Rocketlake: 736 case 0xa7: 737 CPU = "rocketlake"; 738 *Type = X86::INTEL_COREI7; 739 *Subtype = X86::INTEL_COREI7_ROCKETLAKE; 740 break; 741 742 // Skylake Xeon: 743 case 0x55: 744 *Type = X86::INTEL_COREI7; 745 if (testFeature(X86::FEATURE_AVX512BF16)) { 746 CPU = "cooperlake"; 747 *Subtype = X86::INTEL_COREI7_COOPERLAKE; 748 } else if (testFeature(X86::FEATURE_AVX512VNNI)) { 749 CPU = "cascadelake"; 750 *Subtype = X86::INTEL_COREI7_CASCADELAKE; 751 } else { 752 CPU = "skylake-avx512"; 753 *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; 754 } 755 break; 756 757 // Cannonlake: 758 case 0x66: 759 CPU = "cannonlake"; 760 *Type = X86::INTEL_COREI7; 761 *Subtype = X86::INTEL_COREI7_CANNONLAKE; 762 break; 763 764 // Icelake: 765 case 0x7d: 766 case 0x7e: 767 CPU = "icelake-client"; 768 *Type = X86::INTEL_COREI7; 769 *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT; 770 break; 771 772 // Icelake Xeon: 773 case 0x6a: 774 case 0x6c: 775 CPU = "icelake-server"; 776 *Type = X86::INTEL_COREI7; 777 *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER; 778 break; 779 780 // Sapphire Rapids: 781 case 0x8f: 782 CPU = "sapphirerapids"; 783 *Type = X86::INTEL_COREI7; 784 *Subtype = X86::INTEL_COREI7_SAPPHIRERAPIDS; 785 break; 786 787 case 0x1c: // Most 45 nm Intel Atom processors 788 case 0x26: // 45 nm Atom Lincroft 789 case 0x27: // 32 nm Atom Medfield 790 case 0x35: // 32 nm Atom Midview 791 case 0x36: // 32 nm Atom Midview 792 CPU = "bonnell"; 793 *Type = X86::INTEL_BONNELL; 794 break; 795 796 // Atom Silvermont codes from the Intel software optimization guide. 797 case 0x37: 798 case 0x4a: 799 case 0x4d: 800 case 0x5a: 801 case 0x5d: 802 case 0x4c: // really airmont 803 CPU = "silvermont"; 804 *Type = X86::INTEL_SILVERMONT; 805 break; 806 // Goldmont: 807 case 0x5c: // Apollo Lake 808 case 0x5f: // Denverton 809 CPU = "goldmont"; 810 *Type = X86::INTEL_GOLDMONT; 811 break; 812 case 0x7a: 813 CPU = "goldmont-plus"; 814 *Type = X86::INTEL_GOLDMONT_PLUS; 815 break; 816 case 0x86: 817 CPU = "tremont"; 818 *Type = X86::INTEL_TREMONT; 819 break; 820 821 // Xeon Phi (Knights Landing + Knights Mill): 822 case 0x57: 823 CPU = "knl"; 824 *Type = X86::INTEL_KNL; 825 break; 826 case 0x85: 827 CPU = "knm"; 828 *Type = X86::INTEL_KNM; 829 break; 830 831 default: // Unknown family 6 CPU, try to guess. 832 // Don't both with Type/Subtype here, they aren't used by the caller. 833 // They're used above to keep the code in sync with compiler-rt. 834 // TODO detect tigerlake host from model 835 if (testFeature(X86::FEATURE_AVX512VP2INTERSECT)) { 836 CPU = "tigerlake"; 837 } else if (testFeature(X86::FEATURE_AVX512VBMI2)) { 838 CPU = "icelake-client"; 839 } else if (testFeature(X86::FEATURE_AVX512VBMI)) { 840 CPU = "cannonlake"; 841 } else if (testFeature(X86::FEATURE_AVX512BF16)) { 842 CPU = "cooperlake"; 843 } else if (testFeature(X86::FEATURE_AVX512VNNI)) { 844 CPU = "cascadelake"; 845 } else if (testFeature(X86::FEATURE_AVX512VL)) { 846 CPU = "skylake-avx512"; 847 } else if (testFeature(X86::FEATURE_AVX512ER)) { 848 CPU = "knl"; 849 } else if (testFeature(X86::FEATURE_CLFLUSHOPT)) { 850 if (testFeature(X86::FEATURE_SHA)) 851 CPU = "goldmont"; 852 else 853 CPU = "skylake"; 854 } else if (testFeature(X86::FEATURE_ADX)) { 855 CPU = "broadwell"; 856 } else if (testFeature(X86::FEATURE_AVX2)) { 857 CPU = "haswell"; 858 } else if (testFeature(X86::FEATURE_AVX)) { 859 CPU = "sandybridge"; 860 } else if (testFeature(X86::FEATURE_SSE4_2)) { 861 if (testFeature(X86::FEATURE_MOVBE)) 862 CPU = "silvermont"; 863 else 864 CPU = "nehalem"; 865 } else if (testFeature(X86::FEATURE_SSE4_1)) { 866 CPU = "penryn"; 867 } else if (testFeature(X86::FEATURE_SSSE3)) { 868 if (testFeature(X86::FEATURE_MOVBE)) 869 CPU = "bonnell"; 870 else 871 CPU = "core2"; 872 } else if (testFeature(X86::FEATURE_64BIT)) { 873 CPU = "core2"; 874 } else if (testFeature(X86::FEATURE_SSE3)) { 875 CPU = "yonah"; 876 } else if (testFeature(X86::FEATURE_SSE2)) { 877 CPU = "pentium-m"; 878 } else if (testFeature(X86::FEATURE_SSE)) { 879 CPU = "pentium3"; 880 } else if (testFeature(X86::FEATURE_MMX)) { 881 CPU = "pentium2"; 882 } else { 883 CPU = "pentiumpro"; 884 } 885 break; 886 } 887 break; 888 case 15: { 889 if (testFeature(X86::FEATURE_64BIT)) { 890 CPU = "nocona"; 891 break; 892 } 893 if (testFeature(X86::FEATURE_SSE3)) { 894 CPU = "prescott"; 895 break; 896 } 897 CPU = "pentium4"; 898 break; 899 } 900 default: 901 break; // Unknown. 902 } 903 904 return CPU; 905 } 906 907 static StringRef 908 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, 909 const unsigned *Features, 910 unsigned *Type, unsigned *Subtype) { 911 auto testFeature = [&](unsigned F) { 912 return (Features[F / 32] & (1U << (F % 32))) != 0; 913 }; 914 915 StringRef CPU; 916 917 switch (Family) { 918 case 4: 919 CPU = "i486"; 920 break; 921 case 5: 922 CPU = "pentium"; 923 switch (Model) { 924 case 6: 925 case 7: 926 CPU = "k6"; 927 break; 928 case 8: 929 CPU = "k6-2"; 930 break; 931 case 9: 932 case 13: 933 CPU = "k6-3"; 934 break; 935 case 10: 936 CPU = "geode"; 937 break; 938 } 939 break; 940 case 6: 941 if (testFeature(X86::FEATURE_SSE)) { 942 CPU = "athlon-xp"; 943 break; 944 } 945 CPU = "athlon"; 946 break; 947 case 15: 948 if (testFeature(X86::FEATURE_SSE3)) { 949 CPU = "k8-sse3"; 950 break; 951 } 952 CPU = "k8"; 953 break; 954 case 16: 955 CPU = "amdfam10"; 956 *Type = X86::AMDFAM10H; // "amdfam10" 957 switch (Model) { 958 case 2: 959 *Subtype = X86::AMDFAM10H_BARCELONA; 960 break; 961 case 4: 962 *Subtype = X86::AMDFAM10H_SHANGHAI; 963 break; 964 case 8: 965 *Subtype = X86::AMDFAM10H_ISTANBUL; 966 break; 967 } 968 break; 969 case 20: 970 CPU = "btver1"; 971 *Type = X86::AMD_BTVER1; 972 break; 973 case 21: 974 CPU = "bdver1"; 975 *Type = X86::AMDFAM15H; 976 if (Model >= 0x60 && Model <= 0x7f) { 977 CPU = "bdver4"; 978 *Subtype = X86::AMDFAM15H_BDVER4; 979 break; // 60h-7Fh: Excavator 980 } 981 if (Model >= 0x30 && Model <= 0x3f) { 982 CPU = "bdver3"; 983 *Subtype = X86::AMDFAM15H_BDVER3; 984 break; // 30h-3Fh: Steamroller 985 } 986 if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) { 987 CPU = "bdver2"; 988 *Subtype = X86::AMDFAM15H_BDVER2; 989 break; // 02h, 10h-1Fh: Piledriver 990 } 991 if (Model <= 0x0f) { 992 *Subtype = X86::AMDFAM15H_BDVER1; 993 break; // 00h-0Fh: Bulldozer 994 } 995 break; 996 case 22: 997 CPU = "btver2"; 998 *Type = X86::AMD_BTVER2; 999 break; 1000 case 23: 1001 CPU = "znver1"; 1002 *Type = X86::AMDFAM17H; 1003 if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) { 1004 CPU = "znver2"; 1005 *Subtype = X86::AMDFAM17H_ZNVER2; 1006 break; // 30h-3fh, 71h: Zen2 1007 } 1008 if (Model <= 0x0f) { 1009 *Subtype = X86::AMDFAM17H_ZNVER1; 1010 break; // 00h-0Fh: Zen1 1011 } 1012 break; 1013 case 25: 1014 CPU = "znver3"; 1015 *Type = X86::AMDFAM19H; 1016 if (Model <= 0x0f) { 1017 *Subtype = X86::AMDFAM19H_ZNVER3; 1018 break; // 00h-0Fh: Zen3 1019 } 1020 break; 1021 default: 1022 break; // Unknown AMD CPU. 1023 } 1024 1025 return CPU; 1026 } 1027 1028 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, 1029 unsigned *Features) { 1030 unsigned EAX, EBX; 1031 1032 auto setFeature = [&](unsigned F) { 1033 Features[F / 32] |= 1U << (F % 32); 1034 }; 1035 1036 if ((EDX >> 15) & 1) 1037 setFeature(X86::FEATURE_CMOV); 1038 if ((EDX >> 23) & 1) 1039 setFeature(X86::FEATURE_MMX); 1040 if ((EDX >> 25) & 1) 1041 setFeature(X86::FEATURE_SSE); 1042 if ((EDX >> 26) & 1) 1043 setFeature(X86::FEATURE_SSE2); 1044 1045 if ((ECX >> 0) & 1) 1046 setFeature(X86::FEATURE_SSE3); 1047 if ((ECX >> 1) & 1) 1048 setFeature(X86::FEATURE_PCLMUL); 1049 if ((ECX >> 9) & 1) 1050 setFeature(X86::FEATURE_SSSE3); 1051 if ((ECX >> 12) & 1) 1052 setFeature(X86::FEATURE_FMA); 1053 if ((ECX >> 19) & 1) 1054 setFeature(X86::FEATURE_SSE4_1); 1055 if ((ECX >> 20) & 1) 1056 setFeature(X86::FEATURE_SSE4_2); 1057 if ((ECX >> 23) & 1) 1058 setFeature(X86::FEATURE_POPCNT); 1059 if ((ECX >> 25) & 1) 1060 setFeature(X86::FEATURE_AES); 1061 1062 if ((ECX >> 22) & 1) 1063 setFeature(X86::FEATURE_MOVBE); 1064 1065 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 1066 // indicates that the AVX registers will be saved and restored on context 1067 // switch, then we have full AVX support. 1068 const unsigned AVXBits = (1 << 27) | (1 << 28); 1069 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && 1070 ((EAX & 0x6) == 0x6); 1071 #if defined(__APPLE__) 1072 // Darwin lazily saves the AVX512 context on first use: trust that the OS will 1073 // save the AVX512 context if we use AVX512 instructions, even the bit is not 1074 // set right now. 1075 bool HasAVX512Save = true; 1076 #else 1077 // AVX512 requires additional context to be saved by the OS. 1078 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); 1079 #endif 1080 1081 if (HasAVX) 1082 setFeature(X86::FEATURE_AVX); 1083 1084 bool HasLeaf7 = 1085 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 1086 1087 if (HasLeaf7 && ((EBX >> 3) & 1)) 1088 setFeature(X86::FEATURE_BMI); 1089 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) 1090 setFeature(X86::FEATURE_AVX2); 1091 if (HasLeaf7 && ((EBX >> 8) & 1)) 1092 setFeature(X86::FEATURE_BMI2); 1093 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) 1094 setFeature(X86::FEATURE_AVX512F); 1095 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) 1096 setFeature(X86::FEATURE_AVX512DQ); 1097 if (HasLeaf7 && ((EBX >> 19) & 1)) 1098 setFeature(X86::FEATURE_ADX); 1099 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) 1100 setFeature(X86::FEATURE_AVX512IFMA); 1101 if (HasLeaf7 && ((EBX >> 23) & 1)) 1102 setFeature(X86::FEATURE_CLFLUSHOPT); 1103 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) 1104 setFeature(X86::FEATURE_AVX512PF); 1105 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) 1106 setFeature(X86::FEATURE_AVX512ER); 1107 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) 1108 setFeature(X86::FEATURE_AVX512CD); 1109 if (HasLeaf7 && ((EBX >> 29) & 1)) 1110 setFeature(X86::FEATURE_SHA); 1111 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) 1112 setFeature(X86::FEATURE_AVX512BW); 1113 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) 1114 setFeature(X86::FEATURE_AVX512VL); 1115 1116 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) 1117 setFeature(X86::FEATURE_AVX512VBMI); 1118 if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save) 1119 setFeature(X86::FEATURE_AVX512VBMI2); 1120 if (HasLeaf7 && ((ECX >> 8) & 1)) 1121 setFeature(X86::FEATURE_GFNI); 1122 if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX) 1123 setFeature(X86::FEATURE_VPCLMULQDQ); 1124 if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save) 1125 setFeature(X86::FEATURE_AVX512VNNI); 1126 if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save) 1127 setFeature(X86::FEATURE_AVX512BITALG); 1128 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) 1129 setFeature(X86::FEATURE_AVX512VPOPCNTDQ); 1130 1131 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) 1132 setFeature(X86::FEATURE_AVX5124VNNIW); 1133 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) 1134 setFeature(X86::FEATURE_AVX5124FMAPS); 1135 if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save) 1136 setFeature(X86::FEATURE_AVX512VP2INTERSECT); 1137 1138 bool HasLeaf7Subleaf1 = 1139 MaxLeaf >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); 1140 if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save) 1141 setFeature(X86::FEATURE_AVX512BF16); 1142 1143 unsigned MaxExtLevel; 1144 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); 1145 1146 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && 1147 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 1148 if (HasExtLeaf1 && ((ECX >> 6) & 1)) 1149 setFeature(X86::FEATURE_SSE4_A); 1150 if (HasExtLeaf1 && ((ECX >> 11) & 1)) 1151 setFeature(X86::FEATURE_XOP); 1152 if (HasExtLeaf1 && ((ECX >> 16) & 1)) 1153 setFeature(X86::FEATURE_FMA4); 1154 1155 if (HasExtLeaf1 && ((EDX >> 29) & 1)) 1156 setFeature(X86::FEATURE_64BIT); 1157 } 1158 1159 StringRef sys::getHostCPUName() { 1160 unsigned MaxLeaf = 0; 1161 const VendorSignatures Vendor = getVendorSignature(&MaxLeaf); 1162 if (Vendor == VendorSignatures::UNKNOWN) 1163 return "generic"; 1164 1165 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 1166 getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); 1167 1168 unsigned Family = 0, Model = 0; 1169 unsigned Features[(X86::CPU_FEATURE_MAX + 31) / 32] = {0}; 1170 detectX86FamilyModel(EAX, &Family, &Model); 1171 getAvailableFeatures(ECX, EDX, MaxLeaf, Features); 1172 1173 // These aren't consumed in this file, but we try to keep some source code the 1174 // same or similar to compiler-rt. 1175 unsigned Type = 0; 1176 unsigned Subtype = 0; 1177 1178 StringRef CPU; 1179 1180 if (Vendor == VendorSignatures::GENUINE_INTEL) { 1181 CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type, 1182 &Subtype); 1183 } else if (Vendor == VendorSignatures::AUTHENTIC_AMD) { 1184 CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type, 1185 &Subtype); 1186 } 1187 1188 if (!CPU.empty()) 1189 return CPU; 1190 1191 return "generic"; 1192 } 1193 1194 #elif defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__)) 1195 StringRef sys::getHostCPUName() { 1196 host_basic_info_data_t hostInfo; 1197 mach_msg_type_number_t infoCount; 1198 1199 infoCount = HOST_BASIC_INFO_COUNT; 1200 mach_port_t hostPort = mach_host_self(); 1201 host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo, 1202 &infoCount); 1203 mach_port_deallocate(mach_task_self(), hostPort); 1204 1205 if (hostInfo.cpu_type != CPU_TYPE_POWERPC) 1206 return "generic"; 1207 1208 switch (hostInfo.cpu_subtype) { 1209 case CPU_SUBTYPE_POWERPC_601: 1210 return "601"; 1211 case CPU_SUBTYPE_POWERPC_602: 1212 return "602"; 1213 case CPU_SUBTYPE_POWERPC_603: 1214 return "603"; 1215 case CPU_SUBTYPE_POWERPC_603e: 1216 return "603e"; 1217 case CPU_SUBTYPE_POWERPC_603ev: 1218 return "603ev"; 1219 case CPU_SUBTYPE_POWERPC_604: 1220 return "604"; 1221 case CPU_SUBTYPE_POWERPC_604e: 1222 return "604e"; 1223 case CPU_SUBTYPE_POWERPC_620: 1224 return "620"; 1225 case CPU_SUBTYPE_POWERPC_750: 1226 return "750"; 1227 case CPU_SUBTYPE_POWERPC_7400: 1228 return "7400"; 1229 case CPU_SUBTYPE_POWERPC_7450: 1230 return "7450"; 1231 case CPU_SUBTYPE_POWERPC_970: 1232 return "970"; 1233 default:; 1234 } 1235 1236 return "generic"; 1237 } 1238 #elif defined(__linux__) && (defined(__ppc__) || defined(__powerpc__)) 1239 StringRef sys::getHostCPUName() { 1240 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1241 StringRef Content = P ? P->getBuffer() : ""; 1242 return detail::getHostCPUNameForPowerPC(Content); 1243 } 1244 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) 1245 StringRef sys::getHostCPUName() { 1246 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1247 StringRef Content = P ? P->getBuffer() : ""; 1248 return detail::getHostCPUNameForARM(Content); 1249 } 1250 #elif defined(__linux__) && defined(__s390x__) 1251 StringRef sys::getHostCPUName() { 1252 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1253 StringRef Content = P ? P->getBuffer() : ""; 1254 return detail::getHostCPUNameForS390x(Content); 1255 } 1256 #elif defined(__MVS__) 1257 StringRef sys::getHostCPUName() { 1258 // Get pointer to Communications Vector Table (CVT). 1259 // The pointer is located at offset 16 of the Prefixed Save Area (PSA). 1260 // It is stored as 31 bit pointer and will be zero-extended to 64 bit. 1261 int *StartToCVTOffset = reinterpret_cast<int *>(0x10); 1262 // Since its stored as a 31-bit pointer, get the 4 bytes from the start 1263 // of address. 1264 int ReadValue = *StartToCVTOffset; 1265 // Explicitly clear the high order bit. 1266 ReadValue = (ReadValue & 0x7FFFFFFF); 1267 char *CVT = reinterpret_cast<char *>(ReadValue); 1268 // The model number is located in the CVT prefix at offset -6 and stored as 1269 // signless packed decimal. 1270 uint16_t Id = *(uint16_t *)&CVT[-6]; 1271 // Convert number to integer. 1272 Id = decodePackedBCD<uint16_t>(Id, false); 1273 // Check for vector support. It's stored in field CVTFLAG5 (offset 244), 1274 // bit CVTVEF (X'80'). The facilities list is part of the PSA but the vector 1275 // extension can only be used if bit CVTVEF is on. 1276 bool HaveVectorSupport = CVT[244] & 0x80; 1277 return getCPUNameFromS390Model(Id, HaveVectorSupport); 1278 } 1279 #elif defined(__APPLE__) && defined(__aarch64__) 1280 StringRef sys::getHostCPUName() { 1281 return "cyclone"; 1282 } 1283 #elif defined(__APPLE__) && defined(__arm__) 1284 StringRef sys::getHostCPUName() { 1285 host_basic_info_data_t hostInfo; 1286 mach_msg_type_number_t infoCount; 1287 1288 infoCount = HOST_BASIC_INFO_COUNT; 1289 mach_port_t hostPort = mach_host_self(); 1290 host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo, 1291 &infoCount); 1292 mach_port_deallocate(mach_task_self(), hostPort); 1293 1294 if (hostInfo.cpu_type != CPU_TYPE_ARM) { 1295 assert(false && "CPUType not equal to ARM should not be possible on ARM"); 1296 return "generic"; 1297 } 1298 switch (hostInfo.cpu_subtype) { 1299 case CPU_SUBTYPE_ARM_V7S: 1300 return "swift"; 1301 default:; 1302 } 1303 1304 return "generic"; 1305 } 1306 #elif defined(_AIX) 1307 StringRef sys::getHostCPUName() { 1308 switch (_system_configuration.implementation) { 1309 case POWER_4: 1310 if (_system_configuration.version == PV_4_3) 1311 return "970"; 1312 return "pwr4"; 1313 case POWER_5: 1314 if (_system_configuration.version == PV_5) 1315 return "pwr5"; 1316 return "pwr5x"; 1317 case POWER_6: 1318 if (_system_configuration.version == PV_6_Compat) 1319 return "pwr6"; 1320 return "pwr6x"; 1321 case POWER_7: 1322 return "pwr7"; 1323 case POWER_8: 1324 return "pwr8"; 1325 case POWER_9: 1326 return "pwr9"; 1327 // TODO: simplify this once the macro is available in all OS levels. 1328 #ifdef POWER_10 1329 case POWER_10: 1330 #else 1331 case 0x40000: 1332 #endif 1333 return "pwr10"; 1334 default: 1335 return "generic"; 1336 } 1337 } 1338 #else 1339 StringRef sys::getHostCPUName() { return "generic"; } 1340 namespace llvm { 1341 namespace sys { 1342 namespace detail { 1343 namespace x86 { 1344 1345 VendorSignatures getVendorSignature(unsigned *MaxLeaf) { 1346 return VendorSignatures::UNKNOWN; 1347 } 1348 1349 } // namespace x86 1350 } // namespace detail 1351 } // namespace sys 1352 } // namespace llvm 1353 #endif 1354 1355 #if defined(__linux__) && (defined(__i386__) || defined(__x86_64__)) 1356 // On Linux, the number of physical cores can be computed from /proc/cpuinfo, 1357 // using the number of unique physical/core id pairs. The following 1358 // implementation reads the /proc/cpuinfo format on an x86_64 system. 1359 int computeHostNumPhysicalCores() { 1360 // Enabled represents the number of physical id/core id pairs with at least 1361 // one processor id enabled by the CPU affinity mask. 1362 cpu_set_t Affinity, Enabled; 1363 if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0) 1364 return -1; 1365 CPU_ZERO(&Enabled); 1366 1367 // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be 1368 // mmapped because it appears to have 0 size. 1369 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = 1370 llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); 1371 if (std::error_code EC = Text.getError()) { 1372 llvm::errs() << "Can't read " 1373 << "/proc/cpuinfo: " << EC.message() << "\n"; 1374 return -1; 1375 } 1376 SmallVector<StringRef, 8> strs; 1377 (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1, 1378 /*KeepEmpty=*/false); 1379 int CurProcessor = -1; 1380 int CurPhysicalId = -1; 1381 int CurSiblings = -1; 1382 int CurCoreId = -1; 1383 for (StringRef Line : strs) { 1384 std::pair<StringRef, StringRef> Data = Line.split(':'); 1385 auto Name = Data.first.trim(); 1386 auto Val = Data.second.trim(); 1387 // These fields are available if the kernel is configured with CONFIG_SMP. 1388 if (Name == "processor") 1389 Val.getAsInteger(10, CurProcessor); 1390 else if (Name == "physical id") 1391 Val.getAsInteger(10, CurPhysicalId); 1392 else if (Name == "siblings") 1393 Val.getAsInteger(10, CurSiblings); 1394 else if (Name == "core id") { 1395 Val.getAsInteger(10, CurCoreId); 1396 // The processor id corresponds to an index into cpu_set_t. 1397 if (CPU_ISSET(CurProcessor, &Affinity)) 1398 CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled); 1399 } 1400 } 1401 return CPU_COUNT(&Enabled); 1402 } 1403 #elif defined(__linux__) && defined(__powerpc__) 1404 int computeHostNumPhysicalCores() { 1405 cpu_set_t Affinity; 1406 if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0) 1407 return CPU_COUNT(&Affinity); 1408 1409 // The call to sched_getaffinity() may have failed because the Affinity 1410 // mask is too small for the number of CPU's on the system (i.e. the 1411 // system has more than 1024 CPUs). Allocate a mask large enough for 1412 // twice as many CPUs. 1413 cpu_set_t *DynAffinity; 1414 DynAffinity = CPU_ALLOC(2048); 1415 if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) { 1416 int NumCPUs = CPU_COUNT(DynAffinity); 1417 CPU_FREE(DynAffinity); 1418 return NumCPUs; 1419 } 1420 return -1; 1421 } 1422 #elif defined(__linux__) && defined(__s390x__) 1423 int computeHostNumPhysicalCores() { return sysconf(_SC_NPROCESSORS_ONLN); } 1424 #elif defined(__APPLE__) 1425 #include <sys/param.h> 1426 #include <sys/sysctl.h> 1427 1428 // Gets the number of *physical cores* on the machine. 1429 int computeHostNumPhysicalCores() { 1430 uint32_t count; 1431 size_t len = sizeof(count); 1432 sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0); 1433 if (count < 1) { 1434 int nm[2]; 1435 nm[0] = CTL_HW; 1436 nm[1] = HW_AVAILCPU; 1437 sysctl(nm, 2, &count, &len, NULL, 0); 1438 if (count < 1) 1439 return -1; 1440 } 1441 return count; 1442 } 1443 #elif defined(__MVS__) 1444 int computeHostNumPhysicalCores() { 1445 enum { 1446 // Byte offset of the pointer to the Communications Vector Table (CVT) in 1447 // the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and 1448 // will be zero-extended to uintptr_t. 1449 FLCCVT = 16, 1450 // Byte offset of the pointer to the Common System Data Area (CSD) in the 1451 // CVT. The table entry is a 31-bit pointer and will be zero-extended to 1452 // uintptr_t. 1453 CVTCSD = 660, 1454 // Byte offset to the number of live CPs in the LPAR, stored as a signed 1455 // 32-bit value in the table. 1456 CSD_NUMBER_ONLINE_STANDARD_CPS = 264, 1457 }; 1458 char *PSA = 0; 1459 char *CVT = reinterpret_cast<char *>( 1460 static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(PSA[FLCCVT]))); 1461 char *CSD = reinterpret_cast<char *>( 1462 static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(CVT[CVTCSD]))); 1463 return reinterpret_cast<int &>(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]); 1464 } 1465 #elif defined(_WIN32) && LLVM_ENABLE_THREADS != 0 1466 // Defined in llvm/lib/Support/Windows/Threading.inc 1467 int computeHostNumPhysicalCores(); 1468 #else 1469 // On other systems, return -1 to indicate unknown. 1470 static int computeHostNumPhysicalCores() { return -1; } 1471 #endif 1472 1473 int sys::getHostNumPhysicalCores() { 1474 static int NumCores = computeHostNumPhysicalCores(); 1475 return NumCores; 1476 } 1477 1478 #if defined(__i386__) || defined(_M_IX86) || \ 1479 defined(__x86_64__) || defined(_M_X64) 1480 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1481 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 1482 unsigned MaxLevel; 1483 1484 if (getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX) || MaxLevel < 1) 1485 return false; 1486 1487 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); 1488 1489 Features["cx8"] = (EDX >> 8) & 1; 1490 Features["cmov"] = (EDX >> 15) & 1; 1491 Features["mmx"] = (EDX >> 23) & 1; 1492 Features["fxsr"] = (EDX >> 24) & 1; 1493 Features["sse"] = (EDX >> 25) & 1; 1494 Features["sse2"] = (EDX >> 26) & 1; 1495 1496 Features["sse3"] = (ECX >> 0) & 1; 1497 Features["pclmul"] = (ECX >> 1) & 1; 1498 Features["ssse3"] = (ECX >> 9) & 1; 1499 Features["cx16"] = (ECX >> 13) & 1; 1500 Features["sse4.1"] = (ECX >> 19) & 1; 1501 Features["sse4.2"] = (ECX >> 20) & 1; 1502 Features["movbe"] = (ECX >> 22) & 1; 1503 Features["popcnt"] = (ECX >> 23) & 1; 1504 Features["aes"] = (ECX >> 25) & 1; 1505 Features["rdrnd"] = (ECX >> 30) & 1; 1506 1507 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 1508 // indicates that the AVX registers will be saved and restored on context 1509 // switch, then we have full AVX support. 1510 bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX); 1511 bool HasAVXSave = HasXSave && ((ECX >> 28) & 1) && ((EAX & 0x6) == 0x6); 1512 #if defined(__APPLE__) 1513 // Darwin lazily saves the AVX512 context on first use: trust that the OS will 1514 // save the AVX512 context if we use AVX512 instructions, even the bit is not 1515 // set right now. 1516 bool HasAVX512Save = true; 1517 #else 1518 // AVX512 requires additional context to be saved by the OS. 1519 bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0); 1520 #endif 1521 // AMX requires additional context to be saved by the OS. 1522 const unsigned AMXBits = (1 << 17) | (1 << 18); 1523 bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits); 1524 1525 Features["avx"] = HasAVXSave; 1526 Features["fma"] = ((ECX >> 12) & 1) && HasAVXSave; 1527 // Only enable XSAVE if OS has enabled support for saving YMM state. 1528 Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave; 1529 Features["f16c"] = ((ECX >> 29) & 1) && HasAVXSave; 1530 1531 unsigned MaxExtLevel; 1532 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); 1533 1534 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && 1535 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 1536 Features["sahf"] = HasExtLeaf1 && ((ECX >> 0) & 1); 1537 Features["lzcnt"] = HasExtLeaf1 && ((ECX >> 5) & 1); 1538 Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1); 1539 Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1); 1540 Features["xop"] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave; 1541 Features["lwp"] = HasExtLeaf1 && ((ECX >> 15) & 1); 1542 Features["fma4"] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave; 1543 Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1); 1544 Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1); 1545 1546 Features["64bit"] = HasExtLeaf1 && ((EDX >> 29) & 1); 1547 1548 // Miscellaneous memory related features, detected by 1549 // using the 0x80000008 leaf of the CPUID instruction 1550 bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 && 1551 !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX); 1552 Features["clzero"] = HasExtLeaf8 && ((EBX >> 0) & 1); 1553 Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1); 1554 1555 bool HasLeaf7 = 1556 MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 1557 1558 Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1); 1559 Features["sgx"] = HasLeaf7 && ((EBX >> 2) & 1); 1560 Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1); 1561 // AVX2 is only supported if we have the OS save support from AVX. 1562 Features["avx2"] = HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave; 1563 Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1); 1564 Features["invpcid"] = HasLeaf7 && ((EBX >> 10) & 1); 1565 Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1); 1566 // AVX512 is only supported if the OS supports the context save for it. 1567 Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save; 1568 Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save; 1569 Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1); 1570 Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1); 1571 Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save; 1572 Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1); 1573 Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1); 1574 Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save; 1575 Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save; 1576 Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save; 1577 Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1); 1578 Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save; 1579 Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save; 1580 1581 Features["prefetchwt1"] = HasLeaf7 && ((ECX >> 0) & 1); 1582 Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; 1583 Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1); 1584 Features["waitpkg"] = HasLeaf7 && ((ECX >> 5) & 1); 1585 Features["avx512vbmi2"] = HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save; 1586 Features["shstk"] = HasLeaf7 && ((ECX >> 7) & 1); 1587 Features["gfni"] = HasLeaf7 && ((ECX >> 8) & 1); 1588 Features["vaes"] = HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave; 1589 Features["vpclmulqdq"] = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave; 1590 Features["avx512vnni"] = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save; 1591 Features["avx512bitalg"] = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save; 1592 Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save; 1593 Features["rdpid"] = HasLeaf7 && ((ECX >> 22) & 1); 1594 Features["kl"] = HasLeaf7 && ((ECX >> 23) & 1); // key locker 1595 Features["cldemote"] = HasLeaf7 && ((ECX >> 25) & 1); 1596 Features["movdiri"] = HasLeaf7 && ((ECX >> 27) & 1); 1597 Features["movdir64b"] = HasLeaf7 && ((ECX >> 28) & 1); 1598 Features["enqcmd"] = HasLeaf7 && ((ECX >> 29) & 1); 1599 1600 Features["uintr"] = HasLeaf7 && ((EDX >> 5) & 1); 1601 Features["avx512vp2intersect"] = 1602 HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save; 1603 Features["serialize"] = HasLeaf7 && ((EDX >> 14) & 1); 1604 Features["tsxldtrk"] = HasLeaf7 && ((EDX >> 16) & 1); 1605 // There are two CPUID leafs which information associated with the pconfig 1606 // instruction: 1607 // EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th 1608 // bit of EDX), while the EAX=0x1b leaf returns information on the 1609 // availability of specific pconfig leafs. 1610 // The target feature here only refers to the the first of these two. 1611 // Users might need to check for the availability of specific pconfig 1612 // leaves using cpuid, since that information is ignored while 1613 // detecting features using the "-march=native" flag. 1614 // For more info, see X86 ISA docs. 1615 Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1); 1616 Features["amx-bf16"] = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave; 1617 Features["amx-tile"] = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave; 1618 Features["amx-int8"] = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave; 1619 bool HasLeaf7Subleaf1 = 1620 MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); 1621 Features["avxvnni"] = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave; 1622 Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save; 1623 Features["hreset"] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1); 1624 1625 bool HasLeafD = MaxLevel >= 0xd && 1626 !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); 1627 1628 // Only enable XSAVE if OS has enabled support for saving YMM state. 1629 Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave; 1630 Features["xsavec"] = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave; 1631 Features["xsaves"] = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave; 1632 1633 bool HasLeaf14 = MaxLevel >= 0x14 && 1634 !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX); 1635 1636 Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1); 1637 1638 bool HasLeaf19 = 1639 MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX); 1640 Features["widekl"] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1); 1641 1642 return true; 1643 } 1644 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) 1645 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1646 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1647 if (!P) 1648 return false; 1649 1650 SmallVector<StringRef, 32> Lines; 1651 P->getBuffer().split(Lines, "\n"); 1652 1653 SmallVector<StringRef, 32> CPUFeatures; 1654 1655 // Look for the CPU features. 1656 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 1657 if (Lines[I].startswith("Features")) { 1658 Lines[I].split(CPUFeatures, ' '); 1659 break; 1660 } 1661 1662 #if defined(__aarch64__) 1663 // Keep track of which crypto features we have seen 1664 enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 }; 1665 uint32_t crypto = 0; 1666 #endif 1667 1668 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { 1669 StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I]) 1670 #if defined(__aarch64__) 1671 .Case("asimd", "neon") 1672 .Case("fp", "fp-armv8") 1673 .Case("crc32", "crc") 1674 #else 1675 .Case("half", "fp16") 1676 .Case("neon", "neon") 1677 .Case("vfpv3", "vfp3") 1678 .Case("vfpv3d16", "d16") 1679 .Case("vfpv4", "vfp4") 1680 .Case("idiva", "hwdiv-arm") 1681 .Case("idivt", "hwdiv") 1682 #endif 1683 .Default(""); 1684 1685 #if defined(__aarch64__) 1686 // We need to check crypto separately since we need all of the crypto 1687 // extensions to enable the subtarget feature 1688 if (CPUFeatures[I] == "aes") 1689 crypto |= CAP_AES; 1690 else if (CPUFeatures[I] == "pmull") 1691 crypto |= CAP_PMULL; 1692 else if (CPUFeatures[I] == "sha1") 1693 crypto |= CAP_SHA1; 1694 else if (CPUFeatures[I] == "sha2") 1695 crypto |= CAP_SHA2; 1696 #endif 1697 1698 if (LLVMFeatureStr != "") 1699 Features[LLVMFeatureStr] = true; 1700 } 1701 1702 #if defined(__aarch64__) 1703 // If we have all crypto bits we can add the feature 1704 if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2)) 1705 Features["crypto"] = true; 1706 #endif 1707 1708 return true; 1709 } 1710 #elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64)) 1711 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1712 if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) 1713 Features["neon"] = true; 1714 if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) 1715 Features["crc"] = true; 1716 if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) 1717 Features["crypto"] = true; 1718 1719 return true; 1720 } 1721 #else 1722 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; } 1723 #endif 1724 1725 std::string sys::getProcessTriple() { 1726 std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE); 1727 Triple PT(Triple::normalize(TargetTripleString)); 1728 1729 if (sizeof(void *) == 8 && PT.isArch32Bit()) 1730 PT = PT.get64BitArchVariant(); 1731 if (sizeof(void *) == 4 && PT.isArch64Bit()) 1732 PT = PT.get32BitArchVariant(); 1733 1734 return PT.str(); 1735 } 1736