1 //===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the operating system Host concept. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Support/Host.h" 14 #include "llvm/ADT/SmallVector.h" 15 #include "llvm/ADT/StringMap.h" 16 #include "llvm/ADT/StringRef.h" 17 #include "llvm/ADT/StringSwitch.h" 18 #include "llvm/ADT/Triple.h" 19 #include "llvm/Config/llvm-config.h" 20 #include "llvm/Support/MemoryBuffer.h" 21 #include "llvm/Support/X86TargetParser.h" 22 #include "llvm/Support/raw_ostream.h" 23 #include <string.h> 24 25 // Include the platform-specific parts of this class. 26 #ifdef LLVM_ON_UNIX 27 #include "Unix/Host.inc" 28 #include <sched.h> 29 #endif 30 #ifdef _WIN32 31 #include "Windows/Host.inc" 32 #endif 33 #ifdef _MSC_VER 34 #include <intrin.h> 35 #endif 36 #if defined(__APPLE__) && (!defined(__x86_64__)) 37 #include <mach/host_info.h> 38 #include <mach/mach.h> 39 #include <mach/mach_host.h> 40 #include <mach/machine.h> 41 #endif 42 #ifdef _AIX 43 #include <sys/systemcfg.h> 44 #endif 45 46 #define DEBUG_TYPE "host-detection" 47 48 //===----------------------------------------------------------------------===// 49 // 50 // Implementations of the CPU detection routines 51 // 52 //===----------------------------------------------------------------------===// 53 54 using namespace llvm; 55 56 static std::unique_ptr<llvm::MemoryBuffer> 57 LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() { 58 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = 59 llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); 60 if (std::error_code EC = Text.getError()) { 61 llvm::errs() << "Can't read " 62 << "/proc/cpuinfo: " << EC.message() << "\n"; 63 return nullptr; 64 } 65 return std::move(*Text); 66 } 67 68 StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) { 69 // Access to the Processor Version Register (PVR) on PowerPC is privileged, 70 // and so we must use an operating-system interface to determine the current 71 // processor type. On Linux, this is exposed through the /proc/cpuinfo file. 72 const char *generic = "generic"; 73 74 // The cpu line is second (after the 'processor: 0' line), so if this 75 // buffer is too small then something has changed (or is wrong). 76 StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin(); 77 StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end(); 78 79 StringRef::const_iterator CIP = CPUInfoStart; 80 81 StringRef::const_iterator CPUStart = nullptr; 82 size_t CPULen = 0; 83 84 // We need to find the first line which starts with cpu, spaces, and a colon. 85 // After the colon, there may be some additional spaces and then the cpu type. 86 while (CIP < CPUInfoEnd && CPUStart == nullptr) { 87 if (CIP < CPUInfoEnd && *CIP == '\n') 88 ++CIP; 89 90 if (CIP < CPUInfoEnd && *CIP == 'c') { 91 ++CIP; 92 if (CIP < CPUInfoEnd && *CIP == 'p') { 93 ++CIP; 94 if (CIP < CPUInfoEnd && *CIP == 'u') { 95 ++CIP; 96 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) 97 ++CIP; 98 99 if (CIP < CPUInfoEnd && *CIP == ':') { 100 ++CIP; 101 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) 102 ++CIP; 103 104 if (CIP < CPUInfoEnd) { 105 CPUStart = CIP; 106 while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' && 107 *CIP != ',' && *CIP != '\n')) 108 ++CIP; 109 CPULen = CIP - CPUStart; 110 } 111 } 112 } 113 } 114 } 115 116 if (CPUStart == nullptr) 117 while (CIP < CPUInfoEnd && *CIP != '\n') 118 ++CIP; 119 } 120 121 if (CPUStart == nullptr) 122 return generic; 123 124 return StringSwitch<const char *>(StringRef(CPUStart, CPULen)) 125 .Case("604e", "604e") 126 .Case("604", "604") 127 .Case("7400", "7400") 128 .Case("7410", "7400") 129 .Case("7447", "7400") 130 .Case("7455", "7450") 131 .Case("G4", "g4") 132 .Case("POWER4", "970") 133 .Case("PPC970FX", "970") 134 .Case("PPC970MP", "970") 135 .Case("G5", "g5") 136 .Case("POWER5", "g5") 137 .Case("A2", "a2") 138 .Case("POWER6", "pwr6") 139 .Case("POWER7", "pwr7") 140 .Case("POWER8", "pwr8") 141 .Case("POWER8E", "pwr8") 142 .Case("POWER8NVL", "pwr8") 143 .Case("POWER9", "pwr9") 144 .Case("POWER10", "pwr10") 145 // FIXME: If we get a simulator or machine with the capabilities of 146 // mcpu=future, we should revisit this and add the name reported by the 147 // simulator/machine. 148 .Default(generic); 149 } 150 151 StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { 152 // The cpuid register on arm is not accessible from user space. On Linux, 153 // it is exposed through the /proc/cpuinfo file. 154 155 // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line 156 // in all cases. 157 SmallVector<StringRef, 32> Lines; 158 ProcCpuinfoContent.split(Lines, "\n"); 159 160 // Look for the CPU implementer line. 161 StringRef Implementer; 162 StringRef Hardware; 163 StringRef Part; 164 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 165 if (Lines[I].startswith("CPU implementer")) 166 Implementer = Lines[I].substr(15).ltrim("\t :"); 167 if (Lines[I].startswith("Hardware")) 168 Hardware = Lines[I].substr(8).ltrim("\t :"); 169 if (Lines[I].startswith("CPU part")) 170 Part = Lines[I].substr(8).ltrim("\t :"); 171 } 172 173 if (Implementer == "0x41") { // ARM Ltd. 174 // MSM8992/8994 may give cpu part for the core that the kernel is running on, 175 // which is undeterministic and wrong. Always return cortex-a53 for these SoC. 176 if (Hardware.endswith("MSM8994") || Hardware.endswith("MSM8996")) 177 return "cortex-a53"; 178 179 180 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 181 // values correspond to the "Part number" in the CP15/c0 register. The 182 // contents are specified in the various processor manuals. 183 // This corresponds to the Main ID Register in Technical Reference Manuals. 184 // and is used in programs like sys-utils 185 return StringSwitch<const char *>(Part) 186 .Case("0x926", "arm926ej-s") 187 .Case("0xb02", "mpcore") 188 .Case("0xb36", "arm1136j-s") 189 .Case("0xb56", "arm1156t2-s") 190 .Case("0xb76", "arm1176jz-s") 191 .Case("0xc08", "cortex-a8") 192 .Case("0xc09", "cortex-a9") 193 .Case("0xc0f", "cortex-a15") 194 .Case("0xc20", "cortex-m0") 195 .Case("0xc23", "cortex-m3") 196 .Case("0xc24", "cortex-m4") 197 .Case("0xd22", "cortex-m55") 198 .Case("0xd02", "cortex-a34") 199 .Case("0xd04", "cortex-a35") 200 .Case("0xd03", "cortex-a53") 201 .Case("0xd07", "cortex-a57") 202 .Case("0xd08", "cortex-a72") 203 .Case("0xd09", "cortex-a73") 204 .Case("0xd0a", "cortex-a75") 205 .Case("0xd0b", "cortex-a76") 206 .Case("0xd0d", "cortex-a77") 207 .Case("0xd41", "cortex-a78") 208 .Case("0xd44", "cortex-x1") 209 .Case("0xd4c", "cortex-x1c") 210 .Case("0xd0c", "neoverse-n1") 211 .Case("0xd49", "neoverse-n2") 212 .Case("0xd40", "neoverse-v1") 213 .Default("generic"); 214 } 215 216 if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium. 217 return StringSwitch<const char *>(Part) 218 .Case("0x516", "thunderx2t99") 219 .Case("0x0516", "thunderx2t99") 220 .Case("0xaf", "thunderx2t99") 221 .Case("0x0af", "thunderx2t99") 222 .Case("0xa1", "thunderxt88") 223 .Case("0x0a1", "thunderxt88") 224 .Default("generic"); 225 } 226 227 if (Implementer == "0x46") { // Fujitsu Ltd. 228 return StringSwitch<const char *>(Part) 229 .Case("0x001", "a64fx") 230 .Default("generic"); 231 } 232 233 if (Implementer == "0x4e") { // NVIDIA Corporation 234 return StringSwitch<const char *>(Part) 235 .Case("0x004", "carmel") 236 .Default("generic"); 237 } 238 239 if (Implementer == "0x48") // HiSilicon Technologies, Inc. 240 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 241 // values correspond to the "Part number" in the CP15/c0 register. The 242 // contents are specified in the various processor manuals. 243 return StringSwitch<const char *>(Part) 244 .Case("0xd01", "tsv110") 245 .Default("generic"); 246 247 if (Implementer == "0x51") // Qualcomm Technologies, Inc. 248 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 249 // values correspond to the "Part number" in the CP15/c0 register. The 250 // contents are specified in the various processor manuals. 251 return StringSwitch<const char *>(Part) 252 .Case("0x06f", "krait") // APQ8064 253 .Case("0x201", "kryo") 254 .Case("0x205", "kryo") 255 .Case("0x211", "kryo") 256 .Case("0x800", "cortex-a73") // Kryo 2xx Gold 257 .Case("0x801", "cortex-a73") // Kryo 2xx Silver 258 .Case("0x802", "cortex-a75") // Kryo 3xx Gold 259 .Case("0x803", "cortex-a75") // Kryo 3xx Silver 260 .Case("0x804", "cortex-a76") // Kryo 4xx Gold 261 .Case("0x805", "cortex-a76") // Kryo 4xx/5xx Silver 262 .Case("0xc00", "falkor") 263 .Case("0xc01", "saphira") 264 .Default("generic"); 265 if (Implementer == "0x53") { // Samsung Electronics Co., Ltd. 266 // The Exynos chips have a convoluted ID scheme that doesn't seem to follow 267 // any predictive pattern across variants and parts. 268 unsigned Variant = 0, Part = 0; 269 270 // Look for the CPU variant line, whose value is a 1 digit hexadecimal 271 // number, corresponding to the Variant bits in the CP15/C0 register. 272 for (auto I : Lines) 273 if (I.consume_front("CPU variant")) 274 I.ltrim("\t :").getAsInteger(0, Variant); 275 276 // Look for the CPU part line, whose value is a 3 digit hexadecimal 277 // number, corresponding to the PartNum bits in the CP15/C0 register. 278 for (auto I : Lines) 279 if (I.consume_front("CPU part")) 280 I.ltrim("\t :").getAsInteger(0, Part); 281 282 unsigned Exynos = (Variant << 12) | Part; 283 switch (Exynos) { 284 default: 285 // Default by falling through to Exynos M3. 286 LLVM_FALLTHROUGH; 287 case 0x1002: 288 return "exynos-m3"; 289 case 0x1003: 290 return "exynos-m4"; 291 } 292 } 293 294 return "generic"; 295 } 296 297 namespace { 298 StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) { 299 switch (Id) { 300 case 2064: // z900 not supported by LLVM 301 case 2066: 302 case 2084: // z990 not supported by LLVM 303 case 2086: 304 case 2094: // z9-109 not supported by LLVM 305 case 2096: 306 return "generic"; 307 case 2097: 308 case 2098: 309 return "z10"; 310 case 2817: 311 case 2818: 312 return "z196"; 313 case 2827: 314 case 2828: 315 return "zEC12"; 316 case 2964: 317 case 2965: 318 return HaveVectorSupport? "z13" : "zEC12"; 319 case 3906: 320 case 3907: 321 return HaveVectorSupport? "z14" : "zEC12"; 322 case 8561: 323 case 8562: 324 return HaveVectorSupport? "z15" : "zEC12"; 325 case 3931: 326 case 3932: 327 default: 328 return HaveVectorSupport? "arch14" : "zEC12"; 329 } 330 } 331 } // end anonymous namespace 332 333 StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) { 334 // STIDP is a privileged operation, so use /proc/cpuinfo instead. 335 336 // The "processor 0:" line comes after a fair amount of other information, 337 // including a cache breakdown, but this should be plenty. 338 SmallVector<StringRef, 32> Lines; 339 ProcCpuinfoContent.split(Lines, "\n"); 340 341 // Look for the CPU features. 342 SmallVector<StringRef, 32> CPUFeatures; 343 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 344 if (Lines[I].startswith("features")) { 345 size_t Pos = Lines[I].find(':'); 346 if (Pos != StringRef::npos) { 347 Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' '); 348 break; 349 } 350 } 351 352 // We need to check for the presence of vector support independently of 353 // the machine type, since we may only use the vector register set when 354 // supported by the kernel (and hypervisor). 355 bool HaveVectorSupport = false; 356 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { 357 if (CPUFeatures[I] == "vx") 358 HaveVectorSupport = true; 359 } 360 361 // Now check the processor machine type. 362 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 363 if (Lines[I].startswith("processor ")) { 364 size_t Pos = Lines[I].find("machine = "); 365 if (Pos != StringRef::npos) { 366 Pos += sizeof("machine = ") - 1; 367 unsigned int Id; 368 if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) 369 return getCPUNameFromS390Model(Id, HaveVectorSupport); 370 } 371 break; 372 } 373 } 374 375 return "generic"; 376 } 377 378 StringRef sys::detail::getHostCPUNameForBPF() { 379 #if !defined(__linux__) || !defined(__x86_64__) 380 return "generic"; 381 #else 382 uint8_t v3_insns[40] __attribute__ ((aligned (8))) = 383 /* BPF_MOV64_IMM(BPF_REG_0, 0) */ 384 { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 385 /* BPF_MOV64_IMM(BPF_REG_2, 1) */ 386 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 387 /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */ 388 0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 389 /* BPF_MOV64_IMM(BPF_REG_0, 1) */ 390 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 391 /* BPF_EXIT_INSN() */ 392 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; 393 394 uint8_t v2_insns[40] __attribute__ ((aligned (8))) = 395 /* BPF_MOV64_IMM(BPF_REG_0, 0) */ 396 { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 397 /* BPF_MOV64_IMM(BPF_REG_2, 1) */ 398 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 399 /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */ 400 0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 401 /* BPF_MOV64_IMM(BPF_REG_0, 1) */ 402 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 403 /* BPF_EXIT_INSN() */ 404 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; 405 406 struct bpf_prog_load_attr { 407 uint32_t prog_type; 408 uint32_t insn_cnt; 409 uint64_t insns; 410 uint64_t license; 411 uint32_t log_level; 412 uint32_t log_size; 413 uint64_t log_buf; 414 uint32_t kern_version; 415 uint32_t prog_flags; 416 } attr = {}; 417 attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */ 418 attr.insn_cnt = 5; 419 attr.insns = (uint64_t)v3_insns; 420 attr.license = (uint64_t)"DUMMY"; 421 422 int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, 423 sizeof(attr)); 424 if (fd >= 0) { 425 close(fd); 426 return "v3"; 427 } 428 429 /* Clear the whole attr in case its content changed by syscall. */ 430 memset(&attr, 0, sizeof(attr)); 431 attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */ 432 attr.insn_cnt = 5; 433 attr.insns = (uint64_t)v2_insns; 434 attr.license = (uint64_t)"DUMMY"; 435 fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr)); 436 if (fd >= 0) { 437 close(fd); 438 return "v2"; 439 } 440 return "v1"; 441 #endif 442 } 443 444 #if defined(__i386__) || defined(_M_IX86) || \ 445 defined(__x86_64__) || defined(_M_X64) 446 447 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). 448 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID 449 // support. Consequently, for i386, the presence of CPUID is checked first 450 // via the corresponding eflags bit. 451 // Removal of cpuid.h header motivated by PR30384 452 // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp 453 // or test-suite, but are used in external projects e.g. libstdcxx 454 static bool isCpuIdSupported() { 455 #if defined(__GNUC__) || defined(__clang__) 456 #if defined(__i386__) 457 int __cpuid_supported; 458 __asm__(" pushfl\n" 459 " popl %%eax\n" 460 " movl %%eax,%%ecx\n" 461 " xorl $0x00200000,%%eax\n" 462 " pushl %%eax\n" 463 " popfl\n" 464 " pushfl\n" 465 " popl %%eax\n" 466 " movl $0,%0\n" 467 " cmpl %%eax,%%ecx\n" 468 " je 1f\n" 469 " movl $1,%0\n" 470 "1:" 471 : "=r"(__cpuid_supported) 472 : 473 : "eax", "ecx"); 474 if (!__cpuid_supported) 475 return false; 476 #endif 477 return true; 478 #endif 479 return true; 480 } 481 482 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in 483 /// the specified arguments. If we can't run cpuid on the host, return true. 484 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, 485 unsigned *rECX, unsigned *rEDX) { 486 #if defined(__GNUC__) || defined(__clang__) 487 #if defined(__x86_64__) 488 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 489 // FIXME: should we save this for Clang? 490 __asm__("movq\t%%rbx, %%rsi\n\t" 491 "cpuid\n\t" 492 "xchgq\t%%rbx, %%rsi\n\t" 493 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 494 : "a"(value)); 495 return false; 496 #elif defined(__i386__) 497 __asm__("movl\t%%ebx, %%esi\n\t" 498 "cpuid\n\t" 499 "xchgl\t%%ebx, %%esi\n\t" 500 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 501 : "a"(value)); 502 return false; 503 #else 504 return true; 505 #endif 506 #elif defined(_MSC_VER) 507 // The MSVC intrinsic is portable across x86 and x64. 508 int registers[4]; 509 __cpuid(registers, value); 510 *rEAX = registers[0]; 511 *rEBX = registers[1]; 512 *rECX = registers[2]; 513 *rEDX = registers[3]; 514 return false; 515 #else 516 return true; 517 #endif 518 } 519 520 namespace llvm { 521 namespace sys { 522 namespace detail { 523 namespace x86 { 524 525 VendorSignatures getVendorSignature(unsigned *MaxLeaf) { 526 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 527 if (MaxLeaf == nullptr) 528 MaxLeaf = &EAX; 529 else 530 *MaxLeaf = 0; 531 532 if (!isCpuIdSupported()) 533 return VendorSignatures::UNKNOWN; 534 535 if (getX86CpuIDAndInfo(0, MaxLeaf, &EBX, &ECX, &EDX) || *MaxLeaf < 1) 536 return VendorSignatures::UNKNOWN; 537 538 // "Genu ineI ntel" 539 if (EBX == 0x756e6547 && EDX == 0x49656e69 && ECX == 0x6c65746e) 540 return VendorSignatures::GENUINE_INTEL; 541 542 // "Auth enti cAMD" 543 if (EBX == 0x68747541 && EDX == 0x69746e65 && ECX == 0x444d4163) 544 return VendorSignatures::AUTHENTIC_AMD; 545 546 return VendorSignatures::UNKNOWN; 547 } 548 549 } // namespace x86 550 } // namespace detail 551 } // namespace sys 552 } // namespace llvm 553 554 using namespace llvm::sys::detail::x86; 555 556 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return 557 /// the 4 values in the specified arguments. If we can't run cpuid on the host, 558 /// return true. 559 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, 560 unsigned *rEAX, unsigned *rEBX, unsigned *rECX, 561 unsigned *rEDX) { 562 #if defined(__GNUC__) || defined(__clang__) 563 #if defined(__x86_64__) 564 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 565 // FIXME: should we save this for Clang? 566 __asm__("movq\t%%rbx, %%rsi\n\t" 567 "cpuid\n\t" 568 "xchgq\t%%rbx, %%rsi\n\t" 569 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 570 : "a"(value), "c"(subleaf)); 571 return false; 572 #elif defined(__i386__) 573 __asm__("movl\t%%ebx, %%esi\n\t" 574 "cpuid\n\t" 575 "xchgl\t%%ebx, %%esi\n\t" 576 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 577 : "a"(value), "c"(subleaf)); 578 return false; 579 #else 580 return true; 581 #endif 582 #elif defined(_MSC_VER) 583 int registers[4]; 584 __cpuidex(registers, value, subleaf); 585 *rEAX = registers[0]; 586 *rEBX = registers[1]; 587 *rECX = registers[2]; 588 *rEDX = registers[3]; 589 return false; 590 #else 591 return true; 592 #endif 593 } 594 595 // Read control register 0 (XCR0). Used to detect features such as AVX. 596 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { 597 #if defined(__GNUC__) || defined(__clang__) 598 // Check xgetbv; this uses a .byte sequence instead of the instruction 599 // directly because older assemblers do not include support for xgetbv and 600 // there is no easy way to conditionally compile based on the assembler used. 601 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0)); 602 return false; 603 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) 604 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); 605 *rEAX = Result; 606 *rEDX = Result >> 32; 607 return false; 608 #else 609 return true; 610 #endif 611 } 612 613 static void detectX86FamilyModel(unsigned EAX, unsigned *Family, 614 unsigned *Model) { 615 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11 616 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7 617 if (*Family == 6 || *Family == 0xf) { 618 if (*Family == 0xf) 619 // Examine extended family ID if family ID is F. 620 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27 621 // Examine extended model ID if family ID is 6 or F. 622 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 623 } 624 } 625 626 static StringRef 627 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, 628 const unsigned *Features, 629 unsigned *Type, unsigned *Subtype) { 630 auto testFeature = [&](unsigned F) { 631 return (Features[F / 32] & (1U << (F % 32))) != 0; 632 }; 633 634 StringRef CPU; 635 636 switch (Family) { 637 case 3: 638 CPU = "i386"; 639 break; 640 case 4: 641 CPU = "i486"; 642 break; 643 case 5: 644 if (testFeature(X86::FEATURE_MMX)) { 645 CPU = "pentium-mmx"; 646 break; 647 } 648 CPU = "pentium"; 649 break; 650 case 6: 651 switch (Model) { 652 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile 653 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad 654 // mobile processor, Intel Core 2 Extreme processor, Intel 655 // Pentium Dual-Core processor, Intel Xeon processor, model 656 // 0Fh. All processors are manufactured using the 65 nm process. 657 case 0x16: // Intel Celeron processor model 16h. All processors are 658 // manufactured using the 65 nm process 659 CPU = "core2"; 660 *Type = X86::INTEL_CORE2; 661 break; 662 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model 663 // 17h. All processors are manufactured using the 45 nm process. 664 // 665 // 45nm: Penryn , Wolfdale, Yorkfield (XE) 666 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using 667 // the 45 nm process. 668 CPU = "penryn"; 669 *Type = X86::INTEL_CORE2; 670 break; 671 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All 672 // processors are manufactured using the 45 nm process. 673 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. 674 // As found in a Summer 2010 model iMac. 675 case 0x1f: 676 case 0x2e: // Nehalem EX 677 CPU = "nehalem"; 678 *Type = X86::INTEL_COREI7; 679 *Subtype = X86::INTEL_COREI7_NEHALEM; 680 break; 681 case 0x25: // Intel Core i7, laptop version. 682 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All 683 // processors are manufactured using the 32 nm process. 684 case 0x2f: // Westmere EX 685 CPU = "westmere"; 686 *Type = X86::INTEL_COREI7; 687 *Subtype = X86::INTEL_COREI7_WESTMERE; 688 break; 689 case 0x2a: // Intel Core i7 processor. All processors are manufactured 690 // using the 32 nm process. 691 case 0x2d: 692 CPU = "sandybridge"; 693 *Type = X86::INTEL_COREI7; 694 *Subtype = X86::INTEL_COREI7_SANDYBRIDGE; 695 break; 696 case 0x3a: 697 case 0x3e: // Ivy Bridge EP 698 CPU = "ivybridge"; 699 *Type = X86::INTEL_COREI7; 700 *Subtype = X86::INTEL_COREI7_IVYBRIDGE; 701 break; 702 703 // Haswell: 704 case 0x3c: 705 case 0x3f: 706 case 0x45: 707 case 0x46: 708 CPU = "haswell"; 709 *Type = X86::INTEL_COREI7; 710 *Subtype = X86::INTEL_COREI7_HASWELL; 711 break; 712 713 // Broadwell: 714 case 0x3d: 715 case 0x47: 716 case 0x4f: 717 case 0x56: 718 CPU = "broadwell"; 719 *Type = X86::INTEL_COREI7; 720 *Subtype = X86::INTEL_COREI7_BROADWELL; 721 break; 722 723 // Skylake: 724 case 0x4e: // Skylake mobile 725 case 0x5e: // Skylake desktop 726 case 0x8e: // Kaby Lake mobile 727 case 0x9e: // Kaby Lake desktop 728 case 0xa5: // Comet Lake-H/S 729 case 0xa6: // Comet Lake-U 730 CPU = "skylake"; 731 *Type = X86::INTEL_COREI7; 732 *Subtype = X86::INTEL_COREI7_SKYLAKE; 733 break; 734 735 // Rocketlake: 736 case 0xa7: 737 CPU = "rocketlake"; 738 *Type = X86::INTEL_COREI7; 739 *Subtype = X86::INTEL_COREI7_ROCKETLAKE; 740 break; 741 742 // Skylake Xeon: 743 case 0x55: 744 *Type = X86::INTEL_COREI7; 745 if (testFeature(X86::FEATURE_AVX512BF16)) { 746 CPU = "cooperlake"; 747 *Subtype = X86::INTEL_COREI7_COOPERLAKE; 748 } else if (testFeature(X86::FEATURE_AVX512VNNI)) { 749 CPU = "cascadelake"; 750 *Subtype = X86::INTEL_COREI7_CASCADELAKE; 751 } else { 752 CPU = "skylake-avx512"; 753 *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; 754 } 755 break; 756 757 // Cannonlake: 758 case 0x66: 759 CPU = "cannonlake"; 760 *Type = X86::INTEL_COREI7; 761 *Subtype = X86::INTEL_COREI7_CANNONLAKE; 762 break; 763 764 // Icelake: 765 case 0x7d: 766 case 0x7e: 767 CPU = "icelake-client"; 768 *Type = X86::INTEL_COREI7; 769 *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT; 770 break; 771 772 // Tigerlake: 773 case 0x8c: 774 case 0x8d: 775 CPU = "tigerlake"; 776 *Type = X86::INTEL_COREI7; 777 *Subtype = X86::INTEL_COREI7_TIGERLAKE; 778 break; 779 780 // Alderlake: 781 case 0x97: 782 case 0x9a: 783 CPU = "alderlake"; 784 *Type = X86::INTEL_COREI7; 785 *Subtype = X86::INTEL_COREI7_ALDERLAKE; 786 break; 787 788 // Icelake Xeon: 789 case 0x6a: 790 case 0x6c: 791 CPU = "icelake-server"; 792 *Type = X86::INTEL_COREI7; 793 *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER; 794 break; 795 796 // Sapphire Rapids: 797 case 0x8f: 798 CPU = "sapphirerapids"; 799 *Type = X86::INTEL_COREI7; 800 *Subtype = X86::INTEL_COREI7_SAPPHIRERAPIDS; 801 break; 802 803 case 0x1c: // Most 45 nm Intel Atom processors 804 case 0x26: // 45 nm Atom Lincroft 805 case 0x27: // 32 nm Atom Medfield 806 case 0x35: // 32 nm Atom Midview 807 case 0x36: // 32 nm Atom Midview 808 CPU = "bonnell"; 809 *Type = X86::INTEL_BONNELL; 810 break; 811 812 // Atom Silvermont codes from the Intel software optimization guide. 813 case 0x37: 814 case 0x4a: 815 case 0x4d: 816 case 0x5a: 817 case 0x5d: 818 case 0x4c: // really airmont 819 CPU = "silvermont"; 820 *Type = X86::INTEL_SILVERMONT; 821 break; 822 // Goldmont: 823 case 0x5c: // Apollo Lake 824 case 0x5f: // Denverton 825 CPU = "goldmont"; 826 *Type = X86::INTEL_GOLDMONT; 827 break; 828 case 0x7a: 829 CPU = "goldmont-plus"; 830 *Type = X86::INTEL_GOLDMONT_PLUS; 831 break; 832 case 0x86: 833 CPU = "tremont"; 834 *Type = X86::INTEL_TREMONT; 835 break; 836 837 // Xeon Phi (Knights Landing + Knights Mill): 838 case 0x57: 839 CPU = "knl"; 840 *Type = X86::INTEL_KNL; 841 break; 842 case 0x85: 843 CPU = "knm"; 844 *Type = X86::INTEL_KNM; 845 break; 846 847 default: // Unknown family 6 CPU, try to guess. 848 // Don't both with Type/Subtype here, they aren't used by the caller. 849 // They're used above to keep the code in sync with compiler-rt. 850 // TODO detect tigerlake host from model 851 if (testFeature(X86::FEATURE_AVX512VP2INTERSECT)) { 852 CPU = "tigerlake"; 853 } else if (testFeature(X86::FEATURE_AVX512VBMI2)) { 854 CPU = "icelake-client"; 855 } else if (testFeature(X86::FEATURE_AVX512VBMI)) { 856 CPU = "cannonlake"; 857 } else if (testFeature(X86::FEATURE_AVX512BF16)) { 858 CPU = "cooperlake"; 859 } else if (testFeature(X86::FEATURE_AVX512VNNI)) { 860 CPU = "cascadelake"; 861 } else if (testFeature(X86::FEATURE_AVX512VL)) { 862 CPU = "skylake-avx512"; 863 } else if (testFeature(X86::FEATURE_AVX512ER)) { 864 CPU = "knl"; 865 } else if (testFeature(X86::FEATURE_CLFLUSHOPT)) { 866 if (testFeature(X86::FEATURE_SHA)) 867 CPU = "goldmont"; 868 else 869 CPU = "skylake"; 870 } else if (testFeature(X86::FEATURE_ADX)) { 871 CPU = "broadwell"; 872 } else if (testFeature(X86::FEATURE_AVX2)) { 873 CPU = "haswell"; 874 } else if (testFeature(X86::FEATURE_AVX)) { 875 CPU = "sandybridge"; 876 } else if (testFeature(X86::FEATURE_SSE4_2)) { 877 if (testFeature(X86::FEATURE_MOVBE)) 878 CPU = "silvermont"; 879 else 880 CPU = "nehalem"; 881 } else if (testFeature(X86::FEATURE_SSE4_1)) { 882 CPU = "penryn"; 883 } else if (testFeature(X86::FEATURE_SSSE3)) { 884 if (testFeature(X86::FEATURE_MOVBE)) 885 CPU = "bonnell"; 886 else 887 CPU = "core2"; 888 } else if (testFeature(X86::FEATURE_64BIT)) { 889 CPU = "core2"; 890 } else if (testFeature(X86::FEATURE_SSE3)) { 891 CPU = "yonah"; 892 } else if (testFeature(X86::FEATURE_SSE2)) { 893 CPU = "pentium-m"; 894 } else if (testFeature(X86::FEATURE_SSE)) { 895 CPU = "pentium3"; 896 } else if (testFeature(X86::FEATURE_MMX)) { 897 CPU = "pentium2"; 898 } else { 899 CPU = "pentiumpro"; 900 } 901 break; 902 } 903 break; 904 case 15: { 905 if (testFeature(X86::FEATURE_64BIT)) { 906 CPU = "nocona"; 907 break; 908 } 909 if (testFeature(X86::FEATURE_SSE3)) { 910 CPU = "prescott"; 911 break; 912 } 913 CPU = "pentium4"; 914 break; 915 } 916 default: 917 break; // Unknown. 918 } 919 920 return CPU; 921 } 922 923 static StringRef 924 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, 925 const unsigned *Features, 926 unsigned *Type, unsigned *Subtype) { 927 auto testFeature = [&](unsigned F) { 928 return (Features[F / 32] & (1U << (F % 32))) != 0; 929 }; 930 931 StringRef CPU; 932 933 switch (Family) { 934 case 4: 935 CPU = "i486"; 936 break; 937 case 5: 938 CPU = "pentium"; 939 switch (Model) { 940 case 6: 941 case 7: 942 CPU = "k6"; 943 break; 944 case 8: 945 CPU = "k6-2"; 946 break; 947 case 9: 948 case 13: 949 CPU = "k6-3"; 950 break; 951 case 10: 952 CPU = "geode"; 953 break; 954 } 955 break; 956 case 6: 957 if (testFeature(X86::FEATURE_SSE)) { 958 CPU = "athlon-xp"; 959 break; 960 } 961 CPU = "athlon"; 962 break; 963 case 15: 964 if (testFeature(X86::FEATURE_SSE3)) { 965 CPU = "k8-sse3"; 966 break; 967 } 968 CPU = "k8"; 969 break; 970 case 16: 971 CPU = "amdfam10"; 972 *Type = X86::AMDFAM10H; // "amdfam10" 973 switch (Model) { 974 case 2: 975 *Subtype = X86::AMDFAM10H_BARCELONA; 976 break; 977 case 4: 978 *Subtype = X86::AMDFAM10H_SHANGHAI; 979 break; 980 case 8: 981 *Subtype = X86::AMDFAM10H_ISTANBUL; 982 break; 983 } 984 break; 985 case 20: 986 CPU = "btver1"; 987 *Type = X86::AMD_BTVER1; 988 break; 989 case 21: 990 CPU = "bdver1"; 991 *Type = X86::AMDFAM15H; 992 if (Model >= 0x60 && Model <= 0x7f) { 993 CPU = "bdver4"; 994 *Subtype = X86::AMDFAM15H_BDVER4; 995 break; // 60h-7Fh: Excavator 996 } 997 if (Model >= 0x30 && Model <= 0x3f) { 998 CPU = "bdver3"; 999 *Subtype = X86::AMDFAM15H_BDVER3; 1000 break; // 30h-3Fh: Steamroller 1001 } 1002 if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) { 1003 CPU = "bdver2"; 1004 *Subtype = X86::AMDFAM15H_BDVER2; 1005 break; // 02h, 10h-1Fh: Piledriver 1006 } 1007 if (Model <= 0x0f) { 1008 *Subtype = X86::AMDFAM15H_BDVER1; 1009 break; // 00h-0Fh: Bulldozer 1010 } 1011 break; 1012 case 22: 1013 CPU = "btver2"; 1014 *Type = X86::AMD_BTVER2; 1015 break; 1016 case 23: 1017 CPU = "znver1"; 1018 *Type = X86::AMDFAM17H; 1019 if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) { 1020 CPU = "znver2"; 1021 *Subtype = X86::AMDFAM17H_ZNVER2; 1022 break; // 30h-3fh, 71h: Zen2 1023 } 1024 if (Model <= 0x0f) { 1025 *Subtype = X86::AMDFAM17H_ZNVER1; 1026 break; // 00h-0Fh: Zen1 1027 } 1028 break; 1029 case 25: 1030 CPU = "znver3"; 1031 *Type = X86::AMDFAM19H; 1032 if (Model <= 0x0f) { 1033 *Subtype = X86::AMDFAM19H_ZNVER3; 1034 break; // 00h-0Fh: Zen3 1035 } 1036 break; 1037 default: 1038 break; // Unknown AMD CPU. 1039 } 1040 1041 return CPU; 1042 } 1043 1044 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, 1045 unsigned *Features) { 1046 unsigned EAX, EBX; 1047 1048 auto setFeature = [&](unsigned F) { 1049 Features[F / 32] |= 1U << (F % 32); 1050 }; 1051 1052 if ((EDX >> 15) & 1) 1053 setFeature(X86::FEATURE_CMOV); 1054 if ((EDX >> 23) & 1) 1055 setFeature(X86::FEATURE_MMX); 1056 if ((EDX >> 25) & 1) 1057 setFeature(X86::FEATURE_SSE); 1058 if ((EDX >> 26) & 1) 1059 setFeature(X86::FEATURE_SSE2); 1060 1061 if ((ECX >> 0) & 1) 1062 setFeature(X86::FEATURE_SSE3); 1063 if ((ECX >> 1) & 1) 1064 setFeature(X86::FEATURE_PCLMUL); 1065 if ((ECX >> 9) & 1) 1066 setFeature(X86::FEATURE_SSSE3); 1067 if ((ECX >> 12) & 1) 1068 setFeature(X86::FEATURE_FMA); 1069 if ((ECX >> 19) & 1) 1070 setFeature(X86::FEATURE_SSE4_1); 1071 if ((ECX >> 20) & 1) { 1072 setFeature(X86::FEATURE_SSE4_2); 1073 setFeature(X86::FEATURE_CRC32); 1074 } 1075 if ((ECX >> 23) & 1) 1076 setFeature(X86::FEATURE_POPCNT); 1077 if ((ECX >> 25) & 1) 1078 setFeature(X86::FEATURE_AES); 1079 1080 if ((ECX >> 22) & 1) 1081 setFeature(X86::FEATURE_MOVBE); 1082 1083 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 1084 // indicates that the AVX registers will be saved and restored on context 1085 // switch, then we have full AVX support. 1086 const unsigned AVXBits = (1 << 27) | (1 << 28); 1087 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && 1088 ((EAX & 0x6) == 0x6); 1089 #if defined(__APPLE__) 1090 // Darwin lazily saves the AVX512 context on first use: trust that the OS will 1091 // save the AVX512 context if we use AVX512 instructions, even the bit is not 1092 // set right now. 1093 bool HasAVX512Save = true; 1094 #else 1095 // AVX512 requires additional context to be saved by the OS. 1096 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); 1097 #endif 1098 1099 if (HasAVX) 1100 setFeature(X86::FEATURE_AVX); 1101 1102 bool HasLeaf7 = 1103 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 1104 1105 if (HasLeaf7 && ((EBX >> 3) & 1)) 1106 setFeature(X86::FEATURE_BMI); 1107 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) 1108 setFeature(X86::FEATURE_AVX2); 1109 if (HasLeaf7 && ((EBX >> 8) & 1)) 1110 setFeature(X86::FEATURE_BMI2); 1111 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) 1112 setFeature(X86::FEATURE_AVX512F); 1113 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) 1114 setFeature(X86::FEATURE_AVX512DQ); 1115 if (HasLeaf7 && ((EBX >> 19) & 1)) 1116 setFeature(X86::FEATURE_ADX); 1117 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) 1118 setFeature(X86::FEATURE_AVX512IFMA); 1119 if (HasLeaf7 && ((EBX >> 23) & 1)) 1120 setFeature(X86::FEATURE_CLFLUSHOPT); 1121 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) 1122 setFeature(X86::FEATURE_AVX512PF); 1123 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) 1124 setFeature(X86::FEATURE_AVX512ER); 1125 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) 1126 setFeature(X86::FEATURE_AVX512CD); 1127 if (HasLeaf7 && ((EBX >> 29) & 1)) 1128 setFeature(X86::FEATURE_SHA); 1129 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) 1130 setFeature(X86::FEATURE_AVX512BW); 1131 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) 1132 setFeature(X86::FEATURE_AVX512VL); 1133 1134 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) 1135 setFeature(X86::FEATURE_AVX512VBMI); 1136 if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save) 1137 setFeature(X86::FEATURE_AVX512VBMI2); 1138 if (HasLeaf7 && ((ECX >> 8) & 1)) 1139 setFeature(X86::FEATURE_GFNI); 1140 if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX) 1141 setFeature(X86::FEATURE_VPCLMULQDQ); 1142 if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save) 1143 setFeature(X86::FEATURE_AVX512VNNI); 1144 if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save) 1145 setFeature(X86::FEATURE_AVX512BITALG); 1146 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) 1147 setFeature(X86::FEATURE_AVX512VPOPCNTDQ); 1148 1149 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) 1150 setFeature(X86::FEATURE_AVX5124VNNIW); 1151 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) 1152 setFeature(X86::FEATURE_AVX5124FMAPS); 1153 if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save) 1154 setFeature(X86::FEATURE_AVX512VP2INTERSECT); 1155 1156 bool HasLeaf7Subleaf1 = 1157 MaxLeaf >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); 1158 if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save) 1159 setFeature(X86::FEATURE_AVX512BF16); 1160 1161 unsigned MaxExtLevel; 1162 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); 1163 1164 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && 1165 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 1166 if (HasExtLeaf1 && ((ECX >> 6) & 1)) 1167 setFeature(X86::FEATURE_SSE4_A); 1168 if (HasExtLeaf1 && ((ECX >> 11) & 1)) 1169 setFeature(X86::FEATURE_XOP); 1170 if (HasExtLeaf1 && ((ECX >> 16) & 1)) 1171 setFeature(X86::FEATURE_FMA4); 1172 1173 if (HasExtLeaf1 && ((EDX >> 29) & 1)) 1174 setFeature(X86::FEATURE_64BIT); 1175 } 1176 1177 StringRef sys::getHostCPUName() { 1178 unsigned MaxLeaf = 0; 1179 const VendorSignatures Vendor = getVendorSignature(&MaxLeaf); 1180 if (Vendor == VendorSignatures::UNKNOWN) 1181 return "generic"; 1182 1183 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 1184 getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); 1185 1186 unsigned Family = 0, Model = 0; 1187 unsigned Features[(X86::CPU_FEATURE_MAX + 31) / 32] = {0}; 1188 detectX86FamilyModel(EAX, &Family, &Model); 1189 getAvailableFeatures(ECX, EDX, MaxLeaf, Features); 1190 1191 // These aren't consumed in this file, but we try to keep some source code the 1192 // same or similar to compiler-rt. 1193 unsigned Type = 0; 1194 unsigned Subtype = 0; 1195 1196 StringRef CPU; 1197 1198 if (Vendor == VendorSignatures::GENUINE_INTEL) { 1199 CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type, 1200 &Subtype); 1201 } else if (Vendor == VendorSignatures::AUTHENTIC_AMD) { 1202 CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type, 1203 &Subtype); 1204 } 1205 1206 if (!CPU.empty()) 1207 return CPU; 1208 1209 return "generic"; 1210 } 1211 1212 #elif defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__)) 1213 StringRef sys::getHostCPUName() { 1214 host_basic_info_data_t hostInfo; 1215 mach_msg_type_number_t infoCount; 1216 1217 infoCount = HOST_BASIC_INFO_COUNT; 1218 mach_port_t hostPort = mach_host_self(); 1219 host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo, 1220 &infoCount); 1221 mach_port_deallocate(mach_task_self(), hostPort); 1222 1223 if (hostInfo.cpu_type != CPU_TYPE_POWERPC) 1224 return "generic"; 1225 1226 switch (hostInfo.cpu_subtype) { 1227 case CPU_SUBTYPE_POWERPC_601: 1228 return "601"; 1229 case CPU_SUBTYPE_POWERPC_602: 1230 return "602"; 1231 case CPU_SUBTYPE_POWERPC_603: 1232 return "603"; 1233 case CPU_SUBTYPE_POWERPC_603e: 1234 return "603e"; 1235 case CPU_SUBTYPE_POWERPC_603ev: 1236 return "603ev"; 1237 case CPU_SUBTYPE_POWERPC_604: 1238 return "604"; 1239 case CPU_SUBTYPE_POWERPC_604e: 1240 return "604e"; 1241 case CPU_SUBTYPE_POWERPC_620: 1242 return "620"; 1243 case CPU_SUBTYPE_POWERPC_750: 1244 return "750"; 1245 case CPU_SUBTYPE_POWERPC_7400: 1246 return "7400"; 1247 case CPU_SUBTYPE_POWERPC_7450: 1248 return "7450"; 1249 case CPU_SUBTYPE_POWERPC_970: 1250 return "970"; 1251 default:; 1252 } 1253 1254 return "generic"; 1255 } 1256 #elif defined(__linux__) && (defined(__ppc__) || defined(__powerpc__)) 1257 StringRef sys::getHostCPUName() { 1258 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1259 StringRef Content = P ? P->getBuffer() : ""; 1260 return detail::getHostCPUNameForPowerPC(Content); 1261 } 1262 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) 1263 StringRef sys::getHostCPUName() { 1264 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1265 StringRef Content = P ? P->getBuffer() : ""; 1266 return detail::getHostCPUNameForARM(Content); 1267 } 1268 #elif defined(__linux__) && defined(__s390x__) 1269 StringRef sys::getHostCPUName() { 1270 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1271 StringRef Content = P ? P->getBuffer() : ""; 1272 return detail::getHostCPUNameForS390x(Content); 1273 } 1274 #elif defined(__MVS__) 1275 StringRef sys::getHostCPUName() { 1276 // Get pointer to Communications Vector Table (CVT). 1277 // The pointer is located at offset 16 of the Prefixed Save Area (PSA). 1278 // It is stored as 31 bit pointer and will be zero-extended to 64 bit. 1279 int *StartToCVTOffset = reinterpret_cast<int *>(0x10); 1280 // Since its stored as a 31-bit pointer, get the 4 bytes from the start 1281 // of address. 1282 int ReadValue = *StartToCVTOffset; 1283 // Explicitly clear the high order bit. 1284 ReadValue = (ReadValue & 0x7FFFFFFF); 1285 char *CVT = reinterpret_cast<char *>(ReadValue); 1286 // The model number is located in the CVT prefix at offset -6 and stored as 1287 // signless packed decimal. 1288 uint16_t Id = *(uint16_t *)&CVT[-6]; 1289 // Convert number to integer. 1290 Id = decodePackedBCD<uint16_t>(Id, false); 1291 // Check for vector support. It's stored in field CVTFLAG5 (offset 244), 1292 // bit CVTVEF (X'80'). The facilities list is part of the PSA but the vector 1293 // extension can only be used if bit CVTVEF is on. 1294 bool HaveVectorSupport = CVT[244] & 0x80; 1295 return getCPUNameFromS390Model(Id, HaveVectorSupport); 1296 } 1297 #elif defined(__APPLE__) && defined(__aarch64__) 1298 StringRef sys::getHostCPUName() { 1299 return "cyclone"; 1300 } 1301 #elif defined(__APPLE__) && defined(__arm__) 1302 StringRef sys::getHostCPUName() { 1303 host_basic_info_data_t hostInfo; 1304 mach_msg_type_number_t infoCount; 1305 1306 infoCount = HOST_BASIC_INFO_COUNT; 1307 mach_port_t hostPort = mach_host_self(); 1308 host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo, 1309 &infoCount); 1310 mach_port_deallocate(mach_task_self(), hostPort); 1311 1312 if (hostInfo.cpu_type != CPU_TYPE_ARM) { 1313 assert(false && "CPUType not equal to ARM should not be possible on ARM"); 1314 return "generic"; 1315 } 1316 switch (hostInfo.cpu_subtype) { 1317 case CPU_SUBTYPE_ARM_V7S: 1318 return "swift"; 1319 default:; 1320 } 1321 1322 return "generic"; 1323 } 1324 #elif defined(_AIX) 1325 StringRef sys::getHostCPUName() { 1326 switch (_system_configuration.implementation) { 1327 case POWER_4: 1328 if (_system_configuration.version == PV_4_3) 1329 return "970"; 1330 return "pwr4"; 1331 case POWER_5: 1332 if (_system_configuration.version == PV_5) 1333 return "pwr5"; 1334 return "pwr5x"; 1335 case POWER_6: 1336 if (_system_configuration.version == PV_6_Compat) 1337 return "pwr6"; 1338 return "pwr6x"; 1339 case POWER_7: 1340 return "pwr7"; 1341 case POWER_8: 1342 return "pwr8"; 1343 case POWER_9: 1344 return "pwr9"; 1345 // TODO: simplify this once the macro is available in all OS levels. 1346 #ifdef POWER_10 1347 case POWER_10: 1348 #else 1349 case 0x40000: 1350 #endif 1351 return "pwr10"; 1352 default: 1353 return "generic"; 1354 } 1355 } 1356 #elif defined(__riscv) 1357 StringRef sys::getHostCPUName() { 1358 #if __riscv_xlen == 64 1359 return "generic-rv64"; 1360 #elif __riscv_xlen == 32 1361 return "generic-rv32"; 1362 #else 1363 #error "Unhandled value of __riscv_xlen" 1364 #endif 1365 } 1366 #else 1367 StringRef sys::getHostCPUName() { return "generic"; } 1368 namespace llvm { 1369 namespace sys { 1370 namespace detail { 1371 namespace x86 { 1372 1373 VendorSignatures getVendorSignature(unsigned *MaxLeaf) { 1374 return VendorSignatures::UNKNOWN; 1375 } 1376 1377 } // namespace x86 1378 } // namespace detail 1379 } // namespace sys 1380 } // namespace llvm 1381 #endif 1382 1383 #if defined(__linux__) && (defined(__i386__) || defined(__x86_64__)) 1384 // On Linux, the number of physical cores can be computed from /proc/cpuinfo, 1385 // using the number of unique physical/core id pairs. The following 1386 // implementation reads the /proc/cpuinfo format on an x86_64 system. 1387 int computeHostNumPhysicalCores() { 1388 // Enabled represents the number of physical id/core id pairs with at least 1389 // one processor id enabled by the CPU affinity mask. 1390 cpu_set_t Affinity, Enabled; 1391 if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0) 1392 return -1; 1393 CPU_ZERO(&Enabled); 1394 1395 // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be 1396 // mmapped because it appears to have 0 size. 1397 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = 1398 llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); 1399 if (std::error_code EC = Text.getError()) { 1400 llvm::errs() << "Can't read " 1401 << "/proc/cpuinfo: " << EC.message() << "\n"; 1402 return -1; 1403 } 1404 SmallVector<StringRef, 8> strs; 1405 (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1, 1406 /*KeepEmpty=*/false); 1407 int CurProcessor = -1; 1408 int CurPhysicalId = -1; 1409 int CurSiblings = -1; 1410 int CurCoreId = -1; 1411 for (StringRef Line : strs) { 1412 std::pair<StringRef, StringRef> Data = Line.split(':'); 1413 auto Name = Data.first.trim(); 1414 auto Val = Data.second.trim(); 1415 // These fields are available if the kernel is configured with CONFIG_SMP. 1416 if (Name == "processor") 1417 Val.getAsInteger(10, CurProcessor); 1418 else if (Name == "physical id") 1419 Val.getAsInteger(10, CurPhysicalId); 1420 else if (Name == "siblings") 1421 Val.getAsInteger(10, CurSiblings); 1422 else if (Name == "core id") { 1423 Val.getAsInteger(10, CurCoreId); 1424 // The processor id corresponds to an index into cpu_set_t. 1425 if (CPU_ISSET(CurProcessor, &Affinity)) 1426 CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled); 1427 } 1428 } 1429 return CPU_COUNT(&Enabled); 1430 } 1431 #elif defined(__linux__) && defined(__powerpc__) 1432 int computeHostNumPhysicalCores() { 1433 cpu_set_t Affinity; 1434 if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0) 1435 return CPU_COUNT(&Affinity); 1436 1437 // The call to sched_getaffinity() may have failed because the Affinity 1438 // mask is too small for the number of CPU's on the system (i.e. the 1439 // system has more than 1024 CPUs). Allocate a mask large enough for 1440 // twice as many CPUs. 1441 cpu_set_t *DynAffinity; 1442 DynAffinity = CPU_ALLOC(2048); 1443 if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) { 1444 int NumCPUs = CPU_COUNT(DynAffinity); 1445 CPU_FREE(DynAffinity); 1446 return NumCPUs; 1447 } 1448 return -1; 1449 } 1450 #elif defined(__linux__) && defined(__s390x__) 1451 int computeHostNumPhysicalCores() { return sysconf(_SC_NPROCESSORS_ONLN); } 1452 #elif defined(__APPLE__) 1453 #include <sys/param.h> 1454 #include <sys/sysctl.h> 1455 1456 // Gets the number of *physical cores* on the machine. 1457 int computeHostNumPhysicalCores() { 1458 uint32_t count; 1459 size_t len = sizeof(count); 1460 sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0); 1461 if (count < 1) { 1462 int nm[2]; 1463 nm[0] = CTL_HW; 1464 nm[1] = HW_AVAILCPU; 1465 sysctl(nm, 2, &count, &len, NULL, 0); 1466 if (count < 1) 1467 return -1; 1468 } 1469 return count; 1470 } 1471 #elif defined(__MVS__) 1472 int computeHostNumPhysicalCores() { 1473 enum { 1474 // Byte offset of the pointer to the Communications Vector Table (CVT) in 1475 // the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and 1476 // will be zero-extended to uintptr_t. 1477 FLCCVT = 16, 1478 // Byte offset of the pointer to the Common System Data Area (CSD) in the 1479 // CVT. The table entry is a 31-bit pointer and will be zero-extended to 1480 // uintptr_t. 1481 CVTCSD = 660, 1482 // Byte offset to the number of live CPs in the LPAR, stored as a signed 1483 // 32-bit value in the table. 1484 CSD_NUMBER_ONLINE_STANDARD_CPS = 264, 1485 }; 1486 char *PSA = 0; 1487 char *CVT = reinterpret_cast<char *>( 1488 static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(PSA[FLCCVT]))); 1489 char *CSD = reinterpret_cast<char *>( 1490 static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(CVT[CVTCSD]))); 1491 return reinterpret_cast<int &>(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]); 1492 } 1493 #elif defined(_WIN32) && LLVM_ENABLE_THREADS != 0 1494 // Defined in llvm/lib/Support/Windows/Threading.inc 1495 int computeHostNumPhysicalCores(); 1496 #else 1497 // On other systems, return -1 to indicate unknown. 1498 static int computeHostNumPhysicalCores() { return -1; } 1499 #endif 1500 1501 int sys::getHostNumPhysicalCores() { 1502 static int NumCores = computeHostNumPhysicalCores(); 1503 return NumCores; 1504 } 1505 1506 #if defined(__i386__) || defined(_M_IX86) || \ 1507 defined(__x86_64__) || defined(_M_X64) 1508 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1509 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 1510 unsigned MaxLevel; 1511 1512 if (getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX) || MaxLevel < 1) 1513 return false; 1514 1515 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); 1516 1517 Features["cx8"] = (EDX >> 8) & 1; 1518 Features["cmov"] = (EDX >> 15) & 1; 1519 Features["mmx"] = (EDX >> 23) & 1; 1520 Features["fxsr"] = (EDX >> 24) & 1; 1521 Features["sse"] = (EDX >> 25) & 1; 1522 Features["sse2"] = (EDX >> 26) & 1; 1523 1524 Features["sse3"] = (ECX >> 0) & 1; 1525 Features["pclmul"] = (ECX >> 1) & 1; 1526 Features["ssse3"] = (ECX >> 9) & 1; 1527 Features["cx16"] = (ECX >> 13) & 1; 1528 Features["sse4.1"] = (ECX >> 19) & 1; 1529 Features["sse4.2"] = (ECX >> 20) & 1; 1530 Features["crc32"] = Features["sse4.2"]; 1531 Features["movbe"] = (ECX >> 22) & 1; 1532 Features["popcnt"] = (ECX >> 23) & 1; 1533 Features["aes"] = (ECX >> 25) & 1; 1534 Features["rdrnd"] = (ECX >> 30) & 1; 1535 1536 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 1537 // indicates that the AVX registers will be saved and restored on context 1538 // switch, then we have full AVX support. 1539 bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX); 1540 bool HasAVXSave = HasXSave && ((ECX >> 28) & 1) && ((EAX & 0x6) == 0x6); 1541 #if defined(__APPLE__) 1542 // Darwin lazily saves the AVX512 context on first use: trust that the OS will 1543 // save the AVX512 context if we use AVX512 instructions, even the bit is not 1544 // set right now. 1545 bool HasAVX512Save = true; 1546 #else 1547 // AVX512 requires additional context to be saved by the OS. 1548 bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0); 1549 #endif 1550 // AMX requires additional context to be saved by the OS. 1551 const unsigned AMXBits = (1 << 17) | (1 << 18); 1552 bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits); 1553 1554 Features["avx"] = HasAVXSave; 1555 Features["fma"] = ((ECX >> 12) & 1) && HasAVXSave; 1556 // Only enable XSAVE if OS has enabled support for saving YMM state. 1557 Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave; 1558 Features["f16c"] = ((ECX >> 29) & 1) && HasAVXSave; 1559 1560 unsigned MaxExtLevel; 1561 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); 1562 1563 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && 1564 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 1565 Features["sahf"] = HasExtLeaf1 && ((ECX >> 0) & 1); 1566 Features["lzcnt"] = HasExtLeaf1 && ((ECX >> 5) & 1); 1567 Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1); 1568 Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1); 1569 Features["xop"] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave; 1570 Features["lwp"] = HasExtLeaf1 && ((ECX >> 15) & 1); 1571 Features["fma4"] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave; 1572 Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1); 1573 Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1); 1574 1575 Features["64bit"] = HasExtLeaf1 && ((EDX >> 29) & 1); 1576 1577 // Miscellaneous memory related features, detected by 1578 // using the 0x80000008 leaf of the CPUID instruction 1579 bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 && 1580 !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX); 1581 Features["clzero"] = HasExtLeaf8 && ((EBX >> 0) & 1); 1582 Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1); 1583 1584 bool HasLeaf7 = 1585 MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 1586 1587 Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1); 1588 Features["sgx"] = HasLeaf7 && ((EBX >> 2) & 1); 1589 Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1); 1590 // AVX2 is only supported if we have the OS save support from AVX. 1591 Features["avx2"] = HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave; 1592 Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1); 1593 Features["invpcid"] = HasLeaf7 && ((EBX >> 10) & 1); 1594 Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1); 1595 // AVX512 is only supported if the OS supports the context save for it. 1596 Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save; 1597 Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save; 1598 Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1); 1599 Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1); 1600 Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save; 1601 Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1); 1602 Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1); 1603 Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save; 1604 Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save; 1605 Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save; 1606 Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1); 1607 Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save; 1608 Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save; 1609 1610 Features["prefetchwt1"] = HasLeaf7 && ((ECX >> 0) & 1); 1611 Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; 1612 Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1); 1613 Features["waitpkg"] = HasLeaf7 && ((ECX >> 5) & 1); 1614 Features["avx512vbmi2"] = HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save; 1615 Features["shstk"] = HasLeaf7 && ((ECX >> 7) & 1); 1616 Features["gfni"] = HasLeaf7 && ((ECX >> 8) & 1); 1617 Features["vaes"] = HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave; 1618 Features["vpclmulqdq"] = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave; 1619 Features["avx512vnni"] = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save; 1620 Features["avx512bitalg"] = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save; 1621 Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save; 1622 Features["rdpid"] = HasLeaf7 && ((ECX >> 22) & 1); 1623 Features["kl"] = HasLeaf7 && ((ECX >> 23) & 1); // key locker 1624 Features["cldemote"] = HasLeaf7 && ((ECX >> 25) & 1); 1625 Features["movdiri"] = HasLeaf7 && ((ECX >> 27) & 1); 1626 Features["movdir64b"] = HasLeaf7 && ((ECX >> 28) & 1); 1627 Features["enqcmd"] = HasLeaf7 && ((ECX >> 29) & 1); 1628 1629 Features["uintr"] = HasLeaf7 && ((EDX >> 5) & 1); 1630 Features["avx512vp2intersect"] = 1631 HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save; 1632 Features["serialize"] = HasLeaf7 && ((EDX >> 14) & 1); 1633 Features["tsxldtrk"] = HasLeaf7 && ((EDX >> 16) & 1); 1634 // There are two CPUID leafs which information associated with the pconfig 1635 // instruction: 1636 // EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th 1637 // bit of EDX), while the EAX=0x1b leaf returns information on the 1638 // availability of specific pconfig leafs. 1639 // The target feature here only refers to the the first of these two. 1640 // Users might need to check for the availability of specific pconfig 1641 // leaves using cpuid, since that information is ignored while 1642 // detecting features using the "-march=native" flag. 1643 // For more info, see X86 ISA docs. 1644 Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1); 1645 Features["amx-bf16"] = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave; 1646 Features["avx512fp16"] = HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save; 1647 Features["amx-tile"] = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave; 1648 Features["amx-int8"] = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave; 1649 bool HasLeaf7Subleaf1 = 1650 MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); 1651 Features["avxvnni"] = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave; 1652 Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save; 1653 Features["hreset"] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1); 1654 1655 bool HasLeafD = MaxLevel >= 0xd && 1656 !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); 1657 1658 // Only enable XSAVE if OS has enabled support for saving YMM state. 1659 Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave; 1660 Features["xsavec"] = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave; 1661 Features["xsaves"] = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave; 1662 1663 bool HasLeaf14 = MaxLevel >= 0x14 && 1664 !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX); 1665 1666 Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1); 1667 1668 bool HasLeaf19 = 1669 MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX); 1670 Features["widekl"] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1); 1671 1672 return true; 1673 } 1674 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) 1675 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1676 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1677 if (!P) 1678 return false; 1679 1680 SmallVector<StringRef, 32> Lines; 1681 P->getBuffer().split(Lines, "\n"); 1682 1683 SmallVector<StringRef, 32> CPUFeatures; 1684 1685 // Look for the CPU features. 1686 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 1687 if (Lines[I].startswith("Features")) { 1688 Lines[I].split(CPUFeatures, ' '); 1689 break; 1690 } 1691 1692 #if defined(__aarch64__) 1693 // Keep track of which crypto features we have seen 1694 enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 }; 1695 uint32_t crypto = 0; 1696 #endif 1697 1698 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { 1699 StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I]) 1700 #if defined(__aarch64__) 1701 .Case("asimd", "neon") 1702 .Case("fp", "fp-armv8") 1703 .Case("crc32", "crc") 1704 .Case("atomics", "lse") 1705 .Case("sve", "sve") 1706 .Case("sve2", "sve2") 1707 #else 1708 .Case("half", "fp16") 1709 .Case("neon", "neon") 1710 .Case("vfpv3", "vfp3") 1711 .Case("vfpv3d16", "d16") 1712 .Case("vfpv4", "vfp4") 1713 .Case("idiva", "hwdiv-arm") 1714 .Case("idivt", "hwdiv") 1715 #endif 1716 .Default(""); 1717 1718 #if defined(__aarch64__) 1719 // We need to check crypto separately since we need all of the crypto 1720 // extensions to enable the subtarget feature 1721 if (CPUFeatures[I] == "aes") 1722 crypto |= CAP_AES; 1723 else if (CPUFeatures[I] == "pmull") 1724 crypto |= CAP_PMULL; 1725 else if (CPUFeatures[I] == "sha1") 1726 crypto |= CAP_SHA1; 1727 else if (CPUFeatures[I] == "sha2") 1728 crypto |= CAP_SHA2; 1729 #endif 1730 1731 if (LLVMFeatureStr != "") 1732 Features[LLVMFeatureStr] = true; 1733 } 1734 1735 #if defined(__aarch64__) 1736 // If we have all crypto bits we can add the feature 1737 if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2)) 1738 Features["crypto"] = true; 1739 #endif 1740 1741 return true; 1742 } 1743 #elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64)) 1744 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1745 if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) 1746 Features["neon"] = true; 1747 if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) 1748 Features["crc"] = true; 1749 if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) 1750 Features["crypto"] = true; 1751 1752 return true; 1753 } 1754 #else 1755 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; } 1756 #endif 1757 1758 std::string sys::getProcessTriple() { 1759 std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE); 1760 Triple PT(Triple::normalize(TargetTripleString)); 1761 1762 if (sizeof(void *) == 8 && PT.isArch32Bit()) 1763 PT = PT.get64BitArchVariant(); 1764 if (sizeof(void *) == 4 && PT.isArch64Bit()) 1765 PT = PT.get32BitArchVariant(); 1766 1767 return PT.str(); 1768 } 1769