1 //===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the operating system Host concept. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Support/Host.h" 15 #include "llvm/Support/TargetParser.h" 16 #include "llvm/ADT/SmallSet.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/ADT/Triple.h" 21 #include "llvm/Config/config.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/FileSystem.h" 24 #include "llvm/Support/MemoryBuffer.h" 25 #include "llvm/Support/raw_ostream.h" 26 #include <assert.h> 27 #include <string.h> 28 29 // Include the platform-specific parts of this class. 30 #ifdef LLVM_ON_UNIX 31 #include "Unix/Host.inc" 32 #endif 33 #ifdef LLVM_ON_WIN32 34 #include "Windows/Host.inc" 35 #endif 36 #ifdef _MSC_VER 37 #include <intrin.h> 38 #endif 39 #if defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__)) 40 #include <mach/host_info.h> 41 #include <mach/mach.h> 42 #include <mach/mach_host.h> 43 #include <mach/machine.h> 44 #endif 45 46 #define DEBUG_TYPE "host-detection" 47 48 //===----------------------------------------------------------------------===// 49 // 50 // Implementations of the CPU detection routines 51 // 52 //===----------------------------------------------------------------------===// 53 54 using namespace llvm; 55 56 static std::unique_ptr<llvm::MemoryBuffer> 57 LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() { 58 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = 59 llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); 60 if (std::error_code EC = Text.getError()) { 61 llvm::errs() << "Can't read " 62 << "/proc/cpuinfo: " << EC.message() << "\n"; 63 return nullptr; 64 } 65 return std::move(*Text); 66 } 67 68 StringRef sys::detail::getHostCPUNameForPowerPC( 69 const StringRef &ProcCpuinfoContent) { 70 // Access to the Processor Version Register (PVR) on PowerPC is privileged, 71 // and so we must use an operating-system interface to determine the current 72 // processor type. On Linux, this is exposed through the /proc/cpuinfo file. 73 const char *generic = "generic"; 74 75 // The cpu line is second (after the 'processor: 0' line), so if this 76 // buffer is too small then something has changed (or is wrong). 77 StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin(); 78 StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end(); 79 80 StringRef::const_iterator CIP = CPUInfoStart; 81 82 StringRef::const_iterator CPUStart = 0; 83 size_t CPULen = 0; 84 85 // We need to find the first line which starts with cpu, spaces, and a colon. 86 // After the colon, there may be some additional spaces and then the cpu type. 87 while (CIP < CPUInfoEnd && CPUStart == 0) { 88 if (CIP < CPUInfoEnd && *CIP == '\n') 89 ++CIP; 90 91 if (CIP < CPUInfoEnd && *CIP == 'c') { 92 ++CIP; 93 if (CIP < CPUInfoEnd && *CIP == 'p') { 94 ++CIP; 95 if (CIP < CPUInfoEnd && *CIP == 'u') { 96 ++CIP; 97 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) 98 ++CIP; 99 100 if (CIP < CPUInfoEnd && *CIP == ':') { 101 ++CIP; 102 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) 103 ++CIP; 104 105 if (CIP < CPUInfoEnd) { 106 CPUStart = CIP; 107 while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' && 108 *CIP != ',' && *CIP != '\n')) 109 ++CIP; 110 CPULen = CIP - CPUStart; 111 } 112 } 113 } 114 } 115 } 116 117 if (CPUStart == 0) 118 while (CIP < CPUInfoEnd && *CIP != '\n') 119 ++CIP; 120 } 121 122 if (CPUStart == 0) 123 return generic; 124 125 return StringSwitch<const char *>(StringRef(CPUStart, CPULen)) 126 .Case("604e", "604e") 127 .Case("604", "604") 128 .Case("7400", "7400") 129 .Case("7410", "7400") 130 .Case("7447", "7400") 131 .Case("7455", "7450") 132 .Case("G4", "g4") 133 .Case("POWER4", "970") 134 .Case("PPC970FX", "970") 135 .Case("PPC970MP", "970") 136 .Case("G5", "g5") 137 .Case("POWER5", "g5") 138 .Case("A2", "a2") 139 .Case("POWER6", "pwr6") 140 .Case("POWER7", "pwr7") 141 .Case("POWER8", "pwr8") 142 .Case("POWER8E", "pwr8") 143 .Case("POWER8NVL", "pwr8") 144 .Case("POWER9", "pwr9") 145 .Default(generic); 146 } 147 148 StringRef sys::detail::getHostCPUNameForARM( 149 const StringRef &ProcCpuinfoContent) { 150 // The cpuid register on arm is not accessible from user space. On Linux, 151 // it is exposed through the /proc/cpuinfo file. 152 153 // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line 154 // in all cases. 155 SmallVector<StringRef, 32> Lines; 156 ProcCpuinfoContent.split(Lines, "\n"); 157 158 // Look for the CPU implementer line. 159 StringRef Implementer; 160 StringRef Hardware; 161 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 162 if (Lines[I].startswith("CPU implementer")) 163 Implementer = Lines[I].substr(15).ltrim("\t :"); 164 if (Lines[I].startswith("Hardware")) 165 Hardware = Lines[I].substr(8).ltrim("\t :"); 166 } 167 168 if (Implementer == "0x41") { // ARM Ltd. 169 // MSM8992/8994 may give cpu part for the core that the kernel is running on, 170 // which is undeterministic and wrong. Always return cortex-a53 for these SoC. 171 if (Hardware.endswith("MSM8994") || Hardware.endswith("MSM8996")) 172 return "cortex-a53"; 173 174 175 // Look for the CPU part line. 176 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 177 if (Lines[I].startswith("CPU part")) 178 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 179 // values correspond to the "Part number" in the CP15/c0 register. The 180 // contents are specified in the various processor manuals. 181 return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) 182 .Case("0x926", "arm926ej-s") 183 .Case("0xb02", "mpcore") 184 .Case("0xb36", "arm1136j-s") 185 .Case("0xb56", "arm1156t2-s") 186 .Case("0xb76", "arm1176jz-s") 187 .Case("0xc08", "cortex-a8") 188 .Case("0xc09", "cortex-a9") 189 .Case("0xc0f", "cortex-a15") 190 .Case("0xc20", "cortex-m0") 191 .Case("0xc23", "cortex-m3") 192 .Case("0xc24", "cortex-m4") 193 .Case("0xd04", "cortex-a35") 194 .Case("0xd03", "cortex-a53") 195 .Case("0xd07", "cortex-a57") 196 .Case("0xd08", "cortex-a72") 197 .Case("0xd09", "cortex-a73") 198 .Default("generic"); 199 } 200 201 if (Implementer == "0x51") // Qualcomm Technologies, Inc. 202 // Look for the CPU part line. 203 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 204 if (Lines[I].startswith("CPU part")) 205 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 206 // values correspond to the "Part number" in the CP15/c0 register. The 207 // contents are specified in the various processor manuals. 208 return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) 209 .Case("0x06f", "krait") // APQ8064 210 .Case("0x201", "kryo") 211 .Case("0x205", "kryo") 212 .Case("0x211", "kryo") 213 .Case("0x800", "cortex-a73") 214 .Case("0x801", "cortex-a73") 215 .Case("0xc00", "falkor") 216 .Case("0xc01", "saphira") 217 .Default("generic"); 218 219 return "generic"; 220 } 221 222 StringRef sys::detail::getHostCPUNameForS390x( 223 const StringRef &ProcCpuinfoContent) { 224 // STIDP is a privileged operation, so use /proc/cpuinfo instead. 225 226 // The "processor 0:" line comes after a fair amount of other information, 227 // including a cache breakdown, but this should be plenty. 228 SmallVector<StringRef, 32> Lines; 229 ProcCpuinfoContent.split(Lines, "\n"); 230 231 // Look for the CPU features. 232 SmallVector<StringRef, 32> CPUFeatures; 233 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 234 if (Lines[I].startswith("features")) { 235 size_t Pos = Lines[I].find(":"); 236 if (Pos != StringRef::npos) { 237 Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' '); 238 break; 239 } 240 } 241 242 // We need to check for the presence of vector support independently of 243 // the machine type, since we may only use the vector register set when 244 // supported by the kernel (and hypervisor). 245 bool HaveVectorSupport = false; 246 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { 247 if (CPUFeatures[I] == "vx") 248 HaveVectorSupport = true; 249 } 250 251 // Now check the processor machine type. 252 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 253 if (Lines[I].startswith("processor ")) { 254 size_t Pos = Lines[I].find("machine = "); 255 if (Pos != StringRef::npos) { 256 Pos += sizeof("machine = ") - 1; 257 unsigned int Id; 258 if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) { 259 if (Id >= 3906 && HaveVectorSupport) 260 return "z14"; 261 if (Id >= 2964 && HaveVectorSupport) 262 return "z13"; 263 if (Id >= 2827) 264 return "zEC12"; 265 if (Id >= 2817) 266 return "z196"; 267 } 268 } 269 break; 270 } 271 } 272 273 return "generic"; 274 } 275 276 StringRef sys::detail::getHostCPUNameForBPF() { 277 #if !defined(__linux__) || !defined(__x86_64__) 278 return "generic"; 279 #else 280 uint8_t insns[40] __attribute__ ((aligned (8))) = 281 /* BPF_MOV64_IMM(BPF_REG_0, 0) */ 282 { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 283 /* BPF_MOV64_IMM(BPF_REG_2, 1) */ 284 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 285 /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */ 286 0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 287 /* BPF_MOV64_IMM(BPF_REG_0, 1) */ 288 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 289 /* BPF_EXIT_INSN() */ 290 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; 291 292 struct bpf_prog_load_attr { 293 uint32_t prog_type; 294 uint32_t insn_cnt; 295 uint64_t insns; 296 uint64_t license; 297 uint32_t log_level; 298 uint32_t log_size; 299 uint64_t log_buf; 300 uint32_t kern_version; 301 uint32_t prog_flags; 302 } attr = {}; 303 attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */ 304 attr.insn_cnt = 5; 305 attr.insns = (uint64_t)insns; 306 attr.license = (uint64_t)"DUMMY"; 307 308 int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr)); 309 if (fd >= 0) { 310 close(fd); 311 return "v2"; 312 } 313 return "v1"; 314 #endif 315 } 316 317 #if defined(__i386__) || defined(_M_IX86) || \ 318 defined(__x86_64__) || defined(_M_X64) 319 320 enum VendorSignatures { 321 SIG_INTEL = 0x756e6547 /* Genu */, 322 SIG_AMD = 0x68747541 /* Auth */ 323 }; 324 325 enum ProcessorFeatures { 326 FEATURE_CMOV = 0, 327 FEATURE_MMX, 328 FEATURE_POPCNT, 329 FEATURE_SSE, 330 FEATURE_SSE2, 331 FEATURE_SSE3, 332 FEATURE_SSSE3, 333 FEATURE_SSE4_1, 334 FEATURE_SSE4_2, 335 FEATURE_AVX, 336 FEATURE_AVX2, 337 FEATURE_SSE4_A, 338 FEATURE_FMA4, 339 FEATURE_XOP, 340 FEATURE_FMA, 341 FEATURE_AVX512F, 342 FEATURE_BMI, 343 FEATURE_BMI2, 344 FEATURE_AES, 345 FEATURE_PCLMUL, 346 FEATURE_AVX512VL, 347 FEATURE_AVX512BW, 348 FEATURE_AVX512DQ, 349 FEATURE_AVX512CD, 350 FEATURE_AVX512ER, 351 FEATURE_AVX512PF, 352 FEATURE_AVX512VBMI, 353 FEATURE_AVX512IFMA, 354 FEATURE_AVX5124VNNIW, 355 FEATURE_AVX5124FMAPS, 356 FEATURE_AVX512VPOPCNTDQ, 357 // Only one bit free left in the first 32 features. 358 FEATURE_MOVBE = 32, 359 FEATURE_ADX, 360 FEATURE_EM64T, 361 FEATURE_CLFLUSHOPT, 362 FEATURE_SHA, 363 }; 364 365 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). 366 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID 367 // support. Consequently, for i386, the presence of CPUID is checked first 368 // via the corresponding eflags bit. 369 // Removal of cpuid.h header motivated by PR30384 370 // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp 371 // or test-suite, but are used in external projects e.g. libstdcxx 372 static bool isCpuIdSupported() { 373 #if defined(__GNUC__) || defined(__clang__) 374 #if defined(__i386__) 375 int __cpuid_supported; 376 __asm__(" pushfl\n" 377 " popl %%eax\n" 378 " movl %%eax,%%ecx\n" 379 " xorl $0x00200000,%%eax\n" 380 " pushl %%eax\n" 381 " popfl\n" 382 " pushfl\n" 383 " popl %%eax\n" 384 " movl $0,%0\n" 385 " cmpl %%eax,%%ecx\n" 386 " je 1f\n" 387 " movl $1,%0\n" 388 "1:" 389 : "=r"(__cpuid_supported) 390 : 391 : "eax", "ecx"); 392 if (!__cpuid_supported) 393 return false; 394 #endif 395 return true; 396 #endif 397 return true; 398 } 399 400 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in 401 /// the specified arguments. If we can't run cpuid on the host, return true. 402 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, 403 unsigned *rECX, unsigned *rEDX) { 404 #if defined(__GNUC__) || defined(__clang__) 405 #if defined(__x86_64__) 406 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 407 // FIXME: should we save this for Clang? 408 __asm__("movq\t%%rbx, %%rsi\n\t" 409 "cpuid\n\t" 410 "xchgq\t%%rbx, %%rsi\n\t" 411 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 412 : "a"(value)); 413 return false; 414 #elif defined(__i386__) 415 __asm__("movl\t%%ebx, %%esi\n\t" 416 "cpuid\n\t" 417 "xchgl\t%%ebx, %%esi\n\t" 418 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 419 : "a"(value)); 420 return false; 421 #else 422 return true; 423 #endif 424 #elif defined(_MSC_VER) 425 // The MSVC intrinsic is portable across x86 and x64. 426 int registers[4]; 427 __cpuid(registers, value); 428 *rEAX = registers[0]; 429 *rEBX = registers[1]; 430 *rECX = registers[2]; 431 *rEDX = registers[3]; 432 return false; 433 #else 434 return true; 435 #endif 436 } 437 438 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return 439 /// the 4 values in the specified arguments. If we can't run cpuid on the host, 440 /// return true. 441 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, 442 unsigned *rEAX, unsigned *rEBX, unsigned *rECX, 443 unsigned *rEDX) { 444 #if defined(__GNUC__) || defined(__clang__) 445 #if defined(__x86_64__) 446 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 447 // FIXME: should we save this for Clang? 448 __asm__("movq\t%%rbx, %%rsi\n\t" 449 "cpuid\n\t" 450 "xchgq\t%%rbx, %%rsi\n\t" 451 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 452 : "a"(value), "c"(subleaf)); 453 return false; 454 #elif defined(__i386__) 455 __asm__("movl\t%%ebx, %%esi\n\t" 456 "cpuid\n\t" 457 "xchgl\t%%ebx, %%esi\n\t" 458 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 459 : "a"(value), "c"(subleaf)); 460 return false; 461 #else 462 return true; 463 #endif 464 #elif defined(_MSC_VER) 465 int registers[4]; 466 __cpuidex(registers, value, subleaf); 467 *rEAX = registers[0]; 468 *rEBX = registers[1]; 469 *rECX = registers[2]; 470 *rEDX = registers[3]; 471 return false; 472 #else 473 return true; 474 #endif 475 } 476 477 // Read control register 0 (XCR0). Used to detect features such as AVX. 478 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { 479 #if defined(__GNUC__) || defined(__clang__) 480 // Check xgetbv; this uses a .byte sequence instead of the instruction 481 // directly because older assemblers do not include support for xgetbv and 482 // there is no easy way to conditionally compile based on the assembler used. 483 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0)); 484 return false; 485 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) 486 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); 487 *rEAX = Result; 488 *rEDX = Result >> 32; 489 return false; 490 #else 491 return true; 492 #endif 493 } 494 495 static void detectX86FamilyModel(unsigned EAX, unsigned *Family, 496 unsigned *Model) { 497 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11 498 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7 499 if (*Family == 6 || *Family == 0xf) { 500 if (*Family == 0xf) 501 // Examine extended family ID if family ID is F. 502 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27 503 // Examine extended model ID if family ID is 6 or F. 504 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 505 } 506 } 507 508 static void 509 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, 510 unsigned Brand_id, unsigned Features, 511 unsigned Features2, unsigned *Type, 512 unsigned *Subtype) { 513 if (Brand_id != 0) 514 return; 515 switch (Family) { 516 case 3: 517 *Type = X86::INTEL_i386; 518 break; 519 case 4: 520 *Type = X86::INTEL_i486; 521 break; 522 case 5: 523 if (Features & (1 << FEATURE_MMX)) { 524 *Type = X86::INTEL_PENTIUM_MMX; 525 break; 526 } 527 *Type = X86::INTEL_PENTIUM; 528 break; 529 case 6: 530 switch (Model) { 531 case 0x01: // Pentium Pro processor 532 *Type = X86::INTEL_PENTIUM_PRO; 533 break; 534 case 0x03: // Intel Pentium II OverDrive processor, Pentium II processor, 535 // model 03 536 case 0x05: // Pentium II processor, model 05, Pentium II Xeon processor, 537 // model 05, and Intel Celeron processor, model 05 538 case 0x06: // Celeron processor, model 06 539 *Type = X86::INTEL_PENTIUM_II; 540 break; 541 case 0x07: // Pentium III processor, model 07, and Pentium III Xeon 542 // processor, model 07 543 case 0x08: // Pentium III processor, model 08, Pentium III Xeon processor, 544 // model 08, and Celeron processor, model 08 545 case 0x0a: // Pentium III Xeon processor, model 0Ah 546 case 0x0b: // Pentium III processor, model 0Bh 547 *Type = X86::INTEL_PENTIUM_III; 548 break; 549 case 0x09: // Intel Pentium M processor, Intel Celeron M processor model 09. 550 case 0x0d: // Intel Pentium M processor, Intel Celeron M processor, model 551 // 0Dh. All processors are manufactured using the 90 nm process. 552 case 0x15: // Intel EP80579 Integrated Processor and Intel EP80579 553 // Integrated Processor with Intel QuickAssist Technology 554 *Type = X86::INTEL_PENTIUM_M; 555 break; 556 case 0x0e: // Intel Core Duo processor, Intel Core Solo processor, model 557 // 0Eh. All processors are manufactured using the 65 nm process. 558 *Type = X86::INTEL_CORE_DUO; 559 break; // yonah 560 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile 561 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad 562 // mobile processor, Intel Core 2 Extreme processor, Intel 563 // Pentium Dual-Core processor, Intel Xeon processor, model 564 // 0Fh. All processors are manufactured using the 65 nm process. 565 case 0x16: // Intel Celeron processor model 16h. All processors are 566 // manufactured using the 65 nm process 567 *Type = X86::INTEL_CORE2; // "core2" 568 *Subtype = X86::INTEL_CORE2_65; 569 break; 570 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model 571 // 17h. All processors are manufactured using the 45 nm process. 572 // 573 // 45nm: Penryn , Wolfdale, Yorkfield (XE) 574 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using 575 // the 45 nm process. 576 *Type = X86::INTEL_CORE2; // "penryn" 577 *Subtype = X86::INTEL_CORE2_45; 578 break; 579 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All 580 // processors are manufactured using the 45 nm process. 581 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. 582 // As found in a Summer 2010 model iMac. 583 case 0x1f: 584 case 0x2e: // Nehalem EX 585 *Type = X86::INTEL_COREI7; // "nehalem" 586 *Subtype = X86::INTEL_COREI7_NEHALEM; 587 break; 588 case 0x25: // Intel Core i7, laptop version. 589 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All 590 // processors are manufactured using the 32 nm process. 591 case 0x2f: // Westmere EX 592 *Type = X86::INTEL_COREI7; // "westmere" 593 *Subtype = X86::INTEL_COREI7_WESTMERE; 594 break; 595 case 0x2a: // Intel Core i7 processor. All processors are manufactured 596 // using the 32 nm process. 597 case 0x2d: 598 *Type = X86::INTEL_COREI7; //"sandybridge" 599 *Subtype = X86::INTEL_COREI7_SANDYBRIDGE; 600 break; 601 case 0x3a: 602 case 0x3e: // Ivy Bridge EP 603 *Type = X86::INTEL_COREI7; // "ivybridge" 604 *Subtype = X86::INTEL_COREI7_IVYBRIDGE; 605 break; 606 607 // Haswell: 608 case 0x3c: 609 case 0x3f: 610 case 0x45: 611 case 0x46: 612 *Type = X86::INTEL_COREI7; // "haswell" 613 *Subtype = X86::INTEL_COREI7_HASWELL; 614 break; 615 616 // Broadwell: 617 case 0x3d: 618 case 0x47: 619 case 0x4f: 620 case 0x56: 621 *Type = X86::INTEL_COREI7; // "broadwell" 622 *Subtype = X86::INTEL_COREI7_BROADWELL; 623 break; 624 625 // Skylake: 626 case 0x4e: // Skylake mobile 627 case 0x5e: // Skylake desktop 628 case 0x8e: // Kaby Lake mobile 629 case 0x9e: // Kaby Lake desktop 630 *Type = X86::INTEL_COREI7; // "skylake" 631 *Subtype = X86::INTEL_COREI7_SKYLAKE; 632 break; 633 634 // Skylake Xeon: 635 case 0x55: 636 *Type = X86::INTEL_COREI7; 637 *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512" 638 break; 639 640 case 0x1c: // Most 45 nm Intel Atom processors 641 case 0x26: // 45 nm Atom Lincroft 642 case 0x27: // 32 nm Atom Medfield 643 case 0x35: // 32 nm Atom Midview 644 case 0x36: // 32 nm Atom Midview 645 *Type = X86::INTEL_BONNELL; 646 break; // "bonnell" 647 648 // Atom Silvermont codes from the Intel software optimization guide. 649 case 0x37: 650 case 0x4a: 651 case 0x4d: 652 case 0x5a: 653 case 0x5d: 654 case 0x4c: // really airmont 655 *Type = X86::INTEL_SILVERMONT; 656 break; // "silvermont" 657 // Goldmont: 658 case 0x5c: 659 case 0x5f: 660 *Type = X86::INTEL_GOLDMONT; 661 break; // "goldmont" 662 case 0x57: 663 *Type = X86::INTEL_KNL; // knl 664 break; 665 case 0x85: 666 *Type = X86::INTEL_KNM; // knm 667 break; 668 669 default: // Unknown family 6 CPU, try to guess. 670 if (Features & (1 << FEATURE_AVX512F)) { 671 if (Features & (1 << FEATURE_AVX512VL)) { 672 *Type = X86::INTEL_COREI7; 673 *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; 674 } else { 675 *Type = X86::INTEL_KNL; // knl 676 } 677 break; 678 } 679 if (Features2 & (1 << (FEATURE_CLFLUSHOPT - 32))) { 680 if (Features2 & (1 << (FEATURE_SHA - 32))) { 681 *Type = X86::INTEL_GOLDMONT; 682 } else { 683 *Type = X86::INTEL_COREI7; 684 *Subtype = X86::INTEL_COREI7_SKYLAKE; 685 } 686 break; 687 } 688 if (Features2 & (1 << (FEATURE_ADX - 32))) { 689 *Type = X86::INTEL_COREI7; 690 *Subtype = X86::INTEL_COREI7_BROADWELL; 691 break; 692 } 693 if (Features & (1 << FEATURE_AVX2)) { 694 *Type = X86::INTEL_COREI7; 695 *Subtype = X86::INTEL_COREI7_HASWELL; 696 break; 697 } 698 if (Features & (1 << FEATURE_AVX)) { 699 *Type = X86::INTEL_COREI7; 700 *Subtype = X86::INTEL_COREI7_SANDYBRIDGE; 701 break; 702 } 703 if (Features & (1 << FEATURE_SSE4_2)) { 704 if (Features2 & (1 << (FEATURE_MOVBE - 32))) { 705 *Type = X86::INTEL_SILVERMONT; 706 } else { 707 *Type = X86::INTEL_COREI7; 708 *Subtype = X86::INTEL_COREI7_NEHALEM; 709 } 710 break; 711 } 712 if (Features & (1 << FEATURE_SSE4_1)) { 713 *Type = X86::INTEL_CORE2; // "penryn" 714 *Subtype = X86::INTEL_CORE2_45; 715 break; 716 } 717 if (Features & (1 << FEATURE_SSSE3)) { 718 if (Features2 & (1 << (FEATURE_MOVBE - 32))) { 719 *Type = X86::INTEL_BONNELL; // "bonnell" 720 } else { 721 *Type = X86::INTEL_CORE2; // "core2" 722 *Subtype = X86::INTEL_CORE2_65; 723 } 724 break; 725 } 726 if (Features2 & (1 << (FEATURE_EM64T - 32))) { 727 *Type = X86::INTEL_CORE2; // "core2" 728 *Subtype = X86::INTEL_CORE2_65; 729 break; 730 } 731 if (Features & (1 << FEATURE_SSE3)) { 732 *Type = X86::INTEL_CORE_DUO; 733 break; 734 } 735 if (Features & (1 << FEATURE_SSE2)) { 736 *Type = X86::INTEL_PENTIUM_M; 737 break; 738 } 739 if (Features & (1 << FEATURE_SSE)) { 740 *Type = X86::INTEL_PENTIUM_III; 741 break; 742 } 743 if (Features & (1 << FEATURE_MMX)) { 744 *Type = X86::INTEL_PENTIUM_II; 745 break; 746 } 747 *Type = X86::INTEL_PENTIUM_PRO; 748 break; 749 } 750 break; 751 case 15: { 752 if (Features2 & (1 << (FEATURE_EM64T - 32))) { 753 *Type = X86::INTEL_NOCONA; 754 break; 755 } 756 if (Features & (1 << FEATURE_SSE3)) { 757 *Type = X86::INTEL_PRESCOTT; 758 break; 759 } 760 *Type = X86::INTEL_PENTIUM_IV; 761 break; 762 } 763 default: 764 break; /*"generic"*/ 765 } 766 } 767 768 static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, 769 unsigned Features, unsigned *Type, 770 unsigned *Subtype) { 771 // FIXME: this poorly matches the generated SubtargetFeatureKV table. There 772 // appears to be no way to generate the wide variety of AMD-specific targets 773 // from the information returned from CPUID. 774 switch (Family) { 775 case 4: 776 *Type = X86::AMD_i486; 777 break; 778 case 5: 779 *Type = X86::AMDPENTIUM; 780 switch (Model) { 781 case 6: 782 case 7: 783 *Subtype = X86::AMDPENTIUM_K6; 784 break; // "k6" 785 case 8: 786 *Subtype = X86::AMDPENTIUM_K62; 787 break; // "k6-2" 788 case 9: 789 case 13: 790 *Subtype = X86::AMDPENTIUM_K63; 791 break; // "k6-3" 792 case 10: 793 *Subtype = X86::AMDPENTIUM_GEODE; 794 break; // "geode" 795 } 796 break; 797 case 6: 798 if (Features & (1 << FEATURE_SSE)) { 799 *Type = X86::AMD_ATHLON_XP; 800 break; // "athlon-xp" 801 } 802 *Type = X86::AMD_ATHLON; 803 break; // "athlon" 804 case 15: 805 if (Features & (1 << FEATURE_SSE3)) { 806 *Type = X86::AMD_K8SSE3; 807 break; // "k8-sse3" 808 } 809 *Type = X86::AMD_K8; 810 break; // "k8" 811 case 16: 812 *Type = X86::AMDFAM10H; // "amdfam10" 813 switch (Model) { 814 case 2: 815 *Subtype = X86::AMDFAM10H_BARCELONA; 816 break; 817 case 4: 818 *Subtype = X86::AMDFAM10H_SHANGHAI; 819 break; 820 case 8: 821 *Subtype = X86::AMDFAM10H_ISTANBUL; 822 break; 823 } 824 break; 825 case 20: 826 *Type = X86::AMD_BTVER1; 827 break; // "btver1"; 828 case 21: 829 *Type = X86::AMDFAM15H; 830 if (Model >= 0x60 && Model <= 0x7f) { 831 *Subtype = X86::AMDFAM15H_BDVER4; 832 break; // "bdver4"; 60h-7Fh: Excavator 833 } 834 if (Model >= 0x30 && Model <= 0x3f) { 835 *Subtype = X86::AMDFAM15H_BDVER3; 836 break; // "bdver3"; 30h-3Fh: Steamroller 837 } 838 if (Model >= 0x10 && Model <= 0x1f) { 839 *Subtype = X86::AMDFAM15H_BDVER2; 840 break; // "bdver2"; 10h-1Fh: Piledriver 841 } 842 if (Model <= 0x0f) { 843 *Subtype = X86::AMDFAM15H_BDVER1; 844 break; // "bdver1"; 00h-0Fh: Bulldozer 845 } 846 break; 847 case 22: 848 *Type = X86::AMD_BTVER2; 849 break; // "btver2" 850 case 23: 851 *Type = X86::AMDFAM17H; 852 *Subtype = X86::AMDFAM17H_ZNVER1; 853 break; 854 default: 855 break; // "generic" 856 } 857 } 858 859 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, 860 unsigned *FeaturesOut, 861 unsigned *Features2Out) { 862 unsigned Features = 0; 863 unsigned Features2 = 0; 864 unsigned EAX, EBX; 865 866 if ((EDX >> 15) & 1) 867 Features |= 1 << FEATURE_CMOV; 868 if ((EDX >> 23) & 1) 869 Features |= 1 << FEATURE_MMX; 870 if ((EDX >> 25) & 1) 871 Features |= 1 << FEATURE_SSE; 872 if ((EDX >> 26) & 1) 873 Features |= 1 << FEATURE_SSE2; 874 875 if ((ECX >> 0) & 1) 876 Features |= 1 << FEATURE_SSE3; 877 if ((ECX >> 1) & 1) 878 Features |= 1 << FEATURE_PCLMUL; 879 if ((ECX >> 9) & 1) 880 Features |= 1 << FEATURE_SSSE3; 881 if ((ECX >> 12) & 1) 882 Features |= 1 << FEATURE_FMA; 883 if ((ECX >> 19) & 1) 884 Features |= 1 << FEATURE_SSE4_1; 885 if ((ECX >> 20) & 1) 886 Features |= 1 << FEATURE_SSE4_2; 887 if ((ECX >> 23) & 1) 888 Features |= 1 << FEATURE_POPCNT; 889 if ((ECX >> 25) & 1) 890 Features |= 1 << FEATURE_AES; 891 892 if ((ECX >> 22) & 1) 893 Features2 |= 1 << (FEATURE_MOVBE - 32); 894 895 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 896 // indicates that the AVX registers will be saved and restored on context 897 // switch, then we have full AVX support. 898 const unsigned AVXBits = (1 << 27) | (1 << 28); 899 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && 900 ((EAX & 0x6) == 0x6); 901 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); 902 903 if (HasAVX) 904 Features |= 1 << FEATURE_AVX; 905 906 bool HasLeaf7 = 907 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 908 909 if (HasLeaf7 && ((EBX >> 3) & 1)) 910 Features |= 1 << FEATURE_BMI; 911 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) 912 Features |= 1 << FEATURE_AVX2; 913 if (HasLeaf7 && ((EBX >> 9) & 1)) 914 Features |= 1 << FEATURE_BMI2; 915 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) 916 Features |= 1 << FEATURE_AVX512F; 917 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) 918 Features |= 1 << FEATURE_AVX512DQ; 919 if (HasLeaf7 && ((EBX >> 19) & 1)) 920 Features2 |= 1 << (FEATURE_ADX - 32); 921 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) 922 Features |= 1 << FEATURE_AVX512IFMA; 923 if (HasLeaf7 && ((EBX >> 23) & 1)) 924 Features2 |= 1 << (FEATURE_CLFLUSHOPT - 32); 925 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) 926 Features |= 1 << FEATURE_AVX512PF; 927 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) 928 Features |= 1 << FEATURE_AVX512ER; 929 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) 930 Features |= 1 << FEATURE_AVX512CD; 931 if (HasLeaf7 && ((EBX >> 29) & 1)) 932 Features2 |= 1 << (FEATURE_SHA - 32); 933 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) 934 Features |= 1 << FEATURE_AVX512BW; 935 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) 936 Features |= 1 << FEATURE_AVX512VL; 937 938 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) 939 Features |= 1 << FEATURE_AVX512VBMI; 940 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) 941 Features |= 1 << FEATURE_AVX512VPOPCNTDQ; 942 943 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) 944 Features |= 1 << FEATURE_AVX5124VNNIW; 945 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) 946 Features |= 1 << FEATURE_AVX5124FMAPS; 947 948 unsigned MaxExtLevel; 949 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); 950 951 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && 952 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 953 if (HasExtLeaf1 && ((ECX >> 6) & 1)) 954 Features |= 1 << FEATURE_SSE4_A; 955 if (HasExtLeaf1 && ((ECX >> 11) & 1)) 956 Features |= 1 << FEATURE_XOP; 957 if (HasExtLeaf1 && ((ECX >> 16) & 1)) 958 Features |= 1 << FEATURE_FMA4; 959 960 if (HasExtLeaf1 && ((EDX >> 29) & 1)) 961 Features2 |= 1 << (FEATURE_EM64T - 32); 962 963 *FeaturesOut = Features; 964 *Features2Out = Features2; 965 } 966 967 StringRef sys::getHostCPUName() { 968 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 969 unsigned MaxLeaf, Vendor; 970 971 #if defined(__GNUC__) || defined(__clang__) 972 //FIXME: include cpuid.h from clang or copy __get_cpuid_max here 973 // and simplify it to not invoke __cpuid (like cpu_model.c in 974 // compiler-rt/lib/builtins/cpu_model.c? 975 // Opting for the second option. 976 if(!isCpuIdSupported()) 977 return "generic"; 978 #endif 979 if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) 980 return "generic"; 981 getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); 982 983 unsigned Brand_id = EBX & 0xff; 984 unsigned Family = 0, Model = 0; 985 unsigned Features = 0, Features2 = 0; 986 detectX86FamilyModel(EAX, &Family, &Model); 987 getAvailableFeatures(ECX, EDX, MaxLeaf, &Features, &Features2); 988 989 unsigned Type = 0; 990 unsigned Subtype = 0; 991 992 if (Vendor == SIG_INTEL) { 993 getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features, 994 Features2, &Type, &Subtype); 995 } else if (Vendor == SIG_AMD) { 996 getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type, &Subtype); 997 } 998 999 // Check subtypes first since those are more specific. 1000 #define X86_CPU_SUBTYPE(ARCHNAME, ENUM) \ 1001 if (Subtype == X86::ENUM) \ 1002 return ARCHNAME; 1003 #include "llvm/Support/X86TargetParser.def" 1004 1005 // Now check types. 1006 #define X86_CPU_SUBTYPE(ARCHNAME, ENUM) \ 1007 if (Type == X86::ENUM) \ 1008 return ARCHNAME; 1009 #include "llvm/Support/X86TargetParser.def" 1010 1011 return "generic"; 1012 } 1013 1014 #elif defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__)) 1015 StringRef sys::getHostCPUName() { 1016 host_basic_info_data_t hostInfo; 1017 mach_msg_type_number_t infoCount; 1018 1019 infoCount = HOST_BASIC_INFO_COUNT; 1020 host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo, 1021 &infoCount); 1022 1023 if (hostInfo.cpu_type != CPU_TYPE_POWERPC) 1024 return "generic"; 1025 1026 switch (hostInfo.cpu_subtype) { 1027 case CPU_SUBTYPE_POWERPC_601: 1028 return "601"; 1029 case CPU_SUBTYPE_POWERPC_602: 1030 return "602"; 1031 case CPU_SUBTYPE_POWERPC_603: 1032 return "603"; 1033 case CPU_SUBTYPE_POWERPC_603e: 1034 return "603e"; 1035 case CPU_SUBTYPE_POWERPC_603ev: 1036 return "603ev"; 1037 case CPU_SUBTYPE_POWERPC_604: 1038 return "604"; 1039 case CPU_SUBTYPE_POWERPC_604e: 1040 return "604e"; 1041 case CPU_SUBTYPE_POWERPC_620: 1042 return "620"; 1043 case CPU_SUBTYPE_POWERPC_750: 1044 return "750"; 1045 case CPU_SUBTYPE_POWERPC_7400: 1046 return "7400"; 1047 case CPU_SUBTYPE_POWERPC_7450: 1048 return "7450"; 1049 case CPU_SUBTYPE_POWERPC_970: 1050 return "970"; 1051 default:; 1052 } 1053 1054 return "generic"; 1055 } 1056 #elif defined(__linux__) && (defined(__ppc__) || defined(__powerpc__)) 1057 StringRef sys::getHostCPUName() { 1058 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1059 const StringRef& Content = P ? P->getBuffer() : ""; 1060 return detail::getHostCPUNameForPowerPC(Content); 1061 } 1062 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) 1063 StringRef sys::getHostCPUName() { 1064 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1065 const StringRef& Content = P ? P->getBuffer() : ""; 1066 return detail::getHostCPUNameForARM(Content); 1067 } 1068 #elif defined(__linux__) && defined(__s390x__) 1069 StringRef sys::getHostCPUName() { 1070 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1071 const StringRef& Content = P ? P->getBuffer() : ""; 1072 return detail::getHostCPUNameForS390x(Content); 1073 } 1074 #else 1075 StringRef sys::getHostCPUName() { return "generic"; } 1076 #endif 1077 1078 #if defined(__linux__) && defined(__x86_64__) 1079 // On Linux, the number of physical cores can be computed from /proc/cpuinfo, 1080 // using the number of unique physical/core id pairs. The following 1081 // implementation reads the /proc/cpuinfo format on an x86_64 system. 1082 static int computeHostNumPhysicalCores() { 1083 // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be 1084 // mmapped because it appears to have 0 size. 1085 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = 1086 llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); 1087 if (std::error_code EC = Text.getError()) { 1088 llvm::errs() << "Can't read " 1089 << "/proc/cpuinfo: " << EC.message() << "\n"; 1090 return -1; 1091 } 1092 SmallVector<StringRef, 8> strs; 1093 (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1, 1094 /*KeepEmpty=*/false); 1095 int CurPhysicalId = -1; 1096 int CurCoreId = -1; 1097 SmallSet<std::pair<int, int>, 32> UniqueItems; 1098 for (auto &Line : strs) { 1099 Line = Line.trim(); 1100 if (!Line.startswith("physical id") && !Line.startswith("core id")) 1101 continue; 1102 std::pair<StringRef, StringRef> Data = Line.split(':'); 1103 auto Name = Data.first.trim(); 1104 auto Val = Data.second.trim(); 1105 if (Name == "physical id") { 1106 assert(CurPhysicalId == -1 && 1107 "Expected a core id before seeing another physical id"); 1108 Val.getAsInteger(10, CurPhysicalId); 1109 } 1110 if (Name == "core id") { 1111 assert(CurCoreId == -1 && 1112 "Expected a physical id before seeing another core id"); 1113 Val.getAsInteger(10, CurCoreId); 1114 } 1115 if (CurPhysicalId != -1 && CurCoreId != -1) { 1116 UniqueItems.insert(std::make_pair(CurPhysicalId, CurCoreId)); 1117 CurPhysicalId = -1; 1118 CurCoreId = -1; 1119 } 1120 } 1121 return UniqueItems.size(); 1122 } 1123 #elif defined(__APPLE__) && defined(__x86_64__) 1124 #include <sys/param.h> 1125 #include <sys/sysctl.h> 1126 1127 // Gets the number of *physical cores* on the machine. 1128 static int computeHostNumPhysicalCores() { 1129 uint32_t count; 1130 size_t len = sizeof(count); 1131 sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0); 1132 if (count < 1) { 1133 int nm[2]; 1134 nm[0] = CTL_HW; 1135 nm[1] = HW_AVAILCPU; 1136 sysctl(nm, 2, &count, &len, NULL, 0); 1137 if (count < 1) 1138 return -1; 1139 } 1140 return count; 1141 } 1142 #else 1143 // On other systems, return -1 to indicate unknown. 1144 static int computeHostNumPhysicalCores() { return -1; } 1145 #endif 1146 1147 int sys::getHostNumPhysicalCores() { 1148 static int NumCores = computeHostNumPhysicalCores(); 1149 return NumCores; 1150 } 1151 1152 #if defined(__i386__) || defined(_M_IX86) || \ 1153 defined(__x86_64__) || defined(_M_X64) 1154 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1155 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 1156 unsigned MaxLevel; 1157 union { 1158 unsigned u[3]; 1159 char c[12]; 1160 } text; 1161 1162 if (getX86CpuIDAndInfo(0, &MaxLevel, text.u + 0, text.u + 2, text.u + 1) || 1163 MaxLevel < 1) 1164 return false; 1165 1166 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); 1167 1168 Features["cmov"] = (EDX >> 15) & 1; 1169 Features["mmx"] = (EDX >> 23) & 1; 1170 Features["sse"] = (EDX >> 25) & 1; 1171 Features["sse2"] = (EDX >> 26) & 1; 1172 Features["sse3"] = (ECX >> 0) & 1; 1173 Features["ssse3"] = (ECX >> 9) & 1; 1174 Features["sse4.1"] = (ECX >> 19) & 1; 1175 Features["sse4.2"] = (ECX >> 20) & 1; 1176 1177 Features["pclmul"] = (ECX >> 1) & 1; 1178 Features["cx16"] = (ECX >> 13) & 1; 1179 Features["movbe"] = (ECX >> 22) & 1; 1180 Features["popcnt"] = (ECX >> 23) & 1; 1181 Features["aes"] = (ECX >> 25) & 1; 1182 Features["rdrnd"] = (ECX >> 30) & 1; 1183 1184 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 1185 // indicates that the AVX registers will be saved and restored on context 1186 // switch, then we have full AVX support. 1187 bool HasAVXSave = ((ECX >> 27) & 1) && ((ECX >> 28) & 1) && 1188 !getX86XCR0(&EAX, &EDX) && ((EAX & 0x6) == 0x6); 1189 Features["avx"] = HasAVXSave; 1190 Features["fma"] = HasAVXSave && (ECX >> 12) & 1; 1191 Features["f16c"] = HasAVXSave && (ECX >> 29) & 1; 1192 1193 // Only enable XSAVE if OS has enabled support for saving YMM state. 1194 Features["xsave"] = HasAVXSave && (ECX >> 26) & 1; 1195 1196 // AVX512 requires additional context to be saved by the OS. 1197 bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0); 1198 1199 unsigned MaxExtLevel; 1200 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); 1201 1202 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && 1203 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 1204 Features["lzcnt"] = HasExtLeaf1 && ((ECX >> 5) & 1); 1205 Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1); 1206 Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1); 1207 Features["xop"] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave; 1208 Features["lwp"] = HasExtLeaf1 && ((ECX >> 15) & 1); 1209 Features["fma4"] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave; 1210 Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1); 1211 Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1); 1212 1213 bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 && 1214 !getX86CpuIDAndInfoEx(0x80000008,0x0, &EAX, &EBX, &ECX, &EDX); 1215 Features["clzero"] = HasExtLeaf8 && ((EBX >> 0) & 1); 1216 1217 bool HasLeaf7 = 1218 MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 1219 1220 // AVX2 is only supported if we have the OS save support from AVX. 1221 Features["avx2"] = HasAVXSave && HasLeaf7 && ((EBX >> 5) & 1); 1222 1223 Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1); 1224 Features["sgx"] = HasLeaf7 && ((EBX >> 2) & 1); 1225 Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1); 1226 Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1); 1227 Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1); 1228 Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1); 1229 Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1); 1230 Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1); 1231 Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1); 1232 Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1); 1233 1234 // AVX512 is only supported if the OS supports the context save for it. 1235 Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save; 1236 Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save; 1237 Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save; 1238 Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save; 1239 Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save; 1240 Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save; 1241 Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save; 1242 Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save; 1243 1244 Features["prefetchwt1"] = HasLeaf7 && (ECX & 1); 1245 Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; 1246 Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save; 1247 // Enable protection keys 1248 Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1); 1249 1250 bool HasLeafD = MaxLevel >= 0xd && 1251 !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); 1252 1253 // Only enable XSAVE if OS has enabled support for saving YMM state. 1254 Features["xsaveopt"] = HasAVXSave && HasLeafD && ((EAX >> 0) & 1); 1255 Features["xsavec"] = HasAVXSave && HasLeafD && ((EAX >> 1) & 1); 1256 Features["xsaves"] = HasAVXSave && HasLeafD && ((EAX >> 3) & 1); 1257 1258 return true; 1259 } 1260 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) 1261 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1262 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1263 if (!P) 1264 return false; 1265 1266 SmallVector<StringRef, 32> Lines; 1267 P->getBuffer().split(Lines, "\n"); 1268 1269 SmallVector<StringRef, 32> CPUFeatures; 1270 1271 // Look for the CPU features. 1272 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 1273 if (Lines[I].startswith("Features")) { 1274 Lines[I].split(CPUFeatures, ' '); 1275 break; 1276 } 1277 1278 #if defined(__aarch64__) 1279 // Keep track of which crypto features we have seen 1280 enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 }; 1281 uint32_t crypto = 0; 1282 #endif 1283 1284 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { 1285 StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I]) 1286 #if defined(__aarch64__) 1287 .Case("asimd", "neon") 1288 .Case("fp", "fp-armv8") 1289 .Case("crc32", "crc") 1290 #else 1291 .Case("half", "fp16") 1292 .Case("neon", "neon") 1293 .Case("vfpv3", "vfp3") 1294 .Case("vfpv3d16", "d16") 1295 .Case("vfpv4", "vfp4") 1296 .Case("idiva", "hwdiv-arm") 1297 .Case("idivt", "hwdiv") 1298 #endif 1299 .Default(""); 1300 1301 #if defined(__aarch64__) 1302 // We need to check crypto separately since we need all of the crypto 1303 // extensions to enable the subtarget feature 1304 if (CPUFeatures[I] == "aes") 1305 crypto |= CAP_AES; 1306 else if (CPUFeatures[I] == "pmull") 1307 crypto |= CAP_PMULL; 1308 else if (CPUFeatures[I] == "sha1") 1309 crypto |= CAP_SHA1; 1310 else if (CPUFeatures[I] == "sha2") 1311 crypto |= CAP_SHA2; 1312 #endif 1313 1314 if (LLVMFeatureStr != "") 1315 Features[LLVMFeatureStr] = true; 1316 } 1317 1318 #if defined(__aarch64__) 1319 // If we have all crypto bits we can add the feature 1320 if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2)) 1321 Features["crypto"] = true; 1322 #endif 1323 1324 return true; 1325 } 1326 #else 1327 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; } 1328 #endif 1329 1330 std::string sys::getProcessTriple() { 1331 std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE); 1332 Triple PT(Triple::normalize(TargetTripleString)); 1333 1334 if (sizeof(void *) == 8 && PT.isArch32Bit()) 1335 PT = PT.get64BitArchVariant(); 1336 if (sizeof(void *) == 4 && PT.isArch64Bit()) 1337 PT = PT.get32BitArchVariant(); 1338 1339 return PT.str(); 1340 } 1341