1 //===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the operating system Host concept. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Support/Host.h" 15 #include "llvm/Support/TargetParser.h" 16 #include "llvm/ADT/SmallSet.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/ADT/Triple.h" 21 #include "llvm/Config/config.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/FileSystem.h" 24 #include "llvm/Support/MemoryBuffer.h" 25 #include "llvm/Support/raw_ostream.h" 26 #include <assert.h> 27 #include <string.h> 28 29 // Include the platform-specific parts of this class. 30 #ifdef LLVM_ON_UNIX 31 #include "Unix/Host.inc" 32 #endif 33 #ifdef LLVM_ON_WIN32 34 #include "Windows/Host.inc" 35 #endif 36 #ifdef _MSC_VER 37 #include <intrin.h> 38 #endif 39 #if defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__)) 40 #include <mach/host_info.h> 41 #include <mach/mach.h> 42 #include <mach/mach_host.h> 43 #include <mach/machine.h> 44 #endif 45 46 #define DEBUG_TYPE "host-detection" 47 48 //===----------------------------------------------------------------------===// 49 // 50 // Implementations of the CPU detection routines 51 // 52 //===----------------------------------------------------------------------===// 53 54 using namespace llvm; 55 56 static std::unique_ptr<llvm::MemoryBuffer> 57 LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() { 58 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = 59 llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); 60 if (std::error_code EC = Text.getError()) { 61 llvm::errs() << "Can't read " 62 << "/proc/cpuinfo: " << EC.message() << "\n"; 63 return nullptr; 64 } 65 return std::move(*Text); 66 } 67 68 StringRef sys::detail::getHostCPUNameForPowerPC( 69 const StringRef &ProcCpuinfoContent) { 70 // Access to the Processor Version Register (PVR) on PowerPC is privileged, 71 // and so we must use an operating-system interface to determine the current 72 // processor type. On Linux, this is exposed through the /proc/cpuinfo file. 73 const char *generic = "generic"; 74 75 // The cpu line is second (after the 'processor: 0' line), so if this 76 // buffer is too small then something has changed (or is wrong). 77 StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin(); 78 StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end(); 79 80 StringRef::const_iterator CIP = CPUInfoStart; 81 82 StringRef::const_iterator CPUStart = 0; 83 size_t CPULen = 0; 84 85 // We need to find the first line which starts with cpu, spaces, and a colon. 86 // After the colon, there may be some additional spaces and then the cpu type. 87 while (CIP < CPUInfoEnd && CPUStart == 0) { 88 if (CIP < CPUInfoEnd && *CIP == '\n') 89 ++CIP; 90 91 if (CIP < CPUInfoEnd && *CIP == 'c') { 92 ++CIP; 93 if (CIP < CPUInfoEnd && *CIP == 'p') { 94 ++CIP; 95 if (CIP < CPUInfoEnd && *CIP == 'u') { 96 ++CIP; 97 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) 98 ++CIP; 99 100 if (CIP < CPUInfoEnd && *CIP == ':') { 101 ++CIP; 102 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) 103 ++CIP; 104 105 if (CIP < CPUInfoEnd) { 106 CPUStart = CIP; 107 while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' && 108 *CIP != ',' && *CIP != '\n')) 109 ++CIP; 110 CPULen = CIP - CPUStart; 111 } 112 } 113 } 114 } 115 } 116 117 if (CPUStart == 0) 118 while (CIP < CPUInfoEnd && *CIP != '\n') 119 ++CIP; 120 } 121 122 if (CPUStart == 0) 123 return generic; 124 125 return StringSwitch<const char *>(StringRef(CPUStart, CPULen)) 126 .Case("604e", "604e") 127 .Case("604", "604") 128 .Case("7400", "7400") 129 .Case("7410", "7400") 130 .Case("7447", "7400") 131 .Case("7455", "7450") 132 .Case("G4", "g4") 133 .Case("POWER4", "970") 134 .Case("PPC970FX", "970") 135 .Case("PPC970MP", "970") 136 .Case("G5", "g5") 137 .Case("POWER5", "g5") 138 .Case("A2", "a2") 139 .Case("POWER6", "pwr6") 140 .Case("POWER7", "pwr7") 141 .Case("POWER8", "pwr8") 142 .Case("POWER8E", "pwr8") 143 .Case("POWER8NVL", "pwr8") 144 .Case("POWER9", "pwr9") 145 .Default(generic); 146 } 147 148 StringRef sys::detail::getHostCPUNameForARM( 149 const StringRef &ProcCpuinfoContent) { 150 // The cpuid register on arm is not accessible from user space. On Linux, 151 // it is exposed through the /proc/cpuinfo file. 152 153 // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line 154 // in all cases. 155 SmallVector<StringRef, 32> Lines; 156 ProcCpuinfoContent.split(Lines, "\n"); 157 158 // Look for the CPU implementer line. 159 StringRef Implementer; 160 StringRef Hardware; 161 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 162 if (Lines[I].startswith("CPU implementer")) 163 Implementer = Lines[I].substr(15).ltrim("\t :"); 164 if (Lines[I].startswith("Hardware")) 165 Hardware = Lines[I].substr(8).ltrim("\t :"); 166 } 167 168 if (Implementer == "0x41") { // ARM Ltd. 169 // MSM8992/8994 may give cpu part for the core that the kernel is running on, 170 // which is undeterministic and wrong. Always return cortex-a53 for these SoC. 171 if (Hardware.endswith("MSM8994") || Hardware.endswith("MSM8996")) 172 return "cortex-a53"; 173 174 175 // Look for the CPU part line. 176 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 177 if (Lines[I].startswith("CPU part")) 178 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 179 // values correspond to the "Part number" in the CP15/c0 register. The 180 // contents are specified in the various processor manuals. 181 return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) 182 .Case("0x926", "arm926ej-s") 183 .Case("0xb02", "mpcore") 184 .Case("0xb36", "arm1136j-s") 185 .Case("0xb56", "arm1156t2-s") 186 .Case("0xb76", "arm1176jz-s") 187 .Case("0xc08", "cortex-a8") 188 .Case("0xc09", "cortex-a9") 189 .Case("0xc0f", "cortex-a15") 190 .Case("0xc20", "cortex-m0") 191 .Case("0xc23", "cortex-m3") 192 .Case("0xc24", "cortex-m4") 193 .Case("0xd04", "cortex-a35") 194 .Case("0xd03", "cortex-a53") 195 .Case("0xd07", "cortex-a57") 196 .Case("0xd08", "cortex-a72") 197 .Case("0xd09", "cortex-a73") 198 .Default("generic"); 199 } 200 201 if (Implementer == "0x51") // Qualcomm Technologies, Inc. 202 // Look for the CPU part line. 203 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 204 if (Lines[I].startswith("CPU part")) 205 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 206 // values correspond to the "Part number" in the CP15/c0 register. The 207 // contents are specified in the various processor manuals. 208 return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) 209 .Case("0x06f", "krait") // APQ8064 210 .Case("0x201", "kryo") 211 .Case("0x205", "kryo") 212 .Case("0x211", "kryo") 213 .Case("0x800", "cortex-a73") 214 .Case("0x801", "cortex-a73") 215 .Case("0xc00", "falkor") 216 .Case("0xc01", "saphira") 217 .Default("generic"); 218 219 return "generic"; 220 } 221 222 StringRef sys::detail::getHostCPUNameForS390x( 223 const StringRef &ProcCpuinfoContent) { 224 // STIDP is a privileged operation, so use /proc/cpuinfo instead. 225 226 // The "processor 0:" line comes after a fair amount of other information, 227 // including a cache breakdown, but this should be plenty. 228 SmallVector<StringRef, 32> Lines; 229 ProcCpuinfoContent.split(Lines, "\n"); 230 231 // Look for the CPU features. 232 SmallVector<StringRef, 32> CPUFeatures; 233 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 234 if (Lines[I].startswith("features")) { 235 size_t Pos = Lines[I].find(":"); 236 if (Pos != StringRef::npos) { 237 Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' '); 238 break; 239 } 240 } 241 242 // We need to check for the presence of vector support independently of 243 // the machine type, since we may only use the vector register set when 244 // supported by the kernel (and hypervisor). 245 bool HaveVectorSupport = false; 246 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { 247 if (CPUFeatures[I] == "vx") 248 HaveVectorSupport = true; 249 } 250 251 // Now check the processor machine type. 252 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 253 if (Lines[I].startswith("processor ")) { 254 size_t Pos = Lines[I].find("machine = "); 255 if (Pos != StringRef::npos) { 256 Pos += sizeof("machine = ") - 1; 257 unsigned int Id; 258 if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) { 259 if (Id >= 3906 && HaveVectorSupport) 260 return "z14"; 261 if (Id >= 2964 && HaveVectorSupport) 262 return "z13"; 263 if (Id >= 2827) 264 return "zEC12"; 265 if (Id >= 2817) 266 return "z196"; 267 } 268 } 269 break; 270 } 271 } 272 273 return "generic"; 274 } 275 276 StringRef sys::detail::getHostCPUNameForBPF() { 277 #if !defined(__linux__) || !defined(__x86_64__) 278 return "generic"; 279 #else 280 uint8_t insns[40] __attribute__ ((aligned (8))) = 281 /* BPF_MOV64_IMM(BPF_REG_0, 0) */ 282 { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 283 /* BPF_MOV64_IMM(BPF_REG_2, 1) */ 284 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 285 /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */ 286 0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 287 /* BPF_MOV64_IMM(BPF_REG_0, 1) */ 288 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 289 /* BPF_EXIT_INSN() */ 290 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; 291 292 struct bpf_prog_load_attr { 293 uint32_t prog_type; 294 uint32_t insn_cnt; 295 uint64_t insns; 296 uint64_t license; 297 uint32_t log_level; 298 uint32_t log_size; 299 uint64_t log_buf; 300 uint32_t kern_version; 301 uint32_t prog_flags; 302 } attr = {}; 303 attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */ 304 attr.insn_cnt = 5; 305 attr.insns = (uint64_t)insns; 306 attr.license = (uint64_t)"DUMMY"; 307 308 int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr)); 309 if (fd >= 0) { 310 close(fd); 311 return "v2"; 312 } 313 return "v1"; 314 #endif 315 } 316 317 #if defined(__i386__) || defined(_M_IX86) || \ 318 defined(__x86_64__) || defined(_M_X64) 319 320 enum VendorSignatures { 321 SIG_INTEL = 0x756e6547 /* Genu */, 322 SIG_AMD = 0x68747541 /* Auth */ 323 }; 324 325 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). 326 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID 327 // support. Consequently, for i386, the presence of CPUID is checked first 328 // via the corresponding eflags bit. 329 // Removal of cpuid.h header motivated by PR30384 330 // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp 331 // or test-suite, but are used in external projects e.g. libstdcxx 332 static bool isCpuIdSupported() { 333 #if defined(__GNUC__) || defined(__clang__) 334 #if defined(__i386__) 335 int __cpuid_supported; 336 __asm__(" pushfl\n" 337 " popl %%eax\n" 338 " movl %%eax,%%ecx\n" 339 " xorl $0x00200000,%%eax\n" 340 " pushl %%eax\n" 341 " popfl\n" 342 " pushfl\n" 343 " popl %%eax\n" 344 " movl $0,%0\n" 345 " cmpl %%eax,%%ecx\n" 346 " je 1f\n" 347 " movl $1,%0\n" 348 "1:" 349 : "=r"(__cpuid_supported) 350 : 351 : "eax", "ecx"); 352 if (!__cpuid_supported) 353 return false; 354 #endif 355 return true; 356 #endif 357 return true; 358 } 359 360 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in 361 /// the specified arguments. If we can't run cpuid on the host, return true. 362 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, 363 unsigned *rECX, unsigned *rEDX) { 364 #if defined(__GNUC__) || defined(__clang__) 365 #if defined(__x86_64__) 366 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 367 // FIXME: should we save this for Clang? 368 __asm__("movq\t%%rbx, %%rsi\n\t" 369 "cpuid\n\t" 370 "xchgq\t%%rbx, %%rsi\n\t" 371 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 372 : "a"(value)); 373 return false; 374 #elif defined(__i386__) 375 __asm__("movl\t%%ebx, %%esi\n\t" 376 "cpuid\n\t" 377 "xchgl\t%%ebx, %%esi\n\t" 378 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 379 : "a"(value)); 380 return false; 381 #else 382 return true; 383 #endif 384 #elif defined(_MSC_VER) 385 // The MSVC intrinsic is portable across x86 and x64. 386 int registers[4]; 387 __cpuid(registers, value); 388 *rEAX = registers[0]; 389 *rEBX = registers[1]; 390 *rECX = registers[2]; 391 *rEDX = registers[3]; 392 return false; 393 #else 394 return true; 395 #endif 396 } 397 398 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return 399 /// the 4 values in the specified arguments. If we can't run cpuid on the host, 400 /// return true. 401 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, 402 unsigned *rEAX, unsigned *rEBX, unsigned *rECX, 403 unsigned *rEDX) { 404 #if defined(__GNUC__) || defined(__clang__) 405 #if defined(__x86_64__) 406 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 407 // FIXME: should we save this for Clang? 408 __asm__("movq\t%%rbx, %%rsi\n\t" 409 "cpuid\n\t" 410 "xchgq\t%%rbx, %%rsi\n\t" 411 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 412 : "a"(value), "c"(subleaf)); 413 return false; 414 #elif defined(__i386__) 415 __asm__("movl\t%%ebx, %%esi\n\t" 416 "cpuid\n\t" 417 "xchgl\t%%ebx, %%esi\n\t" 418 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 419 : "a"(value), "c"(subleaf)); 420 return false; 421 #else 422 return true; 423 #endif 424 #elif defined(_MSC_VER) 425 int registers[4]; 426 __cpuidex(registers, value, subleaf); 427 *rEAX = registers[0]; 428 *rEBX = registers[1]; 429 *rECX = registers[2]; 430 *rEDX = registers[3]; 431 return false; 432 #else 433 return true; 434 #endif 435 } 436 437 // Read control register 0 (XCR0). Used to detect features such as AVX. 438 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { 439 #if defined(__GNUC__) || defined(__clang__) 440 // Check xgetbv; this uses a .byte sequence instead of the instruction 441 // directly because older assemblers do not include support for xgetbv and 442 // there is no easy way to conditionally compile based on the assembler used. 443 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0)); 444 return false; 445 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) 446 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); 447 *rEAX = Result; 448 *rEDX = Result >> 32; 449 return false; 450 #else 451 return true; 452 #endif 453 } 454 455 static void detectX86FamilyModel(unsigned EAX, unsigned *Family, 456 unsigned *Model) { 457 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11 458 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7 459 if (*Family == 6 || *Family == 0xf) { 460 if (*Family == 0xf) 461 // Examine extended family ID if family ID is F. 462 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27 463 // Examine extended model ID if family ID is 6 or F. 464 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 465 } 466 } 467 468 static void 469 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, 470 unsigned Brand_id, unsigned Features, 471 unsigned Features2, unsigned *Type, 472 unsigned *Subtype) { 473 if (Brand_id != 0) 474 return; 475 switch (Family) { 476 case 3: 477 *Type = X86::INTEL_i386; 478 break; 479 case 4: 480 *Type = X86::INTEL_i486; 481 break; 482 case 5: 483 if (Features & (1 << X86::FEATURE_MMX)) { 484 *Type = X86::INTEL_PENTIUM_MMX; 485 break; 486 } 487 *Type = X86::INTEL_PENTIUM; 488 break; 489 case 6: 490 switch (Model) { 491 case 0x01: // Pentium Pro processor 492 *Type = X86::INTEL_PENTIUM_PRO; 493 break; 494 case 0x03: // Intel Pentium II OverDrive processor, Pentium II processor, 495 // model 03 496 case 0x05: // Pentium II processor, model 05, Pentium II Xeon processor, 497 // model 05, and Intel Celeron processor, model 05 498 case 0x06: // Celeron processor, model 06 499 *Type = X86::INTEL_PENTIUM_II; 500 break; 501 case 0x07: // Pentium III processor, model 07, and Pentium III Xeon 502 // processor, model 07 503 case 0x08: // Pentium III processor, model 08, Pentium III Xeon processor, 504 // model 08, and Celeron processor, model 08 505 case 0x0a: // Pentium III Xeon processor, model 0Ah 506 case 0x0b: // Pentium III processor, model 0Bh 507 *Type = X86::INTEL_PENTIUM_III; 508 break; 509 case 0x09: // Intel Pentium M processor, Intel Celeron M processor model 09. 510 case 0x0d: // Intel Pentium M processor, Intel Celeron M processor, model 511 // 0Dh. All processors are manufactured using the 90 nm process. 512 case 0x15: // Intel EP80579 Integrated Processor and Intel EP80579 513 // Integrated Processor with Intel QuickAssist Technology 514 *Type = X86::INTEL_PENTIUM_M; 515 break; 516 case 0x0e: // Intel Core Duo processor, Intel Core Solo processor, model 517 // 0Eh. All processors are manufactured using the 65 nm process. 518 *Type = X86::INTEL_CORE_DUO; 519 break; // yonah 520 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile 521 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad 522 // mobile processor, Intel Core 2 Extreme processor, Intel 523 // Pentium Dual-Core processor, Intel Xeon processor, model 524 // 0Fh. All processors are manufactured using the 65 nm process. 525 case 0x16: // Intel Celeron processor model 16h. All processors are 526 // manufactured using the 65 nm process 527 *Type = X86::INTEL_CORE2; // "core2" 528 *Subtype = X86::INTEL_CORE2_65; 529 break; 530 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model 531 // 17h. All processors are manufactured using the 45 nm process. 532 // 533 // 45nm: Penryn , Wolfdale, Yorkfield (XE) 534 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using 535 // the 45 nm process. 536 *Type = X86::INTEL_CORE2; // "penryn" 537 *Subtype = X86::INTEL_CORE2_45; 538 break; 539 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All 540 // processors are manufactured using the 45 nm process. 541 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. 542 // As found in a Summer 2010 model iMac. 543 case 0x1f: 544 case 0x2e: // Nehalem EX 545 *Type = X86::INTEL_COREI7; // "nehalem" 546 *Subtype = X86::INTEL_COREI7_NEHALEM; 547 break; 548 case 0x25: // Intel Core i7, laptop version. 549 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All 550 // processors are manufactured using the 32 nm process. 551 case 0x2f: // Westmere EX 552 *Type = X86::INTEL_COREI7; // "westmere" 553 *Subtype = X86::INTEL_COREI7_WESTMERE; 554 break; 555 case 0x2a: // Intel Core i7 processor. All processors are manufactured 556 // using the 32 nm process. 557 case 0x2d: 558 *Type = X86::INTEL_COREI7; //"sandybridge" 559 *Subtype = X86::INTEL_COREI7_SANDYBRIDGE; 560 break; 561 case 0x3a: 562 case 0x3e: // Ivy Bridge EP 563 *Type = X86::INTEL_COREI7; // "ivybridge" 564 *Subtype = X86::INTEL_COREI7_IVYBRIDGE; 565 break; 566 567 // Haswell: 568 case 0x3c: 569 case 0x3f: 570 case 0x45: 571 case 0x46: 572 *Type = X86::INTEL_COREI7; // "haswell" 573 *Subtype = X86::INTEL_COREI7_HASWELL; 574 break; 575 576 // Broadwell: 577 case 0x3d: 578 case 0x47: 579 case 0x4f: 580 case 0x56: 581 *Type = X86::INTEL_COREI7; // "broadwell" 582 *Subtype = X86::INTEL_COREI7_BROADWELL; 583 break; 584 585 // Skylake: 586 case 0x4e: // Skylake mobile 587 case 0x5e: // Skylake desktop 588 case 0x8e: // Kaby Lake mobile 589 case 0x9e: // Kaby Lake desktop 590 *Type = X86::INTEL_COREI7; // "skylake" 591 *Subtype = X86::INTEL_COREI7_SKYLAKE; 592 break; 593 594 // Skylake Xeon: 595 case 0x55: 596 *Type = X86::INTEL_COREI7; 597 *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512" 598 break; 599 600 // Cannonlake: 601 case 0x66: 602 *Type = X86::INTEL_COREI7; 603 *Subtype = X86::INTEL_COREI7_CANNONLAKE; // "cannonlake" 604 break; 605 606 case 0x1c: // Most 45 nm Intel Atom processors 607 case 0x26: // 45 nm Atom Lincroft 608 case 0x27: // 32 nm Atom Medfield 609 case 0x35: // 32 nm Atom Midview 610 case 0x36: // 32 nm Atom Midview 611 *Type = X86::INTEL_BONNELL; 612 break; // "bonnell" 613 614 // Atom Silvermont codes from the Intel software optimization guide. 615 case 0x37: 616 case 0x4a: 617 case 0x4d: 618 case 0x5a: 619 case 0x5d: 620 case 0x4c: // really airmont 621 *Type = X86::INTEL_SILVERMONT; 622 break; // "silvermont" 623 // Goldmont: 624 case 0x5c: // Apollo Lake 625 case 0x5f: // Denverton 626 case 0x7a: // Gemini Lake 627 *Type = X86::INTEL_GOLDMONT; 628 break; // "goldmont" 629 case 0x57: 630 *Type = X86::INTEL_KNL; // knl 631 break; 632 case 0x85: 633 *Type = X86::INTEL_KNM; // knm 634 break; 635 636 default: // Unknown family 6 CPU, try to guess. 637 if (Features & (1 << X86::FEATURE_AVX512VBMI)) { 638 *Type = X86::INTEL_COREI7; 639 *Subtype = X86::INTEL_COREI7_CANNONLAKE; 640 break; 641 } 642 643 if (Features & (1 << X86::FEATURE_AVX512VL)) { 644 *Type = X86::INTEL_COREI7; 645 *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; 646 break; 647 } 648 649 if (Features & (1 << X86::FEATURE_AVX512ER)) { 650 *Type = X86::INTEL_KNL; // knl 651 break; 652 } 653 654 if (Features2 & (1 << (X86::FEATURE_CLFLUSHOPT - 32))) { 655 if (Features2 & (1 << (X86::FEATURE_SHA - 32))) { 656 *Type = X86::INTEL_GOLDMONT; 657 } else { 658 *Type = X86::INTEL_COREI7; 659 *Subtype = X86::INTEL_COREI7_SKYLAKE; 660 } 661 break; 662 } 663 if (Features2 & (1 << (X86::FEATURE_ADX - 32))) { 664 *Type = X86::INTEL_COREI7; 665 *Subtype = X86::INTEL_COREI7_BROADWELL; 666 break; 667 } 668 if (Features & (1 << X86::FEATURE_AVX2)) { 669 *Type = X86::INTEL_COREI7; 670 *Subtype = X86::INTEL_COREI7_HASWELL; 671 break; 672 } 673 if (Features & (1 << X86::FEATURE_AVX)) { 674 *Type = X86::INTEL_COREI7; 675 *Subtype = X86::INTEL_COREI7_SANDYBRIDGE; 676 break; 677 } 678 if (Features & (1 << X86::FEATURE_SSE4_2)) { 679 if (Features2 & (1 << (X86::FEATURE_MOVBE - 32))) { 680 *Type = X86::INTEL_SILVERMONT; 681 } else { 682 *Type = X86::INTEL_COREI7; 683 *Subtype = X86::INTEL_COREI7_NEHALEM; 684 } 685 break; 686 } 687 if (Features & (1 << X86::FEATURE_SSE4_1)) { 688 *Type = X86::INTEL_CORE2; // "penryn" 689 *Subtype = X86::INTEL_CORE2_45; 690 break; 691 } 692 if (Features & (1 << X86::FEATURE_SSSE3)) { 693 if (Features2 & (1 << (X86::FEATURE_MOVBE - 32))) { 694 *Type = X86::INTEL_BONNELL; // "bonnell" 695 } else { 696 *Type = X86::INTEL_CORE2; // "core2" 697 *Subtype = X86::INTEL_CORE2_65; 698 } 699 break; 700 } 701 if (Features2 & (1 << (X86::FEATURE_EM64T - 32))) { 702 *Type = X86::INTEL_CORE2; // "core2" 703 *Subtype = X86::INTEL_CORE2_65; 704 break; 705 } 706 if (Features & (1 << X86::FEATURE_SSE3)) { 707 *Type = X86::INTEL_CORE_DUO; 708 break; 709 } 710 if (Features & (1 << X86::FEATURE_SSE2)) { 711 *Type = X86::INTEL_PENTIUM_M; 712 break; 713 } 714 if (Features & (1 << X86::FEATURE_SSE)) { 715 *Type = X86::INTEL_PENTIUM_III; 716 break; 717 } 718 if (Features & (1 << X86::FEATURE_MMX)) { 719 *Type = X86::INTEL_PENTIUM_II; 720 break; 721 } 722 *Type = X86::INTEL_PENTIUM_PRO; 723 break; 724 } 725 break; 726 case 15: { 727 if (Features2 & (1 << (X86::FEATURE_EM64T - 32))) { 728 *Type = X86::INTEL_NOCONA; 729 break; 730 } 731 if (Features & (1 << X86::FEATURE_SSE3)) { 732 *Type = X86::INTEL_PRESCOTT; 733 break; 734 } 735 *Type = X86::INTEL_PENTIUM_IV; 736 break; 737 } 738 default: 739 break; /*"generic"*/ 740 } 741 } 742 743 static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, 744 unsigned Features, unsigned *Type, 745 unsigned *Subtype) { 746 // FIXME: this poorly matches the generated SubtargetFeatureKV table. There 747 // appears to be no way to generate the wide variety of AMD-specific targets 748 // from the information returned from CPUID. 749 switch (Family) { 750 case 4: 751 *Type = X86::AMD_i486; 752 break; 753 case 5: 754 *Type = X86::AMDPENTIUM; 755 switch (Model) { 756 case 6: 757 case 7: 758 *Subtype = X86::AMDPENTIUM_K6; 759 break; // "k6" 760 case 8: 761 *Subtype = X86::AMDPENTIUM_K62; 762 break; // "k6-2" 763 case 9: 764 case 13: 765 *Subtype = X86::AMDPENTIUM_K63; 766 break; // "k6-3" 767 case 10: 768 *Subtype = X86::AMDPENTIUM_GEODE; 769 break; // "geode" 770 } 771 break; 772 case 6: 773 if (Features & (1 << X86::FEATURE_SSE)) { 774 *Type = X86::AMD_ATHLON_XP; 775 break; // "athlon-xp" 776 } 777 *Type = X86::AMD_ATHLON; 778 break; // "athlon" 779 case 15: 780 if (Features & (1 << X86::FEATURE_SSE3)) { 781 *Type = X86::AMD_K8SSE3; 782 break; // "k8-sse3" 783 } 784 *Type = X86::AMD_K8; 785 break; // "k8" 786 case 16: 787 *Type = X86::AMDFAM10H; // "amdfam10" 788 switch (Model) { 789 case 2: 790 *Subtype = X86::AMDFAM10H_BARCELONA; 791 break; 792 case 4: 793 *Subtype = X86::AMDFAM10H_SHANGHAI; 794 break; 795 case 8: 796 *Subtype = X86::AMDFAM10H_ISTANBUL; 797 break; 798 } 799 break; 800 case 20: 801 *Type = X86::AMD_BTVER1; 802 break; // "btver1"; 803 case 21: 804 *Type = X86::AMDFAM15H; 805 if (Model >= 0x60 && Model <= 0x7f) { 806 *Subtype = X86::AMDFAM15H_BDVER4; 807 break; // "bdver4"; 60h-7Fh: Excavator 808 } 809 if (Model >= 0x30 && Model <= 0x3f) { 810 *Subtype = X86::AMDFAM15H_BDVER3; 811 break; // "bdver3"; 30h-3Fh: Steamroller 812 } 813 if (Model >= 0x10 && Model <= 0x1f) { 814 *Subtype = X86::AMDFAM15H_BDVER2; 815 break; // "bdver2"; 10h-1Fh: Piledriver 816 } 817 if (Model <= 0x0f) { 818 *Subtype = X86::AMDFAM15H_BDVER1; 819 break; // "bdver1"; 00h-0Fh: Bulldozer 820 } 821 break; 822 case 22: 823 *Type = X86::AMD_BTVER2; 824 break; // "btver2" 825 case 23: 826 *Type = X86::AMDFAM17H; 827 *Subtype = X86::AMDFAM17H_ZNVER1; 828 break; 829 default: 830 break; // "generic" 831 } 832 } 833 834 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, 835 unsigned *FeaturesOut, 836 unsigned *Features2Out) { 837 unsigned Features = 0; 838 unsigned Features2 = 0; 839 unsigned EAX, EBX; 840 841 if ((EDX >> 15) & 1) 842 Features |= 1 << X86::FEATURE_CMOV; 843 if ((EDX >> 23) & 1) 844 Features |= 1 << X86::FEATURE_MMX; 845 if ((EDX >> 25) & 1) 846 Features |= 1 << X86::FEATURE_SSE; 847 if ((EDX >> 26) & 1) 848 Features |= 1 << X86::FEATURE_SSE2; 849 850 if ((ECX >> 0) & 1) 851 Features |= 1 << X86::FEATURE_SSE3; 852 if ((ECX >> 1) & 1) 853 Features |= 1 << X86::FEATURE_PCLMUL; 854 if ((ECX >> 9) & 1) 855 Features |= 1 << X86::FEATURE_SSSE3; 856 if ((ECX >> 12) & 1) 857 Features |= 1 << X86::FEATURE_FMA; 858 if ((ECX >> 19) & 1) 859 Features |= 1 << X86::FEATURE_SSE4_1; 860 if ((ECX >> 20) & 1) 861 Features |= 1 << X86::FEATURE_SSE4_2; 862 if ((ECX >> 23) & 1) 863 Features |= 1 << X86::FEATURE_POPCNT; 864 if ((ECX >> 25) & 1) 865 Features |= 1 << X86::FEATURE_AES; 866 867 if ((ECX >> 22) & 1) 868 Features2 |= 1 << (X86::FEATURE_MOVBE - 32); 869 870 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 871 // indicates that the AVX registers will be saved and restored on context 872 // switch, then we have full AVX support. 873 const unsigned AVXBits = (1 << 27) | (1 << 28); 874 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && 875 ((EAX & 0x6) == 0x6); 876 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); 877 878 if (HasAVX) 879 Features |= 1 << X86::FEATURE_AVX; 880 881 bool HasLeaf7 = 882 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 883 884 if (HasLeaf7 && ((EBX >> 3) & 1)) 885 Features |= 1 << X86::FEATURE_BMI; 886 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) 887 Features |= 1 << X86::FEATURE_AVX2; 888 if (HasLeaf7 && ((EBX >> 9) & 1)) 889 Features |= 1 << X86::FEATURE_BMI2; 890 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) 891 Features |= 1 << X86::FEATURE_AVX512F; 892 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) 893 Features |= 1 << X86::FEATURE_AVX512DQ; 894 if (HasLeaf7 && ((EBX >> 19) & 1)) 895 Features2 |= 1 << (X86::FEATURE_ADX - 32); 896 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) 897 Features |= 1 << X86::FEATURE_AVX512IFMA; 898 if (HasLeaf7 && ((EBX >> 23) & 1)) 899 Features2 |= 1 << (X86::FEATURE_CLFLUSHOPT - 32); 900 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) 901 Features |= 1 << X86::FEATURE_AVX512PF; 902 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) 903 Features |= 1 << X86::FEATURE_AVX512ER; 904 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) 905 Features |= 1 << X86::FEATURE_AVX512CD; 906 if (HasLeaf7 && ((EBX >> 29) & 1)) 907 Features2 |= 1 << (X86::FEATURE_SHA - 32); 908 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) 909 Features |= 1 << X86::FEATURE_AVX512BW; 910 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) 911 Features |= 1 << X86::FEATURE_AVX512VL; 912 913 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) 914 Features |= 1 << X86::FEATURE_AVX512VBMI; 915 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) 916 Features |= 1 << X86::FEATURE_AVX512VPOPCNTDQ; 917 918 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) 919 Features |= 1 << X86::FEATURE_AVX5124VNNIW; 920 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) 921 Features |= 1 << X86::FEATURE_AVX5124FMAPS; 922 923 unsigned MaxExtLevel; 924 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); 925 926 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && 927 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 928 if (HasExtLeaf1 && ((ECX >> 6) & 1)) 929 Features |= 1 << X86::FEATURE_SSE4_A; 930 if (HasExtLeaf1 && ((ECX >> 11) & 1)) 931 Features |= 1 << X86::FEATURE_XOP; 932 if (HasExtLeaf1 && ((ECX >> 16) & 1)) 933 Features |= 1 << X86::FEATURE_FMA4; 934 935 if (HasExtLeaf1 && ((EDX >> 29) & 1)) 936 Features2 |= 1 << (X86::FEATURE_EM64T - 32); 937 938 *FeaturesOut = Features; 939 *Features2Out = Features2; 940 } 941 942 StringRef sys::getHostCPUName() { 943 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 944 unsigned MaxLeaf, Vendor; 945 946 #if defined(__GNUC__) || defined(__clang__) 947 //FIXME: include cpuid.h from clang or copy __get_cpuid_max here 948 // and simplify it to not invoke __cpuid (like cpu_model.c in 949 // compiler-rt/lib/builtins/cpu_model.c? 950 // Opting for the second option. 951 if(!isCpuIdSupported()) 952 return "generic"; 953 #endif 954 if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) 955 return "generic"; 956 getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); 957 958 unsigned Brand_id = EBX & 0xff; 959 unsigned Family = 0, Model = 0; 960 unsigned Features = 0, Features2 = 0; 961 detectX86FamilyModel(EAX, &Family, &Model); 962 getAvailableFeatures(ECX, EDX, MaxLeaf, &Features, &Features2); 963 964 unsigned Type = 0; 965 unsigned Subtype = 0; 966 967 if (Vendor == SIG_INTEL) { 968 getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features, 969 Features2, &Type, &Subtype); 970 } else if (Vendor == SIG_AMD) { 971 getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type, &Subtype); 972 } 973 974 // Check subtypes first since those are more specific. 975 #define X86_CPU_SUBTYPE(ARCHNAME, ENUM) \ 976 if (Subtype == X86::ENUM) \ 977 return ARCHNAME; 978 #include "llvm/Support/X86TargetParser.def" 979 980 // Now check types. 981 #define X86_CPU_SUBTYPE(ARCHNAME, ENUM) \ 982 if (Type == X86::ENUM) \ 983 return ARCHNAME; 984 #include "llvm/Support/X86TargetParser.def" 985 986 return "generic"; 987 } 988 989 #elif defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__)) 990 StringRef sys::getHostCPUName() { 991 host_basic_info_data_t hostInfo; 992 mach_msg_type_number_t infoCount; 993 994 infoCount = HOST_BASIC_INFO_COUNT; 995 host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo, 996 &infoCount); 997 998 if (hostInfo.cpu_type != CPU_TYPE_POWERPC) 999 return "generic"; 1000 1001 switch (hostInfo.cpu_subtype) { 1002 case CPU_SUBTYPE_POWERPC_601: 1003 return "601"; 1004 case CPU_SUBTYPE_POWERPC_602: 1005 return "602"; 1006 case CPU_SUBTYPE_POWERPC_603: 1007 return "603"; 1008 case CPU_SUBTYPE_POWERPC_603e: 1009 return "603e"; 1010 case CPU_SUBTYPE_POWERPC_603ev: 1011 return "603ev"; 1012 case CPU_SUBTYPE_POWERPC_604: 1013 return "604"; 1014 case CPU_SUBTYPE_POWERPC_604e: 1015 return "604e"; 1016 case CPU_SUBTYPE_POWERPC_620: 1017 return "620"; 1018 case CPU_SUBTYPE_POWERPC_750: 1019 return "750"; 1020 case CPU_SUBTYPE_POWERPC_7400: 1021 return "7400"; 1022 case CPU_SUBTYPE_POWERPC_7450: 1023 return "7450"; 1024 case CPU_SUBTYPE_POWERPC_970: 1025 return "970"; 1026 default:; 1027 } 1028 1029 return "generic"; 1030 } 1031 #elif defined(__linux__) && (defined(__ppc__) || defined(__powerpc__)) 1032 StringRef sys::getHostCPUName() { 1033 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1034 const StringRef& Content = P ? P->getBuffer() : ""; 1035 return detail::getHostCPUNameForPowerPC(Content); 1036 } 1037 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) 1038 StringRef sys::getHostCPUName() { 1039 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1040 const StringRef& Content = P ? P->getBuffer() : ""; 1041 return detail::getHostCPUNameForARM(Content); 1042 } 1043 #elif defined(__linux__) && defined(__s390x__) 1044 StringRef sys::getHostCPUName() { 1045 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1046 const StringRef& Content = P ? P->getBuffer() : ""; 1047 return detail::getHostCPUNameForS390x(Content); 1048 } 1049 #else 1050 StringRef sys::getHostCPUName() { return "generic"; } 1051 #endif 1052 1053 #if defined(__linux__) && defined(__x86_64__) 1054 // On Linux, the number of physical cores can be computed from /proc/cpuinfo, 1055 // using the number of unique physical/core id pairs. The following 1056 // implementation reads the /proc/cpuinfo format on an x86_64 system. 1057 static int computeHostNumPhysicalCores() { 1058 // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be 1059 // mmapped because it appears to have 0 size. 1060 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = 1061 llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); 1062 if (std::error_code EC = Text.getError()) { 1063 llvm::errs() << "Can't read " 1064 << "/proc/cpuinfo: " << EC.message() << "\n"; 1065 return -1; 1066 } 1067 SmallVector<StringRef, 8> strs; 1068 (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1, 1069 /*KeepEmpty=*/false); 1070 int CurPhysicalId = -1; 1071 int CurCoreId = -1; 1072 SmallSet<std::pair<int, int>, 32> UniqueItems; 1073 for (auto &Line : strs) { 1074 Line = Line.trim(); 1075 if (!Line.startswith("physical id") && !Line.startswith("core id")) 1076 continue; 1077 std::pair<StringRef, StringRef> Data = Line.split(':'); 1078 auto Name = Data.first.trim(); 1079 auto Val = Data.second.trim(); 1080 if (Name == "physical id") { 1081 assert(CurPhysicalId == -1 && 1082 "Expected a core id before seeing another physical id"); 1083 Val.getAsInteger(10, CurPhysicalId); 1084 } 1085 if (Name == "core id") { 1086 assert(CurCoreId == -1 && 1087 "Expected a physical id before seeing another core id"); 1088 Val.getAsInteger(10, CurCoreId); 1089 } 1090 if (CurPhysicalId != -1 && CurCoreId != -1) { 1091 UniqueItems.insert(std::make_pair(CurPhysicalId, CurCoreId)); 1092 CurPhysicalId = -1; 1093 CurCoreId = -1; 1094 } 1095 } 1096 return UniqueItems.size(); 1097 } 1098 #elif defined(__APPLE__) && defined(__x86_64__) 1099 #include <sys/param.h> 1100 #include <sys/sysctl.h> 1101 1102 // Gets the number of *physical cores* on the machine. 1103 static int computeHostNumPhysicalCores() { 1104 uint32_t count; 1105 size_t len = sizeof(count); 1106 sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0); 1107 if (count < 1) { 1108 int nm[2]; 1109 nm[0] = CTL_HW; 1110 nm[1] = HW_AVAILCPU; 1111 sysctl(nm, 2, &count, &len, NULL, 0); 1112 if (count < 1) 1113 return -1; 1114 } 1115 return count; 1116 } 1117 #else 1118 // On other systems, return -1 to indicate unknown. 1119 static int computeHostNumPhysicalCores() { return -1; } 1120 #endif 1121 1122 int sys::getHostNumPhysicalCores() { 1123 static int NumCores = computeHostNumPhysicalCores(); 1124 return NumCores; 1125 } 1126 1127 #if defined(__i386__) || defined(_M_IX86) || \ 1128 defined(__x86_64__) || defined(_M_X64) 1129 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1130 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 1131 unsigned MaxLevel; 1132 union { 1133 unsigned u[3]; 1134 char c[12]; 1135 } text; 1136 1137 if (getX86CpuIDAndInfo(0, &MaxLevel, text.u + 0, text.u + 2, text.u + 1) || 1138 MaxLevel < 1) 1139 return false; 1140 1141 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); 1142 1143 Features["cmov"] = (EDX >> 15) & 1; 1144 Features["mmx"] = (EDX >> 23) & 1; 1145 Features["sse"] = (EDX >> 25) & 1; 1146 Features["sse2"] = (EDX >> 26) & 1; 1147 1148 Features["sse3"] = (ECX >> 0) & 1; 1149 Features["pclmul"] = (ECX >> 1) & 1; 1150 Features["ssse3"] = (ECX >> 9) & 1; 1151 Features["cx16"] = (ECX >> 13) & 1; 1152 Features["sse4.1"] = (ECX >> 19) & 1; 1153 Features["sse4.2"] = (ECX >> 20) & 1; 1154 Features["movbe"] = (ECX >> 22) & 1; 1155 Features["popcnt"] = (ECX >> 23) & 1; 1156 Features["aes"] = (ECX >> 25) & 1; 1157 Features["rdrnd"] = (ECX >> 30) & 1; 1158 1159 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 1160 // indicates that the AVX registers will be saved and restored on context 1161 // switch, then we have full AVX support. 1162 bool HasAVXSave = ((ECX >> 27) & 1) && ((ECX >> 28) & 1) && 1163 !getX86XCR0(&EAX, &EDX) && ((EAX & 0x6) == 0x6); 1164 // AVX512 requires additional context to be saved by the OS. 1165 bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0); 1166 1167 Features["avx"] = HasAVXSave; 1168 Features["fma"] = ((ECX >> 12) & 1) && HasAVXSave; 1169 // Only enable XSAVE if OS has enabled support for saving YMM state. 1170 Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave; 1171 Features["f16c"] = ((ECX >> 29) & 1) && HasAVXSave; 1172 1173 unsigned MaxExtLevel; 1174 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); 1175 1176 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && 1177 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 1178 Features["lzcnt"] = HasExtLeaf1 && ((ECX >> 5) & 1); 1179 Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1); 1180 Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1); 1181 Features["xop"] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave; 1182 Features["lwp"] = HasExtLeaf1 && ((ECX >> 15) & 1); 1183 Features["fma4"] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave; 1184 Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1); 1185 Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1); 1186 1187 bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 && 1188 !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX); 1189 Features["clzero"] = HasExtLeaf8 && ((EBX >> 0) & 1); 1190 1191 bool HasLeaf7 = 1192 MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 1193 1194 Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1); 1195 Features["sgx"] = HasLeaf7 && ((EBX >> 2) & 1); 1196 Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1); 1197 // AVX2 is only supported if we have the OS save support from AVX. 1198 Features["avx2"] = HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave; 1199 Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1); 1200 Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1); 1201 // AVX512 is only supported if the OS supports the context save for it. 1202 Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save; 1203 Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save; 1204 Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1); 1205 Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1); 1206 Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save; 1207 Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1); 1208 Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1); 1209 Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save; 1210 Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save; 1211 Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save; 1212 Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1); 1213 Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save; 1214 Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save; 1215 1216 Features["prefetchwt1"] = HasLeaf7 && ((ECX >> 0) & 1); 1217 Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; 1218 Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1); 1219 Features["avx512vbmi2"] = HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save; 1220 Features["shstk"] = HasLeaf7 && ((ECX >> 7) & 1); 1221 Features["gfni"] = HasLeaf7 && ((ECX >> 8) & 1); 1222 Features["vaes"] = HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave; 1223 Features["vpclmulqdq"] = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave; 1224 Features["avx512vnni"] = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save; 1225 Features["avx512bitalg"] = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save; 1226 Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save; 1227 1228 Features["ibt"] = HasLeaf7 && ((EDX >> 20) & 1); 1229 1230 bool HasLeafD = MaxLevel >= 0xd && 1231 !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); 1232 1233 // Only enable XSAVE if OS has enabled support for saving YMM state. 1234 Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave; 1235 Features["xsavec"] = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave; 1236 Features["xsaves"] = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave; 1237 1238 return true; 1239 } 1240 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) 1241 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1242 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1243 if (!P) 1244 return false; 1245 1246 SmallVector<StringRef, 32> Lines; 1247 P->getBuffer().split(Lines, "\n"); 1248 1249 SmallVector<StringRef, 32> CPUFeatures; 1250 1251 // Look for the CPU features. 1252 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 1253 if (Lines[I].startswith("Features")) { 1254 Lines[I].split(CPUFeatures, ' '); 1255 break; 1256 } 1257 1258 #if defined(__aarch64__) 1259 // Keep track of which crypto features we have seen 1260 enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 }; 1261 uint32_t crypto = 0; 1262 #endif 1263 1264 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { 1265 StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I]) 1266 #if defined(__aarch64__) 1267 .Case("asimd", "neon") 1268 .Case("fp", "fp-armv8") 1269 .Case("crc32", "crc") 1270 #else 1271 .Case("half", "fp16") 1272 .Case("neon", "neon") 1273 .Case("vfpv3", "vfp3") 1274 .Case("vfpv3d16", "d16") 1275 .Case("vfpv4", "vfp4") 1276 .Case("idiva", "hwdiv-arm") 1277 .Case("idivt", "hwdiv") 1278 #endif 1279 .Default(""); 1280 1281 #if defined(__aarch64__) 1282 // We need to check crypto separately since we need all of the crypto 1283 // extensions to enable the subtarget feature 1284 if (CPUFeatures[I] == "aes") 1285 crypto |= CAP_AES; 1286 else if (CPUFeatures[I] == "pmull") 1287 crypto |= CAP_PMULL; 1288 else if (CPUFeatures[I] == "sha1") 1289 crypto |= CAP_SHA1; 1290 else if (CPUFeatures[I] == "sha2") 1291 crypto |= CAP_SHA2; 1292 #endif 1293 1294 if (LLVMFeatureStr != "") 1295 Features[LLVMFeatureStr] = true; 1296 } 1297 1298 #if defined(__aarch64__) 1299 // If we have all crypto bits we can add the feature 1300 if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2)) 1301 Features["crypto"] = true; 1302 #endif 1303 1304 return true; 1305 } 1306 #else 1307 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; } 1308 #endif 1309 1310 std::string sys::getProcessTriple() { 1311 std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE); 1312 Triple PT(Triple::normalize(TargetTripleString)); 1313 1314 if (sizeof(void *) == 8 && PT.isArch32Bit()) 1315 PT = PT.get64BitArchVariant(); 1316 if (sizeof(void *) == 4 && PT.isArch64Bit()) 1317 PT = PT.get32BitArchVariant(); 1318 1319 return PT.str(); 1320 } 1321