1 //===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the operating system Host concept. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Support/Host.h" 15 #include "llvm/ADT/SmallSet.h" 16 #include "llvm/ADT/SmallVector.h" 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/ADT/StringSwitch.h" 19 #include "llvm/ADT/Triple.h" 20 #include "llvm/Config/config.h" 21 #include "llvm/Support/Debug.h" 22 #include "llvm/Support/FileSystem.h" 23 #include "llvm/Support/MemoryBuffer.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include <assert.h> 26 #include <string.h> 27 28 // Include the platform-specific parts of this class. 29 #ifdef LLVM_ON_UNIX 30 #include "Unix/Host.inc" 31 #endif 32 #ifdef LLVM_ON_WIN32 33 #include "Windows/Host.inc" 34 #endif 35 #ifdef _MSC_VER 36 #include <intrin.h> 37 #endif 38 #if defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__)) 39 #include <mach/host_info.h> 40 #include <mach/mach.h> 41 #include <mach/mach_host.h> 42 #include <mach/machine.h> 43 #endif 44 45 #define DEBUG_TYPE "host-detection" 46 47 //===----------------------------------------------------------------------===// 48 // 49 // Implementations of the CPU detection routines 50 // 51 //===----------------------------------------------------------------------===// 52 53 using namespace llvm; 54 55 #if defined(__linux__) 56 static ssize_t LLVM_ATTRIBUTE_UNUSED readCpuInfo(void *Buf, size_t Size) { 57 // Note: We cannot mmap /proc/cpuinfo here and then process the resulting 58 // memory buffer because the 'file' has 0 size (it can be read from only 59 // as a stream). 60 61 int FD; 62 std::error_code EC = sys::fs::openFileForRead("/proc/cpuinfo", FD); 63 if (EC) { 64 DEBUG(dbgs() << "Unable to open /proc/cpuinfo: " << EC.message() << "\n"); 65 return -1; 66 } 67 int Ret = read(FD, Buf, Size); 68 int CloseStatus = close(FD); 69 if (CloseStatus) 70 return -1; 71 return Ret; 72 } 73 #endif 74 75 #if defined(__i386__) || defined(_M_IX86) || \ 76 defined(__x86_64__) || defined(_M_X64) 77 78 enum VendorSignatures { 79 SIG_INTEL = 0x756e6547 /* Genu */, 80 SIG_AMD = 0x68747541 /* Auth */ 81 }; 82 83 enum ProcessorVendors { 84 VENDOR_INTEL = 1, 85 VENDOR_AMD, 86 VENDOR_OTHER, 87 VENDOR_MAX 88 }; 89 90 enum ProcessorTypes { 91 INTEL_ATOM = 1, 92 INTEL_CORE2, 93 INTEL_COREI7, 94 AMDFAM10H, 95 AMDFAM15H, 96 INTEL_i386, 97 INTEL_i486, 98 INTEL_PENTIUM, 99 INTEL_PENTIUM_PRO, 100 INTEL_PENTIUM_II, 101 INTEL_PENTIUM_III, 102 INTEL_PENTIUM_IV, 103 INTEL_PENTIUM_M, 104 INTEL_CORE_DUO, 105 INTEL_XEONPHI, 106 INTEL_X86_64, 107 INTEL_NOCONA, 108 INTEL_PRESCOTT, 109 AMD_i486, 110 AMDPENTIUM, 111 AMDATHLON, 112 AMDFAM14H, 113 AMDFAM16H, 114 AMDFAM17H, 115 CPU_TYPE_MAX 116 }; 117 118 enum ProcessorSubtypes { 119 INTEL_COREI7_NEHALEM = 1, 120 INTEL_COREI7_WESTMERE, 121 INTEL_COREI7_SANDYBRIDGE, 122 AMDFAM10H_BARCELONA, 123 AMDFAM10H_SHANGHAI, 124 AMDFAM10H_ISTANBUL, 125 AMDFAM15H_BDVER1, 126 AMDFAM15H_BDVER2, 127 INTEL_PENTIUM_MMX, 128 INTEL_CORE2_65, 129 INTEL_CORE2_45, 130 INTEL_COREI7_IVYBRIDGE, 131 INTEL_COREI7_HASWELL, 132 INTEL_COREI7_BROADWELL, 133 INTEL_COREI7_SKYLAKE, 134 INTEL_COREI7_SKYLAKE_AVX512, 135 INTEL_ATOM_BONNELL, 136 INTEL_ATOM_SILVERMONT, 137 INTEL_KNIGHTS_LANDING, 138 AMDPENTIUM_K6, 139 AMDPENTIUM_K62, 140 AMDPENTIUM_K63, 141 AMDPENTIUM_GEODE, 142 AMDATHLON_TBIRD, 143 AMDATHLON_MP, 144 AMDATHLON_XP, 145 AMDATHLON_K8SSE3, 146 AMDATHLON_OPTERON, 147 AMDATHLON_FX, 148 AMDATHLON_64, 149 AMD_BTVER1, 150 AMD_BTVER2, 151 AMDFAM15H_BDVER3, 152 AMDFAM15H_BDVER4, 153 AMDFAM17H_ZNVER1, 154 CPU_SUBTYPE_MAX 155 }; 156 157 enum ProcessorFeatures { 158 FEATURE_CMOV = 0, 159 FEATURE_MMX, 160 FEATURE_POPCNT, 161 FEATURE_SSE, 162 FEATURE_SSE2, 163 FEATURE_SSE3, 164 FEATURE_SSSE3, 165 FEATURE_SSE4_1, 166 FEATURE_SSE4_2, 167 FEATURE_AVX, 168 FEATURE_AVX2, 169 FEATURE_AVX512, 170 FEATURE_AVX512SAVE, 171 FEATURE_MOVBE, 172 FEATURE_ADX, 173 FEATURE_EM64T 174 }; 175 176 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). 177 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID 178 // support. Consequently, for i386, the presence of CPUID is checked first 179 // via the corresponding eflags bit. 180 // Removal of cpuid.h header motivated by PR30384 181 // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp 182 // or test-suite, but are used in external projects e.g. libstdcxx 183 static bool isCpuIdSupported() { 184 #if defined(__GNUC__) || defined(__clang__) 185 #if defined(__i386__) 186 int __cpuid_supported; 187 __asm__(" pushfl\n" 188 " popl %%eax\n" 189 " movl %%eax,%%ecx\n" 190 " xorl $0x00200000,%%eax\n" 191 " pushl %%eax\n" 192 " popfl\n" 193 " pushfl\n" 194 " popl %%eax\n" 195 " movl $0,%0\n" 196 " cmpl %%eax,%%ecx\n" 197 " je 1f\n" 198 " movl $1,%0\n" 199 "1:" 200 : "=r"(__cpuid_supported) 201 : 202 : "eax", "ecx"); 203 if (!__cpuid_supported) 204 return false; 205 #endif 206 return true; 207 #endif 208 return true; 209 } 210 211 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in 212 /// the specified arguments. If we can't run cpuid on the host, return true. 213 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, 214 unsigned *rECX, unsigned *rEDX) { 215 #if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER) 216 #if defined(__GNUC__) || defined(__clang__) 217 #if defined(__x86_64__) 218 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 219 // FIXME: should we save this for Clang? 220 __asm__("movq\t%%rbx, %%rsi\n\t" 221 "cpuid\n\t" 222 "xchgq\t%%rbx, %%rsi\n\t" 223 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 224 : "a"(value)); 225 #elif defined(__i386__) 226 __asm__("movl\t%%ebx, %%esi\n\t" 227 "cpuid\n\t" 228 "xchgl\t%%ebx, %%esi\n\t" 229 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 230 : "a"(value)); 231 #else 232 assert(0 && "This method is defined only for x86."); 233 #endif 234 #elif defined(_MSC_VER) 235 // The MSVC intrinsic is portable across x86 and x64. 236 int registers[4]; 237 __cpuid(registers, value); 238 *rEAX = registers[0]; 239 *rEBX = registers[1]; 240 *rECX = registers[2]; 241 *rEDX = registers[3]; 242 #endif 243 return false; 244 #else 245 return true; 246 #endif 247 } 248 249 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return 250 /// the 4 values in the specified arguments. If we can't run cpuid on the host, 251 /// return true. 252 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, 253 unsigned *rEAX, unsigned *rEBX, unsigned *rECX, 254 unsigned *rEDX) { 255 #if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER) 256 #if defined(__x86_64__) || defined(_M_X64) 257 #if defined(__GNUC__) || defined(__clang__) 258 // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually. 259 // FIXME: should we save this for Clang? 260 __asm__("movq\t%%rbx, %%rsi\n\t" 261 "cpuid\n\t" 262 "xchgq\t%%rbx, %%rsi\n\t" 263 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 264 : "a"(value), "c"(subleaf)); 265 #elif defined(_MSC_VER) 266 int registers[4]; 267 __cpuidex(registers, value, subleaf); 268 *rEAX = registers[0]; 269 *rEBX = registers[1]; 270 *rECX = registers[2]; 271 *rEDX = registers[3]; 272 #endif 273 #elif defined(__i386__) || defined(_M_IX86) 274 #if defined(__GNUC__) || defined(__clang__) 275 __asm__("movl\t%%ebx, %%esi\n\t" 276 "cpuid\n\t" 277 "xchgl\t%%ebx, %%esi\n\t" 278 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 279 : "a"(value), "c"(subleaf)); 280 #elif defined(_MSC_VER) 281 __asm { 282 mov eax,value 283 mov ecx,subleaf 284 cpuid 285 mov esi,rEAX 286 mov dword ptr [esi],eax 287 mov esi,rEBX 288 mov dword ptr [esi],ebx 289 mov esi,rECX 290 mov dword ptr [esi],ecx 291 mov esi,rEDX 292 mov dword ptr [esi],edx 293 } 294 #endif 295 #else 296 assert(0 && "This method is defined only for x86."); 297 #endif 298 return false; 299 #else 300 return true; 301 #endif 302 } 303 304 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { 305 #if defined(__GNUC__) || defined(__clang__) 306 // Check xgetbv; this uses a .byte sequence instead of the instruction 307 // directly because older assemblers do not include support for xgetbv and 308 // there is no easy way to conditionally compile based on the assembler used. 309 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0)); 310 return false; 311 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) 312 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); 313 *rEAX = Result; 314 *rEDX = Result >> 32; 315 return false; 316 #else 317 return true; 318 #endif 319 } 320 321 static void detectX86FamilyModel(unsigned EAX, unsigned *Family, 322 unsigned *Model) { 323 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11 324 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7 325 if (*Family == 6 || *Family == 0xf) { 326 if (*Family == 0xf) 327 // Examine extended family ID if family ID is F. 328 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27 329 // Examine extended model ID if family ID is 6 or F. 330 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 331 } 332 } 333 334 static void 335 getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model, 336 unsigned int Brand_id, unsigned int Features, 337 unsigned *Type, unsigned *Subtype) { 338 if (Brand_id != 0) 339 return; 340 switch (Family) { 341 case 3: 342 *Type = INTEL_i386; 343 break; 344 case 4: 345 switch (Model) { 346 case 0: // Intel486 DX processors 347 case 1: // Intel486 DX processors 348 case 2: // Intel486 SX processors 349 case 3: // Intel487 processors, IntelDX2 OverDrive processors, 350 // IntelDX2 processors 351 case 4: // Intel486 SL processor 352 case 5: // IntelSX2 processors 353 case 7: // Write-Back Enhanced IntelDX2 processors 354 case 8: // IntelDX4 OverDrive processors, IntelDX4 processors 355 default: 356 *Type = INTEL_i486; 357 break; 358 } 359 break; 360 case 5: 361 switch (Model) { 362 case 1: // Pentium OverDrive processor for Pentium processor (60, 66), 363 // Pentium processors (60, 66) 364 case 2: // Pentium OverDrive processor for Pentium processor (75, 90, 365 // 100, 120, 133), Pentium processors (75, 90, 100, 120, 133, 366 // 150, 166, 200) 367 case 3: // Pentium OverDrive processors for Intel486 processor-based 368 // systems 369 *Type = INTEL_PENTIUM; 370 break; 371 case 4: // Pentium OverDrive processor with MMX technology for Pentium 372 // processor (75, 90, 100, 120, 133), Pentium processor with 373 // MMX technology (166, 200) 374 *Type = INTEL_PENTIUM; 375 *Subtype = INTEL_PENTIUM_MMX; 376 break; 377 default: 378 *Type = INTEL_PENTIUM; 379 break; 380 } 381 break; 382 case 6: 383 switch (Model) { 384 case 0x01: // Pentium Pro processor 385 *Type = INTEL_PENTIUM_PRO; 386 break; 387 case 0x03: // Intel Pentium II OverDrive processor, Pentium II processor, 388 // model 03 389 case 0x05: // Pentium II processor, model 05, Pentium II Xeon processor, 390 // model 05, and Intel Celeron processor, model 05 391 case 0x06: // Celeron processor, model 06 392 *Type = INTEL_PENTIUM_II; 393 break; 394 case 0x07: // Pentium III processor, model 07, and Pentium III Xeon 395 // processor, model 07 396 case 0x08: // Pentium III processor, model 08, Pentium III Xeon processor, 397 // model 08, and Celeron processor, model 08 398 case 0x0a: // Pentium III Xeon processor, model 0Ah 399 case 0x0b: // Pentium III processor, model 0Bh 400 *Type = INTEL_PENTIUM_III; 401 break; 402 case 0x09: // Intel Pentium M processor, Intel Celeron M processor model 09. 403 case 0x0d: // Intel Pentium M processor, Intel Celeron M processor, model 404 // 0Dh. All processors are manufactured using the 90 nm process. 405 case 0x15: // Intel EP80579 Integrated Processor and Intel EP80579 406 // Integrated Processor with Intel QuickAssist Technology 407 *Type = INTEL_PENTIUM_M; 408 break; 409 case 0x0e: // Intel Core Duo processor, Intel Core Solo processor, model 410 // 0Eh. All processors are manufactured using the 65 nm process. 411 *Type = INTEL_CORE_DUO; 412 break; // yonah 413 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile 414 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad 415 // mobile processor, Intel Core 2 Extreme processor, Intel 416 // Pentium Dual-Core processor, Intel Xeon processor, model 417 // 0Fh. All processors are manufactured using the 65 nm process. 418 case 0x16: // Intel Celeron processor model 16h. All processors are 419 // manufactured using the 65 nm process 420 *Type = INTEL_CORE2; // "core2" 421 *Subtype = INTEL_CORE2_65; 422 break; 423 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model 424 // 17h. All processors are manufactured using the 45 nm process. 425 // 426 // 45nm: Penryn , Wolfdale, Yorkfield (XE) 427 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using 428 // the 45 nm process. 429 *Type = INTEL_CORE2; // "penryn" 430 *Subtype = INTEL_CORE2_45; 431 break; 432 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All 433 // processors are manufactured using the 45 nm process. 434 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. 435 // As found in a Summer 2010 model iMac. 436 case 0x1f: 437 case 0x2e: // Nehalem EX 438 *Type = INTEL_COREI7; // "nehalem" 439 *Subtype = INTEL_COREI7_NEHALEM; 440 break; 441 case 0x25: // Intel Core i7, laptop version. 442 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All 443 // processors are manufactured using the 32 nm process. 444 case 0x2f: // Westmere EX 445 *Type = INTEL_COREI7; // "westmere" 446 *Subtype = INTEL_COREI7_WESTMERE; 447 break; 448 case 0x2a: // Intel Core i7 processor. All processors are manufactured 449 // using the 32 nm process. 450 case 0x2d: 451 *Type = INTEL_COREI7; //"sandybridge" 452 *Subtype = INTEL_COREI7_SANDYBRIDGE; 453 break; 454 case 0x3a: 455 case 0x3e: // Ivy Bridge EP 456 *Type = INTEL_COREI7; // "ivybridge" 457 *Subtype = INTEL_COREI7_IVYBRIDGE; 458 break; 459 460 // Haswell: 461 case 0x3c: 462 case 0x3f: 463 case 0x45: 464 case 0x46: 465 *Type = INTEL_COREI7; // "haswell" 466 *Subtype = INTEL_COREI7_HASWELL; 467 break; 468 469 // Broadwell: 470 case 0x3d: 471 case 0x47: 472 case 0x4f: 473 case 0x56: 474 *Type = INTEL_COREI7; // "broadwell" 475 *Subtype = INTEL_COREI7_BROADWELL; 476 break; 477 478 // Skylake: 479 case 0x4e: // Skylake mobile 480 case 0x5e: // Skylake desktop 481 case 0x8e: // Kaby Lake mobile 482 case 0x9e: // Kaby Lake desktop 483 *Type = INTEL_COREI7; // "skylake" 484 *Subtype = INTEL_COREI7_SKYLAKE; 485 break; 486 487 // Skylake Xeon: 488 case 0x55: 489 *Type = INTEL_COREI7; 490 // Check that we really have AVX512 491 if (Features & (1 << FEATURE_AVX512)) { 492 *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512" 493 } else { 494 *Subtype = INTEL_COREI7_SKYLAKE; // "skylake" 495 } 496 break; 497 498 case 0x1c: // Most 45 nm Intel Atom processors 499 case 0x26: // 45 nm Atom Lincroft 500 case 0x27: // 32 nm Atom Medfield 501 case 0x35: // 32 nm Atom Midview 502 case 0x36: // 32 nm Atom Midview 503 *Type = INTEL_ATOM; 504 *Subtype = INTEL_ATOM_BONNELL; 505 break; // "bonnell" 506 507 // Atom Silvermont codes from the Intel software optimization guide. 508 case 0x37: 509 case 0x4a: 510 case 0x4d: 511 case 0x5a: 512 case 0x5d: 513 case 0x4c: // really airmont 514 *Type = INTEL_ATOM; 515 *Subtype = INTEL_ATOM_SILVERMONT; 516 break; // "silvermont" 517 518 case 0x57: 519 *Type = INTEL_XEONPHI; // knl 520 *Subtype = INTEL_KNIGHTS_LANDING; 521 break; 522 523 default: // Unknown family 6 CPU, try to guess. 524 if (Features & (1 << FEATURE_AVX512)) { 525 *Type = INTEL_XEONPHI; // knl 526 *Subtype = INTEL_KNIGHTS_LANDING; 527 break; 528 } 529 if (Features & (1 << FEATURE_ADX)) { 530 *Type = INTEL_COREI7; 531 *Subtype = INTEL_COREI7_BROADWELL; 532 break; 533 } 534 if (Features & (1 << FEATURE_AVX2)) { 535 *Type = INTEL_COREI7; 536 *Subtype = INTEL_COREI7_HASWELL; 537 break; 538 } 539 if (Features & (1 << FEATURE_AVX)) { 540 *Type = INTEL_COREI7; 541 *Subtype = INTEL_COREI7_SANDYBRIDGE; 542 break; 543 } 544 if (Features & (1 << FEATURE_SSE4_2)) { 545 if (Features & (1 << FEATURE_MOVBE)) { 546 *Type = INTEL_ATOM; 547 *Subtype = INTEL_ATOM_SILVERMONT; 548 } else { 549 *Type = INTEL_COREI7; 550 *Subtype = INTEL_COREI7_NEHALEM; 551 } 552 break; 553 } 554 if (Features & (1 << FEATURE_SSE4_1)) { 555 *Type = INTEL_CORE2; // "penryn" 556 *Subtype = INTEL_CORE2_45; 557 break; 558 } 559 if (Features & (1 << FEATURE_SSSE3)) { 560 if (Features & (1 << FEATURE_MOVBE)) { 561 *Type = INTEL_ATOM; 562 *Subtype = INTEL_ATOM_BONNELL; // "bonnell" 563 } else { 564 *Type = INTEL_CORE2; // "core2" 565 *Subtype = INTEL_CORE2_65; 566 } 567 break; 568 } 569 if (Features & (1 << FEATURE_EM64T)) { 570 *Type = INTEL_X86_64; 571 break; // x86-64 572 } 573 if (Features & (1 << FEATURE_SSE2)) { 574 *Type = INTEL_PENTIUM_M; 575 break; 576 } 577 if (Features & (1 << FEATURE_SSE)) { 578 *Type = INTEL_PENTIUM_III; 579 break; 580 } 581 if (Features & (1 << FEATURE_MMX)) { 582 *Type = INTEL_PENTIUM_II; 583 break; 584 } 585 *Type = INTEL_PENTIUM_PRO; 586 break; 587 } 588 break; 589 case 15: { 590 switch (Model) { 591 case 0: // Pentium 4 processor, Intel Xeon processor. All processors are 592 // model 00h and manufactured using the 0.18 micron process. 593 case 1: // Pentium 4 processor, Intel Xeon processor, Intel Xeon 594 // processor MP, and Intel Celeron processor. All processors are 595 // model 01h and manufactured using the 0.18 micron process. 596 case 2: // Pentium 4 processor, Mobile Intel Pentium 4 processor - M, 597 // Intel Xeon processor, Intel Xeon processor MP, Intel Celeron 598 // processor, and Mobile Intel Celeron processor. All processors 599 // are model 02h and manufactured using the 0.13 micron process. 600 *Type = 601 ((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV); 602 break; 603 604 case 3: // Pentium 4 processor, Intel Xeon processor, Intel Celeron D 605 // processor. All processors are model 03h and manufactured using 606 // the 90 nm process. 607 case 4: // Pentium 4 processor, Pentium 4 processor Extreme Edition, 608 // Pentium D processor, Intel Xeon processor, Intel Xeon 609 // processor MP, Intel Celeron D processor. All processors are 610 // model 04h and manufactured using the 90 nm process. 611 case 6: // Pentium 4 processor, Pentium D processor, Pentium processor 612 // Extreme Edition, Intel Xeon processor, Intel Xeon processor 613 // MP, Intel Celeron D processor. All processors are model 06h 614 // and manufactured using the 65 nm process. 615 *Type = 616 ((Features & (1 << FEATURE_EM64T)) ? INTEL_NOCONA : INTEL_PRESCOTT); 617 break; 618 619 default: 620 *Type = 621 ((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV); 622 break; 623 } 624 break; 625 } 626 default: 627 break; /*"generic"*/ 628 } 629 } 630 631 static void getAMDProcessorTypeAndSubtype(unsigned int Family, 632 unsigned int Model, 633 unsigned int Features, 634 unsigned *Type, 635 unsigned *Subtype) { 636 // FIXME: this poorly matches the generated SubtargetFeatureKV table. There 637 // appears to be no way to generate the wide variety of AMD-specific targets 638 // from the information returned from CPUID. 639 switch (Family) { 640 case 4: 641 *Type = AMD_i486; 642 break; 643 case 5: 644 *Type = AMDPENTIUM; 645 switch (Model) { 646 case 6: 647 case 7: 648 *Subtype = AMDPENTIUM_K6; 649 break; // "k6" 650 case 8: 651 *Subtype = AMDPENTIUM_K62; 652 break; // "k6-2" 653 case 9: 654 case 13: 655 *Subtype = AMDPENTIUM_K63; 656 break; // "k6-3" 657 case 10: 658 *Subtype = AMDPENTIUM_GEODE; 659 break; // "geode" 660 } 661 break; 662 case 6: 663 *Type = AMDATHLON; 664 switch (Model) { 665 case 4: 666 *Subtype = AMDATHLON_TBIRD; 667 break; // "athlon-tbird" 668 case 6: 669 case 7: 670 case 8: 671 *Subtype = AMDATHLON_MP; 672 break; // "athlon-mp" 673 case 10: 674 *Subtype = AMDATHLON_XP; 675 break; // "athlon-xp" 676 } 677 break; 678 case 15: 679 *Type = AMDATHLON; 680 if (Features & (1 << FEATURE_SSE3)) { 681 *Subtype = AMDATHLON_K8SSE3; 682 break; // "k8-sse3" 683 } 684 switch (Model) { 685 case 1: 686 *Subtype = AMDATHLON_OPTERON; 687 break; // "opteron" 688 case 5: 689 *Subtype = AMDATHLON_FX; 690 break; // "athlon-fx"; also opteron 691 default: 692 *Subtype = AMDATHLON_64; 693 break; // "athlon64" 694 } 695 break; 696 case 16: 697 *Type = AMDFAM10H; // "amdfam10" 698 switch (Model) { 699 case 2: 700 *Subtype = AMDFAM10H_BARCELONA; 701 break; 702 case 4: 703 *Subtype = AMDFAM10H_SHANGHAI; 704 break; 705 case 8: 706 *Subtype = AMDFAM10H_ISTANBUL; 707 break; 708 } 709 break; 710 case 20: 711 *Type = AMDFAM14H; 712 *Subtype = AMD_BTVER1; 713 break; // "btver1"; 714 case 21: 715 *Type = AMDFAM15H; 716 if (!(Features & 717 (1 << FEATURE_AVX))) { // If no AVX support, provide a sane fallback. 718 *Subtype = AMD_BTVER1; 719 break; // "btver1" 720 } 721 if (Model >= 0x50 && Model <= 0x6f) { 722 *Subtype = AMDFAM15H_BDVER4; 723 break; // "bdver4"; 50h-6Fh: Excavator 724 } 725 if (Model >= 0x30 && Model <= 0x3f) { 726 *Subtype = AMDFAM15H_BDVER3; 727 break; // "bdver3"; 30h-3Fh: Steamroller 728 } 729 if (Model >= 0x10 && Model <= 0x1f) { 730 *Subtype = AMDFAM15H_BDVER2; 731 break; // "bdver2"; 10h-1Fh: Piledriver 732 } 733 if (Model <= 0x0f) { 734 *Subtype = AMDFAM15H_BDVER1; 735 break; // "bdver1"; 00h-0Fh: Bulldozer 736 } 737 break; 738 case 22: 739 *Type = AMDFAM16H; 740 if (!(Features & 741 (1 << FEATURE_AVX))) { // If no AVX support provide a sane fallback. 742 *Subtype = AMD_BTVER1; 743 break; // "btver1"; 744 } 745 *Subtype = AMD_BTVER2; 746 break; // "btver2" 747 case 23: 748 *Type = AMDFAM17H; 749 if (Features & (1 << FEATURE_ADX)) { 750 *Subtype = AMDFAM17H_ZNVER1; 751 break; // "znver1" 752 } 753 *Subtype = AMD_BTVER1; 754 break; 755 default: 756 break; // "generic" 757 } 758 } 759 760 static unsigned getAvailableFeatures(unsigned int ECX, unsigned int EDX, 761 unsigned MaxLeaf) { 762 unsigned Features = 0; 763 unsigned int EAX, EBX; 764 Features |= (((EDX >> 23) & 1) << FEATURE_MMX); 765 Features |= (((EDX >> 25) & 1) << FEATURE_SSE); 766 Features |= (((EDX >> 26) & 1) << FEATURE_SSE2); 767 Features |= (((ECX >> 0) & 1) << FEATURE_SSE3); 768 Features |= (((ECX >> 9) & 1) << FEATURE_SSSE3); 769 Features |= (((ECX >> 19) & 1) << FEATURE_SSE4_1); 770 Features |= (((ECX >> 20) & 1) << FEATURE_SSE4_2); 771 Features |= (((ECX >> 22) & 1) << FEATURE_MOVBE); 772 773 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 774 // indicates that the AVX registers will be saved and restored on context 775 // switch, then we have full AVX support. 776 const unsigned AVXBits = (1 << 27) | (1 << 28); 777 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && 778 ((EAX & 0x6) == 0x6); 779 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); 780 bool HasLeaf7 = 781 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 782 bool HasADX = HasLeaf7 && ((EBX >> 19) & 1); 783 bool HasAVX2 = HasAVX && HasLeaf7 && (EBX & 0x20); 784 bool HasAVX512 = HasLeaf7 && HasAVX512Save && ((EBX >> 16) & 1); 785 Features |= (HasAVX << FEATURE_AVX); 786 Features |= (HasAVX2 << FEATURE_AVX2); 787 Features |= (HasAVX512 << FEATURE_AVX512); 788 Features |= (HasAVX512Save << FEATURE_AVX512SAVE); 789 Features |= (HasADX << FEATURE_ADX); 790 791 getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 792 Features |= (((EDX >> 29) & 0x1) << FEATURE_EM64T); 793 return Features; 794 } 795 796 StringRef sys::getHostCPUName() { 797 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 798 unsigned MaxLeaf, Vendor; 799 800 #if defined(__GNUC__) || defined(__clang__) 801 //FIXME: include cpuid.h from clang or copy __get_cpuid_max here 802 // and simplify it to not invoke __cpuid (like cpu_model.c in 803 // compiler-rt/lib/builtins/cpu_model.c? 804 // Opting for the second option. 805 if(!isCpuIdSupported()) 806 return "generic"; 807 #endif 808 if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX)) 809 return "generic"; 810 if (getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX)) 811 return "generic"; 812 813 unsigned Brand_id = EBX & 0xff; 814 unsigned Family = 0, Model = 0; 815 unsigned Features = 0; 816 detectX86FamilyModel(EAX, &Family, &Model); 817 Features = getAvailableFeatures(ECX, EDX, MaxLeaf); 818 819 unsigned Type; 820 unsigned Subtype; 821 822 if (Vendor == SIG_INTEL) { 823 getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features, &Type, 824 &Subtype); 825 switch (Type) { 826 case INTEL_i386: 827 return "i386"; 828 case INTEL_i486: 829 return "i486"; 830 case INTEL_PENTIUM: 831 if (Subtype == INTEL_PENTIUM_MMX) 832 return "pentium-mmx"; 833 return "pentium"; 834 case INTEL_PENTIUM_PRO: 835 return "pentiumpro"; 836 case INTEL_PENTIUM_II: 837 return "pentium2"; 838 case INTEL_PENTIUM_III: 839 return "pentium3"; 840 case INTEL_PENTIUM_IV: 841 return "pentium4"; 842 case INTEL_PENTIUM_M: 843 return "pentium-m"; 844 case INTEL_CORE_DUO: 845 return "yonah"; 846 case INTEL_CORE2: 847 switch (Subtype) { 848 case INTEL_CORE2_65: 849 return "core2"; 850 case INTEL_CORE2_45: 851 return "penryn"; 852 default: 853 return "core2"; 854 } 855 case INTEL_COREI7: 856 switch (Subtype) { 857 case INTEL_COREI7_NEHALEM: 858 return "nehalem"; 859 case INTEL_COREI7_WESTMERE: 860 return "westmere"; 861 case INTEL_COREI7_SANDYBRIDGE: 862 return "sandybridge"; 863 case INTEL_COREI7_IVYBRIDGE: 864 return "ivybridge"; 865 case INTEL_COREI7_HASWELL: 866 return "haswell"; 867 case INTEL_COREI7_BROADWELL: 868 return "broadwell"; 869 case INTEL_COREI7_SKYLAKE: 870 return "skylake"; 871 case INTEL_COREI7_SKYLAKE_AVX512: 872 return "skylake-avx512"; 873 default: 874 return "corei7"; 875 } 876 case INTEL_ATOM: 877 switch (Subtype) { 878 case INTEL_ATOM_BONNELL: 879 return "bonnell"; 880 case INTEL_ATOM_SILVERMONT: 881 return "silvermont"; 882 default: 883 return "atom"; 884 } 885 case INTEL_XEONPHI: 886 return "knl"; /*update for more variants added*/ 887 case INTEL_X86_64: 888 return "x86-64"; 889 case INTEL_NOCONA: 890 return "nocona"; 891 case INTEL_PRESCOTT: 892 return "prescott"; 893 default: 894 return "generic"; 895 } 896 } else if (Vendor == SIG_AMD) { 897 getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type, &Subtype); 898 switch (Type) { 899 case AMD_i486: 900 return "i486"; 901 case AMDPENTIUM: 902 switch (Subtype) { 903 case AMDPENTIUM_K6: 904 return "k6"; 905 case AMDPENTIUM_K62: 906 return "k6-2"; 907 case AMDPENTIUM_K63: 908 return "k6-3"; 909 case AMDPENTIUM_GEODE: 910 return "geode"; 911 default: 912 return "pentium"; 913 } 914 case AMDATHLON: 915 switch (Subtype) { 916 case AMDATHLON_TBIRD: 917 return "athlon-tbird"; 918 case AMDATHLON_MP: 919 return "athlon-mp"; 920 case AMDATHLON_XP: 921 return "athlon-xp"; 922 case AMDATHLON_K8SSE3: 923 return "k8-sse3"; 924 case AMDATHLON_OPTERON: 925 return "opteron"; 926 case AMDATHLON_FX: 927 return "athlon-fx"; 928 case AMDATHLON_64: 929 return "athlon64"; 930 default: 931 return "athlon"; 932 } 933 case AMDFAM10H: 934 if(Subtype == AMDFAM10H_BARCELONA) 935 return "barcelona"; 936 return "amdfam10"; 937 case AMDFAM14H: 938 return "btver1"; 939 case AMDFAM15H: 940 switch (Subtype) { 941 case AMDFAM15H_BDVER1: 942 return "bdver1"; 943 case AMDFAM15H_BDVER2: 944 return "bdver2"; 945 case AMDFAM15H_BDVER3: 946 return "bdver3"; 947 case AMDFAM15H_BDVER4: 948 return "bdver4"; 949 case AMD_BTVER1: 950 return "btver1"; 951 default: 952 return "amdfam15"; 953 } 954 case AMDFAM16H: 955 switch (Subtype) { 956 case AMD_BTVER1: 957 return "btver1"; 958 case AMD_BTVER2: 959 return "btver2"; 960 default: 961 return "amdfam16"; 962 } 963 case AMDFAM17H: 964 switch (Subtype) { 965 case AMD_BTVER1: 966 return "btver1"; 967 case AMDFAM17H_ZNVER1: 968 return "znver1"; 969 default: 970 return "amdfam17"; 971 } 972 default: 973 return "generic"; 974 } 975 } 976 return "generic"; 977 } 978 979 #elif defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__)) 980 StringRef sys::getHostCPUName() { 981 host_basic_info_data_t hostInfo; 982 mach_msg_type_number_t infoCount; 983 984 infoCount = HOST_BASIC_INFO_COUNT; 985 host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo, 986 &infoCount); 987 988 if (hostInfo.cpu_type != CPU_TYPE_POWERPC) 989 return "generic"; 990 991 switch (hostInfo.cpu_subtype) { 992 case CPU_SUBTYPE_POWERPC_601: 993 return "601"; 994 case CPU_SUBTYPE_POWERPC_602: 995 return "602"; 996 case CPU_SUBTYPE_POWERPC_603: 997 return "603"; 998 case CPU_SUBTYPE_POWERPC_603e: 999 return "603e"; 1000 case CPU_SUBTYPE_POWERPC_603ev: 1001 return "603ev"; 1002 case CPU_SUBTYPE_POWERPC_604: 1003 return "604"; 1004 case CPU_SUBTYPE_POWERPC_604e: 1005 return "604e"; 1006 case CPU_SUBTYPE_POWERPC_620: 1007 return "620"; 1008 case CPU_SUBTYPE_POWERPC_750: 1009 return "750"; 1010 case CPU_SUBTYPE_POWERPC_7400: 1011 return "7400"; 1012 case CPU_SUBTYPE_POWERPC_7450: 1013 return "7450"; 1014 case CPU_SUBTYPE_POWERPC_970: 1015 return "970"; 1016 default:; 1017 } 1018 1019 return "generic"; 1020 } 1021 #elif defined(__linux__) && (defined(__ppc__) || defined(__powerpc__)) 1022 StringRef sys::getHostCPUName() { 1023 // Access to the Processor Version Register (PVR) on PowerPC is privileged, 1024 // and so we must use an operating-system interface to determine the current 1025 // processor type. On Linux, this is exposed through the /proc/cpuinfo file. 1026 const char *generic = "generic"; 1027 1028 // The cpu line is second (after the 'processor: 0' line), so if this 1029 // buffer is too small then something has changed (or is wrong). 1030 char buffer[1024]; 1031 ssize_t CPUInfoSize = readCpuInfo(buffer, sizeof(buffer)); 1032 if (CPUInfoSize == -1) 1033 return generic; 1034 1035 const char *CPUInfoStart = buffer; 1036 const char *CPUInfoEnd = buffer + CPUInfoSize; 1037 1038 const char *CIP = CPUInfoStart; 1039 1040 const char *CPUStart = 0; 1041 size_t CPULen = 0; 1042 1043 // We need to find the first line which starts with cpu, spaces, and a colon. 1044 // After the colon, there may be some additional spaces and then the cpu type. 1045 while (CIP < CPUInfoEnd && CPUStart == 0) { 1046 if (CIP < CPUInfoEnd && *CIP == '\n') 1047 ++CIP; 1048 1049 if (CIP < CPUInfoEnd && *CIP == 'c') { 1050 ++CIP; 1051 if (CIP < CPUInfoEnd && *CIP == 'p') { 1052 ++CIP; 1053 if (CIP < CPUInfoEnd && *CIP == 'u') { 1054 ++CIP; 1055 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) 1056 ++CIP; 1057 1058 if (CIP < CPUInfoEnd && *CIP == ':') { 1059 ++CIP; 1060 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) 1061 ++CIP; 1062 1063 if (CIP < CPUInfoEnd) { 1064 CPUStart = CIP; 1065 while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' && 1066 *CIP != ',' && *CIP != '\n')) 1067 ++CIP; 1068 CPULen = CIP - CPUStart; 1069 } 1070 } 1071 } 1072 } 1073 } 1074 1075 if (CPUStart == 0) 1076 while (CIP < CPUInfoEnd && *CIP != '\n') 1077 ++CIP; 1078 } 1079 1080 if (CPUStart == 0) 1081 return generic; 1082 1083 return StringSwitch<const char *>(StringRef(CPUStart, CPULen)) 1084 .Case("604e", "604e") 1085 .Case("604", "604") 1086 .Case("7400", "7400") 1087 .Case("7410", "7400") 1088 .Case("7447", "7400") 1089 .Case("7455", "7450") 1090 .Case("G4", "g4") 1091 .Case("POWER4", "970") 1092 .Case("PPC970FX", "970") 1093 .Case("PPC970MP", "970") 1094 .Case("G5", "g5") 1095 .Case("POWER5", "g5") 1096 .Case("A2", "a2") 1097 .Case("POWER6", "pwr6") 1098 .Case("POWER7", "pwr7") 1099 .Case("POWER8", "pwr8") 1100 .Case("POWER8E", "pwr8") 1101 .Case("POWER8NVL", "pwr8") 1102 .Case("POWER9", "pwr9") 1103 .Default(generic); 1104 } 1105 #elif defined(__linux__) && defined(__arm__) 1106 StringRef sys::getHostCPUName() { 1107 // The cpuid register on arm is not accessible from user space. On Linux, 1108 // it is exposed through the /proc/cpuinfo file. 1109 1110 // Read 1024 bytes from /proc/cpuinfo, which should contain the CPU part line 1111 // in all cases. 1112 char buffer[1024]; 1113 ssize_t CPUInfoSize = readCpuInfo(buffer, sizeof(buffer)); 1114 if (CPUInfoSize == -1) 1115 return "generic"; 1116 1117 StringRef Str(buffer, CPUInfoSize); 1118 1119 SmallVector<StringRef, 32> Lines; 1120 Str.split(Lines, "\n"); 1121 1122 // Look for the CPU implementer line. 1123 StringRef Implementer; 1124 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 1125 if (Lines[I].startswith("CPU implementer")) 1126 Implementer = Lines[I].substr(15).ltrim("\t :"); 1127 1128 if (Implementer == "0x41") // ARM Ltd. 1129 // Look for the CPU part line. 1130 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 1131 if (Lines[I].startswith("CPU part")) 1132 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 1133 // values correspond to the "Part number" in the CP15/c0 register. The 1134 // contents are specified in the various processor manuals. 1135 return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) 1136 .Case("0x926", "arm926ej-s") 1137 .Case("0xb02", "mpcore") 1138 .Case("0xb36", "arm1136j-s") 1139 .Case("0xb56", "arm1156t2-s") 1140 .Case("0xb76", "arm1176jz-s") 1141 .Case("0xc08", "cortex-a8") 1142 .Case("0xc09", "cortex-a9") 1143 .Case("0xc0f", "cortex-a15") 1144 .Case("0xc20", "cortex-m0") 1145 .Case("0xc23", "cortex-m3") 1146 .Case("0xc24", "cortex-m4") 1147 .Default("generic"); 1148 1149 if (Implementer == "0x51") // Qualcomm Technologies, Inc. 1150 // Look for the CPU part line. 1151 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 1152 if (Lines[I].startswith("CPU part")) 1153 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 1154 // values correspond to the "Part number" in the CP15/c0 register. The 1155 // contents are specified in the various processor manuals. 1156 return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) 1157 .Case("0x06f", "krait") // APQ8064 1158 .Default("generic"); 1159 1160 return "generic"; 1161 } 1162 #elif defined(__linux__) && defined(__s390x__) 1163 StringRef sys::getHostCPUName() { 1164 // STIDP is a privileged operation, so use /proc/cpuinfo instead. 1165 1166 // The "processor 0:" line comes after a fair amount of other information, 1167 // including a cache breakdown, but this should be plenty. 1168 char buffer[2048]; 1169 ssize_t CPUInfoSize = readCpuInfo(buffer, sizeof(buffer)); 1170 if (CPUInfoSize == -1) 1171 return "generic"; 1172 1173 StringRef Str(buffer, CPUInfoSize); 1174 SmallVector<StringRef, 32> Lines; 1175 Str.split(Lines, "\n"); 1176 1177 // Look for the CPU features. 1178 SmallVector<StringRef, 32> CPUFeatures; 1179 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 1180 if (Lines[I].startswith("features")) { 1181 size_t Pos = Lines[I].find(":"); 1182 if (Pos != StringRef::npos) { 1183 Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' '); 1184 break; 1185 } 1186 } 1187 1188 // We need to check for the presence of vector support independently of 1189 // the machine type, since we may only use the vector register set when 1190 // supported by the kernel (and hypervisor). 1191 bool HaveVectorSupport = false; 1192 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { 1193 if (CPUFeatures[I] == "vx") 1194 HaveVectorSupport = true; 1195 } 1196 1197 // Now check the processor machine type. 1198 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 1199 if (Lines[I].startswith("processor ")) { 1200 size_t Pos = Lines[I].find("machine = "); 1201 if (Pos != StringRef::npos) { 1202 Pos += sizeof("machine = ") - 1; 1203 unsigned int Id; 1204 if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) { 1205 if (Id >= 2964 && HaveVectorSupport) 1206 return "z13"; 1207 if (Id >= 2827) 1208 return "zEC12"; 1209 if (Id >= 2817) 1210 return "z196"; 1211 } 1212 } 1213 break; 1214 } 1215 } 1216 1217 return "generic"; 1218 } 1219 #else 1220 StringRef sys::getHostCPUName() { return "generic"; } 1221 #endif 1222 1223 #if defined(__linux__) && defined(__x86_64__) 1224 // On Linux, the number of physical cores can be computed from /proc/cpuinfo, 1225 // using the number of unique physical/core id pairs. The following 1226 // implementation reads the /proc/cpuinfo format on an x86_64 system. 1227 static int computeHostNumPhysicalCores() { 1228 // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be 1229 // mmapped because it appears to have 0 size. 1230 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = 1231 llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); 1232 if (std::error_code EC = Text.getError()) { 1233 llvm::errs() << "Can't read " 1234 << "/proc/cpuinfo: " << EC.message() << "\n"; 1235 } 1236 SmallVector<StringRef, 8> strs; 1237 (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1, 1238 /*KeepEmpty=*/false); 1239 int CurPhysicalId = -1; 1240 int CurCoreId = -1; 1241 SmallSet<std::pair<int, int>, 32> UniqueItems; 1242 for (auto &Line : strs) { 1243 Line = Line.trim(); 1244 if (!Line.startswith("physical id") && !Line.startswith("core id")) 1245 continue; 1246 std::pair<StringRef, StringRef> Data = Line.split(':'); 1247 auto Name = Data.first.trim(); 1248 auto Val = Data.second.trim(); 1249 if (Name == "physical id") { 1250 assert(CurPhysicalId == -1 && 1251 "Expected a core id before seeing another physical id"); 1252 Val.getAsInteger(10, CurPhysicalId); 1253 } 1254 if (Name == "core id") { 1255 assert(CurCoreId == -1 && 1256 "Expected a physical id before seeing another core id"); 1257 Val.getAsInteger(10, CurCoreId); 1258 } 1259 if (CurPhysicalId != -1 && CurCoreId != -1) { 1260 UniqueItems.insert(std::make_pair(CurPhysicalId, CurCoreId)); 1261 CurPhysicalId = -1; 1262 CurCoreId = -1; 1263 } 1264 } 1265 return UniqueItems.size(); 1266 } 1267 #elif defined(__APPLE__) && defined(__x86_64__) 1268 #include <sys/param.h> 1269 #include <sys/sysctl.h> 1270 1271 // Gets the number of *physical cores* on the machine. 1272 static int computeHostNumPhysicalCores() { 1273 uint32_t count; 1274 size_t len = sizeof(count); 1275 sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0); 1276 if (count < 1) { 1277 int nm[2]; 1278 nm[0] = CTL_HW; 1279 nm[1] = HW_AVAILCPU; 1280 sysctl(nm, 2, &count, &len, NULL, 0); 1281 if (count < 1) 1282 return -1; 1283 } 1284 return count; 1285 } 1286 #else 1287 // On other systems, return -1 to indicate unknown. 1288 static int computeHostNumPhysicalCores() { return -1; } 1289 #endif 1290 1291 int sys::getHostNumPhysicalCores() { 1292 static int NumCores = computeHostNumPhysicalCores(); 1293 return NumCores; 1294 } 1295 1296 #if defined(__i386__) || defined(_M_IX86) || \ 1297 defined(__x86_64__) || defined(_M_X64) 1298 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1299 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 1300 unsigned MaxLevel; 1301 union { 1302 unsigned u[3]; 1303 char c[12]; 1304 } text; 1305 1306 if (getX86CpuIDAndInfo(0, &MaxLevel, text.u + 0, text.u + 2, text.u + 1) || 1307 MaxLevel < 1) 1308 return false; 1309 1310 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); 1311 1312 Features["cmov"] = (EDX >> 15) & 1; 1313 Features["mmx"] = (EDX >> 23) & 1; 1314 Features["sse"] = (EDX >> 25) & 1; 1315 Features["sse2"] = (EDX >> 26) & 1; 1316 Features["sse3"] = (ECX >> 0) & 1; 1317 Features["ssse3"] = (ECX >> 9) & 1; 1318 Features["sse4.1"] = (ECX >> 19) & 1; 1319 Features["sse4.2"] = (ECX >> 20) & 1; 1320 1321 Features["pclmul"] = (ECX >> 1) & 1; 1322 Features["cx16"] = (ECX >> 13) & 1; 1323 Features["movbe"] = (ECX >> 22) & 1; 1324 Features["popcnt"] = (ECX >> 23) & 1; 1325 Features["aes"] = (ECX >> 25) & 1; 1326 Features["rdrnd"] = (ECX >> 30) & 1; 1327 1328 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 1329 // indicates that the AVX registers will be saved and restored on context 1330 // switch, then we have full AVX support. 1331 bool HasAVXSave = ((ECX >> 27) & 1) && ((ECX >> 28) & 1) && 1332 !getX86XCR0(&EAX, &EDX) && ((EAX & 0x6) == 0x6); 1333 Features["avx"] = HasAVXSave; 1334 Features["fma"] = HasAVXSave && (ECX >> 12) & 1; 1335 Features["f16c"] = HasAVXSave && (ECX >> 29) & 1; 1336 1337 // Only enable XSAVE if OS has enabled support for saving YMM state. 1338 Features["xsave"] = HasAVXSave && (ECX >> 26) & 1; 1339 1340 // AVX512 requires additional context to be saved by the OS. 1341 bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0); 1342 1343 unsigned MaxExtLevel; 1344 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); 1345 1346 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && 1347 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 1348 Features["lzcnt"] = HasExtLeaf1 && ((ECX >> 5) & 1); 1349 Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1); 1350 Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1); 1351 Features["xop"] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave; 1352 Features["fma4"] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave; 1353 Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1); 1354 Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1); 1355 1356 bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 && 1357 !getX86CpuIDAndInfoEx(0x80000008,0x0, &EAX, &EBX, &ECX, &EDX); 1358 Features["clzero"] = HasExtLeaf8 && ((EBX >> 0) & 1); 1359 1360 bool HasLeaf7 = 1361 MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 1362 1363 // AVX2 is only supported if we have the OS save support from AVX. 1364 Features["avx2"] = HasAVXSave && HasLeaf7 && ((EBX >> 5) & 1); 1365 1366 Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1); 1367 Features["sgx"] = HasLeaf7 && ((EBX >> 2) & 1); 1368 Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1); 1369 Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1); 1370 Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1); 1371 Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1); 1372 Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1); 1373 Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1); 1374 Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1); 1375 Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1); 1376 1377 // AVX512 is only supported if the OS supports the context save for it. 1378 Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save; 1379 Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save; 1380 Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save; 1381 Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save; 1382 Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save; 1383 Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save; 1384 Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save; 1385 Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save; 1386 1387 Features["prefetchwt1"] = HasLeaf7 && (ECX & 1); 1388 Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; 1389 // Enable protection keys 1390 Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1); 1391 1392 bool HasLeafD = MaxLevel >= 0xd && 1393 !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); 1394 1395 // Only enable XSAVE if OS has enabled support for saving YMM state. 1396 Features["xsaveopt"] = HasAVXSave && HasLeafD && ((EAX >> 0) & 1); 1397 Features["xsavec"] = HasAVXSave && HasLeafD && ((EAX >> 1) & 1); 1398 Features["xsaves"] = HasAVXSave && HasLeafD && ((EAX >> 3) & 1); 1399 1400 return true; 1401 } 1402 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) 1403 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1404 // Read 1024 bytes from /proc/cpuinfo, which should contain the Features line 1405 // in all cases. 1406 char buffer[1024]; 1407 ssize_t CPUInfoSize = readCpuInfo(buffer, sizeof(buffer)); 1408 if (CPUInfoSize == -1) 1409 return false; 1410 1411 StringRef Str(buffer, CPUInfoSize); 1412 1413 SmallVector<StringRef, 32> Lines; 1414 Str.split(Lines, "\n"); 1415 1416 SmallVector<StringRef, 32> CPUFeatures; 1417 1418 // Look for the CPU features. 1419 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 1420 if (Lines[I].startswith("Features")) { 1421 Lines[I].split(CPUFeatures, ' '); 1422 break; 1423 } 1424 1425 #if defined(__aarch64__) 1426 // Keep track of which crypto features we have seen 1427 enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 }; 1428 uint32_t crypto = 0; 1429 #endif 1430 1431 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { 1432 StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I]) 1433 #if defined(__aarch64__) 1434 .Case("asimd", "neon") 1435 .Case("fp", "fp-armv8") 1436 .Case("crc32", "crc") 1437 #else 1438 .Case("half", "fp16") 1439 .Case("neon", "neon") 1440 .Case("vfpv3", "vfp3") 1441 .Case("vfpv3d16", "d16") 1442 .Case("vfpv4", "vfp4") 1443 .Case("idiva", "hwdiv-arm") 1444 .Case("idivt", "hwdiv") 1445 #endif 1446 .Default(""); 1447 1448 #if defined(__aarch64__) 1449 // We need to check crypto separately since we need all of the crypto 1450 // extensions to enable the subtarget feature 1451 if (CPUFeatures[I] == "aes") 1452 crypto |= CAP_AES; 1453 else if (CPUFeatures[I] == "pmull") 1454 crypto |= CAP_PMULL; 1455 else if (CPUFeatures[I] == "sha1") 1456 crypto |= CAP_SHA1; 1457 else if (CPUFeatures[I] == "sha2") 1458 crypto |= CAP_SHA2; 1459 #endif 1460 1461 if (LLVMFeatureStr != "") 1462 Features[LLVMFeatureStr] = true; 1463 } 1464 1465 #if defined(__aarch64__) 1466 // If we have all crypto bits we can add the feature 1467 if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2)) 1468 Features["crypto"] = true; 1469 #endif 1470 1471 return true; 1472 } 1473 #else 1474 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; } 1475 #endif 1476 1477 std::string sys::getProcessTriple() { 1478 Triple PT(Triple::normalize(LLVM_HOST_TRIPLE)); 1479 1480 if (sizeof(void *) == 8 && PT.isArch32Bit()) 1481 PT = PT.get64BitArchVariant(); 1482 if (sizeof(void *) == 4 && PT.isArch64Bit()) 1483 PT = PT.get32BitArchVariant(); 1484 1485 return PT.str(); 1486 } 1487