1db17bf38SDimitry Andric //===- llvm/BinaryFormat/Magic.cpp - File magic identification --*- C++ -*-===//
2db17bf38SDimitry Andric //
3db17bf38SDimitry Andric // The LLVM Compiler Infrastructure
4db17bf38SDimitry Andric //
5db17bf38SDimitry Andric // This file is distributed under the University of Illinois Open Source
6db17bf38SDimitry Andric // License. See LICENSE.TXT for details.
7db17bf38SDimitry Andric //
8db17bf38SDimitry Andric //===----------------------------------------------------------------------===//
9db17bf38SDimitry Andric
10db17bf38SDimitry Andric #include "llvm/BinaryFormat/Magic.h"
11db17bf38SDimitry Andric
12db17bf38SDimitry Andric #include "llvm/BinaryFormat/COFF.h"
13db17bf38SDimitry Andric #include "llvm/BinaryFormat/ELF.h"
14db17bf38SDimitry Andric #include "llvm/BinaryFormat/MachO.h"
15db17bf38SDimitry Andric #include "llvm/Support/Endian.h"
16db17bf38SDimitry Andric #include "llvm/Support/FileSystem.h"
174ba319b5SDimitry Andric #include "llvm/Support/MemoryBuffer.h"
18db17bf38SDimitry Andric
19db17bf38SDimitry Andric #if !defined(_MSC_VER) && !defined(__MINGW32__)
20db17bf38SDimitry Andric #include <unistd.h>
21db17bf38SDimitry Andric #else
22db17bf38SDimitry Andric #include <io.h>
23db17bf38SDimitry Andric #endif
24db17bf38SDimitry Andric
25db17bf38SDimitry Andric using namespace llvm;
26db17bf38SDimitry Andric using namespace llvm::support::endian;
27db17bf38SDimitry Andric using namespace llvm::sys::fs;
28db17bf38SDimitry Andric
29db17bf38SDimitry Andric template <size_t N>
startswith(StringRef Magic,const char (& S)[N])30db17bf38SDimitry Andric static bool startswith(StringRef Magic, const char (&S)[N]) {
31db17bf38SDimitry Andric return Magic.startswith(StringRef(S, N - 1));
32db17bf38SDimitry Andric }
33db17bf38SDimitry Andric
344ba319b5SDimitry Andric /// Identify the magic in magic.
identify_magic(StringRef Magic)35db17bf38SDimitry Andric file_magic llvm::identify_magic(StringRef Magic) {
36db17bf38SDimitry Andric if (Magic.size() < 4)
37db17bf38SDimitry Andric return file_magic::unknown;
38db17bf38SDimitry Andric switch ((unsigned char)Magic[0]) {
39db17bf38SDimitry Andric case 0x00: {
40db17bf38SDimitry Andric // COFF bigobj, CL.exe's LTO object file, or short import library file
41db17bf38SDimitry Andric if (startswith(Magic, "\0\0\xFF\xFF")) {
42db17bf38SDimitry Andric size_t MinSize =
43db17bf38SDimitry Andric offsetof(COFF::BigObjHeader, UUID) + sizeof(COFF::BigObjMagic);
44db17bf38SDimitry Andric if (Magic.size() < MinSize)
45db17bf38SDimitry Andric return file_magic::coff_import_library;
46db17bf38SDimitry Andric
47db17bf38SDimitry Andric const char *Start = Magic.data() + offsetof(COFF::BigObjHeader, UUID);
48db17bf38SDimitry Andric if (memcmp(Start, COFF::BigObjMagic, sizeof(COFF::BigObjMagic)) == 0)
49db17bf38SDimitry Andric return file_magic::coff_object;
50db17bf38SDimitry Andric if (memcmp(Start, COFF::ClGlObjMagic, sizeof(COFF::BigObjMagic)) == 0)
51db17bf38SDimitry Andric return file_magic::coff_cl_gl_object;
52db17bf38SDimitry Andric return file_magic::coff_import_library;
53db17bf38SDimitry Andric }
54db17bf38SDimitry Andric // Windows resource file
55edd7eaddSDimitry Andric if (Magic.size() >= sizeof(COFF::WinResMagic) &&
56edd7eaddSDimitry Andric memcmp(Magic.data(), COFF::WinResMagic, sizeof(COFF::WinResMagic)) == 0)
57db17bf38SDimitry Andric return file_magic::windows_resource;
58db17bf38SDimitry Andric // 0x0000 = COFF unknown machine type
59db17bf38SDimitry Andric if (Magic[1] == 0)
60db17bf38SDimitry Andric return file_magic::coff_object;
61db17bf38SDimitry Andric if (startswith(Magic, "\0asm"))
62db17bf38SDimitry Andric return file_magic::wasm_object;
63db17bf38SDimitry Andric break;
64db17bf38SDimitry Andric }
65db17bf38SDimitry Andric case 0xDE: // 0x0B17C0DE = BC wraper
66db17bf38SDimitry Andric if (startswith(Magic, "\xDE\xC0\x17\x0B"))
67db17bf38SDimitry Andric return file_magic::bitcode;
68db17bf38SDimitry Andric break;
69db17bf38SDimitry Andric case 'B':
70db17bf38SDimitry Andric if (startswith(Magic, "BC\xC0\xDE"))
71db17bf38SDimitry Andric return file_magic::bitcode;
72db17bf38SDimitry Andric break;
73db17bf38SDimitry Andric case '!':
74db17bf38SDimitry Andric if (startswith(Magic, "!<arch>\n") || startswith(Magic, "!<thin>\n"))
75db17bf38SDimitry Andric return file_magic::archive;
76db17bf38SDimitry Andric break;
77db17bf38SDimitry Andric
78db17bf38SDimitry Andric case '\177':
79db17bf38SDimitry Andric if (startswith(Magic, "\177ELF") && Magic.size() >= 18) {
80db17bf38SDimitry Andric bool Data2MSB = Magic[5] == 2;
81db17bf38SDimitry Andric unsigned high = Data2MSB ? 16 : 17;
82db17bf38SDimitry Andric unsigned low = Data2MSB ? 17 : 16;
83db17bf38SDimitry Andric if (Magic[high] == 0) {
84db17bf38SDimitry Andric switch (Magic[low]) {
85db17bf38SDimitry Andric default:
86db17bf38SDimitry Andric return file_magic::elf;
87db17bf38SDimitry Andric case 1:
88db17bf38SDimitry Andric return file_magic::elf_relocatable;
89db17bf38SDimitry Andric case 2:
90db17bf38SDimitry Andric return file_magic::elf_executable;
91db17bf38SDimitry Andric case 3:
92db17bf38SDimitry Andric return file_magic::elf_shared_object;
93db17bf38SDimitry Andric case 4:
94db17bf38SDimitry Andric return file_magic::elf_core;
95db17bf38SDimitry Andric }
96db17bf38SDimitry Andric }
97db17bf38SDimitry Andric // It's still some type of ELF file.
98db17bf38SDimitry Andric return file_magic::elf;
99db17bf38SDimitry Andric }
100db17bf38SDimitry Andric break;
101db17bf38SDimitry Andric
102db17bf38SDimitry Andric case 0xCA:
103db17bf38SDimitry Andric if (startswith(Magic, "\xCA\xFE\xBA\xBE") ||
104db17bf38SDimitry Andric startswith(Magic, "\xCA\xFE\xBA\xBF")) {
105db17bf38SDimitry Andric // This is complicated by an overlap with Java class files.
106db17bf38SDimitry Andric // See the Mach-O section in /usr/share/file/magic for details.
107db17bf38SDimitry Andric if (Magic.size() >= 8 && Magic[7] < 43)
108db17bf38SDimitry Andric return file_magic::macho_universal_binary;
109db17bf38SDimitry Andric }
110db17bf38SDimitry Andric break;
111db17bf38SDimitry Andric
112db17bf38SDimitry Andric // The two magic numbers for mach-o are:
113db17bf38SDimitry Andric // 0xfeedface - 32-bit mach-o
114db17bf38SDimitry Andric // 0xfeedfacf - 64-bit mach-o
115db17bf38SDimitry Andric case 0xFE:
116db17bf38SDimitry Andric case 0xCE:
117db17bf38SDimitry Andric case 0xCF: {
118db17bf38SDimitry Andric uint16_t type = 0;
119db17bf38SDimitry Andric if (startswith(Magic, "\xFE\xED\xFA\xCE") ||
120db17bf38SDimitry Andric startswith(Magic, "\xFE\xED\xFA\xCF")) {
121db17bf38SDimitry Andric /* Native endian */
122db17bf38SDimitry Andric size_t MinSize;
123db17bf38SDimitry Andric if (Magic[3] == char(0xCE))
124db17bf38SDimitry Andric MinSize = sizeof(MachO::mach_header);
125db17bf38SDimitry Andric else
126db17bf38SDimitry Andric MinSize = sizeof(MachO::mach_header_64);
127db17bf38SDimitry Andric if (Magic.size() >= MinSize)
128db17bf38SDimitry Andric type = Magic[12] << 24 | Magic[13] << 12 | Magic[14] << 8 | Magic[15];
129db17bf38SDimitry Andric } else if (startswith(Magic, "\xCE\xFA\xED\xFE") ||
130db17bf38SDimitry Andric startswith(Magic, "\xCF\xFA\xED\xFE")) {
131db17bf38SDimitry Andric /* Reverse endian */
132db17bf38SDimitry Andric size_t MinSize;
133db17bf38SDimitry Andric if (Magic[0] == char(0xCE))
134db17bf38SDimitry Andric MinSize = sizeof(MachO::mach_header);
135db17bf38SDimitry Andric else
136db17bf38SDimitry Andric MinSize = sizeof(MachO::mach_header_64);
137db17bf38SDimitry Andric if (Magic.size() >= MinSize)
138db17bf38SDimitry Andric type = Magic[15] << 24 | Magic[14] << 12 | Magic[13] << 8 | Magic[12];
139db17bf38SDimitry Andric }
140db17bf38SDimitry Andric switch (type) {
141db17bf38SDimitry Andric default:
142db17bf38SDimitry Andric break;
143db17bf38SDimitry Andric case 1:
144db17bf38SDimitry Andric return file_magic::macho_object;
145db17bf38SDimitry Andric case 2:
146db17bf38SDimitry Andric return file_magic::macho_executable;
147db17bf38SDimitry Andric case 3:
148db17bf38SDimitry Andric return file_magic::macho_fixed_virtual_memory_shared_lib;
149db17bf38SDimitry Andric case 4:
150db17bf38SDimitry Andric return file_magic::macho_core;
151db17bf38SDimitry Andric case 5:
152db17bf38SDimitry Andric return file_magic::macho_preload_executable;
153db17bf38SDimitry Andric case 6:
154db17bf38SDimitry Andric return file_magic::macho_dynamically_linked_shared_lib;
155db17bf38SDimitry Andric case 7:
156db17bf38SDimitry Andric return file_magic::macho_dynamic_linker;
157db17bf38SDimitry Andric case 8:
158db17bf38SDimitry Andric return file_magic::macho_bundle;
159db17bf38SDimitry Andric case 9:
160db17bf38SDimitry Andric return file_magic::macho_dynamically_linked_shared_lib_stub;
161db17bf38SDimitry Andric case 10:
162db17bf38SDimitry Andric return file_magic::macho_dsym_companion;
163db17bf38SDimitry Andric case 11:
164db17bf38SDimitry Andric return file_magic::macho_kext_bundle;
165db17bf38SDimitry Andric }
166db17bf38SDimitry Andric break;
167db17bf38SDimitry Andric }
168db17bf38SDimitry Andric case 0xF0: // PowerPC Windows
169db17bf38SDimitry Andric case 0x83: // Alpha 32-bit
170db17bf38SDimitry Andric case 0x84: // Alpha 64-bit
171db17bf38SDimitry Andric case 0x66: // MPS R4000 Windows
172db17bf38SDimitry Andric case 0x50: // mc68K
173db17bf38SDimitry Andric case 0x4c: // 80386 Windows
174db17bf38SDimitry Andric case 0xc4: // ARMNT Windows
175db17bf38SDimitry Andric if (Magic[1] == 0x01)
176db17bf38SDimitry Andric return file_magic::coff_object;
177db17bf38SDimitry Andric LLVM_FALLTHROUGH;
178db17bf38SDimitry Andric
179db17bf38SDimitry Andric case 0x90: // PA-RISC Windows
180db17bf38SDimitry Andric case 0x68: // mc68K Windows
181db17bf38SDimitry Andric if (Magic[1] == 0x02)
182db17bf38SDimitry Andric return file_magic::coff_object;
183db17bf38SDimitry Andric break;
184db17bf38SDimitry Andric
1854ba319b5SDimitry Andric case 'M': // Possible MS-DOS stub on Windows PE file or MSF/PDB file.
1862cab237bSDimitry Andric if (startswith(Magic, "MZ") && Magic.size() >= 0x3c + 4) {
187db17bf38SDimitry Andric uint32_t off = read32le(Magic.data() + 0x3c);
188db17bf38SDimitry Andric // PE/COFF file, either EXE or DLL.
1892cab237bSDimitry Andric if (Magic.substr(off).startswith(
1902cab237bSDimitry Andric StringRef(COFF::PEMagic, sizeof(COFF::PEMagic))))
191db17bf38SDimitry Andric return file_magic::pecoff_executable;
192db17bf38SDimitry Andric }
1934ba319b5SDimitry Andric if (Magic.startswith("Microsoft C/C++ MSF 7.00\r\n"))
1944ba319b5SDimitry Andric return file_magic::pdb;
195db17bf38SDimitry Andric break;
196db17bf38SDimitry Andric
197a580b014SDimitry Andric case 0x64: // x86-64 or ARM64 Windows.
198a580b014SDimitry Andric if (Magic[1] == char(0x86) || Magic[1] == char(0xaa))
199db17bf38SDimitry Andric return file_magic::coff_object;
200db17bf38SDimitry Andric break;
201db17bf38SDimitry Andric
202db17bf38SDimitry Andric default:
203db17bf38SDimitry Andric break;
204db17bf38SDimitry Andric }
205db17bf38SDimitry Andric return file_magic::unknown;
206db17bf38SDimitry Andric }
207db17bf38SDimitry Andric
identify_magic(const Twine & Path,file_magic & Result)208db17bf38SDimitry Andric std::error_code llvm::identify_magic(const Twine &Path, file_magic &Result) {
209*b5893f02SDimitry Andric auto FileOrError = MemoryBuffer::getFile(Path, -1LL, false);
2104ba319b5SDimitry Andric if (!FileOrError)
2114ba319b5SDimitry Andric return FileOrError.getError();
212db17bf38SDimitry Andric
2134ba319b5SDimitry Andric std::unique_ptr<MemoryBuffer> FileBuffer = std::move(*FileOrError);
2144ba319b5SDimitry Andric Result = identify_magic(FileBuffer->getBuffer());
215db17bf38SDimitry Andric
216db17bf38SDimitry Andric return std::error_code();
217db17bf38SDimitry Andric }
218