1 //===- Archive.cpp - ar File Format implementation --------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the ArchiveObjectFile class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Object/Archive.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/Support/Endian.h"
19 #include "llvm/Support/MemoryBuffer.h"
20 #include "llvm/Support/Path.h"
21 
22 using namespace llvm;
23 using namespace object;
24 using namespace llvm::support::endian;
25 
26 static const char *const Magic = "!<arch>\n";
27 static const char *const ThinMagic = "!<thin>\n";
28 
29 void Archive::anchor() { }
30 
31 StringRef ArchiveMemberHeader::getName() const {
32   char EndCond;
33   if (Name[0] == '/' || Name[0] == '#')
34     EndCond = ' ';
35   else
36     EndCond = '/';
37   llvm::StringRef::size_type end =
38       llvm::StringRef(Name, sizeof(Name)).find(EndCond);
39   if (end == llvm::StringRef::npos)
40     end = sizeof(Name);
41   assert(end <= sizeof(Name) && end > 0);
42   // Don't include the EndCond if there is one.
43   return llvm::StringRef(Name, end);
44 }
45 
46 ErrorOr<uint32_t> ArchiveMemberHeader::getSize() const {
47   uint32_t Ret;
48   if (llvm::StringRef(Size, sizeof(Size)).rtrim(" ").getAsInteger(10, Ret))
49     return object_error::parse_failed; // Size is not a decimal number.
50   return Ret;
51 }
52 
53 sys::fs::perms ArchiveMemberHeader::getAccessMode() const {
54   unsigned Ret;
55   if (StringRef(AccessMode, sizeof(AccessMode)).rtrim(' ').getAsInteger(8, Ret))
56     llvm_unreachable("Access mode is not an octal number.");
57   return static_cast<sys::fs::perms>(Ret);
58 }
59 
60 sys::TimeValue ArchiveMemberHeader::getLastModified() const {
61   unsigned Seconds;
62   if (StringRef(LastModified, sizeof(LastModified)).rtrim(' ')
63           .getAsInteger(10, Seconds))
64     llvm_unreachable("Last modified time not a decimal number.");
65 
66   sys::TimeValue Ret;
67   Ret.fromEpochTime(Seconds);
68   return Ret;
69 }
70 
71 unsigned ArchiveMemberHeader::getUID() const {
72   unsigned Ret;
73   if (StringRef(UID, sizeof(UID)).rtrim(' ').getAsInteger(10, Ret))
74     llvm_unreachable("UID time not a decimal number.");
75   return Ret;
76 }
77 
78 unsigned ArchiveMemberHeader::getGID() const {
79   unsigned Ret;
80   if (StringRef(GID, sizeof(GID)).rtrim(' ').getAsInteger(10, Ret))
81     llvm_unreachable("GID time not a decimal number.");
82   return Ret;
83 }
84 
85 Archive::Child::Child(const Archive *Parent, StringRef Data,
86                       uint16_t StartOfFile)
87     : Parent(Parent), Data(Data), StartOfFile(StartOfFile) {}
88 
89 Archive::Child::Child(const Archive *Parent, const char *Start,
90                       std::error_code *EC)
91     : Parent(Parent) {
92   if (!Start)
93     return;
94 
95   uint64_t Size = sizeof(ArchiveMemberHeader);
96   Data = StringRef(Start, Size);
97   if (!isThinMember()) {
98     ErrorOr<uint64_t> MemberSize = getRawSize();
99     if ((*EC = MemberSize.getError()))
100       return;
101     Size += MemberSize.get();
102     Data = StringRef(Start, Size);
103   }
104 
105   // Setup StartOfFile and PaddingBytes.
106   StartOfFile = sizeof(ArchiveMemberHeader);
107   // Don't include attached name.
108   StringRef Name = getRawName();
109   if (Name.startswith("#1/")) {
110     uint64_t NameSize;
111     if (Name.substr(3).rtrim(' ').getAsInteger(10, NameSize))
112       llvm_unreachable("Long name length is not an integer");
113     StartOfFile += NameSize;
114   }
115 }
116 
117 ErrorOr<uint64_t> Archive::Child::getSize() const {
118   if (Parent->IsThin) {
119     ErrorOr<uint32_t> Size = getHeader()->getSize();
120     if (std::error_code EC = Size.getError())
121       return EC;
122     return Size.get();
123   }
124   return Data.size() - StartOfFile;
125 }
126 
127 ErrorOr<uint64_t> Archive::Child::getRawSize() const {
128   ErrorOr<uint32_t> Size = getHeader()->getSize();
129   if (std::error_code EC = Size.getError())
130     return EC;
131   return Size.get();
132 }
133 
134 bool Archive::Child::isThinMember() const {
135   StringRef Name = getHeader()->getName();
136   return Parent->IsThin && Name != "/" && Name != "//";
137 }
138 
139 ErrorOr<StringRef> Archive::Child::getBuffer() const {
140   if (!isThinMember()) {
141     ErrorOr<uint32_t> Size = getSize();
142     if (std::error_code EC = Size.getError())
143       return EC;
144     return StringRef(Data.data() + StartOfFile, Size.get());
145   }
146   ErrorOr<StringRef> Name = getName();
147   if (std::error_code EC = Name.getError())
148     return EC;
149   SmallString<128> FullName;
150   if (sys::path::is_absolute(*Name))
151     FullName = *Name;
152   else {
153     FullName = sys::path::parent_path(
154         Parent->getMemoryBufferRef().getBufferIdentifier());
155     sys::path::append(FullName, *Name);
156   }
157   ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName);
158   if (std::error_code EC = Buf.getError())
159     return EC;
160   Parent->ThinBuffers.push_back(std::move(*Buf));
161   return Parent->ThinBuffers.back()->getBuffer();
162 }
163 
164 ErrorOr<Archive::Child> Archive::Child::getNext() const {
165   size_t SpaceToSkip = Data.size();
166   // If it's odd, add 1 to make it even.
167   if (SpaceToSkip & 1)
168     ++SpaceToSkip;
169 
170   const char *NextLoc = Data.data() + SpaceToSkip;
171 
172   // Check to see if this is at the end of the archive.
173   if (NextLoc == Parent->Data.getBufferEnd())
174     return Child(Parent, nullptr, nullptr);
175 
176   // Check to see if this is past the end of the archive.
177   if (NextLoc > Parent->Data.getBufferEnd())
178     return object_error::parse_failed;
179 
180   std::error_code EC;
181   Child Ret(Parent, NextLoc, &EC);
182   if (EC)
183     return EC;
184   return Ret;
185 }
186 
187 uint64_t Archive::Child::getChildOffset() const {
188   const char *a = Parent->Data.getBuffer().data();
189   const char *c = Data.data();
190   uint64_t offset = c - a;
191   return offset;
192 }
193 
194 ErrorOr<StringRef> Archive::Child::getName() const {
195   StringRef name = getRawName();
196   // Check if it's a special name.
197   if (name[0] == '/') {
198     if (name.size() == 1) // Linker member.
199       return name;
200     if (name.size() == 2 && name[1] == '/') // String table.
201       return name;
202     // It's a long name.
203     // Get the offset.
204     std::size_t offset;
205     if (name.substr(1).rtrim(' ').getAsInteger(10, offset))
206       llvm_unreachable("Long name offset is not an integer");
207 
208     // Verify it.
209     if (offset >= Parent->StringTable.size())
210       return object_error::parse_failed;
211     const char *addr = Parent->StringTable.begin() + offset;
212 
213     // GNU long file names end with a "/\n".
214     if (Parent->kind() == K_GNU || Parent->kind() == K_MIPS64) {
215       StringRef::size_type End = StringRef(addr).find('\n');
216       return StringRef(addr, End - 1);
217     }
218     return StringRef(addr);
219   } else if (name.startswith("#1/")) {
220     uint64_t name_size;
221     if (name.substr(3).rtrim(' ').getAsInteger(10, name_size))
222       llvm_unreachable("Long name length is not an ingeter");
223     return Data.substr(sizeof(ArchiveMemberHeader), name_size).rtrim('\0');
224   }
225   // It's a simple name.
226   if (name[name.size() - 1] == '/')
227     return name.substr(0, name.size() - 1);
228   return name;
229 }
230 
231 ErrorOr<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const {
232   ErrorOr<StringRef> NameOrErr = getName();
233   if (std::error_code EC = NameOrErr.getError())
234     return EC;
235   StringRef Name = NameOrErr.get();
236   ErrorOr<StringRef> Buf = getBuffer();
237   if (std::error_code EC = Buf.getError())
238     return EC;
239   return MemoryBufferRef(*Buf, Name);
240 }
241 
242 ErrorOr<std::unique_ptr<Binary>>
243 Archive::Child::getAsBinary(LLVMContext *Context) const {
244   ErrorOr<MemoryBufferRef> BuffOrErr = getMemoryBufferRef();
245   if (std::error_code EC = BuffOrErr.getError())
246     return EC;
247 
248   auto BinaryOrErr = createBinary(BuffOrErr.get(), Context);
249   if (BinaryOrErr)
250     return std::move(*BinaryOrErr);
251   return errorToErrorCode(BinaryOrErr.takeError());
252 }
253 
254 ErrorOr<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) {
255   std::error_code EC;
256   std::unique_ptr<Archive> Ret(new Archive(Source, EC));
257   if (EC)
258     return EC;
259   return std::move(Ret);
260 }
261 
262 void Archive::setFirstRegular(const Child &C) {
263   FirstRegularData = C.Data;
264   FirstRegularStartOfFile = C.StartOfFile;
265 }
266 
267 Archive::Archive(MemoryBufferRef Source, std::error_code &ec)
268     : Binary(Binary::ID_Archive, Source) {
269   StringRef Buffer = Data.getBuffer();
270   // Check for sufficient magic.
271   if (Buffer.startswith(ThinMagic)) {
272     IsThin = true;
273   } else if (Buffer.startswith(Magic)) {
274     IsThin = false;
275   } else {
276     ec = object_error::invalid_file_type;
277     return;
278   }
279 
280   // Get the special members.
281   child_iterator I = child_begin(false);
282   if ((ec = I->getError()))
283     return;
284   child_iterator E = child_end();
285 
286   if (I == E) {
287     ec = std::error_code();
288     return;
289   }
290   const Child *C = &**I;
291 
292   auto Increment = [&]() {
293     ++I;
294     if ((ec = I->getError()))
295       return true;
296     C = &**I;
297     return false;
298   };
299 
300   StringRef Name = C->getRawName();
301 
302   // Below is the pattern that is used to figure out the archive format
303   // GNU archive format
304   //  First member : / (may exist, if it exists, points to the symbol table )
305   //  Second member : // (may exist, if it exists, points to the string table)
306   //  Note : The string table is used if the filename exceeds 15 characters
307   // BSD archive format
308   //  First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table)
309   //  There is no string table, if the filename exceeds 15 characters or has a
310   //  embedded space, the filename has #1/<size>, The size represents the size
311   //  of the filename that needs to be read after the archive header
312   // COFF archive format
313   //  First member : /
314   //  Second member : / (provides a directory of symbols)
315   //  Third member : // (may exist, if it exists, contains the string table)
316   //  Note: Microsoft PE/COFF Spec 8.3 says that the third member is present
317   //  even if the string table is empty. However, lib.exe does not in fact
318   //  seem to create the third member if there's no member whose filename
319   //  exceeds 15 characters. So the third member is optional.
320 
321   if (Name == "__.SYMDEF") {
322     Format = K_BSD;
323     // We know that the symbol table is not an external file, so we just assert
324     // there is no error.
325     SymbolTable = *C->getBuffer();
326     if (Increment())
327       return;
328     setFirstRegular(*C);
329 
330     ec = std::error_code();
331     return;
332   }
333 
334   if (Name.startswith("#1/")) {
335     Format = K_BSD;
336     // We know this is BSD, so getName will work since there is no string table.
337     ErrorOr<StringRef> NameOrErr = C->getName();
338     ec = NameOrErr.getError();
339     if (ec)
340       return;
341     Name = NameOrErr.get();
342     if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") {
343       // We know that the symbol table is not an external file, so we just
344       // assert there is no error.
345       SymbolTable = *C->getBuffer();
346       if (Increment())
347         return;
348     }
349     setFirstRegular(*C);
350     return;
351   }
352 
353   // MIPS 64-bit ELF archives use a special format of a symbol table.
354   // This format is marked by `ar_name` field equals to "/SYM64/".
355   // For detailed description see page 96 in the following document:
356   // http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf
357 
358   bool has64SymTable = false;
359   if (Name == "/" || Name == "/SYM64/") {
360     // We know that the symbol table is not an external file, so we just assert
361     // there is no error.
362     SymbolTable = *C->getBuffer();
363     if (Name == "/SYM64/")
364       has64SymTable = true;
365 
366     if (Increment())
367       return;
368     if (I == E) {
369       ec = std::error_code();
370       return;
371     }
372     Name = C->getRawName();
373   }
374 
375   if (Name == "//") {
376     Format = has64SymTable ? K_MIPS64 : K_GNU;
377     // The string table is never an external member, so we just assert on the
378     // ErrorOr.
379     StringTable = *C->getBuffer();
380     if (Increment())
381       return;
382     setFirstRegular(*C);
383     ec = std::error_code();
384     return;
385   }
386 
387   if (Name[0] != '/') {
388     Format = has64SymTable ? K_MIPS64 : K_GNU;
389     setFirstRegular(*C);
390     ec = std::error_code();
391     return;
392   }
393 
394   if (Name != "/") {
395     ec = object_error::parse_failed;
396     return;
397   }
398 
399   Format = K_COFF;
400   // We know that the symbol table is not an external file, so we just assert
401   // there is no error.
402   SymbolTable = *C->getBuffer();
403 
404   if (Increment())
405     return;
406 
407   if (I == E) {
408     setFirstRegular(*C);
409     ec = std::error_code();
410     return;
411   }
412 
413   Name = C->getRawName();
414 
415   if (Name == "//") {
416     // The string table is never an external member, so we just assert on the
417     // ErrorOr.
418     StringTable = *C->getBuffer();
419     if (Increment())
420       return;
421   }
422 
423   setFirstRegular(*C);
424   ec = std::error_code();
425 }
426 
427 Archive::child_iterator Archive::child_begin(bool SkipInternal) const {
428   if (Data.getBufferSize() == 8) // empty archive.
429     return child_end();
430 
431   if (SkipInternal)
432     return Child(this, FirstRegularData, FirstRegularStartOfFile);
433 
434   const char *Loc = Data.getBufferStart() + strlen(Magic);
435   std::error_code EC;
436   Child c(this, Loc, &EC);
437   if (EC)
438     return child_iterator(EC);
439   return child_iterator(c);
440 }
441 
442 Archive::child_iterator Archive::child_end() const {
443   return Child(this, nullptr, nullptr);
444 }
445 
446 StringRef Archive::Symbol::getName() const {
447   return Parent->getSymbolTable().begin() + StringIndex;
448 }
449 
450 ErrorOr<Archive::Child> Archive::Symbol::getMember() const {
451   const char *Buf = Parent->getSymbolTable().begin();
452   const char *Offsets = Buf;
453   if (Parent->kind() == K_MIPS64)
454     Offsets += sizeof(uint64_t);
455   else
456     Offsets += sizeof(uint32_t);
457   uint32_t Offset = 0;
458   if (Parent->kind() == K_GNU) {
459     Offset = read32be(Offsets + SymbolIndex * 4);
460   } else if (Parent->kind() == K_MIPS64) {
461     Offset = read64be(Offsets + SymbolIndex * 8);
462   } else if (Parent->kind() == K_BSD) {
463     // The SymbolIndex is an index into the ranlib structs that start at
464     // Offsets (the first uint32_t is the number of bytes of the ranlib
465     // structs).  The ranlib structs are a pair of uint32_t's the first
466     // being a string table offset and the second being the offset into
467     // the archive of the member that defines the symbol.  Which is what
468     // is needed here.
469     Offset = read32le(Offsets + SymbolIndex * 8 + 4);
470   } else {
471     // Skip offsets.
472     uint32_t MemberCount = read32le(Buf);
473     Buf += MemberCount * 4 + 4;
474 
475     uint32_t SymbolCount = read32le(Buf);
476     if (SymbolIndex >= SymbolCount)
477       return object_error::parse_failed;
478 
479     // Skip SymbolCount to get to the indices table.
480     const char *Indices = Buf + 4;
481 
482     // Get the index of the offset in the file member offset table for this
483     // symbol.
484     uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2);
485     // Subtract 1 since OffsetIndex is 1 based.
486     --OffsetIndex;
487 
488     if (OffsetIndex >= MemberCount)
489       return object_error::parse_failed;
490 
491     Offset = read32le(Offsets + OffsetIndex * 4);
492   }
493 
494   const char *Loc = Parent->getData().begin() + Offset;
495   std::error_code EC;
496   Child C(Parent, Loc, &EC);
497   if (EC)
498     return EC;
499   return C;
500 }
501 
502 Archive::Symbol Archive::Symbol::getNext() const {
503   Symbol t(*this);
504   if (Parent->kind() == K_BSD) {
505     // t.StringIndex is an offset from the start of the __.SYMDEF or
506     // "__.SYMDEF SORTED" member into the string table for the ranlib
507     // struct indexed by t.SymbolIndex .  To change t.StringIndex to the
508     // offset in the string table for t.SymbolIndex+1 we subtract the
509     // its offset from the start of the string table for t.SymbolIndex
510     // and add the offset of the string table for t.SymbolIndex+1.
511 
512     // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
513     // which is the number of bytes of ranlib structs that follow.  The ranlib
514     // structs are a pair of uint32_t's the first being a string table offset
515     // and the second being the offset into the archive of the member that
516     // define the symbol. After that the next uint32_t is the byte count of
517     // the string table followed by the string table.
518     const char *Buf = Parent->getSymbolTable().begin();
519     uint32_t RanlibCount = 0;
520     RanlibCount = read32le(Buf) / 8;
521     // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount)
522     // don't change the t.StringIndex as we don't want to reference a ranlib
523     // past RanlibCount.
524     if (t.SymbolIndex + 1 < RanlibCount) {
525       const char *Ranlibs = Buf + 4;
526       uint32_t CurRanStrx = 0;
527       uint32_t NextRanStrx = 0;
528       CurRanStrx = read32le(Ranlibs + t.SymbolIndex * 8);
529       NextRanStrx = read32le(Ranlibs + (t.SymbolIndex + 1) * 8);
530       t.StringIndex -= CurRanStrx;
531       t.StringIndex += NextRanStrx;
532     }
533   } else {
534     // Go to one past next null.
535     t.StringIndex = Parent->getSymbolTable().find('\0', t.StringIndex) + 1;
536   }
537   ++t.SymbolIndex;
538   return t;
539 }
540 
541 Archive::symbol_iterator Archive::symbol_begin() const {
542   if (!hasSymbolTable())
543     return symbol_iterator(Symbol(this, 0, 0));
544 
545   const char *buf = getSymbolTable().begin();
546   if (kind() == K_GNU) {
547     uint32_t symbol_count = 0;
548     symbol_count = read32be(buf);
549     buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t)));
550   } else if (kind() == K_MIPS64) {
551     uint64_t symbol_count = read64be(buf);
552     buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t)));
553   } else if (kind() == K_BSD) {
554     // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
555     // which is the number of bytes of ranlib structs that follow.  The ranlib
556     // structs are a pair of uint32_t's the first being a string table offset
557     // and the second being the offset into the archive of the member that
558     // define the symbol. After that the next uint32_t is the byte count of
559     // the string table followed by the string table.
560     uint32_t ranlib_count = 0;
561     ranlib_count = read32le(buf) / 8;
562     const char *ranlibs = buf + 4;
563     uint32_t ran_strx = 0;
564     ran_strx = read32le(ranlibs);
565     buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t))));
566     // Skip the byte count of the string table.
567     buf += sizeof(uint32_t);
568     buf += ran_strx;
569   } else {
570     uint32_t member_count = 0;
571     uint32_t symbol_count = 0;
572     member_count = read32le(buf);
573     buf += 4 + (member_count * 4); // Skip offsets.
574     symbol_count = read32le(buf);
575     buf += 4 + (symbol_count * 2); // Skip indices.
576   }
577   uint32_t string_start_offset = buf - getSymbolTable().begin();
578   return symbol_iterator(Symbol(this, 0, string_start_offset));
579 }
580 
581 Archive::symbol_iterator Archive::symbol_end() const {
582   return symbol_iterator(Symbol(this, getNumberOfSymbols(), 0));
583 }
584 
585 uint32_t Archive::getNumberOfSymbols() const {
586   if (!hasSymbolTable())
587     return 0;
588   const char *buf = getSymbolTable().begin();
589   if (kind() == K_GNU)
590     return read32be(buf);
591   if (kind() == K_MIPS64)
592     return read64be(buf);
593   if (kind() == K_BSD)
594     return read32le(buf) / 8;
595   uint32_t member_count = 0;
596   member_count = read32le(buf);
597   buf += 4 + (member_count * 4); // Skip offsets.
598   return read32le(buf);
599 }
600 
601 Archive::child_iterator Archive::findSym(StringRef name) const {
602   Archive::symbol_iterator bs = symbol_begin();
603   Archive::symbol_iterator es = symbol_end();
604 
605   for (; bs != es; ++bs) {
606     StringRef SymName = bs->getName();
607     if (SymName == name) {
608       ErrorOr<Archive::child_iterator> ResultOrErr = bs->getMember();
609       // FIXME: Should we really eat the error?
610       if (ResultOrErr.getError())
611         return child_end();
612       return ResultOrErr.get();
613     }
614   }
615   return child_end();
616 }
617 
618 bool Archive::hasSymbolTable() const { return !SymbolTable.empty(); }
619