1 //===- Archive.cpp - ar File Format implementation ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the ArchiveObjectFile class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Object/Archive.h"
14 #include "llvm/ADT/Optional.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/Object/Binary.h"
19 #include "llvm/Object/Error.h"
20 #include "llvm/Support/Chrono.h"
21 #include "llvm/Support/Endian.h"
22 #include "llvm/Support/Error.h"
23 #include "llvm/Support/ErrorOr.h"
24 #include "llvm/Support/FileSystem.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/MemoryBuffer.h"
27 #include "llvm/Support/Path.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <algorithm>
30 #include <cassert>
31 #include <cstddef>
32 #include <cstdint>
33 #include <memory>
34 #include <string>
35 #include <system_error>
36 
37 using namespace llvm;
38 using namespace object;
39 using namespace llvm::support::endian;
40 
41 void Archive::anchor() {}
42 
43 static Error malformedError(Twine Msg) {
44   std::string StringMsg = "truncated or malformed archive (" + Msg.str() + ")";
45   return make_error<GenericBinaryError>(std::move(StringMsg),
46                                         object_error::parse_failed);
47 }
48 
49 static Error
50 createMemberHeaderParseError(const AbstractArchiveMemberHeader *ArMemHeader,
51                              const char *RawHeaderPtr, uint64_t Size) {
52   StringRef Msg("remaining size of archive too small for next archive "
53                 "member header ");
54 
55   Expected<StringRef> NameOrErr = ArMemHeader->getName(Size);
56   if (NameOrErr)
57     return malformedError(Msg + "for " + *NameOrErr);
58 
59   consumeError(NameOrErr.takeError());
60   uint64_t Offset = RawHeaderPtr - ArMemHeader->Parent->getData().data();
61   return malformedError(Msg + "at offset " + Twine(Offset));
62 }
63 
64 template <class T, std::size_t N>
65 StringRef getFieldRawString(const T (&Field)[N]) {
66   return StringRef(Field, N).rtrim(" ");
67 }
68 
69 template <class T>
70 StringRef CommonArchiveMemberHeader<T>::getRawAccessMode() const {
71   return getFieldRawString(ArMemHdr->AccessMode);
72 }
73 
74 template <class T>
75 StringRef CommonArchiveMemberHeader<T>::getRawLastModified() const {
76   return getFieldRawString(ArMemHdr->LastModified);
77 }
78 
79 template <class T> StringRef CommonArchiveMemberHeader<T>::getRawUID() const {
80   return getFieldRawString(ArMemHdr->UID);
81 }
82 
83 template <class T> StringRef CommonArchiveMemberHeader<T>::getRawGID() const {
84   return getFieldRawString(ArMemHdr->GID);
85 }
86 
87 template <class T> uint64_t CommonArchiveMemberHeader<T>::getOffset() const {
88   return reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
89 }
90 
91 template class object::CommonArchiveMemberHeader<UnixArMemHdrType>;
92 template class object::CommonArchiveMemberHeader<BigArMemHdrType>;
93 
94 ArchiveMemberHeader::ArchiveMemberHeader(const Archive *Parent,
95                                          const char *RawHeaderPtr,
96                                          uint64_t Size, Error *Err)
97     : CommonArchiveMemberHeader<UnixArMemHdrType>(
98           Parent, reinterpret_cast<const UnixArMemHdrType *>(RawHeaderPtr)) {
99   if (RawHeaderPtr == nullptr)
100     return;
101   ErrorAsOutParameter ErrAsOutParam(Err);
102 
103   if (Size < getSizeOf()) {
104     *Err = createMemberHeaderParseError(this, RawHeaderPtr, Size);
105     return;
106   }
107   if (ArMemHdr->Terminator[0] != '`' || ArMemHdr->Terminator[1] != '\n') {
108     if (Err) {
109       std::string Buf;
110       raw_string_ostream OS(Buf);
111       OS.write_escaped(
112           StringRef(ArMemHdr->Terminator, sizeof(ArMemHdr->Terminator)));
113       OS.flush();
114       std::string Msg("terminator characters in archive member \"" + Buf +
115                       "\" not the correct \"`\\n\" values for the archive "
116                       "member header ");
117       Expected<StringRef> NameOrErr = getName(Size);
118       if (!NameOrErr) {
119         consumeError(NameOrErr.takeError());
120         uint64_t Offset = RawHeaderPtr - Parent->getData().data();
121         *Err = malformedError(Msg + "at offset " + Twine(Offset));
122       } else
123         *Err = malformedError(Msg + "for " + NameOrErr.get());
124     }
125     return;
126   }
127 }
128 
129 BigArchiveMemberHeader::BigArchiveMemberHeader(const Archive *Parent,
130                                                const char *RawHeaderPtr,
131                                                uint64_t Size, Error *Err)
132     : CommonArchiveMemberHeader<BigArMemHdrType>(
133           Parent, reinterpret_cast<const BigArMemHdrType *>(RawHeaderPtr)) {
134   if (RawHeaderPtr == nullptr)
135     return;
136   ErrorAsOutParameter ErrAsOutParam(Err);
137 
138   if (Size < getSizeOf())
139     *Err = createMemberHeaderParseError(this, RawHeaderPtr, Size);
140 }
141 
142 // This gets the raw name from the ArMemHdr->Name field and checks that it is
143 // valid for the kind of archive.  If it is not valid it returns an Error.
144 Expected<StringRef> ArchiveMemberHeader::getRawName() const {
145   char EndCond;
146   auto Kind = Parent->kind();
147   if (Kind == Archive::K_BSD || Kind == Archive::K_DARWIN64) {
148     if (ArMemHdr->Name[0] == ' ') {
149       uint64_t Offset =
150           reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
151       return malformedError("name contains a leading space for archive member "
152                             "header at offset " +
153                             Twine(Offset));
154     }
155     EndCond = ' ';
156   } else if (ArMemHdr->Name[0] == '/' || ArMemHdr->Name[0] == '#')
157     EndCond = ' ';
158   else
159     EndCond = '/';
160   StringRef::size_type end =
161       StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(EndCond);
162   if (end == StringRef::npos)
163     end = sizeof(ArMemHdr->Name);
164   assert(end <= sizeof(ArMemHdr->Name) && end > 0);
165   // Don't include the EndCond if there is one.
166   return StringRef(ArMemHdr->Name, end);
167 }
168 
169 Expected<uint64_t>
170 getArchiveMemberDecField(Twine FieldName, const StringRef RawField,
171                          const Archive *Parent,
172                          const AbstractArchiveMemberHeader *MemHeader) {
173   uint64_t Value;
174   if (RawField.getAsInteger(10, Value)) {
175     uint64_t Offset = MemHeader->getOffset();
176     return malformedError("characters in " + FieldName +
177                           " field in archive member header are not "
178                           "all decimal numbers: '" +
179                           RawField +
180                           "' for the archive "
181                           "member header at offset " +
182                           Twine(Offset));
183   }
184   return Value;
185 }
186 
187 Expected<uint64_t>
188 getArchiveMemberOctField(Twine FieldName, const StringRef RawField,
189                          const Archive *Parent,
190                          const AbstractArchiveMemberHeader *MemHeader) {
191   uint64_t Value;
192   if (RawField.getAsInteger(8, Value)) {
193     uint64_t Offset = MemHeader->getOffset();
194     return malformedError("characters in " + FieldName +
195                           " field in archive member header are not "
196                           "all octal numbers: '" +
197                           RawField +
198                           "' for the archive "
199                           "member header at offset " +
200                           Twine(Offset));
201   }
202   return Value;
203 }
204 
205 Expected<StringRef> BigArchiveMemberHeader::getRawName() const {
206   Expected<uint64_t> NameLenOrErr = getArchiveMemberDecField(
207       "NameLen", getFieldRawString(ArMemHdr->NameLen), Parent, this);
208   if (!NameLenOrErr)
209     // TODO: Out-of-line.
210     return NameLenOrErr.takeError();
211   uint64_t NameLen = NameLenOrErr.get();
212 
213   // If the name length is odd, pad with '\0' to get an even length. After
214   // padding, there is the name terminator "`\n".
215   uint64_t NameLenWithPadding = alignTo(NameLen, 2);
216   StringRef NameTerminator = "`\n";
217   StringRef NameStringWithNameTerminator =
218       StringRef(ArMemHdr->Name, NameLenWithPadding + NameTerminator.size());
219   if (!NameStringWithNameTerminator.endswith(NameTerminator)) {
220     uint64_t Offset =
221         reinterpret_cast<const char *>(ArMemHdr->Name + NameLenWithPadding) -
222         Parent->getData().data();
223     // TODO: Out-of-line.
224     return malformedError(
225         "name does not have name terminator \"`\\n\" for archive member"
226         "header at offset " +
227         Twine(Offset));
228   }
229   return StringRef(ArMemHdr->Name, NameLen);
230 }
231 
232 // member including the header, so the size of any name following the header
233 // is checked to make sure it does not overflow.
234 Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const {
235 
236   // This can be called from the ArchiveMemberHeader constructor when the
237   // archive header is truncated to produce an error message with the name.
238   // Make sure the name field is not truncated.
239   if (Size < offsetof(UnixArMemHdrType, Name) + sizeof(ArMemHdr->Name)) {
240     uint64_t ArchiveOffset =
241         reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
242     return malformedError("archive header truncated before the name field "
243                           "for archive member header at offset " +
244                           Twine(ArchiveOffset));
245   }
246 
247   // The raw name itself can be invalid.
248   Expected<StringRef> NameOrErr = getRawName();
249   if (!NameOrErr)
250     return NameOrErr.takeError();
251   StringRef Name = NameOrErr.get();
252 
253   // Check if it's a special name.
254   if (Name[0] == '/') {
255     if (Name.size() == 1) // Linker member.
256       return Name;
257     if (Name.size() == 2 && Name[1] == '/') // String table.
258       return Name;
259     // It's a long name.
260     // Get the string table offset.
261     std::size_t StringOffset;
262     if (Name.substr(1).rtrim(' ').getAsInteger(10, StringOffset)) {
263       std::string Buf;
264       raw_string_ostream OS(Buf);
265       OS.write_escaped(Name.substr(1).rtrim(' '));
266       OS.flush();
267       uint64_t ArchiveOffset =
268           reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
269       return malformedError("long name offset characters after the '/' are "
270                             "not all decimal numbers: '" +
271                             Buf + "' for archive member header at offset " +
272                             Twine(ArchiveOffset));
273     }
274 
275     // Verify it.
276     if (StringOffset >= Parent->getStringTable().size()) {
277       uint64_t ArchiveOffset =
278           reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
279       return malformedError("long name offset " + Twine(StringOffset) +
280                             " past the end of the string table for archive "
281                             "member header at offset " +
282                             Twine(ArchiveOffset));
283     }
284 
285     // GNU long file names end with a "/\n".
286     if (Parent->kind() == Archive::K_GNU ||
287         Parent->kind() == Archive::K_GNU64) {
288       size_t End = Parent->getStringTable().find('\n', /*From=*/StringOffset);
289       if (End == StringRef::npos || End < 1 ||
290           Parent->getStringTable()[End - 1] != '/') {
291         return malformedError("string table at long name offset " +
292                               Twine(StringOffset) + "not terminated");
293       }
294       return Parent->getStringTable().slice(StringOffset, End - 1);
295     }
296     return Parent->getStringTable().begin() + StringOffset;
297   }
298 
299   if (Name.startswith("#1/")) {
300     uint64_t NameLength;
301     if (Name.substr(3).rtrim(' ').getAsInteger(10, NameLength)) {
302       std::string Buf;
303       raw_string_ostream OS(Buf);
304       OS.write_escaped(Name.substr(3).rtrim(' '));
305       OS.flush();
306       uint64_t ArchiveOffset =
307           reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
308       return malformedError("long name length characters after the #1/ are "
309                             "not all decimal numbers: '" +
310                             Buf + "' for archive member header at offset " +
311                             Twine(ArchiveOffset));
312     }
313     if (getSizeOf() + NameLength > Size) {
314       uint64_t ArchiveOffset =
315           reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
316       return malformedError("long name length: " + Twine(NameLength) +
317                             " extends past the end of the member or archive "
318                             "for archive member header at offset " +
319                             Twine(ArchiveOffset));
320     }
321     return StringRef(reinterpret_cast<const char *>(ArMemHdr) + getSizeOf(),
322                      NameLength)
323         .rtrim('\0');
324   }
325 
326   // It is not a long name so trim the blanks at the end of the name.
327   if (Name[Name.size() - 1] != '/')
328     return Name.rtrim(' ');
329 
330   // It's a simple name.
331   return Name.drop_back(1);
332 }
333 
334 Expected<StringRef> BigArchiveMemberHeader::getName(uint64_t Size) const {
335   return getRawName();
336 }
337 
338 Expected<uint64_t> ArchiveMemberHeader::getSize() const {
339   return getArchiveMemberDecField("size", getFieldRawString(ArMemHdr->Size),
340                                   Parent, this);
341 }
342 
343 Expected<uint64_t> BigArchiveMemberHeader::getSize() const {
344   Expected<uint64_t> SizeOrErr = getArchiveMemberDecField(
345       "size", getFieldRawString(ArMemHdr->Size), Parent, this);
346   if (!SizeOrErr)
347     return SizeOrErr.takeError();
348 
349   Expected<uint64_t> NameLenOrErr = getRawNameSize();
350   if (!NameLenOrErr)
351     return NameLenOrErr.takeError();
352 
353   return *SizeOrErr + alignTo(*NameLenOrErr, 2);
354 }
355 
356 Expected<uint64_t> BigArchiveMemberHeader::getRawNameSize() const {
357   return getArchiveMemberDecField(
358       "NameLen", getFieldRawString(ArMemHdr->NameLen), Parent, this);
359 }
360 
361 Expected<uint64_t> BigArchiveMemberHeader::getNextOffset() const {
362   return getArchiveMemberDecField(
363       "NextOffset", getFieldRawString(ArMemHdr->NextOffset), Parent, this);
364 }
365 
366 Expected<sys::fs::perms> AbstractArchiveMemberHeader::getAccessMode() const {
367   Expected<uint64_t> AccessModeOrErr =
368       getArchiveMemberOctField("AccessMode", getRawAccessMode(), Parent, this);
369   if (!AccessModeOrErr)
370     return AccessModeOrErr.takeError();
371   return static_cast<sys::fs::perms>(*AccessModeOrErr);
372 }
373 
374 Expected<sys::TimePoint<std::chrono::seconds>>
375 AbstractArchiveMemberHeader::getLastModified() const {
376   Expected<uint64_t> SecondsOrErr = getArchiveMemberDecField(
377       "LastModified", getRawLastModified(), Parent, this);
378 
379   if (!SecondsOrErr)
380     return SecondsOrErr.takeError();
381 
382   return sys::toTimePoint(*SecondsOrErr);
383 }
384 
385 Expected<unsigned> AbstractArchiveMemberHeader::getUID() const {
386   StringRef User = getRawUID();
387   if (User.empty())
388     return 0;
389   return getArchiveMemberDecField("UID", User, Parent, this);
390 }
391 
392 Expected<unsigned> AbstractArchiveMemberHeader::getGID() const {
393   StringRef Group = getRawGID();
394   if (Group.empty())
395     return 0;
396   return getArchiveMemberDecField("GID", Group, Parent, this);
397 }
398 
399 Expected<bool> ArchiveMemberHeader::isThin() const {
400   Expected<StringRef> NameOrErr = getRawName();
401   if (!NameOrErr)
402     return NameOrErr.takeError();
403   StringRef Name = NameOrErr.get();
404   return Parent->isThin() && Name != "/" && Name != "//" && Name != "/SYM64/";
405 }
406 
407 Expected<const char *> ArchiveMemberHeader::getNextChildLoc() const {
408   uint64_t Size = getSizeOf();
409   Expected<bool> isThinOrErr = isThin();
410   if (!isThinOrErr)
411     return isThinOrErr.takeError();
412 
413   bool isThin = isThinOrErr.get();
414   if (!isThin) {
415     Expected<uint64_t> MemberSize = getSize();
416     if (!MemberSize)
417       return MemberSize.takeError();
418 
419     Size += MemberSize.get();
420   }
421 
422   // If Size is odd, add 1 to make it even.
423   const char *NextLoc =
424       reinterpret_cast<const char *>(ArMemHdr) + alignTo(Size, 2);
425 
426   if (NextLoc == Parent->getMemoryBufferRef().getBufferEnd())
427     return nullptr;
428 
429   return NextLoc;
430 }
431 
432 Expected<const char *> BigArchiveMemberHeader::getNextChildLoc() const {
433   if (getOffset() ==
434       static_cast<const BigArchive *>(Parent)->getLastChildOffset())
435     return nullptr;
436 
437   Expected<uint64_t> NextOffsetOrErr = getNextOffset();
438   if (!NextOffsetOrErr)
439     return NextOffsetOrErr.takeError();
440   return Parent->getData().data() + NextOffsetOrErr.get();
441 }
442 
443 Archive::Child::Child(const Archive *Parent, StringRef Data,
444                       uint16_t StartOfFile)
445     : Parent(Parent), Data(Data), StartOfFile(StartOfFile) {
446   Header = Parent->createArchiveMemberHeader(Data.data(), Data.size(), nullptr);
447 }
448 
449 Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err)
450     : Parent(Parent) {
451   if (!Start) {
452     Header = nullptr;
453     return;
454   }
455 
456   Header = Parent->createArchiveMemberHeader(
457       Start,
458       Parent ? Parent->getData().size() - (Start - Parent->getData().data())
459              : 0,
460       Err);
461 
462   // If we are pointed to real data, Start is not a nullptr, then there must be
463   // a non-null Err pointer available to report malformed data on.  Only in
464   // the case sentinel value is being constructed is Err is permitted to be a
465   // nullptr.
466   assert(Err && "Err can't be nullptr if Start is not a nullptr");
467 
468   ErrorAsOutParameter ErrAsOutParam(Err);
469 
470   // If there was an error in the construction of the Header
471   // then just return with the error now set.
472   if (*Err)
473     return;
474 
475   uint64_t Size = Header->getSizeOf();
476   Data = StringRef(Start, Size);
477   Expected<bool> isThinOrErr = isThinMember();
478   if (!isThinOrErr) {
479     *Err = isThinOrErr.takeError();
480     return;
481   }
482   bool isThin = isThinOrErr.get();
483   if (!isThin) {
484     Expected<uint64_t> MemberSize = getRawSize();
485     if (!MemberSize) {
486       *Err = MemberSize.takeError();
487       return;
488     }
489     Size += MemberSize.get();
490     Data = StringRef(Start, Size);
491   }
492 
493   // Setup StartOfFile and PaddingBytes.
494   StartOfFile = Header->getSizeOf();
495   // Don't include attached name.
496   Expected<StringRef> NameOrErr = getRawName();
497   if (!NameOrErr) {
498     *Err = NameOrErr.takeError();
499     return;
500   }
501   StringRef Name = NameOrErr.get();
502 
503   if (Parent->kind() == Archive::K_AIXBIG) {
504     // The actual start of the file is after the name and any necessary
505     // even-alignment padding.
506     StartOfFile += ((Name.size() + 1) >> 1) << 1;
507   } else if (Name.startswith("#1/")) {
508     uint64_t NameSize;
509     StringRef RawNameSize = Name.substr(3).rtrim(' ');
510     if (RawNameSize.getAsInteger(10, NameSize)) {
511       uint64_t Offset = Start - Parent->getData().data();
512       *Err = malformedError("long name length characters after the #1/ are "
513                             "not all decimal numbers: '" +
514                             RawNameSize +
515                             "' for archive member header at offset " +
516                             Twine(Offset));
517       return;
518     }
519     StartOfFile += NameSize;
520   }
521 }
522 
523 Expected<uint64_t> Archive::Child::getSize() const {
524   if (Parent->IsThin)
525     return Header->getSize();
526   return Data.size() - StartOfFile;
527 }
528 
529 Expected<uint64_t> Archive::Child::getRawSize() const {
530   return Header->getSize();
531 }
532 
533 Expected<bool> Archive::Child::isThinMember() const { return Header->isThin(); }
534 
535 Expected<std::string> Archive::Child::getFullName() const {
536   Expected<bool> isThin = isThinMember();
537   if (!isThin)
538     return isThin.takeError();
539   assert(isThin.get());
540   Expected<StringRef> NameOrErr = getName();
541   if (!NameOrErr)
542     return NameOrErr.takeError();
543   StringRef Name = *NameOrErr;
544   if (sys::path::is_absolute(Name))
545     return std::string(Name);
546 
547   SmallString<128> FullName = sys::path::parent_path(
548       Parent->getMemoryBufferRef().getBufferIdentifier());
549   sys::path::append(FullName, Name);
550   return std::string(FullName.str());
551 }
552 
553 Expected<StringRef> Archive::Child::getBuffer() const {
554   Expected<bool> isThinOrErr = isThinMember();
555   if (!isThinOrErr)
556     return isThinOrErr.takeError();
557   bool isThin = isThinOrErr.get();
558   if (!isThin) {
559     Expected<uint64_t> Size = getSize();
560     if (!Size)
561       return Size.takeError();
562     return StringRef(Data.data() + StartOfFile, Size.get());
563   }
564   Expected<std::string> FullNameOrErr = getFullName();
565   if (!FullNameOrErr)
566     return FullNameOrErr.takeError();
567   const std::string &FullName = *FullNameOrErr;
568   ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName);
569   if (std::error_code EC = Buf.getError())
570     return errorCodeToError(EC);
571   Parent->ThinBuffers.push_back(std::move(*Buf));
572   return Parent->ThinBuffers.back()->getBuffer();
573 }
574 
575 Expected<Archive::Child> Archive::Child::getNext() const {
576   Expected<const char *> NextLocOrErr = Header->getNextChildLoc();
577   if (!NextLocOrErr)
578     return NextLocOrErr.takeError();
579 
580   const char *NextLoc = *NextLocOrErr;
581 
582   // Check to see if this is at the end of the archive.
583   if (NextLoc == nullptr)
584     return Child(nullptr, nullptr, nullptr);
585 
586   // Check to see if this is past the end of the archive.
587   if (NextLoc > Parent->Data.getBufferEnd()) {
588     std::string Msg("offset to next archive member past the end of the archive "
589                     "after member ");
590     Expected<StringRef> NameOrErr = getName();
591     if (!NameOrErr) {
592       consumeError(NameOrErr.takeError());
593       uint64_t Offset = Data.data() - Parent->getData().data();
594       return malformedError(Msg + "at offset " + Twine(Offset));
595     } else
596       return malformedError(Msg + NameOrErr.get());
597   }
598 
599   Error Err = Error::success();
600   Child Ret(Parent, NextLoc, &Err);
601   if (Err)
602     return std::move(Err);
603   return Ret;
604 }
605 
606 uint64_t Archive::Child::getChildOffset() const {
607   const char *a = Parent->Data.getBuffer().data();
608   const char *c = Data.data();
609   uint64_t offset = c - a;
610   return offset;
611 }
612 
613 Expected<StringRef> Archive::Child::getName() const {
614   Expected<uint64_t> RawSizeOrErr = getRawSize();
615   if (!RawSizeOrErr)
616     return RawSizeOrErr.takeError();
617   uint64_t RawSize = RawSizeOrErr.get();
618   Expected<StringRef> NameOrErr =
619       Header->getName(Header->getSizeOf() + RawSize);
620   if (!NameOrErr)
621     return NameOrErr.takeError();
622   StringRef Name = NameOrErr.get();
623   return Name;
624 }
625 
626 Expected<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const {
627   Expected<StringRef> NameOrErr = getName();
628   if (!NameOrErr)
629     return NameOrErr.takeError();
630   StringRef Name = NameOrErr.get();
631   Expected<StringRef> Buf = getBuffer();
632   if (!Buf)
633     return createFileError(Name, Buf.takeError());
634   return MemoryBufferRef(*Buf, Name);
635 }
636 
637 Expected<std::unique_ptr<Binary>>
638 Archive::Child::getAsBinary(LLVMContext *Context) const {
639   Expected<MemoryBufferRef> BuffOrErr = getMemoryBufferRef();
640   if (!BuffOrErr)
641     return BuffOrErr.takeError();
642 
643   auto BinaryOrErr = createBinary(BuffOrErr.get(), Context);
644   if (BinaryOrErr)
645     return std::move(*BinaryOrErr);
646   return BinaryOrErr.takeError();
647 }
648 
649 Expected<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) {
650   Error Err = Error::success();
651   std::unique_ptr<Archive> Ret;
652   StringRef Buffer = Source.getBuffer();
653 
654   if (Buffer.startswith(BigArchiveMagic))
655     Ret = std::make_unique<BigArchive>(Source, Err);
656   else
657     Ret = std::make_unique<Archive>(Source, Err);
658 
659   if (Err)
660     return std::move(Err);
661   return std::move(Ret);
662 }
663 
664 std::unique_ptr<AbstractArchiveMemberHeader>
665 Archive::createArchiveMemberHeader(const char *RawHeaderPtr, uint64_t Size,
666                                    Error *Err) const {
667   ErrorAsOutParameter ErrAsOutParam(Err);
668   if (kind() != K_AIXBIG)
669     return std::make_unique<ArchiveMemberHeader>(this, RawHeaderPtr, Size, Err);
670   return std::make_unique<BigArchiveMemberHeader>(this, RawHeaderPtr, Size,
671                                                   Err);
672 }
673 
674 uint64_t Archive::getArchiveMagicLen() const {
675   if (isThin())
676     return sizeof(ThinArchiveMagic) - 1;
677 
678   if (Kind() == K_AIXBIG)
679     return sizeof(BigArchiveMagic) - 1;
680 
681   return sizeof(ArchiveMagic) - 1;
682 }
683 
684 void Archive::setFirstRegular(const Child &C) {
685   FirstRegularData = C.Data;
686   FirstRegularStartOfFile = C.StartOfFile;
687 }
688 
689 Archive::Archive(MemoryBufferRef Source, Error &Err)
690     : Binary(Binary::ID_Archive, Source) {
691   ErrorAsOutParameter ErrAsOutParam(&Err);
692   StringRef Buffer = Data.getBuffer();
693   // Check for sufficient magic.
694   if (Buffer.startswith(ThinArchiveMagic)) {
695     IsThin = true;
696   } else if (Buffer.startswith(ArchiveMagic)) {
697     IsThin = false;
698   } else if (Buffer.startswith(BigArchiveMagic)) {
699     Format = K_AIXBIG;
700     IsThin = false;
701     return;
702   } else {
703     Err = make_error<GenericBinaryError>("file too small to be an archive",
704                                          object_error::invalid_file_type);
705     return;
706   }
707 
708   // Make sure Format is initialized before any call to
709   // ArchiveMemberHeader::getName() is made.  This could be a valid empty
710   // archive which is the same in all formats.  So claiming it to be gnu to is
711   // fine if not totally correct before we look for a string table or table of
712   // contents.
713   Format = K_GNU;
714 
715   // Get the special members.
716   child_iterator I = child_begin(Err, false);
717   if (Err)
718     return;
719   child_iterator E = child_end();
720 
721   // See if this is a valid empty archive and if so return.
722   if (I == E) {
723     Err = Error::success();
724     return;
725   }
726   const Child *C = &*I;
727 
728   auto Increment = [&]() {
729     ++I;
730     if (Err)
731       return true;
732     C = &*I;
733     return false;
734   };
735 
736   Expected<StringRef> NameOrErr = C->getRawName();
737   if (!NameOrErr) {
738     Err = NameOrErr.takeError();
739     return;
740   }
741   StringRef Name = NameOrErr.get();
742 
743   // Below is the pattern that is used to figure out the archive format
744   // GNU archive format
745   //  First member : / (may exist, if it exists, points to the symbol table )
746   //  Second member : // (may exist, if it exists, points to the string table)
747   //  Note : The string table is used if the filename exceeds 15 characters
748   // BSD archive format
749   //  First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table)
750   //  There is no string table, if the filename exceeds 15 characters or has a
751   //  embedded space, the filename has #1/<size>, The size represents the size
752   //  of the filename that needs to be read after the archive header
753   // COFF archive format
754   //  First member : /
755   //  Second member : / (provides a directory of symbols)
756   //  Third member : // (may exist, if it exists, contains the string table)
757   //  Note: Microsoft PE/COFF Spec 8.3 says that the third member is present
758   //  even if the string table is empty. However, lib.exe does not in fact
759   //  seem to create the third member if there's no member whose filename
760   //  exceeds 15 characters. So the third member is optional.
761 
762   if (Name == "__.SYMDEF" || Name == "__.SYMDEF_64") {
763     if (Name == "__.SYMDEF")
764       Format = K_BSD;
765     else // Name == "__.SYMDEF_64"
766       Format = K_DARWIN64;
767     // We know that the symbol table is not an external file, but we still must
768     // check any Expected<> return value.
769     Expected<StringRef> BufOrErr = C->getBuffer();
770     if (!BufOrErr) {
771       Err = BufOrErr.takeError();
772       return;
773     }
774     SymbolTable = BufOrErr.get();
775     if (Increment())
776       return;
777     setFirstRegular(*C);
778 
779     Err = Error::success();
780     return;
781   }
782 
783   if (Name.startswith("#1/")) {
784     Format = K_BSD;
785     // We know this is BSD, so getName will work since there is no string table.
786     Expected<StringRef> NameOrErr = C->getName();
787     if (!NameOrErr) {
788       Err = NameOrErr.takeError();
789       return;
790     }
791     Name = NameOrErr.get();
792     if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") {
793       // We know that the symbol table is not an external file, but we still
794       // must check any Expected<> return value.
795       Expected<StringRef> BufOrErr = C->getBuffer();
796       if (!BufOrErr) {
797         Err = BufOrErr.takeError();
798         return;
799       }
800       SymbolTable = BufOrErr.get();
801       if (Increment())
802         return;
803     } else if (Name == "__.SYMDEF_64 SORTED" || Name == "__.SYMDEF_64") {
804       Format = K_DARWIN64;
805       // We know that the symbol table is not an external file, but we still
806       // must check any Expected<> return value.
807       Expected<StringRef> BufOrErr = C->getBuffer();
808       if (!BufOrErr) {
809         Err = BufOrErr.takeError();
810         return;
811       }
812       SymbolTable = BufOrErr.get();
813       if (Increment())
814         return;
815     }
816     setFirstRegular(*C);
817     return;
818   }
819 
820   // MIPS 64-bit ELF archives use a special format of a symbol table.
821   // This format is marked by `ar_name` field equals to "/SYM64/".
822   // For detailed description see page 96 in the following document:
823   // http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf
824 
825   bool has64SymTable = false;
826   if (Name == "/" || Name == "/SYM64/") {
827     // We know that the symbol table is not an external file, but we still
828     // must check any Expected<> return value.
829     Expected<StringRef> BufOrErr = C->getBuffer();
830     if (!BufOrErr) {
831       Err = BufOrErr.takeError();
832       return;
833     }
834     SymbolTable = BufOrErr.get();
835     if (Name == "/SYM64/")
836       has64SymTable = true;
837 
838     if (Increment())
839       return;
840     if (I == E) {
841       Err = Error::success();
842       return;
843     }
844     Expected<StringRef> NameOrErr = C->getRawName();
845     if (!NameOrErr) {
846       Err = NameOrErr.takeError();
847       return;
848     }
849     Name = NameOrErr.get();
850   }
851 
852   if (Name == "//") {
853     Format = has64SymTable ? K_GNU64 : K_GNU;
854     // The string table is never an external member, but we still
855     // must check any Expected<> return value.
856     Expected<StringRef> BufOrErr = C->getBuffer();
857     if (!BufOrErr) {
858       Err = BufOrErr.takeError();
859       return;
860     }
861     StringTable = BufOrErr.get();
862     if (Increment())
863       return;
864     setFirstRegular(*C);
865     Err = Error::success();
866     return;
867   }
868 
869   if (Name[0] != '/') {
870     Format = has64SymTable ? K_GNU64 : K_GNU;
871     setFirstRegular(*C);
872     Err = Error::success();
873     return;
874   }
875 
876   if (Name != "/") {
877     Err = errorCodeToError(object_error::parse_failed);
878     return;
879   }
880 
881   Format = K_COFF;
882   // We know that the symbol table is not an external file, but we still
883   // must check any Expected<> return value.
884   Expected<StringRef> BufOrErr = C->getBuffer();
885   if (!BufOrErr) {
886     Err = BufOrErr.takeError();
887     return;
888   }
889   SymbolTable = BufOrErr.get();
890 
891   if (Increment())
892     return;
893 
894   if (I == E) {
895     setFirstRegular(*C);
896     Err = Error::success();
897     return;
898   }
899 
900   NameOrErr = C->getRawName();
901   if (!NameOrErr) {
902     Err = NameOrErr.takeError();
903     return;
904   }
905   Name = NameOrErr.get();
906 
907   if (Name == "//") {
908     // The string table is never an external member, but we still
909     // must check any Expected<> return value.
910     Expected<StringRef> BufOrErr = C->getBuffer();
911     if (!BufOrErr) {
912       Err = BufOrErr.takeError();
913       return;
914     }
915     StringTable = BufOrErr.get();
916     if (Increment())
917       return;
918   }
919 
920   setFirstRegular(*C);
921   Err = Error::success();
922 }
923 
924 Archive::child_iterator Archive::child_begin(Error &Err,
925                                              bool SkipInternal) const {
926   if (isEmpty())
927     return child_end();
928 
929   if (SkipInternal)
930     return child_iterator::itr(
931         Child(this, FirstRegularData, FirstRegularStartOfFile), Err);
932 
933   const char *Loc = Data.getBufferStart() + getFirstChildOffset();
934   Child C(this, Loc, &Err);
935   if (Err)
936     return child_end();
937   return child_iterator::itr(C, Err);
938 }
939 
940 Archive::child_iterator Archive::child_end() const {
941   return child_iterator::end(Child(nullptr, nullptr, nullptr));
942 }
943 
944 StringRef Archive::Symbol::getName() const {
945   return Parent->getSymbolTable().begin() + StringIndex;
946 }
947 
948 Expected<Archive::Child> Archive::Symbol::getMember() const {
949   const char *Buf = Parent->getSymbolTable().begin();
950   const char *Offsets = Buf;
951   if (Parent->kind() == K_GNU64 || Parent->kind() == K_DARWIN64)
952     Offsets += sizeof(uint64_t);
953   else
954     Offsets += sizeof(uint32_t);
955   uint64_t Offset = 0;
956   if (Parent->kind() == K_GNU) {
957     Offset = read32be(Offsets + SymbolIndex * 4);
958   } else if (Parent->kind() == K_GNU64) {
959     Offset = read64be(Offsets + SymbolIndex * 8);
960   } else if (Parent->kind() == K_BSD) {
961     // The SymbolIndex is an index into the ranlib structs that start at
962     // Offsets (the first uint32_t is the number of bytes of the ranlib
963     // structs).  The ranlib structs are a pair of uint32_t's the first
964     // being a string table offset and the second being the offset into
965     // the archive of the member that defines the symbol.  Which is what
966     // is needed here.
967     Offset = read32le(Offsets + SymbolIndex * 8 + 4);
968   } else if (Parent->kind() == K_DARWIN64) {
969     // The SymbolIndex is an index into the ranlib_64 structs that start at
970     // Offsets (the first uint64_t is the number of bytes of the ranlib_64
971     // structs).  The ranlib_64 structs are a pair of uint64_t's the first
972     // being a string table offset and the second being the offset into
973     // the archive of the member that defines the symbol.  Which is what
974     // is needed here.
975     Offset = read64le(Offsets + SymbolIndex * 16 + 8);
976   } else {
977     // Skip offsets.
978     uint32_t MemberCount = read32le(Buf);
979     Buf += MemberCount * 4 + 4;
980 
981     uint32_t SymbolCount = read32le(Buf);
982     if (SymbolIndex >= SymbolCount)
983       return errorCodeToError(object_error::parse_failed);
984 
985     // Skip SymbolCount to get to the indices table.
986     const char *Indices = Buf + 4;
987 
988     // Get the index of the offset in the file member offset table for this
989     // symbol.
990     uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2);
991     // Subtract 1 since OffsetIndex is 1 based.
992     --OffsetIndex;
993 
994     if (OffsetIndex >= MemberCount)
995       return errorCodeToError(object_error::parse_failed);
996 
997     Offset = read32le(Offsets + OffsetIndex * 4);
998   }
999 
1000   const char *Loc = Parent->getData().begin() + Offset;
1001   Error Err = Error::success();
1002   Child C(Parent, Loc, &Err);
1003   if (Err)
1004     return std::move(Err);
1005   return C;
1006 }
1007 
1008 Archive::Symbol Archive::Symbol::getNext() const {
1009   Symbol t(*this);
1010   if (Parent->kind() == K_BSD) {
1011     // t.StringIndex is an offset from the start of the __.SYMDEF or
1012     // "__.SYMDEF SORTED" member into the string table for the ranlib
1013     // struct indexed by t.SymbolIndex .  To change t.StringIndex to the
1014     // offset in the string table for t.SymbolIndex+1 we subtract the
1015     // its offset from the start of the string table for t.SymbolIndex
1016     // and add the offset of the string table for t.SymbolIndex+1.
1017 
1018     // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
1019     // which is the number of bytes of ranlib structs that follow.  The ranlib
1020     // structs are a pair of uint32_t's the first being a string table offset
1021     // and the second being the offset into the archive of the member that
1022     // define the symbol. After that the next uint32_t is the byte count of
1023     // the string table followed by the string table.
1024     const char *Buf = Parent->getSymbolTable().begin();
1025     uint32_t RanlibCount = 0;
1026     RanlibCount = read32le(Buf) / 8;
1027     // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount)
1028     // don't change the t.StringIndex as we don't want to reference a ranlib
1029     // past RanlibCount.
1030     if (t.SymbolIndex + 1 < RanlibCount) {
1031       const char *Ranlibs = Buf + 4;
1032       uint32_t CurRanStrx = 0;
1033       uint32_t NextRanStrx = 0;
1034       CurRanStrx = read32le(Ranlibs + t.SymbolIndex * 8);
1035       NextRanStrx = read32le(Ranlibs + (t.SymbolIndex + 1) * 8);
1036       t.StringIndex -= CurRanStrx;
1037       t.StringIndex += NextRanStrx;
1038     }
1039   } else {
1040     // Go to one past next null.
1041     t.StringIndex = Parent->getSymbolTable().find('\0', t.StringIndex) + 1;
1042   }
1043   ++t.SymbolIndex;
1044   return t;
1045 }
1046 
1047 Archive::symbol_iterator Archive::symbol_begin() const {
1048   if (!hasSymbolTable())
1049     return symbol_iterator(Symbol(this, 0, 0));
1050 
1051   const char *buf = getSymbolTable().begin();
1052   if (kind() == K_GNU) {
1053     uint32_t symbol_count = 0;
1054     symbol_count = read32be(buf);
1055     buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t)));
1056   } else if (kind() == K_GNU64) {
1057     uint64_t symbol_count = read64be(buf);
1058     buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t)));
1059   } else if (kind() == K_BSD) {
1060     // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
1061     // which is the number of bytes of ranlib structs that follow.  The ranlib
1062     // structs are a pair of uint32_t's the first being a string table offset
1063     // and the second being the offset into the archive of the member that
1064     // define the symbol. After that the next uint32_t is the byte count of
1065     // the string table followed by the string table.
1066     uint32_t ranlib_count = 0;
1067     ranlib_count = read32le(buf) / 8;
1068     const char *ranlibs = buf + 4;
1069     uint32_t ran_strx = 0;
1070     ran_strx = read32le(ranlibs);
1071     buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t))));
1072     // Skip the byte count of the string table.
1073     buf += sizeof(uint32_t);
1074     buf += ran_strx;
1075   } else if (kind() == K_DARWIN64) {
1076     // The __.SYMDEF_64 or "__.SYMDEF_64 SORTED" member starts with a uint64_t
1077     // which is the number of bytes of ranlib_64 structs that follow.  The
1078     // ranlib_64 structs are a pair of uint64_t's the first being a string
1079     // table offset and the second being the offset into the archive of the
1080     // member that define the symbol. After that the next uint64_t is the byte
1081     // count of the string table followed by the string table.
1082     uint64_t ranlib_count = 0;
1083     ranlib_count = read64le(buf) / 16;
1084     const char *ranlibs = buf + 8;
1085     uint64_t ran_strx = 0;
1086     ran_strx = read64le(ranlibs);
1087     buf += sizeof(uint64_t) + (ranlib_count * (2 * (sizeof(uint64_t))));
1088     // Skip the byte count of the string table.
1089     buf += sizeof(uint64_t);
1090     buf += ran_strx;
1091   } else {
1092     uint32_t member_count = 0;
1093     uint32_t symbol_count = 0;
1094     member_count = read32le(buf);
1095     buf += 4 + (member_count * 4); // Skip offsets.
1096     symbol_count = read32le(buf);
1097     buf += 4 + (symbol_count * 2); // Skip indices.
1098   }
1099   uint32_t string_start_offset = buf - getSymbolTable().begin();
1100   return symbol_iterator(Symbol(this, 0, string_start_offset));
1101 }
1102 
1103 Archive::symbol_iterator Archive::symbol_end() const {
1104   return symbol_iterator(Symbol(this, getNumberOfSymbols(), 0));
1105 }
1106 
1107 uint32_t Archive::getNumberOfSymbols() const {
1108   if (!hasSymbolTable())
1109     return 0;
1110   const char *buf = getSymbolTable().begin();
1111   if (kind() == K_GNU)
1112     return read32be(buf);
1113   if (kind() == K_GNU64)
1114     return read64be(buf);
1115   if (kind() == K_BSD)
1116     return read32le(buf) / 8;
1117   if (kind() == K_DARWIN64)
1118     return read64le(buf) / 16;
1119   uint32_t member_count = 0;
1120   member_count = read32le(buf);
1121   buf += 4 + (member_count * 4); // Skip offsets.
1122   return read32le(buf);
1123 }
1124 
1125 Expected<Optional<Archive::Child>> Archive::findSym(StringRef name) const {
1126   Archive::symbol_iterator bs = symbol_begin();
1127   Archive::symbol_iterator es = symbol_end();
1128 
1129   for (; bs != es; ++bs) {
1130     StringRef SymName = bs->getName();
1131     if (SymName == name) {
1132       if (auto MemberOrErr = bs->getMember())
1133         return Child(*MemberOrErr);
1134       else
1135         return MemberOrErr.takeError();
1136     }
1137   }
1138   return Optional<Child>();
1139 }
1140 
1141 // Returns true if archive file contains no member file.
1142 bool Archive::isEmpty() const {
1143   return Data.getBufferSize() == getArchiveMagicLen();
1144 }
1145 
1146 bool Archive::hasSymbolTable() const { return !SymbolTable.empty(); }
1147 
1148 BigArchive::BigArchive(MemoryBufferRef Source, Error &Err)
1149     : Archive(Source, Err) {
1150   ErrorAsOutParameter ErrAsOutParam(&Err);
1151   StringRef Buffer = Data.getBuffer();
1152   ArFixLenHdr = reinterpret_cast<const FixLenHdr *>(Buffer.data());
1153 
1154   StringRef RawOffset = getFieldRawString(ArFixLenHdr->FirstChildOffset);
1155   if (RawOffset.getAsInteger(10, FirstChildOffset))
1156     // TODO: Out-of-line.
1157     Err = malformedError("malformed AIX big archive: first member offset \"" +
1158                          RawOffset + "\" is not a number");
1159 
1160   RawOffset = getFieldRawString(ArFixLenHdr->LastChildOffset);
1161   if (RawOffset.getAsInteger(10, LastChildOffset))
1162     // TODO: Out-of-line.
1163     Err = malformedError("malformed AIX big archive: last member offset \"" +
1164                          RawOffset + "\" is not a number");
1165 
1166   child_iterator I = child_begin(Err, false);
1167   if (Err)
1168     return;
1169   child_iterator E = child_end();
1170   if (I == E) {
1171     Err = Error::success();
1172     return;
1173   }
1174   setFirstRegular(*I);
1175   Err = Error::success();
1176 }
1177