1 //===- Archive.cpp - ar File Format implementation ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the ArchiveObjectFile class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Object/Archive.h"
14 #include "llvm/ADT/Optional.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/Object/Binary.h"
19 #include "llvm/Object/Error.h"
20 #include "llvm/Support/Chrono.h"
21 #include "llvm/Support/Endian.h"
22 #include "llvm/Support/Error.h"
23 #include "llvm/Support/ErrorOr.h"
24 #include "llvm/Support/FileSystem.h"
25 #include "llvm/Support/Host.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Support/MemoryBuffer.h"
28 #include "llvm/Support/Path.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <algorithm>
31 #include <cassert>
32 #include <cstddef>
33 #include <cstdint>
34 #include <memory>
35 #include <string>
36 #include <system_error>
37 
38 using namespace llvm;
39 using namespace object;
40 using namespace llvm::support::endian;
41 
42 void Archive::anchor() {}
43 
44 static Error malformedError(Twine Msg) {
45   std::string StringMsg = "truncated or malformed archive (" + Msg.str() + ")";
46   return make_error<GenericBinaryError>(std::move(StringMsg),
47                                         object_error::parse_failed);
48 }
49 
50 static Error
51 createMemberHeaderParseError(const AbstractArchiveMemberHeader *ArMemHeader,
52                              const char *RawHeaderPtr, uint64_t Size) {
53   StringRef Msg("remaining size of archive too small for next archive "
54                 "member header ");
55 
56   Expected<StringRef> NameOrErr = ArMemHeader->getName(Size);
57   if (NameOrErr)
58     return malformedError(Msg + "for " + *NameOrErr);
59 
60   consumeError(NameOrErr.takeError());
61   uint64_t Offset = RawHeaderPtr - ArMemHeader->Parent->getData().data();
62   return malformedError(Msg + "at offset " + Twine(Offset));
63 }
64 
65 template <class T, std::size_t N>
66 StringRef getFieldRawString(const T (&Field)[N]) {
67   return StringRef(Field, N).rtrim(" ");
68 }
69 
70 template <class T>
71 StringRef CommonArchiveMemberHeader<T>::getRawAccessMode() const {
72   return getFieldRawString(ArMemHdr->AccessMode);
73 }
74 
75 template <class T>
76 StringRef CommonArchiveMemberHeader<T>::getRawLastModified() const {
77   return getFieldRawString(ArMemHdr->LastModified);
78 }
79 
80 template <class T> StringRef CommonArchiveMemberHeader<T>::getRawUID() const {
81   return getFieldRawString(ArMemHdr->UID);
82 }
83 
84 template <class T> StringRef CommonArchiveMemberHeader<T>::getRawGID() const {
85   return getFieldRawString(ArMemHdr->GID);
86 }
87 
88 template <class T> uint64_t CommonArchiveMemberHeader<T>::getOffset() const {
89   return reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
90 }
91 
92 template class object::CommonArchiveMemberHeader<UnixArMemHdrType>;
93 template class object::CommonArchiveMemberHeader<BigArMemHdrType>;
94 
95 ArchiveMemberHeader::ArchiveMemberHeader(const Archive *Parent,
96                                          const char *RawHeaderPtr,
97                                          uint64_t Size, Error *Err)
98     : CommonArchiveMemberHeader<UnixArMemHdrType>(
99           Parent, reinterpret_cast<const UnixArMemHdrType *>(RawHeaderPtr)) {
100   if (RawHeaderPtr == nullptr)
101     return;
102   ErrorAsOutParameter ErrAsOutParam(Err);
103 
104   if (Size < getSizeOf()) {
105     *Err = createMemberHeaderParseError(this, RawHeaderPtr, Size);
106     return;
107   }
108   if (ArMemHdr->Terminator[0] != '`' || ArMemHdr->Terminator[1] != '\n') {
109     if (Err) {
110       std::string Buf;
111       raw_string_ostream OS(Buf);
112       OS.write_escaped(
113           StringRef(ArMemHdr->Terminator, sizeof(ArMemHdr->Terminator)));
114       OS.flush();
115       std::string Msg("terminator characters in archive member \"" + Buf +
116                       "\" not the correct \"`\\n\" values for the archive "
117                       "member header ");
118       Expected<StringRef> NameOrErr = getName(Size);
119       if (!NameOrErr) {
120         consumeError(NameOrErr.takeError());
121         uint64_t Offset = RawHeaderPtr - Parent->getData().data();
122         *Err = malformedError(Msg + "at offset " + Twine(Offset));
123       } else
124         *Err = malformedError(Msg + "for " + NameOrErr.get());
125     }
126     return;
127   }
128 }
129 
130 BigArchiveMemberHeader::BigArchiveMemberHeader(const Archive *Parent,
131                                                const char *RawHeaderPtr,
132                                                uint64_t Size, Error *Err)
133     : CommonArchiveMemberHeader<BigArMemHdrType>(
134           Parent, reinterpret_cast<const BigArMemHdrType *>(RawHeaderPtr)) {
135   if (RawHeaderPtr == nullptr)
136     return;
137   ErrorAsOutParameter ErrAsOutParam(Err);
138 
139   if (Size < getSizeOf())
140     *Err = createMemberHeaderParseError(this, RawHeaderPtr, Size);
141 }
142 
143 // This gets the raw name from the ArMemHdr->Name field and checks that it is
144 // valid for the kind of archive.  If it is not valid it returns an Error.
145 Expected<StringRef> ArchiveMemberHeader::getRawName() const {
146   char EndCond;
147   auto Kind = Parent->kind();
148   if (Kind == Archive::K_BSD || Kind == Archive::K_DARWIN64) {
149     if (ArMemHdr->Name[0] == ' ') {
150       uint64_t Offset =
151           reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
152       return malformedError("name contains a leading space for archive member "
153                             "header at offset " +
154                             Twine(Offset));
155     }
156     EndCond = ' ';
157   } else if (ArMemHdr->Name[0] == '/' || ArMemHdr->Name[0] == '#')
158     EndCond = ' ';
159   else
160     EndCond = '/';
161   StringRef::size_type end =
162       StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(EndCond);
163   if (end == StringRef::npos)
164     end = sizeof(ArMemHdr->Name);
165   assert(end <= sizeof(ArMemHdr->Name) && end > 0);
166   // Don't include the EndCond if there is one.
167   return StringRef(ArMemHdr->Name, end);
168 }
169 
170 Expected<uint64_t>
171 getArchiveMemberDecField(Twine FieldName, const StringRef RawField,
172                          const Archive *Parent,
173                          const AbstractArchiveMemberHeader *MemHeader) {
174   uint64_t Value;
175   if (RawField.getAsInteger(10, Value)) {
176     uint64_t Offset = MemHeader->getOffset();
177     return malformedError("characters in " + FieldName +
178                           " field in archive member header are not "
179                           "all decimal numbers: '" +
180                           RawField +
181                           "' for the archive "
182                           "member header at offset " +
183                           Twine(Offset));
184   }
185   return Value;
186 }
187 
188 Expected<uint64_t>
189 getArchiveMemberOctField(Twine FieldName, const StringRef RawField,
190                          const Archive *Parent,
191                          const AbstractArchiveMemberHeader *MemHeader) {
192   uint64_t Value;
193   if (RawField.getAsInteger(8, Value)) {
194     uint64_t Offset = MemHeader->getOffset();
195     return malformedError("characters in " + FieldName +
196                           " field in archive member header are not "
197                           "all octal numbers: '" +
198                           RawField +
199                           "' for the archive "
200                           "member header at offset " +
201                           Twine(Offset));
202   }
203   return Value;
204 }
205 
206 Expected<StringRef> BigArchiveMemberHeader::getRawName() const {
207   Expected<uint64_t> NameLenOrErr = getArchiveMemberDecField(
208       "NameLen", getFieldRawString(ArMemHdr->NameLen), Parent, this);
209   if (!NameLenOrErr)
210     // TODO: Out-of-line.
211     return NameLenOrErr.takeError();
212   uint64_t NameLen = NameLenOrErr.get();
213 
214   // If the name length is odd, pad with '\0' to get an even length. After
215   // padding, there is the name terminator "`\n".
216   uint64_t NameLenWithPadding = alignTo(NameLen, 2);
217   StringRef NameTerminator = "`\n";
218   StringRef NameStringWithNameTerminator =
219       StringRef(ArMemHdr->Name, NameLenWithPadding + NameTerminator.size());
220   if (!NameStringWithNameTerminator.endswith(NameTerminator)) {
221     uint64_t Offset =
222         reinterpret_cast<const char *>(ArMemHdr->Name + NameLenWithPadding) -
223         Parent->getData().data();
224     // TODO: Out-of-line.
225     return malformedError(
226         "name does not have name terminator \"`\\n\" for archive member"
227         "header at offset " +
228         Twine(Offset));
229   }
230   return StringRef(ArMemHdr->Name, NameLen);
231 }
232 
233 // member including the header, so the size of any name following the header
234 // is checked to make sure it does not overflow.
235 Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const {
236 
237   // This can be called from the ArchiveMemberHeader constructor when the
238   // archive header is truncated to produce an error message with the name.
239   // Make sure the name field is not truncated.
240   if (Size < offsetof(UnixArMemHdrType, Name) + sizeof(ArMemHdr->Name)) {
241     uint64_t ArchiveOffset =
242         reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
243     return malformedError("archive header truncated before the name field "
244                           "for archive member header at offset " +
245                           Twine(ArchiveOffset));
246   }
247 
248   // The raw name itself can be invalid.
249   Expected<StringRef> NameOrErr = getRawName();
250   if (!NameOrErr)
251     return NameOrErr.takeError();
252   StringRef Name = NameOrErr.get();
253 
254   // Check if it's a special name.
255   if (Name[0] == '/') {
256     if (Name.size() == 1) // Linker member.
257       return Name;
258     if (Name.size() == 2 && Name[1] == '/') // String table.
259       return Name;
260     // System libraries from the Windows SDK for Windows 11 contain this symbol.
261     // It looks like a CFG guard: we just skip it for now.
262     if (Name.equals("/<XFGHASHMAP>/"))
263       return Name;
264     // It's a long name.
265     // Get the string table offset.
266     std::size_t StringOffset;
267     if (Name.substr(1).rtrim(' ').getAsInteger(10, StringOffset)) {
268       std::string Buf;
269       raw_string_ostream OS(Buf);
270       OS.write_escaped(Name.substr(1).rtrim(' '));
271       OS.flush();
272       uint64_t ArchiveOffset =
273           reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
274       return malformedError("long name offset characters after the '/' are "
275                             "not all decimal numbers: '" +
276                             Buf + "' for archive member header at offset " +
277                             Twine(ArchiveOffset));
278     }
279 
280     // Verify it.
281     if (StringOffset >= Parent->getStringTable().size()) {
282       uint64_t ArchiveOffset =
283           reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
284       return malformedError("long name offset " + Twine(StringOffset) +
285                             " past the end of the string table for archive "
286                             "member header at offset " +
287                             Twine(ArchiveOffset));
288     }
289 
290     // GNU long file names end with a "/\n".
291     if (Parent->kind() == Archive::K_GNU ||
292         Parent->kind() == Archive::K_GNU64) {
293       size_t End = Parent->getStringTable().find('\n', /*From=*/StringOffset);
294       if (End == StringRef::npos || End < 1 ||
295           Parent->getStringTable()[End - 1] != '/') {
296         return malformedError("string table at long name offset " +
297                               Twine(StringOffset) + "not terminated");
298       }
299       return Parent->getStringTable().slice(StringOffset, End - 1);
300     }
301     return Parent->getStringTable().begin() + StringOffset;
302   }
303 
304   if (Name.startswith("#1/")) {
305     uint64_t NameLength;
306     if (Name.substr(3).rtrim(' ').getAsInteger(10, NameLength)) {
307       std::string Buf;
308       raw_string_ostream OS(Buf);
309       OS.write_escaped(Name.substr(3).rtrim(' '));
310       OS.flush();
311       uint64_t ArchiveOffset =
312           reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
313       return malformedError("long name length characters after the #1/ are "
314                             "not all decimal numbers: '" +
315                             Buf + "' for archive member header at offset " +
316                             Twine(ArchiveOffset));
317     }
318     if (getSizeOf() + NameLength > Size) {
319       uint64_t ArchiveOffset =
320           reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
321       return malformedError("long name length: " + Twine(NameLength) +
322                             " extends past the end of the member or archive "
323                             "for archive member header at offset " +
324                             Twine(ArchiveOffset));
325     }
326     return StringRef(reinterpret_cast<const char *>(ArMemHdr) + getSizeOf(),
327                      NameLength)
328         .rtrim('\0');
329   }
330 
331   // It is not a long name so trim the blanks at the end of the name.
332   if (Name[Name.size() - 1] != '/')
333     return Name.rtrim(' ');
334 
335   // It's a simple name.
336   return Name.drop_back(1);
337 }
338 
339 Expected<StringRef> BigArchiveMemberHeader::getName(uint64_t Size) const {
340   return getRawName();
341 }
342 
343 Expected<uint64_t> ArchiveMemberHeader::getSize() const {
344   return getArchiveMemberDecField("size", getFieldRawString(ArMemHdr->Size),
345                                   Parent, this);
346 }
347 
348 Expected<uint64_t> BigArchiveMemberHeader::getSize() const {
349   Expected<uint64_t> SizeOrErr = getArchiveMemberDecField(
350       "size", getFieldRawString(ArMemHdr->Size), Parent, this);
351   if (!SizeOrErr)
352     return SizeOrErr.takeError();
353 
354   Expected<uint64_t> NameLenOrErr = getRawNameSize();
355   if (!NameLenOrErr)
356     return NameLenOrErr.takeError();
357 
358   return *SizeOrErr + alignTo(*NameLenOrErr, 2);
359 }
360 
361 Expected<uint64_t> BigArchiveMemberHeader::getRawNameSize() const {
362   return getArchiveMemberDecField(
363       "NameLen", getFieldRawString(ArMemHdr->NameLen), Parent, this);
364 }
365 
366 Expected<uint64_t> BigArchiveMemberHeader::getNextOffset() const {
367   return getArchiveMemberDecField(
368       "NextOffset", getFieldRawString(ArMemHdr->NextOffset), Parent, this);
369 }
370 
371 Expected<sys::fs::perms> AbstractArchiveMemberHeader::getAccessMode() const {
372   Expected<uint64_t> AccessModeOrErr =
373       getArchiveMemberOctField("AccessMode", getRawAccessMode(), Parent, this);
374   if (!AccessModeOrErr)
375     return AccessModeOrErr.takeError();
376   return static_cast<sys::fs::perms>(*AccessModeOrErr);
377 }
378 
379 Expected<sys::TimePoint<std::chrono::seconds>>
380 AbstractArchiveMemberHeader::getLastModified() const {
381   Expected<uint64_t> SecondsOrErr = getArchiveMemberDecField(
382       "LastModified", getRawLastModified(), Parent, this);
383 
384   if (!SecondsOrErr)
385     return SecondsOrErr.takeError();
386 
387   return sys::toTimePoint(*SecondsOrErr);
388 }
389 
390 Expected<unsigned> AbstractArchiveMemberHeader::getUID() const {
391   StringRef User = getRawUID();
392   if (User.empty())
393     return 0;
394   return getArchiveMemberDecField("UID", User, Parent, this);
395 }
396 
397 Expected<unsigned> AbstractArchiveMemberHeader::getGID() const {
398   StringRef Group = getRawGID();
399   if (Group.empty())
400     return 0;
401   return getArchiveMemberDecField("GID", Group, Parent, this);
402 }
403 
404 Expected<bool> ArchiveMemberHeader::isThin() const {
405   Expected<StringRef> NameOrErr = getRawName();
406   if (!NameOrErr)
407     return NameOrErr.takeError();
408   StringRef Name = NameOrErr.get();
409   return Parent->isThin() && Name != "/" && Name != "//" && Name != "/SYM64/";
410 }
411 
412 Expected<const char *> ArchiveMemberHeader::getNextChildLoc() const {
413   uint64_t Size = getSizeOf();
414   Expected<bool> isThinOrErr = isThin();
415   if (!isThinOrErr)
416     return isThinOrErr.takeError();
417 
418   bool isThin = isThinOrErr.get();
419   if (!isThin) {
420     Expected<uint64_t> MemberSize = getSize();
421     if (!MemberSize)
422       return MemberSize.takeError();
423 
424     Size += MemberSize.get();
425   }
426 
427   // If Size is odd, add 1 to make it even.
428   const char *NextLoc =
429       reinterpret_cast<const char *>(ArMemHdr) + alignTo(Size, 2);
430 
431   if (NextLoc == Parent->getMemoryBufferRef().getBufferEnd())
432     return nullptr;
433 
434   return NextLoc;
435 }
436 
437 Expected<const char *> BigArchiveMemberHeader::getNextChildLoc() const {
438   if (getOffset() ==
439       static_cast<const BigArchive *>(Parent)->getLastChildOffset())
440     return nullptr;
441 
442   Expected<uint64_t> NextOffsetOrErr = getNextOffset();
443   if (!NextOffsetOrErr)
444     return NextOffsetOrErr.takeError();
445   return Parent->getData().data() + NextOffsetOrErr.get();
446 }
447 
448 Archive::Child::Child(const Archive *Parent, StringRef Data,
449                       uint16_t StartOfFile)
450     : Parent(Parent), Data(Data), StartOfFile(StartOfFile) {
451   Header = Parent->createArchiveMemberHeader(Data.data(), Data.size(), nullptr);
452 }
453 
454 Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err)
455     : Parent(Parent) {
456   if (!Start) {
457     Header = nullptr;
458     return;
459   }
460 
461   Header = Parent->createArchiveMemberHeader(
462       Start,
463       Parent ? Parent->getData().size() - (Start - Parent->getData().data())
464              : 0,
465       Err);
466 
467   // If we are pointed to real data, Start is not a nullptr, then there must be
468   // a non-null Err pointer available to report malformed data on.  Only in
469   // the case sentinel value is being constructed is Err is permitted to be a
470   // nullptr.
471   assert(Err && "Err can't be nullptr if Start is not a nullptr");
472 
473   ErrorAsOutParameter ErrAsOutParam(Err);
474 
475   // If there was an error in the construction of the Header
476   // then just return with the error now set.
477   if (*Err)
478     return;
479 
480   uint64_t Size = Header->getSizeOf();
481   Data = StringRef(Start, Size);
482   Expected<bool> isThinOrErr = isThinMember();
483   if (!isThinOrErr) {
484     *Err = isThinOrErr.takeError();
485     return;
486   }
487   bool isThin = isThinOrErr.get();
488   if (!isThin) {
489     Expected<uint64_t> MemberSize = getRawSize();
490     if (!MemberSize) {
491       *Err = MemberSize.takeError();
492       return;
493     }
494     Size += MemberSize.get();
495     Data = StringRef(Start, Size);
496   }
497 
498   // Setup StartOfFile and PaddingBytes.
499   StartOfFile = Header->getSizeOf();
500   // Don't include attached name.
501   Expected<StringRef> NameOrErr = getRawName();
502   if (!NameOrErr) {
503     *Err = NameOrErr.takeError();
504     return;
505   }
506   StringRef Name = NameOrErr.get();
507 
508   if (Parent->kind() == Archive::K_AIXBIG) {
509     // The actual start of the file is after the name and any necessary
510     // even-alignment padding.
511     StartOfFile += ((Name.size() + 1) >> 1) << 1;
512   } else if (Name.startswith("#1/")) {
513     uint64_t NameSize;
514     StringRef RawNameSize = Name.substr(3).rtrim(' ');
515     if (RawNameSize.getAsInteger(10, NameSize)) {
516       uint64_t Offset = Start - Parent->getData().data();
517       *Err = malformedError("long name length characters after the #1/ are "
518                             "not all decimal numbers: '" +
519                             RawNameSize +
520                             "' for archive member header at offset " +
521                             Twine(Offset));
522       return;
523     }
524     StartOfFile += NameSize;
525   }
526 }
527 
528 Expected<uint64_t> Archive::Child::getSize() const {
529   if (Parent->IsThin)
530     return Header->getSize();
531   return Data.size() - StartOfFile;
532 }
533 
534 Expected<uint64_t> Archive::Child::getRawSize() const {
535   return Header->getSize();
536 }
537 
538 Expected<bool> Archive::Child::isThinMember() const { return Header->isThin(); }
539 
540 Expected<std::string> Archive::Child::getFullName() const {
541   Expected<bool> isThin = isThinMember();
542   if (!isThin)
543     return isThin.takeError();
544   assert(isThin.get());
545   Expected<StringRef> NameOrErr = getName();
546   if (!NameOrErr)
547     return NameOrErr.takeError();
548   StringRef Name = *NameOrErr;
549   if (sys::path::is_absolute(Name))
550     return std::string(Name);
551 
552   SmallString<128> FullName = sys::path::parent_path(
553       Parent->getMemoryBufferRef().getBufferIdentifier());
554   sys::path::append(FullName, Name);
555   return std::string(FullName.str());
556 }
557 
558 Expected<StringRef> Archive::Child::getBuffer() const {
559   Expected<bool> isThinOrErr = isThinMember();
560   if (!isThinOrErr)
561     return isThinOrErr.takeError();
562   bool isThin = isThinOrErr.get();
563   if (!isThin) {
564     Expected<uint64_t> Size = getSize();
565     if (!Size)
566       return Size.takeError();
567     return StringRef(Data.data() + StartOfFile, Size.get());
568   }
569   Expected<std::string> FullNameOrErr = getFullName();
570   if (!FullNameOrErr)
571     return FullNameOrErr.takeError();
572   const std::string &FullName = *FullNameOrErr;
573   ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName);
574   if (std::error_code EC = Buf.getError())
575     return errorCodeToError(EC);
576   Parent->ThinBuffers.push_back(std::move(*Buf));
577   return Parent->ThinBuffers.back()->getBuffer();
578 }
579 
580 Expected<Archive::Child> Archive::Child::getNext() const {
581   Expected<const char *> NextLocOrErr = Header->getNextChildLoc();
582   if (!NextLocOrErr)
583     return NextLocOrErr.takeError();
584 
585   const char *NextLoc = *NextLocOrErr;
586 
587   // Check to see if this is at the end of the archive.
588   if (NextLoc == nullptr)
589     return Child(nullptr, nullptr, nullptr);
590 
591   // Check to see if this is past the end of the archive.
592   if (NextLoc > Parent->Data.getBufferEnd()) {
593     std::string Msg("offset to next archive member past the end of the archive "
594                     "after member ");
595     Expected<StringRef> NameOrErr = getName();
596     if (!NameOrErr) {
597       consumeError(NameOrErr.takeError());
598       uint64_t Offset = Data.data() - Parent->getData().data();
599       return malformedError(Msg + "at offset " + Twine(Offset));
600     } else
601       return malformedError(Msg + NameOrErr.get());
602   }
603 
604   Error Err = Error::success();
605   Child Ret(Parent, NextLoc, &Err);
606   if (Err)
607     return std::move(Err);
608   return Ret;
609 }
610 
611 uint64_t Archive::Child::getChildOffset() const {
612   const char *a = Parent->Data.getBuffer().data();
613   const char *c = Data.data();
614   uint64_t offset = c - a;
615   return offset;
616 }
617 
618 Expected<StringRef> Archive::Child::getName() const {
619   Expected<uint64_t> RawSizeOrErr = getRawSize();
620   if (!RawSizeOrErr)
621     return RawSizeOrErr.takeError();
622   uint64_t RawSize = RawSizeOrErr.get();
623   Expected<StringRef> NameOrErr =
624       Header->getName(Header->getSizeOf() + RawSize);
625   if (!NameOrErr)
626     return NameOrErr.takeError();
627   StringRef Name = NameOrErr.get();
628   return Name;
629 }
630 
631 Expected<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const {
632   Expected<StringRef> NameOrErr = getName();
633   if (!NameOrErr)
634     return NameOrErr.takeError();
635   StringRef Name = NameOrErr.get();
636   Expected<StringRef> Buf = getBuffer();
637   if (!Buf)
638     return createFileError(Name, Buf.takeError());
639   return MemoryBufferRef(*Buf, Name);
640 }
641 
642 Expected<std::unique_ptr<Binary>>
643 Archive::Child::getAsBinary(LLVMContext *Context) const {
644   Expected<MemoryBufferRef> BuffOrErr = getMemoryBufferRef();
645   if (!BuffOrErr)
646     return BuffOrErr.takeError();
647 
648   auto BinaryOrErr = createBinary(BuffOrErr.get(), Context);
649   if (BinaryOrErr)
650     return std::move(*BinaryOrErr);
651   return BinaryOrErr.takeError();
652 }
653 
654 Expected<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) {
655   Error Err = Error::success();
656   std::unique_ptr<Archive> Ret;
657   StringRef Buffer = Source.getBuffer();
658 
659   if (Buffer.startswith(BigArchiveMagic))
660     Ret = std::make_unique<BigArchive>(Source, Err);
661   else
662     Ret = std::make_unique<Archive>(Source, Err);
663 
664   if (Err)
665     return std::move(Err);
666   return std::move(Ret);
667 }
668 
669 std::unique_ptr<AbstractArchiveMemberHeader>
670 Archive::createArchiveMemberHeader(const char *RawHeaderPtr, uint64_t Size,
671                                    Error *Err) const {
672   ErrorAsOutParameter ErrAsOutParam(Err);
673   if (kind() != K_AIXBIG)
674     return std::make_unique<ArchiveMemberHeader>(this, RawHeaderPtr, Size, Err);
675   return std::make_unique<BigArchiveMemberHeader>(this, RawHeaderPtr, Size,
676                                                   Err);
677 }
678 
679 uint64_t Archive::getArchiveMagicLen() const {
680   if (isThin())
681     return sizeof(ThinArchiveMagic) - 1;
682 
683   if (Kind() == K_AIXBIG)
684     return sizeof(BigArchiveMagic) - 1;
685 
686   return sizeof(ArchiveMagic) - 1;
687 }
688 
689 void Archive::setFirstRegular(const Child &C) {
690   FirstRegularData = C.Data;
691   FirstRegularStartOfFile = C.StartOfFile;
692 }
693 
694 Archive::Archive(MemoryBufferRef Source, Error &Err)
695     : Binary(Binary::ID_Archive, Source) {
696   ErrorAsOutParameter ErrAsOutParam(&Err);
697   StringRef Buffer = Data.getBuffer();
698   // Check for sufficient magic.
699   if (Buffer.startswith(ThinArchiveMagic)) {
700     IsThin = true;
701   } else if (Buffer.startswith(ArchiveMagic)) {
702     IsThin = false;
703   } else if (Buffer.startswith(BigArchiveMagic)) {
704     Format = K_AIXBIG;
705     IsThin = false;
706     return;
707   } else {
708     Err = make_error<GenericBinaryError>("file too small to be an archive",
709                                          object_error::invalid_file_type);
710     return;
711   }
712 
713   // Make sure Format is initialized before any call to
714   // ArchiveMemberHeader::getName() is made.  This could be a valid empty
715   // archive which is the same in all formats.  So claiming it to be gnu to is
716   // fine if not totally correct before we look for a string table or table of
717   // contents.
718   Format = K_GNU;
719 
720   // Get the special members.
721   child_iterator I = child_begin(Err, false);
722   if (Err)
723     return;
724   child_iterator E = child_end();
725 
726   // See if this is a valid empty archive and if so return.
727   if (I == E) {
728     Err = Error::success();
729     return;
730   }
731   const Child *C = &*I;
732 
733   auto Increment = [&]() {
734     ++I;
735     if (Err)
736       return true;
737     C = &*I;
738     return false;
739   };
740 
741   Expected<StringRef> NameOrErr = C->getRawName();
742   if (!NameOrErr) {
743     Err = NameOrErr.takeError();
744     return;
745   }
746   StringRef Name = NameOrErr.get();
747 
748   // Below is the pattern that is used to figure out the archive format
749   // GNU archive format
750   //  First member : / (may exist, if it exists, points to the symbol table )
751   //  Second member : // (may exist, if it exists, points to the string table)
752   //  Note : The string table is used if the filename exceeds 15 characters
753   // BSD archive format
754   //  First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table)
755   //  There is no string table, if the filename exceeds 15 characters or has a
756   //  embedded space, the filename has #1/<size>, The size represents the size
757   //  of the filename that needs to be read after the archive header
758   // COFF archive format
759   //  First member : /
760   //  Second member : / (provides a directory of symbols)
761   //  Third member : // (may exist, if it exists, contains the string table)
762   //  Note: Microsoft PE/COFF Spec 8.3 says that the third member is present
763   //  even if the string table is empty. However, lib.exe does not in fact
764   //  seem to create the third member if there's no member whose filename
765   //  exceeds 15 characters. So the third member is optional.
766 
767   if (Name == "__.SYMDEF" || Name == "__.SYMDEF_64") {
768     if (Name == "__.SYMDEF")
769       Format = K_BSD;
770     else // Name == "__.SYMDEF_64"
771       Format = K_DARWIN64;
772     // We know that the symbol table is not an external file, but we still must
773     // check any Expected<> return value.
774     Expected<StringRef> BufOrErr = C->getBuffer();
775     if (!BufOrErr) {
776       Err = BufOrErr.takeError();
777       return;
778     }
779     SymbolTable = BufOrErr.get();
780     if (Increment())
781       return;
782     setFirstRegular(*C);
783 
784     Err = Error::success();
785     return;
786   }
787 
788   if (Name.startswith("#1/")) {
789     Format = K_BSD;
790     // We know this is BSD, so getName will work since there is no string table.
791     Expected<StringRef> NameOrErr = C->getName();
792     if (!NameOrErr) {
793       Err = NameOrErr.takeError();
794       return;
795     }
796     Name = NameOrErr.get();
797     if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") {
798       // We know that the symbol table is not an external file, but we still
799       // must check any Expected<> return value.
800       Expected<StringRef> BufOrErr = C->getBuffer();
801       if (!BufOrErr) {
802         Err = BufOrErr.takeError();
803         return;
804       }
805       SymbolTable = BufOrErr.get();
806       if (Increment())
807         return;
808     } else if (Name == "__.SYMDEF_64 SORTED" || Name == "__.SYMDEF_64") {
809       Format = K_DARWIN64;
810       // We know that the symbol table is not an external file, but we still
811       // must check any Expected<> return value.
812       Expected<StringRef> BufOrErr = C->getBuffer();
813       if (!BufOrErr) {
814         Err = BufOrErr.takeError();
815         return;
816       }
817       SymbolTable = BufOrErr.get();
818       if (Increment())
819         return;
820     }
821     setFirstRegular(*C);
822     return;
823   }
824 
825   // MIPS 64-bit ELF archives use a special format of a symbol table.
826   // This format is marked by `ar_name` field equals to "/SYM64/".
827   // For detailed description see page 96 in the following document:
828   // http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf
829 
830   bool has64SymTable = false;
831   if (Name == "/" || Name == "/SYM64/") {
832     // We know that the symbol table is not an external file, but we still
833     // must check any Expected<> return value.
834     Expected<StringRef> BufOrErr = C->getBuffer();
835     if (!BufOrErr) {
836       Err = BufOrErr.takeError();
837       return;
838     }
839     SymbolTable = BufOrErr.get();
840     if (Name == "/SYM64/")
841       has64SymTable = true;
842 
843     if (Increment())
844       return;
845     if (I == E) {
846       Err = Error::success();
847       return;
848     }
849     Expected<StringRef> NameOrErr = C->getRawName();
850     if (!NameOrErr) {
851       Err = NameOrErr.takeError();
852       return;
853     }
854     Name = NameOrErr.get();
855   }
856 
857   if (Name == "//") {
858     Format = has64SymTable ? K_GNU64 : K_GNU;
859     // The string table is never an external member, but we still
860     // must check any Expected<> return value.
861     Expected<StringRef> BufOrErr = C->getBuffer();
862     if (!BufOrErr) {
863       Err = BufOrErr.takeError();
864       return;
865     }
866     StringTable = BufOrErr.get();
867     if (Increment())
868       return;
869     setFirstRegular(*C);
870     Err = Error::success();
871     return;
872   }
873 
874   if (Name[0] != '/') {
875     Format = has64SymTable ? K_GNU64 : K_GNU;
876     setFirstRegular(*C);
877     Err = Error::success();
878     return;
879   }
880 
881   if (Name != "/") {
882     Err = errorCodeToError(object_error::parse_failed);
883     return;
884   }
885 
886   Format = K_COFF;
887   // We know that the symbol table is not an external file, but we still
888   // must check any Expected<> return value.
889   Expected<StringRef> BufOrErr = C->getBuffer();
890   if (!BufOrErr) {
891     Err = BufOrErr.takeError();
892     return;
893   }
894   SymbolTable = BufOrErr.get();
895 
896   if (Increment())
897     return;
898 
899   if (I == E) {
900     setFirstRegular(*C);
901     Err = Error::success();
902     return;
903   }
904 
905   NameOrErr = C->getRawName();
906   if (!NameOrErr) {
907     Err = NameOrErr.takeError();
908     return;
909   }
910   Name = NameOrErr.get();
911 
912   if (Name == "//") {
913     // The string table is never an external member, but we still
914     // must check any Expected<> return value.
915     Expected<StringRef> BufOrErr = C->getBuffer();
916     if (!BufOrErr) {
917       Err = BufOrErr.takeError();
918       return;
919     }
920     StringTable = BufOrErr.get();
921     if (Increment())
922       return;
923   }
924 
925   setFirstRegular(*C);
926   Err = Error::success();
927 }
928 
929 object::Archive::Kind Archive::getDefaultKindForHost() {
930   Triple HostTriple(sys::getProcessTriple());
931   return HostTriple.isOSDarwin()
932              ? object::Archive::K_DARWIN
933              : (HostTriple.isOSAIX() ? object::Archive::K_AIXBIG
934                                      : object::Archive::K_GNU);
935 }
936 
937 Archive::child_iterator Archive::child_begin(Error &Err,
938                                              bool SkipInternal) const {
939   if (isEmpty())
940     return child_end();
941 
942   if (SkipInternal)
943     return child_iterator::itr(
944         Child(this, FirstRegularData, FirstRegularStartOfFile), Err);
945 
946   const char *Loc = Data.getBufferStart() + getFirstChildOffset();
947   Child C(this, Loc, &Err);
948   if (Err)
949     return child_end();
950   return child_iterator::itr(C, Err);
951 }
952 
953 Archive::child_iterator Archive::child_end() const {
954   return child_iterator::end(Child(nullptr, nullptr, nullptr));
955 }
956 
957 StringRef Archive::Symbol::getName() const {
958   return Parent->getSymbolTable().begin() + StringIndex;
959 }
960 
961 Expected<Archive::Child> Archive::Symbol::getMember() const {
962   const char *Buf = Parent->getSymbolTable().begin();
963   const char *Offsets = Buf;
964   if (Parent->kind() == K_GNU64 || Parent->kind() == K_DARWIN64)
965     Offsets += sizeof(uint64_t);
966   else
967     Offsets += sizeof(uint32_t);
968   uint64_t Offset = 0;
969   if (Parent->kind() == K_GNU) {
970     Offset = read32be(Offsets + SymbolIndex * 4);
971   } else if (Parent->kind() == K_GNU64) {
972     Offset = read64be(Offsets + SymbolIndex * 8);
973   } else if (Parent->kind() == K_BSD) {
974     // The SymbolIndex is an index into the ranlib structs that start at
975     // Offsets (the first uint32_t is the number of bytes of the ranlib
976     // structs).  The ranlib structs are a pair of uint32_t's the first
977     // being a string table offset and the second being the offset into
978     // the archive of the member that defines the symbol.  Which is what
979     // is needed here.
980     Offset = read32le(Offsets + SymbolIndex * 8 + 4);
981   } else if (Parent->kind() == K_DARWIN64) {
982     // The SymbolIndex is an index into the ranlib_64 structs that start at
983     // Offsets (the first uint64_t is the number of bytes of the ranlib_64
984     // structs).  The ranlib_64 structs are a pair of uint64_t's the first
985     // being a string table offset and the second being the offset into
986     // the archive of the member that defines the symbol.  Which is what
987     // is needed here.
988     Offset = read64le(Offsets + SymbolIndex * 16 + 8);
989   } else {
990     // Skip offsets.
991     uint32_t MemberCount = read32le(Buf);
992     Buf += MemberCount * 4 + 4;
993 
994     uint32_t SymbolCount = read32le(Buf);
995     if (SymbolIndex >= SymbolCount)
996       return errorCodeToError(object_error::parse_failed);
997 
998     // Skip SymbolCount to get to the indices table.
999     const char *Indices = Buf + 4;
1000 
1001     // Get the index of the offset in the file member offset table for this
1002     // symbol.
1003     uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2);
1004     // Subtract 1 since OffsetIndex is 1 based.
1005     --OffsetIndex;
1006 
1007     if (OffsetIndex >= MemberCount)
1008       return errorCodeToError(object_error::parse_failed);
1009 
1010     Offset = read32le(Offsets + OffsetIndex * 4);
1011   }
1012 
1013   const char *Loc = Parent->getData().begin() + Offset;
1014   Error Err = Error::success();
1015   Child C(Parent, Loc, &Err);
1016   if (Err)
1017     return std::move(Err);
1018   return C;
1019 }
1020 
1021 Archive::Symbol Archive::Symbol::getNext() const {
1022   Symbol t(*this);
1023   if (Parent->kind() == K_BSD) {
1024     // t.StringIndex is an offset from the start of the __.SYMDEF or
1025     // "__.SYMDEF SORTED" member into the string table for the ranlib
1026     // struct indexed by t.SymbolIndex .  To change t.StringIndex to the
1027     // offset in the string table for t.SymbolIndex+1 we subtract the
1028     // its offset from the start of the string table for t.SymbolIndex
1029     // and add the offset of the string table for t.SymbolIndex+1.
1030 
1031     // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
1032     // which is the number of bytes of ranlib structs that follow.  The ranlib
1033     // structs are a pair of uint32_t's the first being a string table offset
1034     // and the second being the offset into the archive of the member that
1035     // define the symbol. After that the next uint32_t is the byte count of
1036     // the string table followed by the string table.
1037     const char *Buf = Parent->getSymbolTable().begin();
1038     uint32_t RanlibCount = 0;
1039     RanlibCount = read32le(Buf) / 8;
1040     // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount)
1041     // don't change the t.StringIndex as we don't want to reference a ranlib
1042     // past RanlibCount.
1043     if (t.SymbolIndex + 1 < RanlibCount) {
1044       const char *Ranlibs = Buf + 4;
1045       uint32_t CurRanStrx = 0;
1046       uint32_t NextRanStrx = 0;
1047       CurRanStrx = read32le(Ranlibs + t.SymbolIndex * 8);
1048       NextRanStrx = read32le(Ranlibs + (t.SymbolIndex + 1) * 8);
1049       t.StringIndex -= CurRanStrx;
1050       t.StringIndex += NextRanStrx;
1051     }
1052   } else {
1053     // Go to one past next null.
1054     t.StringIndex = Parent->getSymbolTable().find('\0', t.StringIndex) + 1;
1055   }
1056   ++t.SymbolIndex;
1057   return t;
1058 }
1059 
1060 Archive::symbol_iterator Archive::symbol_begin() const {
1061   if (!hasSymbolTable())
1062     return symbol_iterator(Symbol(this, 0, 0));
1063 
1064   const char *buf = getSymbolTable().begin();
1065   if (kind() == K_GNU) {
1066     uint32_t symbol_count = 0;
1067     symbol_count = read32be(buf);
1068     buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t)));
1069   } else if (kind() == K_GNU64) {
1070     uint64_t symbol_count = read64be(buf);
1071     buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t)));
1072   } else if (kind() == K_BSD) {
1073     // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
1074     // which is the number of bytes of ranlib structs that follow.  The ranlib
1075     // structs are a pair of uint32_t's the first being a string table offset
1076     // and the second being the offset into the archive of the member that
1077     // define the symbol. After that the next uint32_t is the byte count of
1078     // the string table followed by the string table.
1079     uint32_t ranlib_count = 0;
1080     ranlib_count = read32le(buf) / 8;
1081     const char *ranlibs = buf + 4;
1082     uint32_t ran_strx = 0;
1083     ran_strx = read32le(ranlibs);
1084     buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t))));
1085     // Skip the byte count of the string table.
1086     buf += sizeof(uint32_t);
1087     buf += ran_strx;
1088   } else if (kind() == K_DARWIN64) {
1089     // The __.SYMDEF_64 or "__.SYMDEF_64 SORTED" member starts with a uint64_t
1090     // which is the number of bytes of ranlib_64 structs that follow.  The
1091     // ranlib_64 structs are a pair of uint64_t's the first being a string
1092     // table offset and the second being the offset into the archive of the
1093     // member that define the symbol. After that the next uint64_t is the byte
1094     // count of the string table followed by the string table.
1095     uint64_t ranlib_count = 0;
1096     ranlib_count = read64le(buf) / 16;
1097     const char *ranlibs = buf + 8;
1098     uint64_t ran_strx = 0;
1099     ran_strx = read64le(ranlibs);
1100     buf += sizeof(uint64_t) + (ranlib_count * (2 * (sizeof(uint64_t))));
1101     // Skip the byte count of the string table.
1102     buf += sizeof(uint64_t);
1103     buf += ran_strx;
1104   } else {
1105     uint32_t member_count = 0;
1106     uint32_t symbol_count = 0;
1107     member_count = read32le(buf);
1108     buf += 4 + (member_count * 4); // Skip offsets.
1109     symbol_count = read32le(buf);
1110     buf += 4 + (symbol_count * 2); // Skip indices.
1111   }
1112   uint32_t string_start_offset = buf - getSymbolTable().begin();
1113   return symbol_iterator(Symbol(this, 0, string_start_offset));
1114 }
1115 
1116 Archive::symbol_iterator Archive::symbol_end() const {
1117   return symbol_iterator(Symbol(this, getNumberOfSymbols(), 0));
1118 }
1119 
1120 uint32_t Archive::getNumberOfSymbols() const {
1121   if (!hasSymbolTable())
1122     return 0;
1123   const char *buf = getSymbolTable().begin();
1124   if (kind() == K_GNU)
1125     return read32be(buf);
1126   if (kind() == K_GNU64)
1127     return read64be(buf);
1128   if (kind() == K_BSD)
1129     return read32le(buf) / 8;
1130   if (kind() == K_DARWIN64)
1131     return read64le(buf) / 16;
1132   uint32_t member_count = 0;
1133   member_count = read32le(buf);
1134   buf += 4 + (member_count * 4); // Skip offsets.
1135   return read32le(buf);
1136 }
1137 
1138 Expected<Optional<Archive::Child>> Archive::findSym(StringRef name) const {
1139   Archive::symbol_iterator bs = symbol_begin();
1140   Archive::symbol_iterator es = symbol_end();
1141 
1142   for (; bs != es; ++bs) {
1143     StringRef SymName = bs->getName();
1144     if (SymName == name) {
1145       if (auto MemberOrErr = bs->getMember())
1146         return Child(*MemberOrErr);
1147       else
1148         return MemberOrErr.takeError();
1149     }
1150   }
1151   return Optional<Child>();
1152 }
1153 
1154 // Returns true if archive file contains no member file.
1155 bool Archive::isEmpty() const {
1156   return Data.getBufferSize() == getArchiveMagicLen();
1157 }
1158 
1159 bool Archive::hasSymbolTable() const { return !SymbolTable.empty(); }
1160 
1161 BigArchive::BigArchive(MemoryBufferRef Source, Error &Err)
1162     : Archive(Source, Err) {
1163   ErrorAsOutParameter ErrAsOutParam(&Err);
1164   StringRef Buffer = Data.getBuffer();
1165   ArFixLenHdr = reinterpret_cast<const FixLenHdr *>(Buffer.data());
1166 
1167   StringRef RawOffset = getFieldRawString(ArFixLenHdr->FirstChildOffset);
1168   if (RawOffset.getAsInteger(10, FirstChildOffset))
1169     // TODO: Out-of-line.
1170     Err = malformedError("malformed AIX big archive: first member offset \"" +
1171                          RawOffset + "\" is not a number");
1172 
1173   RawOffset = getFieldRawString(ArFixLenHdr->LastChildOffset);
1174   if (RawOffset.getAsInteger(10, LastChildOffset))
1175     // TODO: Out-of-line.
1176     Err = malformedError("malformed AIX big archive: last member offset \"" +
1177                          RawOffset + "\" is not a number");
1178 
1179   child_iterator I = child_begin(Err, false);
1180   if (Err)
1181     return;
1182   child_iterator E = child_end();
1183   if (I == E) {
1184     Err = Error::success();
1185     return;
1186   }
1187   setFirstRegular(*I);
1188   Err = Error::success();
1189 }
1190