1 //===- Archive.cpp - ar File Format implementation ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the ArchiveObjectFile class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Object/Archive.h"
14 #include "llvm/ADT/Optional.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/Object/Binary.h"
19 #include "llvm/Object/Error.h"
20 #include "llvm/Support/Chrono.h"
21 #include "llvm/Support/Endian.h"
22 #include "llvm/Support/Error.h"
23 #include "llvm/Support/ErrorOr.h"
24 #include "llvm/Support/FileSystem.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/MemoryBuffer.h"
27 #include "llvm/Support/Path.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <algorithm>
30 #include <cassert>
31 #include <cstddef>
32 #include <cstdint>
33 #include <memory>
34 #include <string>
35 #include <system_error>
36 
37 using namespace llvm;
38 using namespace object;
39 using namespace llvm::support::endian;
40 
41 void Archive::anchor() {}
42 
43 static Error malformedError(Twine Msg) {
44   std::string StringMsg = "truncated or malformed archive (" + Msg.str() + ")";
45   return make_error<GenericBinaryError>(std::move(StringMsg),
46                                         object_error::parse_failed);
47 }
48 
49 static Error
50 createMemberHeaderParseError(const AbstractArchiveMemberHeader *ArMemHeader,
51                              const char *RawHeaderPtr, uint64_t Size) {
52   StringRef Msg("remaining size of archive too small for next archive "
53                 "member header ");
54 
55   Expected<StringRef> NameOrErr = ArMemHeader->getName(Size);
56   if (NameOrErr)
57     return malformedError(Msg + "for " + *NameOrErr);
58 
59   consumeError(NameOrErr.takeError());
60   uint64_t Offset = RawHeaderPtr - ArMemHeader->Parent->getData().data();
61   return malformedError(Msg + "at offset " + Twine(Offset));
62 }
63 
64 template <class T, std::size_t N>
65 StringRef getFieldRawString(const T (&Field)[N]) {
66   return StringRef(Field, N).rtrim(" ");
67 }
68 
69 template <class T>
70 StringRef CommonArchiveMemberHeader<T>::getRawAccessMode() const {
71   return getFieldRawString(ArMemHdr->AccessMode);
72 }
73 
74 template <class T>
75 StringRef CommonArchiveMemberHeader<T>::getRawLastModified() const {
76   return getFieldRawString(ArMemHdr->LastModified);
77 }
78 
79 template <class T> StringRef CommonArchiveMemberHeader<T>::getRawUID() const {
80   return getFieldRawString(ArMemHdr->UID);
81 }
82 
83 template <class T> StringRef CommonArchiveMemberHeader<T>::getRawGID() const {
84   return getFieldRawString(ArMemHdr->GID);
85 }
86 
87 template <class T> uint64_t CommonArchiveMemberHeader<T>::getOffset() const {
88   return reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
89 }
90 
91 template class object::CommonArchiveMemberHeader<UnixArMemHdrType>;
92 template class object::CommonArchiveMemberHeader<BigArMemHdrType>;
93 
94 ArchiveMemberHeader::ArchiveMemberHeader(const Archive *Parent,
95                                          const char *RawHeaderPtr,
96                                          uint64_t Size, Error *Err)
97     : CommonArchiveMemberHeader<UnixArMemHdrType>(
98           Parent, reinterpret_cast<const UnixArMemHdrType *>(RawHeaderPtr)) {
99   if (RawHeaderPtr == nullptr)
100     return;
101   ErrorAsOutParameter ErrAsOutParam(Err);
102 
103   if (Size < getSizeOf()) {
104     *Err = createMemberHeaderParseError(this, RawHeaderPtr, Size);
105     return;
106   }
107   if (ArMemHdr->Terminator[0] != '`' || ArMemHdr->Terminator[1] != '\n') {
108     if (Err) {
109       std::string Buf;
110       raw_string_ostream OS(Buf);
111       OS.write_escaped(
112           StringRef(ArMemHdr->Terminator, sizeof(ArMemHdr->Terminator)));
113       OS.flush();
114       std::string Msg("terminator characters in archive member \"" + Buf +
115                       "\" not the correct \"`\\n\" values for the archive "
116                       "member header ");
117       Expected<StringRef> NameOrErr = getName(Size);
118       if (!NameOrErr) {
119         consumeError(NameOrErr.takeError());
120         uint64_t Offset = RawHeaderPtr - Parent->getData().data();
121         *Err = malformedError(Msg + "at offset " + Twine(Offset));
122       } else
123         *Err = malformedError(Msg + "for " + NameOrErr.get());
124     }
125     return;
126   }
127 }
128 
129 BigArchiveMemberHeader::BigArchiveMemberHeader(const Archive *Parent,
130                                                const char *RawHeaderPtr,
131                                                uint64_t Size, Error *Err)
132     : CommonArchiveMemberHeader<BigArMemHdrType>(
133           Parent, reinterpret_cast<const BigArMemHdrType *>(RawHeaderPtr)) {
134   if (RawHeaderPtr == nullptr)
135     return;
136   ErrorAsOutParameter ErrAsOutParam(Err);
137 
138   if (Size < getSizeOf())
139     *Err = createMemberHeaderParseError(this, RawHeaderPtr, Size);
140 }
141 
142 // This gets the raw name from the ArMemHdr->Name field and checks that it is
143 // valid for the kind of archive.  If it is not valid it returns an Error.
144 Expected<StringRef> ArchiveMemberHeader::getRawName() const {
145   char EndCond;
146   auto Kind = Parent->kind();
147   if (Kind == Archive::K_BSD || Kind == Archive::K_DARWIN64) {
148     if (ArMemHdr->Name[0] == ' ') {
149       uint64_t Offset =
150           reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
151       return malformedError("name contains a leading space for archive member "
152                             "header at offset " +
153                             Twine(Offset));
154     }
155     EndCond = ' ';
156   } else if (ArMemHdr->Name[0] == '/' || ArMemHdr->Name[0] == '#')
157     EndCond = ' ';
158   else
159     EndCond = '/';
160   StringRef::size_type end =
161       StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(EndCond);
162   if (end == StringRef::npos)
163     end = sizeof(ArMemHdr->Name);
164   assert(end <= sizeof(ArMemHdr->Name) && end > 0);
165   // Don't include the EndCond if there is one.
166   return StringRef(ArMemHdr->Name, end);
167 }
168 
169 Expected<uint64_t>
170 getArchiveMemberDecField(Twine FieldName, const StringRef RawField,
171                          const Archive *Parent,
172                          const AbstractArchiveMemberHeader *MemHeader) {
173   uint64_t Value;
174   if (RawField.getAsInteger(10, Value)) {
175     uint64_t Offset = MemHeader->getOffset();
176     return malformedError("characters in " + FieldName +
177                           " field in archive member header are not "
178                           "all decimal numbers: '" +
179                           RawField +
180                           "' for the archive "
181                           "member header at offset " +
182                           Twine(Offset));
183   }
184   return Value;
185 }
186 
187 Expected<uint64_t>
188 getArchiveMemberOctField(Twine FieldName, const StringRef RawField,
189                          const Archive *Parent,
190                          const AbstractArchiveMemberHeader *MemHeader) {
191   uint64_t Value;
192   if (RawField.getAsInteger(8, Value)) {
193     uint64_t Offset = MemHeader->getOffset();
194     return malformedError("characters in " + FieldName +
195                           " field in archive member header are not "
196                           "all octal numbers: '" +
197                           RawField +
198                           "' for the archive "
199                           "member header at offset " +
200                           Twine(Offset));
201   }
202   return Value;
203 }
204 
205 Expected<StringRef> BigArchiveMemberHeader::getRawName() const {
206   Expected<uint64_t> NameLenOrErr = getArchiveMemberDecField(
207       "NameLen", getFieldRawString(ArMemHdr->NameLen), Parent, this);
208   if (!NameLenOrErr)
209     // TODO: Out-of-line.
210     return NameLenOrErr.takeError();
211   uint64_t NameLen = NameLenOrErr.get();
212 
213   // If the name length is odd, pad with '\0' to get an even length. After
214   // padding, there is the name terminator "`\n".
215   uint64_t NameLenWithPadding = alignTo(NameLen, 2);
216   StringRef NameTerminator = "`\n";
217   StringRef NameStringWithNameTerminator =
218       StringRef(ArMemHdr->Name, NameLenWithPadding + NameTerminator.size());
219   if (!NameStringWithNameTerminator.endswith(NameTerminator)) {
220     uint64_t Offset =
221         reinterpret_cast<const char *>(ArMemHdr->Name + NameLenWithPadding) -
222         Parent->getData().data();
223     // TODO: Out-of-line.
224     return malformedError(
225         "name does not have name terminator \"`\\n\" for archive member"
226         "header at offset " +
227         Twine(Offset));
228   }
229   return StringRef(ArMemHdr->Name, NameLen);
230 }
231 
232 // member including the header, so the size of any name following the header
233 // is checked to make sure it does not overflow.
234 Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const {
235 
236   // This can be called from the ArchiveMemberHeader constructor when the
237   // archive header is truncated to produce an error message with the name.
238   // Make sure the name field is not truncated.
239   if (Size < offsetof(UnixArMemHdrType, Name) + sizeof(ArMemHdr->Name)) {
240     uint64_t ArchiveOffset =
241         reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
242     return malformedError("archive header truncated before the name field "
243                           "for archive member header at offset " +
244                           Twine(ArchiveOffset));
245   }
246 
247   // The raw name itself can be invalid.
248   Expected<StringRef> NameOrErr = getRawName();
249   if (!NameOrErr)
250     return NameOrErr.takeError();
251   StringRef Name = NameOrErr.get();
252 
253   // Check if it's a special name.
254   if (Name[0] == '/') {
255     if (Name.size() == 1) // Linker member.
256       return Name;
257     if (Name.size() == 2 && Name[1] == '/') // String table.
258       return Name;
259     // System libraries from the Windows SDK for Windows 11 contain this symbol.
260     // It looks like a CFG guard: we just skip it for now.
261     if (Name.equals("/<XFGHASHMAP>/"))
262       return Name;
263     // It's a long name.
264     // Get the string table offset.
265     std::size_t StringOffset;
266     if (Name.substr(1).rtrim(' ').getAsInteger(10, StringOffset)) {
267       std::string Buf;
268       raw_string_ostream OS(Buf);
269       OS.write_escaped(Name.substr(1).rtrim(' '));
270       OS.flush();
271       uint64_t ArchiveOffset =
272           reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
273       return malformedError("long name offset characters after the '/' are "
274                             "not all decimal numbers: '" +
275                             Buf + "' for archive member header at offset " +
276                             Twine(ArchiveOffset));
277     }
278 
279     // Verify it.
280     if (StringOffset >= Parent->getStringTable().size()) {
281       uint64_t ArchiveOffset =
282           reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
283       return malformedError("long name offset " + Twine(StringOffset) +
284                             " past the end of the string table for archive "
285                             "member header at offset " +
286                             Twine(ArchiveOffset));
287     }
288 
289     // GNU long file names end with a "/\n".
290     if (Parent->kind() == Archive::K_GNU ||
291         Parent->kind() == Archive::K_GNU64) {
292       size_t End = Parent->getStringTable().find('\n', /*From=*/StringOffset);
293       if (End == StringRef::npos || End < 1 ||
294           Parent->getStringTable()[End - 1] != '/') {
295         return malformedError("string table at long name offset " +
296                               Twine(StringOffset) + "not terminated");
297       }
298       return Parent->getStringTable().slice(StringOffset, End - 1);
299     }
300     return Parent->getStringTable().begin() + StringOffset;
301   }
302 
303   if (Name.startswith("#1/")) {
304     uint64_t NameLength;
305     if (Name.substr(3).rtrim(' ').getAsInteger(10, NameLength)) {
306       std::string Buf;
307       raw_string_ostream OS(Buf);
308       OS.write_escaped(Name.substr(3).rtrim(' '));
309       OS.flush();
310       uint64_t ArchiveOffset =
311           reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
312       return malformedError("long name length characters after the #1/ are "
313                             "not all decimal numbers: '" +
314                             Buf + "' for archive member header at offset " +
315                             Twine(ArchiveOffset));
316     }
317     if (getSizeOf() + NameLength > Size) {
318       uint64_t ArchiveOffset =
319           reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
320       return malformedError("long name length: " + Twine(NameLength) +
321                             " extends past the end of the member or archive "
322                             "for archive member header at offset " +
323                             Twine(ArchiveOffset));
324     }
325     return StringRef(reinterpret_cast<const char *>(ArMemHdr) + getSizeOf(),
326                      NameLength)
327         .rtrim('\0');
328   }
329 
330   // It is not a long name so trim the blanks at the end of the name.
331   if (Name[Name.size() - 1] != '/')
332     return Name.rtrim(' ');
333 
334   // It's a simple name.
335   return Name.drop_back(1);
336 }
337 
338 Expected<StringRef> BigArchiveMemberHeader::getName(uint64_t Size) const {
339   return getRawName();
340 }
341 
342 Expected<uint64_t> ArchiveMemberHeader::getSize() const {
343   return getArchiveMemberDecField("size", getFieldRawString(ArMemHdr->Size),
344                                   Parent, this);
345 }
346 
347 Expected<uint64_t> BigArchiveMemberHeader::getSize() const {
348   Expected<uint64_t> SizeOrErr = getArchiveMemberDecField(
349       "size", getFieldRawString(ArMemHdr->Size), Parent, this);
350   if (!SizeOrErr)
351     return SizeOrErr.takeError();
352 
353   Expected<uint64_t> NameLenOrErr = getRawNameSize();
354   if (!NameLenOrErr)
355     return NameLenOrErr.takeError();
356 
357   return *SizeOrErr + alignTo(*NameLenOrErr, 2);
358 }
359 
360 Expected<uint64_t> BigArchiveMemberHeader::getRawNameSize() const {
361   return getArchiveMemberDecField(
362       "NameLen", getFieldRawString(ArMemHdr->NameLen), Parent, this);
363 }
364 
365 Expected<uint64_t> BigArchiveMemberHeader::getNextOffset() const {
366   return getArchiveMemberDecField(
367       "NextOffset", getFieldRawString(ArMemHdr->NextOffset), Parent, this);
368 }
369 
370 Expected<sys::fs::perms> AbstractArchiveMemberHeader::getAccessMode() const {
371   Expected<uint64_t> AccessModeOrErr =
372       getArchiveMemberOctField("AccessMode", getRawAccessMode(), Parent, this);
373   if (!AccessModeOrErr)
374     return AccessModeOrErr.takeError();
375   return static_cast<sys::fs::perms>(*AccessModeOrErr);
376 }
377 
378 Expected<sys::TimePoint<std::chrono::seconds>>
379 AbstractArchiveMemberHeader::getLastModified() const {
380   Expected<uint64_t> SecondsOrErr = getArchiveMemberDecField(
381       "LastModified", getRawLastModified(), Parent, this);
382 
383   if (!SecondsOrErr)
384     return SecondsOrErr.takeError();
385 
386   return sys::toTimePoint(*SecondsOrErr);
387 }
388 
389 Expected<unsigned> AbstractArchiveMemberHeader::getUID() const {
390   StringRef User = getRawUID();
391   if (User.empty())
392     return 0;
393   return getArchiveMemberDecField("UID", User, Parent, this);
394 }
395 
396 Expected<unsigned> AbstractArchiveMemberHeader::getGID() const {
397   StringRef Group = getRawGID();
398   if (Group.empty())
399     return 0;
400   return getArchiveMemberDecField("GID", Group, Parent, this);
401 }
402 
403 Expected<bool> ArchiveMemberHeader::isThin() const {
404   Expected<StringRef> NameOrErr = getRawName();
405   if (!NameOrErr)
406     return NameOrErr.takeError();
407   StringRef Name = NameOrErr.get();
408   return Parent->isThin() && Name != "/" && Name != "//" && Name != "/SYM64/";
409 }
410 
411 Expected<const char *> ArchiveMemberHeader::getNextChildLoc() const {
412   uint64_t Size = getSizeOf();
413   Expected<bool> isThinOrErr = isThin();
414   if (!isThinOrErr)
415     return isThinOrErr.takeError();
416 
417   bool isThin = isThinOrErr.get();
418   if (!isThin) {
419     Expected<uint64_t> MemberSize = getSize();
420     if (!MemberSize)
421       return MemberSize.takeError();
422 
423     Size += MemberSize.get();
424   }
425 
426   // If Size is odd, add 1 to make it even.
427   const char *NextLoc =
428       reinterpret_cast<const char *>(ArMemHdr) + alignTo(Size, 2);
429 
430   if (NextLoc == Parent->getMemoryBufferRef().getBufferEnd())
431     return nullptr;
432 
433   return NextLoc;
434 }
435 
436 Expected<const char *> BigArchiveMemberHeader::getNextChildLoc() const {
437   if (getOffset() ==
438       static_cast<const BigArchive *>(Parent)->getLastChildOffset())
439     return nullptr;
440 
441   Expected<uint64_t> NextOffsetOrErr = getNextOffset();
442   if (!NextOffsetOrErr)
443     return NextOffsetOrErr.takeError();
444   return Parent->getData().data() + NextOffsetOrErr.get();
445 }
446 
447 Archive::Child::Child(const Archive *Parent, StringRef Data,
448                       uint16_t StartOfFile)
449     : Parent(Parent), Data(Data), StartOfFile(StartOfFile) {
450   Header = Parent->createArchiveMemberHeader(Data.data(), Data.size(), nullptr);
451 }
452 
453 Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err)
454     : Parent(Parent) {
455   if (!Start) {
456     Header = nullptr;
457     return;
458   }
459 
460   Header = Parent->createArchiveMemberHeader(
461       Start,
462       Parent ? Parent->getData().size() - (Start - Parent->getData().data())
463              : 0,
464       Err);
465 
466   // If we are pointed to real data, Start is not a nullptr, then there must be
467   // a non-null Err pointer available to report malformed data on.  Only in
468   // the case sentinel value is being constructed is Err is permitted to be a
469   // nullptr.
470   assert(Err && "Err can't be nullptr if Start is not a nullptr");
471 
472   ErrorAsOutParameter ErrAsOutParam(Err);
473 
474   // If there was an error in the construction of the Header
475   // then just return with the error now set.
476   if (*Err)
477     return;
478 
479   uint64_t Size = Header->getSizeOf();
480   Data = StringRef(Start, Size);
481   Expected<bool> isThinOrErr = isThinMember();
482   if (!isThinOrErr) {
483     *Err = isThinOrErr.takeError();
484     return;
485   }
486   bool isThin = isThinOrErr.get();
487   if (!isThin) {
488     Expected<uint64_t> MemberSize = getRawSize();
489     if (!MemberSize) {
490       *Err = MemberSize.takeError();
491       return;
492     }
493     Size += MemberSize.get();
494     Data = StringRef(Start, Size);
495   }
496 
497   // Setup StartOfFile and PaddingBytes.
498   StartOfFile = Header->getSizeOf();
499   // Don't include attached name.
500   Expected<StringRef> NameOrErr = getRawName();
501   if (!NameOrErr) {
502     *Err = NameOrErr.takeError();
503     return;
504   }
505   StringRef Name = NameOrErr.get();
506 
507   if (Parent->kind() == Archive::K_AIXBIG) {
508     // The actual start of the file is after the name and any necessary
509     // even-alignment padding.
510     StartOfFile += ((Name.size() + 1) >> 1) << 1;
511   } else if (Name.startswith("#1/")) {
512     uint64_t NameSize;
513     StringRef RawNameSize = Name.substr(3).rtrim(' ');
514     if (RawNameSize.getAsInteger(10, NameSize)) {
515       uint64_t Offset = Start - Parent->getData().data();
516       *Err = malformedError("long name length characters after the #1/ are "
517                             "not all decimal numbers: '" +
518                             RawNameSize +
519                             "' for archive member header at offset " +
520                             Twine(Offset));
521       return;
522     }
523     StartOfFile += NameSize;
524   }
525 }
526 
527 Expected<uint64_t> Archive::Child::getSize() const {
528   if (Parent->IsThin)
529     return Header->getSize();
530   return Data.size() - StartOfFile;
531 }
532 
533 Expected<uint64_t> Archive::Child::getRawSize() const {
534   return Header->getSize();
535 }
536 
537 Expected<bool> Archive::Child::isThinMember() const { return Header->isThin(); }
538 
539 Expected<std::string> Archive::Child::getFullName() const {
540   Expected<bool> isThin = isThinMember();
541   if (!isThin)
542     return isThin.takeError();
543   assert(isThin.get());
544   Expected<StringRef> NameOrErr = getName();
545   if (!NameOrErr)
546     return NameOrErr.takeError();
547   StringRef Name = *NameOrErr;
548   if (sys::path::is_absolute(Name))
549     return std::string(Name);
550 
551   SmallString<128> FullName = sys::path::parent_path(
552       Parent->getMemoryBufferRef().getBufferIdentifier());
553   sys::path::append(FullName, Name);
554   return std::string(FullName.str());
555 }
556 
557 Expected<StringRef> Archive::Child::getBuffer() const {
558   Expected<bool> isThinOrErr = isThinMember();
559   if (!isThinOrErr)
560     return isThinOrErr.takeError();
561   bool isThin = isThinOrErr.get();
562   if (!isThin) {
563     Expected<uint64_t> Size = getSize();
564     if (!Size)
565       return Size.takeError();
566     return StringRef(Data.data() + StartOfFile, Size.get());
567   }
568   Expected<std::string> FullNameOrErr = getFullName();
569   if (!FullNameOrErr)
570     return FullNameOrErr.takeError();
571   const std::string &FullName = *FullNameOrErr;
572   ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName);
573   if (std::error_code EC = Buf.getError())
574     return errorCodeToError(EC);
575   Parent->ThinBuffers.push_back(std::move(*Buf));
576   return Parent->ThinBuffers.back()->getBuffer();
577 }
578 
579 Expected<Archive::Child> Archive::Child::getNext() const {
580   Expected<const char *> NextLocOrErr = Header->getNextChildLoc();
581   if (!NextLocOrErr)
582     return NextLocOrErr.takeError();
583 
584   const char *NextLoc = *NextLocOrErr;
585 
586   // Check to see if this is at the end of the archive.
587   if (NextLoc == nullptr)
588     return Child(nullptr, nullptr, nullptr);
589 
590   // Check to see if this is past the end of the archive.
591   if (NextLoc > Parent->Data.getBufferEnd()) {
592     std::string Msg("offset to next archive member past the end of the archive "
593                     "after member ");
594     Expected<StringRef> NameOrErr = getName();
595     if (!NameOrErr) {
596       consumeError(NameOrErr.takeError());
597       uint64_t Offset = Data.data() - Parent->getData().data();
598       return malformedError(Msg + "at offset " + Twine(Offset));
599     } else
600       return malformedError(Msg + NameOrErr.get());
601   }
602 
603   Error Err = Error::success();
604   Child Ret(Parent, NextLoc, &Err);
605   if (Err)
606     return std::move(Err);
607   return Ret;
608 }
609 
610 uint64_t Archive::Child::getChildOffset() const {
611   const char *a = Parent->Data.getBuffer().data();
612   const char *c = Data.data();
613   uint64_t offset = c - a;
614   return offset;
615 }
616 
617 Expected<StringRef> Archive::Child::getName() const {
618   Expected<uint64_t> RawSizeOrErr = getRawSize();
619   if (!RawSizeOrErr)
620     return RawSizeOrErr.takeError();
621   uint64_t RawSize = RawSizeOrErr.get();
622   Expected<StringRef> NameOrErr =
623       Header->getName(Header->getSizeOf() + RawSize);
624   if (!NameOrErr)
625     return NameOrErr.takeError();
626   StringRef Name = NameOrErr.get();
627   return Name;
628 }
629 
630 Expected<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const {
631   Expected<StringRef> NameOrErr = getName();
632   if (!NameOrErr)
633     return NameOrErr.takeError();
634   StringRef Name = NameOrErr.get();
635   Expected<StringRef> Buf = getBuffer();
636   if (!Buf)
637     return createFileError(Name, Buf.takeError());
638   return MemoryBufferRef(*Buf, Name);
639 }
640 
641 Expected<std::unique_ptr<Binary>>
642 Archive::Child::getAsBinary(LLVMContext *Context) const {
643   Expected<MemoryBufferRef> BuffOrErr = getMemoryBufferRef();
644   if (!BuffOrErr)
645     return BuffOrErr.takeError();
646 
647   auto BinaryOrErr = createBinary(BuffOrErr.get(), Context);
648   if (BinaryOrErr)
649     return std::move(*BinaryOrErr);
650   return BinaryOrErr.takeError();
651 }
652 
653 Expected<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) {
654   Error Err = Error::success();
655   std::unique_ptr<Archive> Ret;
656   StringRef Buffer = Source.getBuffer();
657 
658   if (Buffer.startswith(BigArchiveMagic))
659     Ret = std::make_unique<BigArchive>(Source, Err);
660   else
661     Ret = std::make_unique<Archive>(Source, Err);
662 
663   if (Err)
664     return std::move(Err);
665   return std::move(Ret);
666 }
667 
668 std::unique_ptr<AbstractArchiveMemberHeader>
669 Archive::createArchiveMemberHeader(const char *RawHeaderPtr, uint64_t Size,
670                                    Error *Err) const {
671   ErrorAsOutParameter ErrAsOutParam(Err);
672   if (kind() != K_AIXBIG)
673     return std::make_unique<ArchiveMemberHeader>(this, RawHeaderPtr, Size, Err);
674   return std::make_unique<BigArchiveMemberHeader>(this, RawHeaderPtr, Size,
675                                                   Err);
676 }
677 
678 uint64_t Archive::getArchiveMagicLen() const {
679   if (isThin())
680     return sizeof(ThinArchiveMagic) - 1;
681 
682   if (Kind() == K_AIXBIG)
683     return sizeof(BigArchiveMagic) - 1;
684 
685   return sizeof(ArchiveMagic) - 1;
686 }
687 
688 void Archive::setFirstRegular(const Child &C) {
689   FirstRegularData = C.Data;
690   FirstRegularStartOfFile = C.StartOfFile;
691 }
692 
693 Archive::Archive(MemoryBufferRef Source, Error &Err)
694     : Binary(Binary::ID_Archive, Source) {
695   ErrorAsOutParameter ErrAsOutParam(&Err);
696   StringRef Buffer = Data.getBuffer();
697   // Check for sufficient magic.
698   if (Buffer.startswith(ThinArchiveMagic)) {
699     IsThin = true;
700   } else if (Buffer.startswith(ArchiveMagic)) {
701     IsThin = false;
702   } else if (Buffer.startswith(BigArchiveMagic)) {
703     Format = K_AIXBIG;
704     IsThin = false;
705     return;
706   } else {
707     Err = make_error<GenericBinaryError>("file too small to be an archive",
708                                          object_error::invalid_file_type);
709     return;
710   }
711 
712   // Make sure Format is initialized before any call to
713   // ArchiveMemberHeader::getName() is made.  This could be a valid empty
714   // archive which is the same in all formats.  So claiming it to be gnu to is
715   // fine if not totally correct before we look for a string table or table of
716   // contents.
717   Format = K_GNU;
718 
719   // Get the special members.
720   child_iterator I = child_begin(Err, false);
721   if (Err)
722     return;
723   child_iterator E = child_end();
724 
725   // See if this is a valid empty archive and if so return.
726   if (I == E) {
727     Err = Error::success();
728     return;
729   }
730   const Child *C = &*I;
731 
732   auto Increment = [&]() {
733     ++I;
734     if (Err)
735       return true;
736     C = &*I;
737     return false;
738   };
739 
740   Expected<StringRef> NameOrErr = C->getRawName();
741   if (!NameOrErr) {
742     Err = NameOrErr.takeError();
743     return;
744   }
745   StringRef Name = NameOrErr.get();
746 
747   // Below is the pattern that is used to figure out the archive format
748   // GNU archive format
749   //  First member : / (may exist, if it exists, points to the symbol table )
750   //  Second member : // (may exist, if it exists, points to the string table)
751   //  Note : The string table is used if the filename exceeds 15 characters
752   // BSD archive format
753   //  First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table)
754   //  There is no string table, if the filename exceeds 15 characters or has a
755   //  embedded space, the filename has #1/<size>, The size represents the size
756   //  of the filename that needs to be read after the archive header
757   // COFF archive format
758   //  First member : /
759   //  Second member : / (provides a directory of symbols)
760   //  Third member : // (may exist, if it exists, contains the string table)
761   //  Note: Microsoft PE/COFF Spec 8.3 says that the third member is present
762   //  even if the string table is empty. However, lib.exe does not in fact
763   //  seem to create the third member if there's no member whose filename
764   //  exceeds 15 characters. So the third member is optional.
765 
766   if (Name == "__.SYMDEF" || Name == "__.SYMDEF_64") {
767     if (Name == "__.SYMDEF")
768       Format = K_BSD;
769     else // Name == "__.SYMDEF_64"
770       Format = K_DARWIN64;
771     // We know that the symbol table is not an external file, but we still must
772     // check any Expected<> return value.
773     Expected<StringRef> BufOrErr = C->getBuffer();
774     if (!BufOrErr) {
775       Err = BufOrErr.takeError();
776       return;
777     }
778     SymbolTable = BufOrErr.get();
779     if (Increment())
780       return;
781     setFirstRegular(*C);
782 
783     Err = Error::success();
784     return;
785   }
786 
787   if (Name.startswith("#1/")) {
788     Format = K_BSD;
789     // We know this is BSD, so getName will work since there is no string table.
790     Expected<StringRef> NameOrErr = C->getName();
791     if (!NameOrErr) {
792       Err = NameOrErr.takeError();
793       return;
794     }
795     Name = NameOrErr.get();
796     if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") {
797       // We know that the symbol table is not an external file, but we still
798       // must check any Expected<> return value.
799       Expected<StringRef> BufOrErr = C->getBuffer();
800       if (!BufOrErr) {
801         Err = BufOrErr.takeError();
802         return;
803       }
804       SymbolTable = BufOrErr.get();
805       if (Increment())
806         return;
807     } else if (Name == "__.SYMDEF_64 SORTED" || Name == "__.SYMDEF_64") {
808       Format = K_DARWIN64;
809       // We know that the symbol table is not an external file, but we still
810       // must check any Expected<> return value.
811       Expected<StringRef> BufOrErr = C->getBuffer();
812       if (!BufOrErr) {
813         Err = BufOrErr.takeError();
814         return;
815       }
816       SymbolTable = BufOrErr.get();
817       if (Increment())
818         return;
819     }
820     setFirstRegular(*C);
821     return;
822   }
823 
824   // MIPS 64-bit ELF archives use a special format of a symbol table.
825   // This format is marked by `ar_name` field equals to "/SYM64/".
826   // For detailed description see page 96 in the following document:
827   // http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf
828 
829   bool has64SymTable = false;
830   if (Name == "/" || Name == "/SYM64/") {
831     // We know that the symbol table is not an external file, but we still
832     // must check any Expected<> return value.
833     Expected<StringRef> BufOrErr = C->getBuffer();
834     if (!BufOrErr) {
835       Err = BufOrErr.takeError();
836       return;
837     }
838     SymbolTable = BufOrErr.get();
839     if (Name == "/SYM64/")
840       has64SymTable = true;
841 
842     if (Increment())
843       return;
844     if (I == E) {
845       Err = Error::success();
846       return;
847     }
848     Expected<StringRef> NameOrErr = C->getRawName();
849     if (!NameOrErr) {
850       Err = NameOrErr.takeError();
851       return;
852     }
853     Name = NameOrErr.get();
854   }
855 
856   if (Name == "//") {
857     Format = has64SymTable ? K_GNU64 : K_GNU;
858     // The string table is never an external member, but we still
859     // must check any Expected<> return value.
860     Expected<StringRef> BufOrErr = C->getBuffer();
861     if (!BufOrErr) {
862       Err = BufOrErr.takeError();
863       return;
864     }
865     StringTable = BufOrErr.get();
866     if (Increment())
867       return;
868     setFirstRegular(*C);
869     Err = Error::success();
870     return;
871   }
872 
873   if (Name[0] != '/') {
874     Format = has64SymTable ? K_GNU64 : K_GNU;
875     setFirstRegular(*C);
876     Err = Error::success();
877     return;
878   }
879 
880   if (Name != "/") {
881     Err = errorCodeToError(object_error::parse_failed);
882     return;
883   }
884 
885   Format = K_COFF;
886   // We know that the symbol table is not an external file, but we still
887   // must check any Expected<> return value.
888   Expected<StringRef> BufOrErr = C->getBuffer();
889   if (!BufOrErr) {
890     Err = BufOrErr.takeError();
891     return;
892   }
893   SymbolTable = BufOrErr.get();
894 
895   if (Increment())
896     return;
897 
898   if (I == E) {
899     setFirstRegular(*C);
900     Err = Error::success();
901     return;
902   }
903 
904   NameOrErr = C->getRawName();
905   if (!NameOrErr) {
906     Err = NameOrErr.takeError();
907     return;
908   }
909   Name = NameOrErr.get();
910 
911   if (Name == "//") {
912     // The string table is never an external member, but we still
913     // must check any Expected<> return value.
914     Expected<StringRef> BufOrErr = C->getBuffer();
915     if (!BufOrErr) {
916       Err = BufOrErr.takeError();
917       return;
918     }
919     StringTable = BufOrErr.get();
920     if (Increment())
921       return;
922   }
923 
924   setFirstRegular(*C);
925   Err = Error::success();
926 }
927 
928 Archive::child_iterator Archive::child_begin(Error &Err,
929                                              bool SkipInternal) const {
930   if (isEmpty())
931     return child_end();
932 
933   if (SkipInternal)
934     return child_iterator::itr(
935         Child(this, FirstRegularData, FirstRegularStartOfFile), Err);
936 
937   const char *Loc = Data.getBufferStart() + getFirstChildOffset();
938   Child C(this, Loc, &Err);
939   if (Err)
940     return child_end();
941   return child_iterator::itr(C, Err);
942 }
943 
944 Archive::child_iterator Archive::child_end() const {
945   return child_iterator::end(Child(nullptr, nullptr, nullptr));
946 }
947 
948 StringRef Archive::Symbol::getName() const {
949   return Parent->getSymbolTable().begin() + StringIndex;
950 }
951 
952 Expected<Archive::Child> Archive::Symbol::getMember() const {
953   const char *Buf = Parent->getSymbolTable().begin();
954   const char *Offsets = Buf;
955   if (Parent->kind() == K_GNU64 || Parent->kind() == K_DARWIN64)
956     Offsets += sizeof(uint64_t);
957   else
958     Offsets += sizeof(uint32_t);
959   uint64_t Offset = 0;
960   if (Parent->kind() == K_GNU) {
961     Offset = read32be(Offsets + SymbolIndex * 4);
962   } else if (Parent->kind() == K_GNU64) {
963     Offset = read64be(Offsets + SymbolIndex * 8);
964   } else if (Parent->kind() == K_BSD) {
965     // The SymbolIndex is an index into the ranlib structs that start at
966     // Offsets (the first uint32_t is the number of bytes of the ranlib
967     // structs).  The ranlib structs are a pair of uint32_t's the first
968     // being a string table offset and the second being the offset into
969     // the archive of the member that defines the symbol.  Which is what
970     // is needed here.
971     Offset = read32le(Offsets + SymbolIndex * 8 + 4);
972   } else if (Parent->kind() == K_DARWIN64) {
973     // The SymbolIndex is an index into the ranlib_64 structs that start at
974     // Offsets (the first uint64_t is the number of bytes of the ranlib_64
975     // structs).  The ranlib_64 structs are a pair of uint64_t's the first
976     // being a string table offset and the second being the offset into
977     // the archive of the member that defines the symbol.  Which is what
978     // is needed here.
979     Offset = read64le(Offsets + SymbolIndex * 16 + 8);
980   } else {
981     // Skip offsets.
982     uint32_t MemberCount = read32le(Buf);
983     Buf += MemberCount * 4 + 4;
984 
985     uint32_t SymbolCount = read32le(Buf);
986     if (SymbolIndex >= SymbolCount)
987       return errorCodeToError(object_error::parse_failed);
988 
989     // Skip SymbolCount to get to the indices table.
990     const char *Indices = Buf + 4;
991 
992     // Get the index of the offset in the file member offset table for this
993     // symbol.
994     uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2);
995     // Subtract 1 since OffsetIndex is 1 based.
996     --OffsetIndex;
997 
998     if (OffsetIndex >= MemberCount)
999       return errorCodeToError(object_error::parse_failed);
1000 
1001     Offset = read32le(Offsets + OffsetIndex * 4);
1002   }
1003 
1004   const char *Loc = Parent->getData().begin() + Offset;
1005   Error Err = Error::success();
1006   Child C(Parent, Loc, &Err);
1007   if (Err)
1008     return std::move(Err);
1009   return C;
1010 }
1011 
1012 Archive::Symbol Archive::Symbol::getNext() const {
1013   Symbol t(*this);
1014   if (Parent->kind() == K_BSD) {
1015     // t.StringIndex is an offset from the start of the __.SYMDEF or
1016     // "__.SYMDEF SORTED" member into the string table for the ranlib
1017     // struct indexed by t.SymbolIndex .  To change t.StringIndex to the
1018     // offset in the string table for t.SymbolIndex+1 we subtract the
1019     // its offset from the start of the string table for t.SymbolIndex
1020     // and add the offset of the string table for t.SymbolIndex+1.
1021 
1022     // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
1023     // which is the number of bytes of ranlib structs that follow.  The ranlib
1024     // structs are a pair of uint32_t's the first being a string table offset
1025     // and the second being the offset into the archive of the member that
1026     // define the symbol. After that the next uint32_t is the byte count of
1027     // the string table followed by the string table.
1028     const char *Buf = Parent->getSymbolTable().begin();
1029     uint32_t RanlibCount = 0;
1030     RanlibCount = read32le(Buf) / 8;
1031     // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount)
1032     // don't change the t.StringIndex as we don't want to reference a ranlib
1033     // past RanlibCount.
1034     if (t.SymbolIndex + 1 < RanlibCount) {
1035       const char *Ranlibs = Buf + 4;
1036       uint32_t CurRanStrx = 0;
1037       uint32_t NextRanStrx = 0;
1038       CurRanStrx = read32le(Ranlibs + t.SymbolIndex * 8);
1039       NextRanStrx = read32le(Ranlibs + (t.SymbolIndex + 1) * 8);
1040       t.StringIndex -= CurRanStrx;
1041       t.StringIndex += NextRanStrx;
1042     }
1043   } else {
1044     // Go to one past next null.
1045     t.StringIndex = Parent->getSymbolTable().find('\0', t.StringIndex) + 1;
1046   }
1047   ++t.SymbolIndex;
1048   return t;
1049 }
1050 
1051 Archive::symbol_iterator Archive::symbol_begin() const {
1052   if (!hasSymbolTable())
1053     return symbol_iterator(Symbol(this, 0, 0));
1054 
1055   const char *buf = getSymbolTable().begin();
1056   if (kind() == K_GNU) {
1057     uint32_t symbol_count = 0;
1058     symbol_count = read32be(buf);
1059     buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t)));
1060   } else if (kind() == K_GNU64) {
1061     uint64_t symbol_count = read64be(buf);
1062     buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t)));
1063   } else if (kind() == K_BSD) {
1064     // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
1065     // which is the number of bytes of ranlib structs that follow.  The ranlib
1066     // structs are a pair of uint32_t's the first being a string table offset
1067     // and the second being the offset into the archive of the member that
1068     // define the symbol. After that the next uint32_t is the byte count of
1069     // the string table followed by the string table.
1070     uint32_t ranlib_count = 0;
1071     ranlib_count = read32le(buf) / 8;
1072     const char *ranlibs = buf + 4;
1073     uint32_t ran_strx = 0;
1074     ran_strx = read32le(ranlibs);
1075     buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t))));
1076     // Skip the byte count of the string table.
1077     buf += sizeof(uint32_t);
1078     buf += ran_strx;
1079   } else if (kind() == K_DARWIN64) {
1080     // The __.SYMDEF_64 or "__.SYMDEF_64 SORTED" member starts with a uint64_t
1081     // which is the number of bytes of ranlib_64 structs that follow.  The
1082     // ranlib_64 structs are a pair of uint64_t's the first being a string
1083     // table offset and the second being the offset into the archive of the
1084     // member that define the symbol. After that the next uint64_t is the byte
1085     // count of the string table followed by the string table.
1086     uint64_t ranlib_count = 0;
1087     ranlib_count = read64le(buf) / 16;
1088     const char *ranlibs = buf + 8;
1089     uint64_t ran_strx = 0;
1090     ran_strx = read64le(ranlibs);
1091     buf += sizeof(uint64_t) + (ranlib_count * (2 * (sizeof(uint64_t))));
1092     // Skip the byte count of the string table.
1093     buf += sizeof(uint64_t);
1094     buf += ran_strx;
1095   } else {
1096     uint32_t member_count = 0;
1097     uint32_t symbol_count = 0;
1098     member_count = read32le(buf);
1099     buf += 4 + (member_count * 4); // Skip offsets.
1100     symbol_count = read32le(buf);
1101     buf += 4 + (symbol_count * 2); // Skip indices.
1102   }
1103   uint32_t string_start_offset = buf - getSymbolTable().begin();
1104   return symbol_iterator(Symbol(this, 0, string_start_offset));
1105 }
1106 
1107 Archive::symbol_iterator Archive::symbol_end() const {
1108   return symbol_iterator(Symbol(this, getNumberOfSymbols(), 0));
1109 }
1110 
1111 uint32_t Archive::getNumberOfSymbols() const {
1112   if (!hasSymbolTable())
1113     return 0;
1114   const char *buf = getSymbolTable().begin();
1115   if (kind() == K_GNU)
1116     return read32be(buf);
1117   if (kind() == K_GNU64)
1118     return read64be(buf);
1119   if (kind() == K_BSD)
1120     return read32le(buf) / 8;
1121   if (kind() == K_DARWIN64)
1122     return read64le(buf) / 16;
1123   uint32_t member_count = 0;
1124   member_count = read32le(buf);
1125   buf += 4 + (member_count * 4); // Skip offsets.
1126   return read32le(buf);
1127 }
1128 
1129 Expected<Optional<Archive::Child>> Archive::findSym(StringRef name) const {
1130   Archive::symbol_iterator bs = symbol_begin();
1131   Archive::symbol_iterator es = symbol_end();
1132 
1133   for (; bs != es; ++bs) {
1134     StringRef SymName = bs->getName();
1135     if (SymName == name) {
1136       if (auto MemberOrErr = bs->getMember())
1137         return Child(*MemberOrErr);
1138       else
1139         return MemberOrErr.takeError();
1140     }
1141   }
1142   return Optional<Child>();
1143 }
1144 
1145 // Returns true if archive file contains no member file.
1146 bool Archive::isEmpty() const {
1147   return Data.getBufferSize() == getArchiveMagicLen();
1148 }
1149 
1150 bool Archive::hasSymbolTable() const { return !SymbolTable.empty(); }
1151 
1152 BigArchive::BigArchive(MemoryBufferRef Source, Error &Err)
1153     : Archive(Source, Err) {
1154   ErrorAsOutParameter ErrAsOutParam(&Err);
1155   StringRef Buffer = Data.getBuffer();
1156   ArFixLenHdr = reinterpret_cast<const FixLenHdr *>(Buffer.data());
1157 
1158   StringRef RawOffset = getFieldRawString(ArFixLenHdr->FirstChildOffset);
1159   if (RawOffset.getAsInteger(10, FirstChildOffset))
1160     // TODO: Out-of-line.
1161     Err = malformedError("malformed AIX big archive: first member offset \"" +
1162                          RawOffset + "\" is not a number");
1163 
1164   RawOffset = getFieldRawString(ArFixLenHdr->LastChildOffset);
1165   if (RawOffset.getAsInteger(10, LastChildOffset))
1166     // TODO: Out-of-line.
1167     Err = malformedError("malformed AIX big archive: last member offset \"" +
1168                          RawOffset + "\" is not a number");
1169 
1170   child_iterator I = child_begin(Err, false);
1171   if (Err)
1172     return;
1173   child_iterator E = child_end();
1174   if (I == E) {
1175     Err = Error::success();
1176     return;
1177   }
1178   setFirstRegular(*I);
1179   Err = Error::success();
1180 }
1181