1 //===- Archive.cpp - ar File Format implementation ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the ArchiveObjectFile class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Object/Archive.h" 14 #include "llvm/ADT/Optional.h" 15 #include "llvm/ADT/SmallString.h" 16 #include "llvm/ADT/StringRef.h" 17 #include "llvm/ADT/Twine.h" 18 #include "llvm/Object/Binary.h" 19 #include "llvm/Object/Error.h" 20 #include "llvm/Support/Chrono.h" 21 #include "llvm/Support/Endian.h" 22 #include "llvm/Support/Error.h" 23 #include "llvm/Support/ErrorOr.h" 24 #include "llvm/Support/FileSystem.h" 25 #include "llvm/Support/MathExtras.h" 26 #include "llvm/Support/MemoryBuffer.h" 27 #include "llvm/Support/Path.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <algorithm> 30 #include <cassert> 31 #include <cstddef> 32 #include <cstdint> 33 #include <memory> 34 #include <string> 35 #include <system_error> 36 37 using namespace llvm; 38 using namespace object; 39 using namespace llvm::support::endian; 40 41 void Archive::anchor() {} 42 43 static Error malformedError(Twine Msg) { 44 std::string StringMsg = "truncated or malformed archive (" + Msg.str() + ")"; 45 return make_error<GenericBinaryError>(std::move(StringMsg), 46 object_error::parse_failed); 47 } 48 49 static Error 50 createMemberHeaderParseError(const AbstractArchiveMemberHeader *ArMemHeader, 51 const char *RawHeaderPtr, uint64_t Size) { 52 StringRef Msg("remaining size of archive too small for next archive " 53 "member header "); 54 55 Expected<StringRef> NameOrErr = ArMemHeader->getName(Size); 56 if (NameOrErr) 57 return malformedError(Msg + "for " + *NameOrErr); 58 59 consumeError(NameOrErr.takeError()); 60 uint64_t Offset = RawHeaderPtr - ArMemHeader->Parent->getData().data(); 61 return malformedError(Msg + "at offset " + Twine(Offset)); 62 } 63 64 template <class T, std::size_t N> 65 StringRef getFieldRawString(const T (&Field)[N]) { 66 return StringRef(Field, N).rtrim(" "); 67 } 68 69 template <class T> 70 StringRef CommonArchiveMemberHeader<T>::getRawAccessMode() const { 71 return getFieldRawString(ArMemHdr->AccessMode); 72 } 73 74 template <class T> 75 StringRef CommonArchiveMemberHeader<T>::getRawLastModified() const { 76 return getFieldRawString(ArMemHdr->LastModified); 77 } 78 79 template <class T> StringRef CommonArchiveMemberHeader<T>::getRawUID() const { 80 return getFieldRawString(ArMemHdr->UID); 81 } 82 83 template <class T> StringRef CommonArchiveMemberHeader<T>::getRawGID() const { 84 return getFieldRawString(ArMemHdr->GID); 85 } 86 87 template <class T> uint64_t CommonArchiveMemberHeader<T>::getOffset() const { 88 return reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 89 } 90 91 template class object::CommonArchiveMemberHeader<UnixArMemHdrType>; 92 template class object::CommonArchiveMemberHeader<BigArMemHdrType>; 93 94 ArchiveMemberHeader::ArchiveMemberHeader(const Archive *Parent, 95 const char *RawHeaderPtr, 96 uint64_t Size, Error *Err) 97 : CommonArchiveMemberHeader<UnixArMemHdrType>( 98 Parent, reinterpret_cast<const UnixArMemHdrType *>(RawHeaderPtr)) { 99 if (RawHeaderPtr == nullptr) 100 return; 101 ErrorAsOutParameter ErrAsOutParam(Err); 102 103 if (Size < getSizeOf()) { 104 *Err = createMemberHeaderParseError(this, RawHeaderPtr, Size); 105 return; 106 } 107 if (ArMemHdr->Terminator[0] != '`' || ArMemHdr->Terminator[1] != '\n') { 108 if (Err) { 109 std::string Buf; 110 raw_string_ostream OS(Buf); 111 OS.write_escaped( 112 StringRef(ArMemHdr->Terminator, sizeof(ArMemHdr->Terminator))); 113 OS.flush(); 114 std::string Msg("terminator characters in archive member \"" + Buf + 115 "\" not the correct \"`\\n\" values for the archive " 116 "member header "); 117 Expected<StringRef> NameOrErr = getName(Size); 118 if (!NameOrErr) { 119 consumeError(NameOrErr.takeError()); 120 uint64_t Offset = RawHeaderPtr - Parent->getData().data(); 121 *Err = malformedError(Msg + "at offset " + Twine(Offset)); 122 } else 123 *Err = malformedError(Msg + "for " + NameOrErr.get()); 124 } 125 return; 126 } 127 } 128 129 BigArchiveMemberHeader::BigArchiveMemberHeader(const Archive *Parent, 130 const char *RawHeaderPtr, 131 uint64_t Size, Error *Err) 132 : CommonArchiveMemberHeader<BigArMemHdrType>( 133 Parent, reinterpret_cast<const BigArMemHdrType *>(RawHeaderPtr)) { 134 if (RawHeaderPtr == nullptr) 135 return; 136 ErrorAsOutParameter ErrAsOutParam(Err); 137 138 if (Size < getSizeOf()) 139 *Err = createMemberHeaderParseError(this, RawHeaderPtr, Size); 140 } 141 142 // This gets the raw name from the ArMemHdr->Name field and checks that it is 143 // valid for the kind of archive. If it is not valid it returns an Error. 144 Expected<StringRef> ArchiveMemberHeader::getRawName() const { 145 char EndCond; 146 auto Kind = Parent->kind(); 147 if (Kind == Archive::K_BSD || Kind == Archive::K_DARWIN64) { 148 if (ArMemHdr->Name[0] == ' ') { 149 uint64_t Offset = 150 reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 151 return malformedError("name contains a leading space for archive member " 152 "header at offset " + 153 Twine(Offset)); 154 } 155 EndCond = ' '; 156 } else if (ArMemHdr->Name[0] == '/' || ArMemHdr->Name[0] == '#') 157 EndCond = ' '; 158 else 159 EndCond = '/'; 160 StringRef::size_type end = 161 StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(EndCond); 162 if (end == StringRef::npos) 163 end = sizeof(ArMemHdr->Name); 164 assert(end <= sizeof(ArMemHdr->Name) && end > 0); 165 // Don't include the EndCond if there is one. 166 return StringRef(ArMemHdr->Name, end); 167 } 168 169 Expected<uint64_t> 170 getArchiveMemberDecField(Twine FieldName, const StringRef RawField, 171 const Archive *Parent, 172 const AbstractArchiveMemberHeader *MemHeader) { 173 uint64_t Value; 174 if (RawField.getAsInteger(10, Value)) { 175 uint64_t Offset = MemHeader->getOffset(); 176 return malformedError("characters in " + FieldName + 177 " field in archive member header are not " 178 "all decimal numbers: '" + 179 RawField + 180 "' for the archive " 181 "member header at offset " + 182 Twine(Offset)); 183 } 184 return Value; 185 } 186 187 Expected<uint64_t> 188 getArchiveMemberOctField(Twine FieldName, const StringRef RawField, 189 const Archive *Parent, 190 const AbstractArchiveMemberHeader *MemHeader) { 191 uint64_t Value; 192 if (RawField.getAsInteger(8, Value)) { 193 uint64_t Offset = MemHeader->getOffset(); 194 return malformedError("characters in " + FieldName + 195 " field in archive member header are not " 196 "all octal numbers: '" + 197 RawField + 198 "' for the archive " 199 "member header at offset " + 200 Twine(Offset)); 201 } 202 return Value; 203 } 204 205 Expected<StringRef> BigArchiveMemberHeader::getRawName() const { 206 Expected<uint64_t> NameLenOrErr = getArchiveMemberDecField( 207 "NameLen", getFieldRawString(ArMemHdr->NameLen), Parent, this); 208 if (!NameLenOrErr) 209 // TODO: Out-of-line. 210 return NameLenOrErr.takeError(); 211 uint64_t NameLen = NameLenOrErr.get(); 212 213 // If the name length is odd, pad with '\0' to get an even length. After 214 // padding, there is the name terminator "`\n". 215 uint64_t NameLenWithPadding = alignTo(NameLen, 2); 216 StringRef NameTerminator = "`\n"; 217 StringRef NameStringWithNameTerminator = 218 StringRef(ArMemHdr->Name, NameLenWithPadding + NameTerminator.size()); 219 if (!NameStringWithNameTerminator.endswith(NameTerminator)) { 220 uint64_t Offset = 221 reinterpret_cast<const char *>(ArMemHdr->Name + NameLenWithPadding) - 222 Parent->getData().data(); 223 // TODO: Out-of-line. 224 return malformedError( 225 "name does not have name terminator \"`\\n\" for archive member" 226 "header at offset " + 227 Twine(Offset)); 228 } 229 return StringRef(ArMemHdr->Name, NameLen); 230 } 231 232 // member including the header, so the size of any name following the header 233 // is checked to make sure it does not overflow. 234 Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const { 235 236 // This can be called from the ArchiveMemberHeader constructor when the 237 // archive header is truncated to produce an error message with the name. 238 // Make sure the name field is not truncated. 239 if (Size < offsetof(UnixArMemHdrType, Name) + sizeof(ArMemHdr->Name)) { 240 uint64_t ArchiveOffset = 241 reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 242 return malformedError("archive header truncated before the name field " 243 "for archive member header at offset " + 244 Twine(ArchiveOffset)); 245 } 246 247 // The raw name itself can be invalid. 248 Expected<StringRef> NameOrErr = getRawName(); 249 if (!NameOrErr) 250 return NameOrErr.takeError(); 251 StringRef Name = NameOrErr.get(); 252 253 // Check if it's a special name. 254 if (Name[0] == '/') { 255 if (Name.size() == 1) // Linker member. 256 return Name; 257 if (Name.size() == 2 && Name[1] == '/') // String table. 258 return Name; 259 // System libraries from the Windows SDK for Windows 11 contain this symbol. 260 // It looks like a CFG guard: we just skip it for now. 261 if (Name.equals("/<XFGHASHMAP>/")) 262 return Name; 263 // It's a long name. 264 // Get the string table offset. 265 std::size_t StringOffset; 266 if (Name.substr(1).rtrim(' ').getAsInteger(10, StringOffset)) { 267 std::string Buf; 268 raw_string_ostream OS(Buf); 269 OS.write_escaped(Name.substr(1).rtrim(' ')); 270 OS.flush(); 271 uint64_t ArchiveOffset = 272 reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 273 return malformedError("long name offset characters after the '/' are " 274 "not all decimal numbers: '" + 275 Buf + "' for archive member header at offset " + 276 Twine(ArchiveOffset)); 277 } 278 279 // Verify it. 280 if (StringOffset >= Parent->getStringTable().size()) { 281 uint64_t ArchiveOffset = 282 reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 283 return malformedError("long name offset " + Twine(StringOffset) + 284 " past the end of the string table for archive " 285 "member header at offset " + 286 Twine(ArchiveOffset)); 287 } 288 289 // GNU long file names end with a "/\n". 290 if (Parent->kind() == Archive::K_GNU || 291 Parent->kind() == Archive::K_GNU64) { 292 size_t End = Parent->getStringTable().find('\n', /*From=*/StringOffset); 293 if (End == StringRef::npos || End < 1 || 294 Parent->getStringTable()[End - 1] != '/') { 295 return malformedError("string table at long name offset " + 296 Twine(StringOffset) + "not terminated"); 297 } 298 return Parent->getStringTable().slice(StringOffset, End - 1); 299 } 300 return Parent->getStringTable().begin() + StringOffset; 301 } 302 303 if (Name.startswith("#1/")) { 304 uint64_t NameLength; 305 if (Name.substr(3).rtrim(' ').getAsInteger(10, NameLength)) { 306 std::string Buf; 307 raw_string_ostream OS(Buf); 308 OS.write_escaped(Name.substr(3).rtrim(' ')); 309 OS.flush(); 310 uint64_t ArchiveOffset = 311 reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 312 return malformedError("long name length characters after the #1/ are " 313 "not all decimal numbers: '" + 314 Buf + "' for archive member header at offset " + 315 Twine(ArchiveOffset)); 316 } 317 if (getSizeOf() + NameLength > Size) { 318 uint64_t ArchiveOffset = 319 reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 320 return malformedError("long name length: " + Twine(NameLength) + 321 " extends past the end of the member or archive " 322 "for archive member header at offset " + 323 Twine(ArchiveOffset)); 324 } 325 return StringRef(reinterpret_cast<const char *>(ArMemHdr) + getSizeOf(), 326 NameLength) 327 .rtrim('\0'); 328 } 329 330 // It is not a long name so trim the blanks at the end of the name. 331 if (Name[Name.size() - 1] != '/') 332 return Name.rtrim(' '); 333 334 // It's a simple name. 335 return Name.drop_back(1); 336 } 337 338 Expected<StringRef> BigArchiveMemberHeader::getName(uint64_t Size) const { 339 return getRawName(); 340 } 341 342 Expected<uint64_t> ArchiveMemberHeader::getSize() const { 343 return getArchiveMemberDecField("size", getFieldRawString(ArMemHdr->Size), 344 Parent, this); 345 } 346 347 Expected<uint64_t> BigArchiveMemberHeader::getSize() const { 348 Expected<uint64_t> SizeOrErr = getArchiveMemberDecField( 349 "size", getFieldRawString(ArMemHdr->Size), Parent, this); 350 if (!SizeOrErr) 351 return SizeOrErr.takeError(); 352 353 Expected<uint64_t> NameLenOrErr = getRawNameSize(); 354 if (!NameLenOrErr) 355 return NameLenOrErr.takeError(); 356 357 return *SizeOrErr + alignTo(*NameLenOrErr, 2); 358 } 359 360 Expected<uint64_t> BigArchiveMemberHeader::getRawNameSize() const { 361 return getArchiveMemberDecField( 362 "NameLen", getFieldRawString(ArMemHdr->NameLen), Parent, this); 363 } 364 365 Expected<uint64_t> BigArchiveMemberHeader::getNextOffset() const { 366 return getArchiveMemberDecField( 367 "NextOffset", getFieldRawString(ArMemHdr->NextOffset), Parent, this); 368 } 369 370 Expected<sys::fs::perms> AbstractArchiveMemberHeader::getAccessMode() const { 371 Expected<uint64_t> AccessModeOrErr = 372 getArchiveMemberOctField("AccessMode", getRawAccessMode(), Parent, this); 373 if (!AccessModeOrErr) 374 return AccessModeOrErr.takeError(); 375 return static_cast<sys::fs::perms>(*AccessModeOrErr); 376 } 377 378 Expected<sys::TimePoint<std::chrono::seconds>> 379 AbstractArchiveMemberHeader::getLastModified() const { 380 Expected<uint64_t> SecondsOrErr = getArchiveMemberDecField( 381 "LastModified", getRawLastModified(), Parent, this); 382 383 if (!SecondsOrErr) 384 return SecondsOrErr.takeError(); 385 386 return sys::toTimePoint(*SecondsOrErr); 387 } 388 389 Expected<unsigned> AbstractArchiveMemberHeader::getUID() const { 390 StringRef User = getRawUID(); 391 if (User.empty()) 392 return 0; 393 return getArchiveMemberDecField("UID", User, Parent, this); 394 } 395 396 Expected<unsigned> AbstractArchiveMemberHeader::getGID() const { 397 StringRef Group = getRawGID(); 398 if (Group.empty()) 399 return 0; 400 return getArchiveMemberDecField("GID", Group, Parent, this); 401 } 402 403 Expected<bool> ArchiveMemberHeader::isThin() const { 404 Expected<StringRef> NameOrErr = getRawName(); 405 if (!NameOrErr) 406 return NameOrErr.takeError(); 407 StringRef Name = NameOrErr.get(); 408 return Parent->isThin() && Name != "/" && Name != "//" && Name != "/SYM64/"; 409 } 410 411 Expected<const char *> ArchiveMemberHeader::getNextChildLoc() const { 412 uint64_t Size = getSizeOf(); 413 Expected<bool> isThinOrErr = isThin(); 414 if (!isThinOrErr) 415 return isThinOrErr.takeError(); 416 417 bool isThin = isThinOrErr.get(); 418 if (!isThin) { 419 Expected<uint64_t> MemberSize = getSize(); 420 if (!MemberSize) 421 return MemberSize.takeError(); 422 423 Size += MemberSize.get(); 424 } 425 426 // If Size is odd, add 1 to make it even. 427 const char *NextLoc = 428 reinterpret_cast<const char *>(ArMemHdr) + alignTo(Size, 2); 429 430 if (NextLoc == Parent->getMemoryBufferRef().getBufferEnd()) 431 return nullptr; 432 433 return NextLoc; 434 } 435 436 Expected<const char *> BigArchiveMemberHeader::getNextChildLoc() const { 437 if (getOffset() == 438 static_cast<const BigArchive *>(Parent)->getLastChildOffset()) 439 return nullptr; 440 441 Expected<uint64_t> NextOffsetOrErr = getNextOffset(); 442 if (!NextOffsetOrErr) 443 return NextOffsetOrErr.takeError(); 444 return Parent->getData().data() + NextOffsetOrErr.get(); 445 } 446 447 Archive::Child::Child(const Archive *Parent, StringRef Data, 448 uint16_t StartOfFile) 449 : Parent(Parent), Data(Data), StartOfFile(StartOfFile) { 450 Header = Parent->createArchiveMemberHeader(Data.data(), Data.size(), nullptr); 451 } 452 453 Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err) 454 : Parent(Parent) { 455 if (!Start) { 456 Header = nullptr; 457 return; 458 } 459 460 Header = Parent->createArchiveMemberHeader( 461 Start, 462 Parent ? Parent->getData().size() - (Start - Parent->getData().data()) 463 : 0, 464 Err); 465 466 // If we are pointed to real data, Start is not a nullptr, then there must be 467 // a non-null Err pointer available to report malformed data on. Only in 468 // the case sentinel value is being constructed is Err is permitted to be a 469 // nullptr. 470 assert(Err && "Err can't be nullptr if Start is not a nullptr"); 471 472 ErrorAsOutParameter ErrAsOutParam(Err); 473 474 // If there was an error in the construction of the Header 475 // then just return with the error now set. 476 if (*Err) 477 return; 478 479 uint64_t Size = Header->getSizeOf(); 480 Data = StringRef(Start, Size); 481 Expected<bool> isThinOrErr = isThinMember(); 482 if (!isThinOrErr) { 483 *Err = isThinOrErr.takeError(); 484 return; 485 } 486 bool isThin = isThinOrErr.get(); 487 if (!isThin) { 488 Expected<uint64_t> MemberSize = getRawSize(); 489 if (!MemberSize) { 490 *Err = MemberSize.takeError(); 491 return; 492 } 493 Size += MemberSize.get(); 494 Data = StringRef(Start, Size); 495 } 496 497 // Setup StartOfFile and PaddingBytes. 498 StartOfFile = Header->getSizeOf(); 499 // Don't include attached name. 500 Expected<StringRef> NameOrErr = getRawName(); 501 if (!NameOrErr) { 502 *Err = NameOrErr.takeError(); 503 return; 504 } 505 StringRef Name = NameOrErr.get(); 506 507 if (Parent->kind() == Archive::K_AIXBIG) { 508 // The actual start of the file is after the name and any necessary 509 // even-alignment padding. 510 StartOfFile += ((Name.size() + 1) >> 1) << 1; 511 } else if (Name.startswith("#1/")) { 512 uint64_t NameSize; 513 StringRef RawNameSize = Name.substr(3).rtrim(' '); 514 if (RawNameSize.getAsInteger(10, NameSize)) { 515 uint64_t Offset = Start - Parent->getData().data(); 516 *Err = malformedError("long name length characters after the #1/ are " 517 "not all decimal numbers: '" + 518 RawNameSize + 519 "' for archive member header at offset " + 520 Twine(Offset)); 521 return; 522 } 523 StartOfFile += NameSize; 524 } 525 } 526 527 Expected<uint64_t> Archive::Child::getSize() const { 528 if (Parent->IsThin) 529 return Header->getSize(); 530 return Data.size() - StartOfFile; 531 } 532 533 Expected<uint64_t> Archive::Child::getRawSize() const { 534 return Header->getSize(); 535 } 536 537 Expected<bool> Archive::Child::isThinMember() const { return Header->isThin(); } 538 539 Expected<std::string> Archive::Child::getFullName() const { 540 Expected<bool> isThin = isThinMember(); 541 if (!isThin) 542 return isThin.takeError(); 543 assert(isThin.get()); 544 Expected<StringRef> NameOrErr = getName(); 545 if (!NameOrErr) 546 return NameOrErr.takeError(); 547 StringRef Name = *NameOrErr; 548 if (sys::path::is_absolute(Name)) 549 return std::string(Name); 550 551 SmallString<128> FullName = sys::path::parent_path( 552 Parent->getMemoryBufferRef().getBufferIdentifier()); 553 sys::path::append(FullName, Name); 554 return std::string(FullName.str()); 555 } 556 557 Expected<StringRef> Archive::Child::getBuffer() const { 558 Expected<bool> isThinOrErr = isThinMember(); 559 if (!isThinOrErr) 560 return isThinOrErr.takeError(); 561 bool isThin = isThinOrErr.get(); 562 if (!isThin) { 563 Expected<uint64_t> Size = getSize(); 564 if (!Size) 565 return Size.takeError(); 566 return StringRef(Data.data() + StartOfFile, Size.get()); 567 } 568 Expected<std::string> FullNameOrErr = getFullName(); 569 if (!FullNameOrErr) 570 return FullNameOrErr.takeError(); 571 const std::string &FullName = *FullNameOrErr; 572 ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName); 573 if (std::error_code EC = Buf.getError()) 574 return errorCodeToError(EC); 575 Parent->ThinBuffers.push_back(std::move(*Buf)); 576 return Parent->ThinBuffers.back()->getBuffer(); 577 } 578 579 Expected<Archive::Child> Archive::Child::getNext() const { 580 Expected<const char *> NextLocOrErr = Header->getNextChildLoc(); 581 if (!NextLocOrErr) 582 return NextLocOrErr.takeError(); 583 584 const char *NextLoc = *NextLocOrErr; 585 586 // Check to see if this is at the end of the archive. 587 if (NextLoc == nullptr) 588 return Child(nullptr, nullptr, nullptr); 589 590 // Check to see if this is past the end of the archive. 591 if (NextLoc > Parent->Data.getBufferEnd()) { 592 std::string Msg("offset to next archive member past the end of the archive " 593 "after member "); 594 Expected<StringRef> NameOrErr = getName(); 595 if (!NameOrErr) { 596 consumeError(NameOrErr.takeError()); 597 uint64_t Offset = Data.data() - Parent->getData().data(); 598 return malformedError(Msg + "at offset " + Twine(Offset)); 599 } else 600 return malformedError(Msg + NameOrErr.get()); 601 } 602 603 Error Err = Error::success(); 604 Child Ret(Parent, NextLoc, &Err); 605 if (Err) 606 return std::move(Err); 607 return Ret; 608 } 609 610 uint64_t Archive::Child::getChildOffset() const { 611 const char *a = Parent->Data.getBuffer().data(); 612 const char *c = Data.data(); 613 uint64_t offset = c - a; 614 return offset; 615 } 616 617 Expected<StringRef> Archive::Child::getName() const { 618 Expected<uint64_t> RawSizeOrErr = getRawSize(); 619 if (!RawSizeOrErr) 620 return RawSizeOrErr.takeError(); 621 uint64_t RawSize = RawSizeOrErr.get(); 622 Expected<StringRef> NameOrErr = 623 Header->getName(Header->getSizeOf() + RawSize); 624 if (!NameOrErr) 625 return NameOrErr.takeError(); 626 StringRef Name = NameOrErr.get(); 627 return Name; 628 } 629 630 Expected<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const { 631 Expected<StringRef> NameOrErr = getName(); 632 if (!NameOrErr) 633 return NameOrErr.takeError(); 634 StringRef Name = NameOrErr.get(); 635 Expected<StringRef> Buf = getBuffer(); 636 if (!Buf) 637 return createFileError(Name, Buf.takeError()); 638 return MemoryBufferRef(*Buf, Name); 639 } 640 641 Expected<std::unique_ptr<Binary>> 642 Archive::Child::getAsBinary(LLVMContext *Context) const { 643 Expected<MemoryBufferRef> BuffOrErr = getMemoryBufferRef(); 644 if (!BuffOrErr) 645 return BuffOrErr.takeError(); 646 647 auto BinaryOrErr = createBinary(BuffOrErr.get(), Context); 648 if (BinaryOrErr) 649 return std::move(*BinaryOrErr); 650 return BinaryOrErr.takeError(); 651 } 652 653 Expected<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) { 654 Error Err = Error::success(); 655 std::unique_ptr<Archive> Ret; 656 StringRef Buffer = Source.getBuffer(); 657 658 if (Buffer.startswith(BigArchiveMagic)) 659 Ret = std::make_unique<BigArchive>(Source, Err); 660 else 661 Ret = std::make_unique<Archive>(Source, Err); 662 663 if (Err) 664 return std::move(Err); 665 return std::move(Ret); 666 } 667 668 std::unique_ptr<AbstractArchiveMemberHeader> 669 Archive::createArchiveMemberHeader(const char *RawHeaderPtr, uint64_t Size, 670 Error *Err) const { 671 ErrorAsOutParameter ErrAsOutParam(Err); 672 if (kind() != K_AIXBIG) 673 return std::make_unique<ArchiveMemberHeader>(this, RawHeaderPtr, Size, Err); 674 return std::make_unique<BigArchiveMemberHeader>(this, RawHeaderPtr, Size, 675 Err); 676 } 677 678 uint64_t Archive::getArchiveMagicLen() const { 679 if (isThin()) 680 return sizeof(ThinArchiveMagic) - 1; 681 682 if (Kind() == K_AIXBIG) 683 return sizeof(BigArchiveMagic) - 1; 684 685 return sizeof(ArchiveMagic) - 1; 686 } 687 688 void Archive::setFirstRegular(const Child &C) { 689 FirstRegularData = C.Data; 690 FirstRegularStartOfFile = C.StartOfFile; 691 } 692 693 Archive::Archive(MemoryBufferRef Source, Error &Err) 694 : Binary(Binary::ID_Archive, Source) { 695 ErrorAsOutParameter ErrAsOutParam(&Err); 696 StringRef Buffer = Data.getBuffer(); 697 // Check for sufficient magic. 698 if (Buffer.startswith(ThinArchiveMagic)) { 699 IsThin = true; 700 } else if (Buffer.startswith(ArchiveMagic)) { 701 IsThin = false; 702 } else if (Buffer.startswith(BigArchiveMagic)) { 703 Format = K_AIXBIG; 704 IsThin = false; 705 return; 706 } else { 707 Err = make_error<GenericBinaryError>("file too small to be an archive", 708 object_error::invalid_file_type); 709 return; 710 } 711 712 // Make sure Format is initialized before any call to 713 // ArchiveMemberHeader::getName() is made. This could be a valid empty 714 // archive which is the same in all formats. So claiming it to be gnu to is 715 // fine if not totally correct before we look for a string table or table of 716 // contents. 717 Format = K_GNU; 718 719 // Get the special members. 720 child_iterator I = child_begin(Err, false); 721 if (Err) 722 return; 723 child_iterator E = child_end(); 724 725 // See if this is a valid empty archive and if so return. 726 if (I == E) { 727 Err = Error::success(); 728 return; 729 } 730 const Child *C = &*I; 731 732 auto Increment = [&]() { 733 ++I; 734 if (Err) 735 return true; 736 C = &*I; 737 return false; 738 }; 739 740 Expected<StringRef> NameOrErr = C->getRawName(); 741 if (!NameOrErr) { 742 Err = NameOrErr.takeError(); 743 return; 744 } 745 StringRef Name = NameOrErr.get(); 746 747 // Below is the pattern that is used to figure out the archive format 748 // GNU archive format 749 // First member : / (may exist, if it exists, points to the symbol table ) 750 // Second member : // (may exist, if it exists, points to the string table) 751 // Note : The string table is used if the filename exceeds 15 characters 752 // BSD archive format 753 // First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table) 754 // There is no string table, if the filename exceeds 15 characters or has a 755 // embedded space, the filename has #1/<size>, The size represents the size 756 // of the filename that needs to be read after the archive header 757 // COFF archive format 758 // First member : / 759 // Second member : / (provides a directory of symbols) 760 // Third member : // (may exist, if it exists, contains the string table) 761 // Note: Microsoft PE/COFF Spec 8.3 says that the third member is present 762 // even if the string table is empty. However, lib.exe does not in fact 763 // seem to create the third member if there's no member whose filename 764 // exceeds 15 characters. So the third member is optional. 765 766 if (Name == "__.SYMDEF" || Name == "__.SYMDEF_64") { 767 if (Name == "__.SYMDEF") 768 Format = K_BSD; 769 else // Name == "__.SYMDEF_64" 770 Format = K_DARWIN64; 771 // We know that the symbol table is not an external file, but we still must 772 // check any Expected<> return value. 773 Expected<StringRef> BufOrErr = C->getBuffer(); 774 if (!BufOrErr) { 775 Err = BufOrErr.takeError(); 776 return; 777 } 778 SymbolTable = BufOrErr.get(); 779 if (Increment()) 780 return; 781 setFirstRegular(*C); 782 783 Err = Error::success(); 784 return; 785 } 786 787 if (Name.startswith("#1/")) { 788 Format = K_BSD; 789 // We know this is BSD, so getName will work since there is no string table. 790 Expected<StringRef> NameOrErr = C->getName(); 791 if (!NameOrErr) { 792 Err = NameOrErr.takeError(); 793 return; 794 } 795 Name = NameOrErr.get(); 796 if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") { 797 // We know that the symbol table is not an external file, but we still 798 // must check any Expected<> return value. 799 Expected<StringRef> BufOrErr = C->getBuffer(); 800 if (!BufOrErr) { 801 Err = BufOrErr.takeError(); 802 return; 803 } 804 SymbolTable = BufOrErr.get(); 805 if (Increment()) 806 return; 807 } else if (Name == "__.SYMDEF_64 SORTED" || Name == "__.SYMDEF_64") { 808 Format = K_DARWIN64; 809 // We know that the symbol table is not an external file, but we still 810 // must check any Expected<> return value. 811 Expected<StringRef> BufOrErr = C->getBuffer(); 812 if (!BufOrErr) { 813 Err = BufOrErr.takeError(); 814 return; 815 } 816 SymbolTable = BufOrErr.get(); 817 if (Increment()) 818 return; 819 } 820 setFirstRegular(*C); 821 return; 822 } 823 824 // MIPS 64-bit ELF archives use a special format of a symbol table. 825 // This format is marked by `ar_name` field equals to "/SYM64/". 826 // For detailed description see page 96 in the following document: 827 // http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf 828 829 bool has64SymTable = false; 830 if (Name == "/" || Name == "/SYM64/") { 831 // We know that the symbol table is not an external file, but we still 832 // must check any Expected<> return value. 833 Expected<StringRef> BufOrErr = C->getBuffer(); 834 if (!BufOrErr) { 835 Err = BufOrErr.takeError(); 836 return; 837 } 838 SymbolTable = BufOrErr.get(); 839 if (Name == "/SYM64/") 840 has64SymTable = true; 841 842 if (Increment()) 843 return; 844 if (I == E) { 845 Err = Error::success(); 846 return; 847 } 848 Expected<StringRef> NameOrErr = C->getRawName(); 849 if (!NameOrErr) { 850 Err = NameOrErr.takeError(); 851 return; 852 } 853 Name = NameOrErr.get(); 854 } 855 856 if (Name == "//") { 857 Format = has64SymTable ? K_GNU64 : K_GNU; 858 // The string table is never an external member, but we still 859 // must check any Expected<> return value. 860 Expected<StringRef> BufOrErr = C->getBuffer(); 861 if (!BufOrErr) { 862 Err = BufOrErr.takeError(); 863 return; 864 } 865 StringTable = BufOrErr.get(); 866 if (Increment()) 867 return; 868 setFirstRegular(*C); 869 Err = Error::success(); 870 return; 871 } 872 873 if (Name[0] != '/') { 874 Format = has64SymTable ? K_GNU64 : K_GNU; 875 setFirstRegular(*C); 876 Err = Error::success(); 877 return; 878 } 879 880 if (Name != "/") { 881 Err = errorCodeToError(object_error::parse_failed); 882 return; 883 } 884 885 Format = K_COFF; 886 // We know that the symbol table is not an external file, but we still 887 // must check any Expected<> return value. 888 Expected<StringRef> BufOrErr = C->getBuffer(); 889 if (!BufOrErr) { 890 Err = BufOrErr.takeError(); 891 return; 892 } 893 SymbolTable = BufOrErr.get(); 894 895 if (Increment()) 896 return; 897 898 if (I == E) { 899 setFirstRegular(*C); 900 Err = Error::success(); 901 return; 902 } 903 904 NameOrErr = C->getRawName(); 905 if (!NameOrErr) { 906 Err = NameOrErr.takeError(); 907 return; 908 } 909 Name = NameOrErr.get(); 910 911 if (Name == "//") { 912 // The string table is never an external member, but we still 913 // must check any Expected<> return value. 914 Expected<StringRef> BufOrErr = C->getBuffer(); 915 if (!BufOrErr) { 916 Err = BufOrErr.takeError(); 917 return; 918 } 919 StringTable = BufOrErr.get(); 920 if (Increment()) 921 return; 922 } 923 924 setFirstRegular(*C); 925 Err = Error::success(); 926 } 927 928 Archive::child_iterator Archive::child_begin(Error &Err, 929 bool SkipInternal) const { 930 if (isEmpty()) 931 return child_end(); 932 933 if (SkipInternal) 934 return child_iterator::itr( 935 Child(this, FirstRegularData, FirstRegularStartOfFile), Err); 936 937 const char *Loc = Data.getBufferStart() + getFirstChildOffset(); 938 Child C(this, Loc, &Err); 939 if (Err) 940 return child_end(); 941 return child_iterator::itr(C, Err); 942 } 943 944 Archive::child_iterator Archive::child_end() const { 945 return child_iterator::end(Child(nullptr, nullptr, nullptr)); 946 } 947 948 StringRef Archive::Symbol::getName() const { 949 return Parent->getSymbolTable().begin() + StringIndex; 950 } 951 952 Expected<Archive::Child> Archive::Symbol::getMember() const { 953 const char *Buf = Parent->getSymbolTable().begin(); 954 const char *Offsets = Buf; 955 if (Parent->kind() == K_GNU64 || Parent->kind() == K_DARWIN64) 956 Offsets += sizeof(uint64_t); 957 else 958 Offsets += sizeof(uint32_t); 959 uint64_t Offset = 0; 960 if (Parent->kind() == K_GNU) { 961 Offset = read32be(Offsets + SymbolIndex * 4); 962 } else if (Parent->kind() == K_GNU64) { 963 Offset = read64be(Offsets + SymbolIndex * 8); 964 } else if (Parent->kind() == K_BSD) { 965 // The SymbolIndex is an index into the ranlib structs that start at 966 // Offsets (the first uint32_t is the number of bytes of the ranlib 967 // structs). The ranlib structs are a pair of uint32_t's the first 968 // being a string table offset and the second being the offset into 969 // the archive of the member that defines the symbol. Which is what 970 // is needed here. 971 Offset = read32le(Offsets + SymbolIndex * 8 + 4); 972 } else if (Parent->kind() == K_DARWIN64) { 973 // The SymbolIndex is an index into the ranlib_64 structs that start at 974 // Offsets (the first uint64_t is the number of bytes of the ranlib_64 975 // structs). The ranlib_64 structs are a pair of uint64_t's the first 976 // being a string table offset and the second being the offset into 977 // the archive of the member that defines the symbol. Which is what 978 // is needed here. 979 Offset = read64le(Offsets + SymbolIndex * 16 + 8); 980 } else { 981 // Skip offsets. 982 uint32_t MemberCount = read32le(Buf); 983 Buf += MemberCount * 4 + 4; 984 985 uint32_t SymbolCount = read32le(Buf); 986 if (SymbolIndex >= SymbolCount) 987 return errorCodeToError(object_error::parse_failed); 988 989 // Skip SymbolCount to get to the indices table. 990 const char *Indices = Buf + 4; 991 992 // Get the index of the offset in the file member offset table for this 993 // symbol. 994 uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2); 995 // Subtract 1 since OffsetIndex is 1 based. 996 --OffsetIndex; 997 998 if (OffsetIndex >= MemberCount) 999 return errorCodeToError(object_error::parse_failed); 1000 1001 Offset = read32le(Offsets + OffsetIndex * 4); 1002 } 1003 1004 const char *Loc = Parent->getData().begin() + Offset; 1005 Error Err = Error::success(); 1006 Child C(Parent, Loc, &Err); 1007 if (Err) 1008 return std::move(Err); 1009 return C; 1010 } 1011 1012 Archive::Symbol Archive::Symbol::getNext() const { 1013 Symbol t(*this); 1014 if (Parent->kind() == K_BSD) { 1015 // t.StringIndex is an offset from the start of the __.SYMDEF or 1016 // "__.SYMDEF SORTED" member into the string table for the ranlib 1017 // struct indexed by t.SymbolIndex . To change t.StringIndex to the 1018 // offset in the string table for t.SymbolIndex+1 we subtract the 1019 // its offset from the start of the string table for t.SymbolIndex 1020 // and add the offset of the string table for t.SymbolIndex+1. 1021 1022 // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t 1023 // which is the number of bytes of ranlib structs that follow. The ranlib 1024 // structs are a pair of uint32_t's the first being a string table offset 1025 // and the second being the offset into the archive of the member that 1026 // define the symbol. After that the next uint32_t is the byte count of 1027 // the string table followed by the string table. 1028 const char *Buf = Parent->getSymbolTable().begin(); 1029 uint32_t RanlibCount = 0; 1030 RanlibCount = read32le(Buf) / 8; 1031 // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount) 1032 // don't change the t.StringIndex as we don't want to reference a ranlib 1033 // past RanlibCount. 1034 if (t.SymbolIndex + 1 < RanlibCount) { 1035 const char *Ranlibs = Buf + 4; 1036 uint32_t CurRanStrx = 0; 1037 uint32_t NextRanStrx = 0; 1038 CurRanStrx = read32le(Ranlibs + t.SymbolIndex * 8); 1039 NextRanStrx = read32le(Ranlibs + (t.SymbolIndex + 1) * 8); 1040 t.StringIndex -= CurRanStrx; 1041 t.StringIndex += NextRanStrx; 1042 } 1043 } else { 1044 // Go to one past next null. 1045 t.StringIndex = Parent->getSymbolTable().find('\0', t.StringIndex) + 1; 1046 } 1047 ++t.SymbolIndex; 1048 return t; 1049 } 1050 1051 Archive::symbol_iterator Archive::symbol_begin() const { 1052 if (!hasSymbolTable()) 1053 return symbol_iterator(Symbol(this, 0, 0)); 1054 1055 const char *buf = getSymbolTable().begin(); 1056 if (kind() == K_GNU) { 1057 uint32_t symbol_count = 0; 1058 symbol_count = read32be(buf); 1059 buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t))); 1060 } else if (kind() == K_GNU64) { 1061 uint64_t symbol_count = read64be(buf); 1062 buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t))); 1063 } else if (kind() == K_BSD) { 1064 // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t 1065 // which is the number of bytes of ranlib structs that follow. The ranlib 1066 // structs are a pair of uint32_t's the first being a string table offset 1067 // and the second being the offset into the archive of the member that 1068 // define the symbol. After that the next uint32_t is the byte count of 1069 // the string table followed by the string table. 1070 uint32_t ranlib_count = 0; 1071 ranlib_count = read32le(buf) / 8; 1072 const char *ranlibs = buf + 4; 1073 uint32_t ran_strx = 0; 1074 ran_strx = read32le(ranlibs); 1075 buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t)))); 1076 // Skip the byte count of the string table. 1077 buf += sizeof(uint32_t); 1078 buf += ran_strx; 1079 } else if (kind() == K_DARWIN64) { 1080 // The __.SYMDEF_64 or "__.SYMDEF_64 SORTED" member starts with a uint64_t 1081 // which is the number of bytes of ranlib_64 structs that follow. The 1082 // ranlib_64 structs are a pair of uint64_t's the first being a string 1083 // table offset and the second being the offset into the archive of the 1084 // member that define the symbol. After that the next uint64_t is the byte 1085 // count of the string table followed by the string table. 1086 uint64_t ranlib_count = 0; 1087 ranlib_count = read64le(buf) / 16; 1088 const char *ranlibs = buf + 8; 1089 uint64_t ran_strx = 0; 1090 ran_strx = read64le(ranlibs); 1091 buf += sizeof(uint64_t) + (ranlib_count * (2 * (sizeof(uint64_t)))); 1092 // Skip the byte count of the string table. 1093 buf += sizeof(uint64_t); 1094 buf += ran_strx; 1095 } else { 1096 uint32_t member_count = 0; 1097 uint32_t symbol_count = 0; 1098 member_count = read32le(buf); 1099 buf += 4 + (member_count * 4); // Skip offsets. 1100 symbol_count = read32le(buf); 1101 buf += 4 + (symbol_count * 2); // Skip indices. 1102 } 1103 uint32_t string_start_offset = buf - getSymbolTable().begin(); 1104 return symbol_iterator(Symbol(this, 0, string_start_offset)); 1105 } 1106 1107 Archive::symbol_iterator Archive::symbol_end() const { 1108 return symbol_iterator(Symbol(this, getNumberOfSymbols(), 0)); 1109 } 1110 1111 uint32_t Archive::getNumberOfSymbols() const { 1112 if (!hasSymbolTable()) 1113 return 0; 1114 const char *buf = getSymbolTable().begin(); 1115 if (kind() == K_GNU) 1116 return read32be(buf); 1117 if (kind() == K_GNU64) 1118 return read64be(buf); 1119 if (kind() == K_BSD) 1120 return read32le(buf) / 8; 1121 if (kind() == K_DARWIN64) 1122 return read64le(buf) / 16; 1123 uint32_t member_count = 0; 1124 member_count = read32le(buf); 1125 buf += 4 + (member_count * 4); // Skip offsets. 1126 return read32le(buf); 1127 } 1128 1129 Expected<Optional<Archive::Child>> Archive::findSym(StringRef name) const { 1130 Archive::symbol_iterator bs = symbol_begin(); 1131 Archive::symbol_iterator es = symbol_end(); 1132 1133 for (; bs != es; ++bs) { 1134 StringRef SymName = bs->getName(); 1135 if (SymName == name) { 1136 if (auto MemberOrErr = bs->getMember()) 1137 return Child(*MemberOrErr); 1138 else 1139 return MemberOrErr.takeError(); 1140 } 1141 } 1142 return Optional<Child>(); 1143 } 1144 1145 // Returns true if archive file contains no member file. 1146 bool Archive::isEmpty() const { 1147 return Data.getBufferSize() == getArchiveMagicLen(); 1148 } 1149 1150 bool Archive::hasSymbolTable() const { return !SymbolTable.empty(); } 1151 1152 BigArchive::BigArchive(MemoryBufferRef Source, Error &Err) 1153 : Archive(Source, Err) { 1154 ErrorAsOutParameter ErrAsOutParam(&Err); 1155 StringRef Buffer = Data.getBuffer(); 1156 ArFixLenHdr = reinterpret_cast<const FixLenHdr *>(Buffer.data()); 1157 1158 StringRef RawOffset = getFieldRawString(ArFixLenHdr->FirstChildOffset); 1159 if (RawOffset.getAsInteger(10, FirstChildOffset)) 1160 // TODO: Out-of-line. 1161 Err = malformedError("malformed AIX big archive: first member offset \"" + 1162 RawOffset + "\" is not a number"); 1163 1164 RawOffset = getFieldRawString(ArFixLenHdr->LastChildOffset); 1165 if (RawOffset.getAsInteger(10, LastChildOffset)) 1166 // TODO: Out-of-line. 1167 Err = malformedError("malformed AIX big archive: last member offset \"" + 1168 RawOffset + "\" is not a number"); 1169 1170 child_iterator I = child_begin(Err, false); 1171 if (Err) 1172 return; 1173 child_iterator E = child_end(); 1174 if (I == E) { 1175 Err = Error::success(); 1176 return; 1177 } 1178 setFirstRegular(*I); 1179 Err = Error::success(); 1180 } 1181