1 //===- Archive.cpp - ar File Format implementation ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the ArchiveObjectFile class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Object/Archive.h" 14 #include "llvm/ADT/Optional.h" 15 #include "llvm/ADT/SmallString.h" 16 #include "llvm/ADT/StringRef.h" 17 #include "llvm/ADT/Twine.h" 18 #include "llvm/Object/Binary.h" 19 #include "llvm/Object/Error.h" 20 #include "llvm/Support/Chrono.h" 21 #include "llvm/Support/Endian.h" 22 #include "llvm/Support/Error.h" 23 #include "llvm/Support/ErrorOr.h" 24 #include "llvm/Support/FileSystem.h" 25 #include "llvm/Support/MathExtras.h" 26 #include "llvm/Support/MemoryBuffer.h" 27 #include "llvm/Support/Path.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <algorithm> 30 #include <cassert> 31 #include <cstddef> 32 #include <cstdint> 33 #include <memory> 34 #include <string> 35 #include <system_error> 36 37 using namespace llvm; 38 using namespace object; 39 using namespace llvm::support::endian; 40 41 void Archive::anchor() {} 42 43 static Error malformedError(Twine Msg) { 44 std::string StringMsg = "truncated or malformed archive (" + Msg.str() + ")"; 45 return make_error<GenericBinaryError>(std::move(StringMsg), 46 object_error::parse_failed); 47 } 48 49 static Error 50 createMemberHeaderParseError(const AbstractArchiveMemberHeader *ArMemHeader, 51 const char *RawHeaderPtr, uint64_t Size) { 52 StringRef Msg("remaining size of archive too small for next archive " 53 "member header "); 54 55 Expected<StringRef> NameOrErr = ArMemHeader->getName(Size); 56 if (NameOrErr) 57 return malformedError(Msg + "for " + *NameOrErr); 58 59 consumeError(NameOrErr.takeError()); 60 uint64_t Offset = RawHeaderPtr - ArMemHeader->Parent->getData().data(); 61 return malformedError(Msg + "at offset " + Twine(Offset)); 62 } 63 64 template <class T, std::size_t N> 65 StringRef getFieldRawString(const T (&Field)[N]) { 66 return StringRef(Field, N).rtrim(" "); 67 } 68 69 template <class T> 70 StringRef CommonArchiveMemberHeader<T>::getRawAccessMode() const { 71 return getFieldRawString(ArMemHdr->AccessMode); 72 } 73 74 template <class T> 75 StringRef CommonArchiveMemberHeader<T>::getRawLastModified() const { 76 return getFieldRawString(ArMemHdr->LastModified); 77 } 78 79 template <class T> StringRef CommonArchiveMemberHeader<T>::getRawUID() const { 80 return getFieldRawString(ArMemHdr->UID); 81 } 82 83 template <class T> StringRef CommonArchiveMemberHeader<T>::getRawGID() const { 84 return getFieldRawString(ArMemHdr->GID); 85 } 86 87 template <class T> uint64_t CommonArchiveMemberHeader<T>::getOffset() const { 88 return reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 89 } 90 91 template class object::CommonArchiveMemberHeader<UnixArMemHdrType>; 92 template class object::CommonArchiveMemberHeader<BigArMemHdrType>; 93 94 ArchiveMemberHeader::ArchiveMemberHeader(const Archive *Parent, 95 const char *RawHeaderPtr, 96 uint64_t Size, Error *Err) 97 : CommonArchiveMemberHeader<UnixArMemHdrType>( 98 Parent, reinterpret_cast<const UnixArMemHdrType *>(RawHeaderPtr)) { 99 if (RawHeaderPtr == nullptr) 100 return; 101 ErrorAsOutParameter ErrAsOutParam(Err); 102 103 if (Size < getSizeOf()) { 104 *Err = createMemberHeaderParseError(this, RawHeaderPtr, Size); 105 return; 106 } 107 if (ArMemHdr->Terminator[0] != '`' || ArMemHdr->Terminator[1] != '\n') { 108 if (Err) { 109 std::string Buf; 110 raw_string_ostream OS(Buf); 111 OS.write_escaped( 112 StringRef(ArMemHdr->Terminator, sizeof(ArMemHdr->Terminator))); 113 OS.flush(); 114 std::string Msg("terminator characters in archive member \"" + Buf + 115 "\" not the correct \"`\\n\" values for the archive " 116 "member header "); 117 Expected<StringRef> NameOrErr = getName(Size); 118 if (!NameOrErr) { 119 consumeError(NameOrErr.takeError()); 120 uint64_t Offset = RawHeaderPtr - Parent->getData().data(); 121 *Err = malformedError(Msg + "at offset " + Twine(Offset)); 122 } else 123 *Err = malformedError(Msg + "for " + NameOrErr.get()); 124 } 125 return; 126 } 127 } 128 129 BigArchiveMemberHeader::BigArchiveMemberHeader(const Archive *Parent, 130 const char *RawHeaderPtr, 131 uint64_t Size, Error *Err) 132 : CommonArchiveMemberHeader<BigArMemHdrType>( 133 Parent, reinterpret_cast<const BigArMemHdrType *>(RawHeaderPtr)) { 134 if (RawHeaderPtr == nullptr) 135 return; 136 ErrorAsOutParameter ErrAsOutParam(Err); 137 138 if (Size < getSizeOf()) 139 *Err = createMemberHeaderParseError(this, RawHeaderPtr, Size); 140 } 141 142 // This gets the raw name from the ArMemHdr->Name field and checks that it is 143 // valid for the kind of archive. If it is not valid it returns an Error. 144 Expected<StringRef> ArchiveMemberHeader::getRawName() const { 145 char EndCond; 146 auto Kind = Parent->kind(); 147 if (Kind == Archive::K_BSD || Kind == Archive::K_DARWIN64) { 148 if (ArMemHdr->Name[0] == ' ') { 149 uint64_t Offset = 150 reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 151 return malformedError("name contains a leading space for archive member " 152 "header at offset " + 153 Twine(Offset)); 154 } 155 EndCond = ' '; 156 } else if (ArMemHdr->Name[0] == '/' || ArMemHdr->Name[0] == '#') 157 EndCond = ' '; 158 else 159 EndCond = '/'; 160 StringRef::size_type end = 161 StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(EndCond); 162 if (end == StringRef::npos) 163 end = sizeof(ArMemHdr->Name); 164 assert(end <= sizeof(ArMemHdr->Name) && end > 0); 165 // Don't include the EndCond if there is one. 166 return StringRef(ArMemHdr->Name, end); 167 } 168 169 Expected<uint64_t> 170 getArchiveMemberDecField(Twine FieldName, const StringRef RawField, 171 const Archive *Parent, 172 const AbstractArchiveMemberHeader *MemHeader) { 173 uint64_t Value; 174 if (RawField.getAsInteger(10, Value)) { 175 uint64_t Offset = MemHeader->getOffset(); 176 return malformedError("characters in " + FieldName + 177 " field in archive member header are not " 178 "all decimal numbers: '" + 179 RawField + 180 "' for the archive " 181 "member header at offset " + 182 Twine(Offset)); 183 } 184 return Value; 185 } 186 187 Expected<uint64_t> 188 getArchiveMemberOctField(Twine FieldName, const StringRef RawField, 189 const Archive *Parent, 190 const AbstractArchiveMemberHeader *MemHeader) { 191 uint64_t Value; 192 if (RawField.getAsInteger(8, Value)) { 193 uint64_t Offset = MemHeader->getOffset(); 194 return malformedError("characters in " + FieldName + 195 " field in archive member header are not " 196 "all octal numbers: '" + 197 RawField + 198 "' for the archive " 199 "member header at offset " + 200 Twine(Offset)); 201 } 202 return Value; 203 } 204 205 Expected<StringRef> BigArchiveMemberHeader::getRawName() const { 206 Expected<uint64_t> NameLenOrErr = getArchiveMemberDecField( 207 "NameLen", getFieldRawString(ArMemHdr->NameLen), Parent, this); 208 if (!NameLenOrErr) 209 // TODO: Out-of-line. 210 return NameLenOrErr.takeError(); 211 uint64_t NameLen = NameLenOrErr.get(); 212 213 // If the name length is odd, pad with '\0' to get an even length. After 214 // padding, there is the name terminator "`\n". 215 uint64_t NameLenWithPadding = alignTo(NameLen, 2); 216 StringRef NameTerminator = "`\n"; 217 StringRef NameStringWithNameTerminator = 218 StringRef(ArMemHdr->Name, NameLenWithPadding + NameTerminator.size()); 219 if (!NameStringWithNameTerminator.endswith(NameTerminator)) { 220 uint64_t Offset = 221 reinterpret_cast<const char *>(ArMemHdr->Name + NameLenWithPadding) - 222 Parent->getData().data(); 223 // TODO: Out-of-line. 224 return malformedError( 225 "name does not have name terminator \"`\\n\" for archive member" 226 "header at offset " + 227 Twine(Offset)); 228 } 229 return StringRef(ArMemHdr->Name, NameLen); 230 } 231 232 // member including the header, so the size of any name following the header 233 // is checked to make sure it does not overflow. 234 Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const { 235 236 // This can be called from the ArchiveMemberHeader constructor when the 237 // archive header is truncated to produce an error message with the name. 238 // Make sure the name field is not truncated. 239 if (Size < offsetof(UnixArMemHdrType, Name) + sizeof(ArMemHdr->Name)) { 240 uint64_t ArchiveOffset = 241 reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 242 return malformedError("archive header truncated before the name field " 243 "for archive member header at offset " + 244 Twine(ArchiveOffset)); 245 } 246 247 // The raw name itself can be invalid. 248 Expected<StringRef> NameOrErr = getRawName(); 249 if (!NameOrErr) 250 return NameOrErr.takeError(); 251 StringRef Name = NameOrErr.get(); 252 253 // Check if it's a special name. 254 if (Name[0] == '/') { 255 if (Name.size() == 1) // Linker member. 256 return Name; 257 if (Name.size() == 2 && Name[1] == '/') // String table. 258 return Name; 259 // It's a long name. 260 // Get the string table offset. 261 std::size_t StringOffset; 262 if (Name.substr(1).rtrim(' ').getAsInteger(10, StringOffset)) { 263 std::string Buf; 264 raw_string_ostream OS(Buf); 265 OS.write_escaped(Name.substr(1).rtrim(' ')); 266 OS.flush(); 267 uint64_t ArchiveOffset = 268 reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 269 return malformedError("long name offset characters after the '/' are " 270 "not all decimal numbers: '" + 271 Buf + "' for archive member header at offset " + 272 Twine(ArchiveOffset)); 273 } 274 275 // Verify it. 276 if (StringOffset >= Parent->getStringTable().size()) { 277 uint64_t ArchiveOffset = 278 reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 279 return malformedError("long name offset " + Twine(StringOffset) + 280 " past the end of the string table for archive " 281 "member header at offset " + 282 Twine(ArchiveOffset)); 283 } 284 285 // GNU long file names end with a "/\n". 286 if (Parent->kind() == Archive::K_GNU || 287 Parent->kind() == Archive::K_GNU64) { 288 size_t End = Parent->getStringTable().find('\n', /*From=*/StringOffset); 289 if (End == StringRef::npos || End < 1 || 290 Parent->getStringTable()[End - 1] != '/') { 291 return malformedError("string table at long name offset " + 292 Twine(StringOffset) + "not terminated"); 293 } 294 return Parent->getStringTable().slice(StringOffset, End - 1); 295 } 296 return Parent->getStringTable().begin() + StringOffset; 297 } 298 299 if (Name.startswith("#1/")) { 300 uint64_t NameLength; 301 if (Name.substr(3).rtrim(' ').getAsInteger(10, NameLength)) { 302 std::string Buf; 303 raw_string_ostream OS(Buf); 304 OS.write_escaped(Name.substr(3).rtrim(' ')); 305 OS.flush(); 306 uint64_t ArchiveOffset = 307 reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 308 return malformedError("long name length characters after the #1/ are " 309 "not all decimal numbers: '" + 310 Buf + "' for archive member header at offset " + 311 Twine(ArchiveOffset)); 312 } 313 if (getSizeOf() + NameLength > Size) { 314 uint64_t ArchiveOffset = 315 reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 316 return malformedError("long name length: " + Twine(NameLength) + 317 " extends past the end of the member or archive " 318 "for archive member header at offset " + 319 Twine(ArchiveOffset)); 320 } 321 return StringRef(reinterpret_cast<const char *>(ArMemHdr) + getSizeOf(), 322 NameLength) 323 .rtrim('\0'); 324 } 325 326 // It is not a long name so trim the blanks at the end of the name. 327 if (Name[Name.size() - 1] != '/') 328 return Name.rtrim(' '); 329 330 // It's a simple name. 331 return Name.drop_back(1); 332 } 333 334 Expected<StringRef> BigArchiveMemberHeader::getName(uint64_t Size) const { 335 return getRawName(); 336 } 337 338 Expected<uint64_t> ArchiveMemberHeader::getSize() const { 339 return getArchiveMemberDecField("size", getFieldRawString(ArMemHdr->Size), 340 Parent, this); 341 } 342 343 Expected<uint64_t> BigArchiveMemberHeader::getSize() const { 344 Expected<uint64_t> SizeOrErr = getArchiveMemberDecField( 345 "size", getFieldRawString(ArMemHdr->Size), Parent, this); 346 if (!SizeOrErr) 347 return SizeOrErr.takeError(); 348 349 Expected<uint64_t> NameLenOrErr = getRawNameSize(); 350 if (!NameLenOrErr) 351 return NameLenOrErr.takeError(); 352 353 return *SizeOrErr + alignTo(*NameLenOrErr, 2); 354 } 355 356 Expected<uint64_t> BigArchiveMemberHeader::getRawNameSize() const { 357 return getArchiveMemberDecField( 358 "NameLen", getFieldRawString(ArMemHdr->NameLen), Parent, this); 359 } 360 361 Expected<uint64_t> BigArchiveMemberHeader::getNextOffset() const { 362 return getArchiveMemberDecField( 363 "NextOffset", getFieldRawString(ArMemHdr->NextOffset), Parent, this); 364 } 365 366 Expected<sys::fs::perms> AbstractArchiveMemberHeader::getAccessMode() const { 367 Expected<uint64_t> AccessModeOrErr = 368 getArchiveMemberOctField("AccessMode", getRawAccessMode(), Parent, this); 369 if (!AccessModeOrErr) 370 return AccessModeOrErr.takeError(); 371 return static_cast<sys::fs::perms>(*AccessModeOrErr); 372 } 373 374 Expected<sys::TimePoint<std::chrono::seconds>> 375 AbstractArchiveMemberHeader::getLastModified() const { 376 Expected<uint64_t> SecondsOrErr = getArchiveMemberDecField( 377 "LastModified", getRawLastModified(), Parent, this); 378 379 if (!SecondsOrErr) 380 return SecondsOrErr.takeError(); 381 382 return sys::toTimePoint(*SecondsOrErr); 383 } 384 385 Expected<unsigned> AbstractArchiveMemberHeader::getUID() const { 386 StringRef User = getRawUID(); 387 if (User.empty()) 388 return 0; 389 return getArchiveMemberDecField("UID", User, Parent, this); 390 } 391 392 Expected<unsigned> AbstractArchiveMemberHeader::getGID() const { 393 StringRef Group = getRawGID(); 394 if (Group.empty()) 395 return 0; 396 return getArchiveMemberDecField("GID", Group, Parent, this); 397 } 398 399 Expected<bool> ArchiveMemberHeader::isThin() const { 400 Expected<StringRef> NameOrErr = getRawName(); 401 if (!NameOrErr) 402 return NameOrErr.takeError(); 403 StringRef Name = NameOrErr.get(); 404 return Parent->isThin() && Name != "/" && Name != "//" && Name != "/SYM64/"; 405 } 406 407 Expected<const char *> ArchiveMemberHeader::getNextChildLoc() const { 408 uint64_t Size = getSizeOf(); 409 Expected<bool> isThinOrErr = isThin(); 410 if (!isThinOrErr) 411 return isThinOrErr.takeError(); 412 413 bool isThin = isThinOrErr.get(); 414 if (!isThin) { 415 Expected<uint64_t> MemberSize = getSize(); 416 if (!MemberSize) 417 return MemberSize.takeError(); 418 419 Size += MemberSize.get(); 420 } 421 422 // If Size is odd, add 1 to make it even. 423 const char *NextLoc = 424 reinterpret_cast<const char *>(ArMemHdr) + alignTo(Size, 2); 425 426 if (NextLoc == Parent->getMemoryBufferRef().getBufferEnd()) 427 return nullptr; 428 429 return NextLoc; 430 } 431 432 Expected<const char *> BigArchiveMemberHeader::getNextChildLoc() const { 433 if (getOffset() == 434 static_cast<const BigArchive *>(Parent)->getLastChildOffset()) 435 return nullptr; 436 437 Expected<uint64_t> NextOffsetOrErr = getNextOffset(); 438 if (!NextOffsetOrErr) 439 return NextOffsetOrErr.takeError(); 440 return Parent->getData().data() + NextOffsetOrErr.get(); 441 } 442 443 Archive::Child::Child(const Archive *Parent, StringRef Data, 444 uint16_t StartOfFile) 445 : Parent(Parent), Data(Data), StartOfFile(StartOfFile) { 446 Header = Parent->createArchiveMemberHeader(Data.data(), Data.size(), nullptr); 447 } 448 449 Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err) 450 : Parent(Parent) { 451 if (!Start) { 452 Header = nullptr; 453 return; 454 } 455 456 Header = Parent->createArchiveMemberHeader( 457 Start, 458 Parent ? Parent->getData().size() - (Start - Parent->getData().data()) 459 : 0, 460 Err); 461 462 // If we are pointed to real data, Start is not a nullptr, then there must be 463 // a non-null Err pointer available to report malformed data on. Only in 464 // the case sentinel value is being constructed is Err is permitted to be a 465 // nullptr. 466 assert(Err && "Err can't be nullptr if Start is not a nullptr"); 467 468 ErrorAsOutParameter ErrAsOutParam(Err); 469 470 // If there was an error in the construction of the Header 471 // then just return with the error now set. 472 if (*Err) 473 return; 474 475 uint64_t Size = Header->getSizeOf(); 476 Data = StringRef(Start, Size); 477 Expected<bool> isThinOrErr = isThinMember(); 478 if (!isThinOrErr) { 479 *Err = isThinOrErr.takeError(); 480 return; 481 } 482 bool isThin = isThinOrErr.get(); 483 if (!isThin) { 484 Expected<uint64_t> MemberSize = getRawSize(); 485 if (!MemberSize) { 486 *Err = MemberSize.takeError(); 487 return; 488 } 489 Size += MemberSize.get(); 490 Data = StringRef(Start, Size); 491 } 492 493 // Setup StartOfFile and PaddingBytes. 494 StartOfFile = Header->getSizeOf(); 495 // Don't include attached name. 496 Expected<StringRef> NameOrErr = getRawName(); 497 if (!NameOrErr) { 498 *Err = NameOrErr.takeError(); 499 return; 500 } 501 StringRef Name = NameOrErr.get(); 502 503 if (Parent->kind() == Archive::K_AIXBIG) { 504 // The actual start of the file is after the name and any necessary 505 // even-alignment padding. 506 StartOfFile += ((Name.size() + 1) >> 1) << 1; 507 } else if (Name.startswith("#1/")) { 508 uint64_t NameSize; 509 StringRef RawNameSize = Name.substr(3).rtrim(' '); 510 if (RawNameSize.getAsInteger(10, NameSize)) { 511 uint64_t Offset = Start - Parent->getData().data(); 512 *Err = malformedError("long name length characters after the #1/ are " 513 "not all decimal numbers: '" + 514 RawNameSize + 515 "' for archive member header at offset " + 516 Twine(Offset)); 517 return; 518 } 519 StartOfFile += NameSize; 520 } 521 } 522 523 Expected<uint64_t> Archive::Child::getSize() const { 524 if (Parent->IsThin) 525 return Header->getSize(); 526 return Data.size() - StartOfFile; 527 } 528 529 Expected<uint64_t> Archive::Child::getRawSize() const { 530 return Header->getSize(); 531 } 532 533 Expected<bool> Archive::Child::isThinMember() const { return Header->isThin(); } 534 535 Expected<std::string> Archive::Child::getFullName() const { 536 Expected<bool> isThin = isThinMember(); 537 if (!isThin) 538 return isThin.takeError(); 539 assert(isThin.get()); 540 Expected<StringRef> NameOrErr = getName(); 541 if (!NameOrErr) 542 return NameOrErr.takeError(); 543 StringRef Name = *NameOrErr; 544 if (sys::path::is_absolute(Name)) 545 return std::string(Name); 546 547 SmallString<128> FullName = sys::path::parent_path( 548 Parent->getMemoryBufferRef().getBufferIdentifier()); 549 sys::path::append(FullName, Name); 550 return std::string(FullName.str()); 551 } 552 553 Expected<StringRef> Archive::Child::getBuffer() const { 554 Expected<bool> isThinOrErr = isThinMember(); 555 if (!isThinOrErr) 556 return isThinOrErr.takeError(); 557 bool isThin = isThinOrErr.get(); 558 if (!isThin) { 559 Expected<uint64_t> Size = getSize(); 560 if (!Size) 561 return Size.takeError(); 562 return StringRef(Data.data() + StartOfFile, Size.get()); 563 } 564 Expected<std::string> FullNameOrErr = getFullName(); 565 if (!FullNameOrErr) 566 return FullNameOrErr.takeError(); 567 const std::string &FullName = *FullNameOrErr; 568 ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName); 569 if (std::error_code EC = Buf.getError()) 570 return errorCodeToError(EC); 571 Parent->ThinBuffers.push_back(std::move(*Buf)); 572 return Parent->ThinBuffers.back()->getBuffer(); 573 } 574 575 Expected<Archive::Child> Archive::Child::getNext() const { 576 Expected<const char *> NextLocOrErr = Header->getNextChildLoc(); 577 if (!NextLocOrErr) 578 return NextLocOrErr.takeError(); 579 580 const char *NextLoc = *NextLocOrErr; 581 582 // Check to see if this is at the end of the archive. 583 if (NextLoc == nullptr) 584 return Child(nullptr, nullptr, nullptr); 585 586 // Check to see if this is past the end of the archive. 587 if (NextLoc > Parent->Data.getBufferEnd()) { 588 std::string Msg("offset to next archive member past the end of the archive " 589 "after member "); 590 Expected<StringRef> NameOrErr = getName(); 591 if (!NameOrErr) { 592 consumeError(NameOrErr.takeError()); 593 uint64_t Offset = Data.data() - Parent->getData().data(); 594 return malformedError(Msg + "at offset " + Twine(Offset)); 595 } else 596 return malformedError(Msg + NameOrErr.get()); 597 } 598 599 Error Err = Error::success(); 600 Child Ret(Parent, NextLoc, &Err); 601 if (Err) 602 return std::move(Err); 603 return Ret; 604 } 605 606 uint64_t Archive::Child::getChildOffset() const { 607 const char *a = Parent->Data.getBuffer().data(); 608 const char *c = Data.data(); 609 uint64_t offset = c - a; 610 return offset; 611 } 612 613 Expected<StringRef> Archive::Child::getName() const { 614 Expected<uint64_t> RawSizeOrErr = getRawSize(); 615 if (!RawSizeOrErr) 616 return RawSizeOrErr.takeError(); 617 uint64_t RawSize = RawSizeOrErr.get(); 618 Expected<StringRef> NameOrErr = 619 Header->getName(Header->getSizeOf() + RawSize); 620 if (!NameOrErr) 621 return NameOrErr.takeError(); 622 StringRef Name = NameOrErr.get(); 623 return Name; 624 } 625 626 Expected<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const { 627 Expected<StringRef> NameOrErr = getName(); 628 if (!NameOrErr) 629 return NameOrErr.takeError(); 630 StringRef Name = NameOrErr.get(); 631 Expected<StringRef> Buf = getBuffer(); 632 if (!Buf) 633 return createFileError(Name, Buf.takeError()); 634 return MemoryBufferRef(*Buf, Name); 635 } 636 637 Expected<std::unique_ptr<Binary>> 638 Archive::Child::getAsBinary(LLVMContext *Context) const { 639 Expected<MemoryBufferRef> BuffOrErr = getMemoryBufferRef(); 640 if (!BuffOrErr) 641 return BuffOrErr.takeError(); 642 643 auto BinaryOrErr = createBinary(BuffOrErr.get(), Context); 644 if (BinaryOrErr) 645 return std::move(*BinaryOrErr); 646 return BinaryOrErr.takeError(); 647 } 648 649 Expected<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) { 650 Error Err = Error::success(); 651 std::unique_ptr<Archive> Ret; 652 StringRef Buffer = Source.getBuffer(); 653 654 if (Buffer.startswith(BigArchiveMagic)) 655 Ret = std::make_unique<BigArchive>(Source, Err); 656 else 657 Ret = std::make_unique<Archive>(Source, Err); 658 659 if (Err) 660 return std::move(Err); 661 return std::move(Ret); 662 } 663 664 std::unique_ptr<AbstractArchiveMemberHeader> 665 Archive::createArchiveMemberHeader(const char *RawHeaderPtr, uint64_t Size, 666 Error *Err) const { 667 ErrorAsOutParameter ErrAsOutParam(Err); 668 if (kind() != K_AIXBIG) 669 return std::make_unique<ArchiveMemberHeader>(this, RawHeaderPtr, Size, Err); 670 return std::make_unique<BigArchiveMemberHeader>(this, RawHeaderPtr, Size, 671 Err); 672 } 673 674 uint64_t Archive::getArchiveMagicLen() const { 675 if (isThin()) 676 return sizeof(ThinArchiveMagic) - 1; 677 678 if (Kind() == K_AIXBIG) 679 return sizeof(BigArchiveMagic) - 1; 680 681 return sizeof(ArchiveMagic) - 1; 682 } 683 684 void Archive::setFirstRegular(const Child &C) { 685 FirstRegularData = C.Data; 686 FirstRegularStartOfFile = C.StartOfFile; 687 } 688 689 Archive::Archive(MemoryBufferRef Source, Error &Err) 690 : Binary(Binary::ID_Archive, Source) { 691 ErrorAsOutParameter ErrAsOutParam(&Err); 692 StringRef Buffer = Data.getBuffer(); 693 // Check for sufficient magic. 694 if (Buffer.startswith(ThinArchiveMagic)) { 695 IsThin = true; 696 } else if (Buffer.startswith(ArchiveMagic)) { 697 IsThin = false; 698 } else if (Buffer.startswith(BigArchiveMagic)) { 699 Format = K_AIXBIG; 700 IsThin = false; 701 return; 702 } else { 703 Err = make_error<GenericBinaryError>("file too small to be an archive", 704 object_error::invalid_file_type); 705 return; 706 } 707 708 // Make sure Format is initialized before any call to 709 // ArchiveMemberHeader::getName() is made. This could be a valid empty 710 // archive which is the same in all formats. So claiming it to be gnu to is 711 // fine if not totally correct before we look for a string table or table of 712 // contents. 713 Format = K_GNU; 714 715 // Get the special members. 716 child_iterator I = child_begin(Err, false); 717 if (Err) 718 return; 719 child_iterator E = child_end(); 720 721 // See if this is a valid empty archive and if so return. 722 if (I == E) { 723 Err = Error::success(); 724 return; 725 } 726 const Child *C = &*I; 727 728 auto Increment = [&]() { 729 ++I; 730 if (Err) 731 return true; 732 C = &*I; 733 return false; 734 }; 735 736 Expected<StringRef> NameOrErr = C->getRawName(); 737 if (!NameOrErr) { 738 Err = NameOrErr.takeError(); 739 return; 740 } 741 StringRef Name = NameOrErr.get(); 742 743 // Below is the pattern that is used to figure out the archive format 744 // GNU archive format 745 // First member : / (may exist, if it exists, points to the symbol table ) 746 // Second member : // (may exist, if it exists, points to the string table) 747 // Note : The string table is used if the filename exceeds 15 characters 748 // BSD archive format 749 // First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table) 750 // There is no string table, if the filename exceeds 15 characters or has a 751 // embedded space, the filename has #1/<size>, The size represents the size 752 // of the filename that needs to be read after the archive header 753 // COFF archive format 754 // First member : / 755 // Second member : / (provides a directory of symbols) 756 // Third member : // (may exist, if it exists, contains the string table) 757 // Note: Microsoft PE/COFF Spec 8.3 says that the third member is present 758 // even if the string table is empty. However, lib.exe does not in fact 759 // seem to create the third member if there's no member whose filename 760 // exceeds 15 characters. So the third member is optional. 761 762 if (Name == "__.SYMDEF" || Name == "__.SYMDEF_64") { 763 if (Name == "__.SYMDEF") 764 Format = K_BSD; 765 else // Name == "__.SYMDEF_64" 766 Format = K_DARWIN64; 767 // We know that the symbol table is not an external file, but we still must 768 // check any Expected<> return value. 769 Expected<StringRef> BufOrErr = C->getBuffer(); 770 if (!BufOrErr) { 771 Err = BufOrErr.takeError(); 772 return; 773 } 774 SymbolTable = BufOrErr.get(); 775 if (Increment()) 776 return; 777 setFirstRegular(*C); 778 779 Err = Error::success(); 780 return; 781 } 782 783 if (Name.startswith("#1/")) { 784 Format = K_BSD; 785 // We know this is BSD, so getName will work since there is no string table. 786 Expected<StringRef> NameOrErr = C->getName(); 787 if (!NameOrErr) { 788 Err = NameOrErr.takeError(); 789 return; 790 } 791 Name = NameOrErr.get(); 792 if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") { 793 // We know that the symbol table is not an external file, but we still 794 // must check any Expected<> return value. 795 Expected<StringRef> BufOrErr = C->getBuffer(); 796 if (!BufOrErr) { 797 Err = BufOrErr.takeError(); 798 return; 799 } 800 SymbolTable = BufOrErr.get(); 801 if (Increment()) 802 return; 803 } else if (Name == "__.SYMDEF_64 SORTED" || Name == "__.SYMDEF_64") { 804 Format = K_DARWIN64; 805 // We know that the symbol table is not an external file, but we still 806 // must check any Expected<> return value. 807 Expected<StringRef> BufOrErr = C->getBuffer(); 808 if (!BufOrErr) { 809 Err = BufOrErr.takeError(); 810 return; 811 } 812 SymbolTable = BufOrErr.get(); 813 if (Increment()) 814 return; 815 } 816 setFirstRegular(*C); 817 return; 818 } 819 820 // MIPS 64-bit ELF archives use a special format of a symbol table. 821 // This format is marked by `ar_name` field equals to "/SYM64/". 822 // For detailed description see page 96 in the following document: 823 // http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf 824 825 bool has64SymTable = false; 826 if (Name == "/" || Name == "/SYM64/") { 827 // We know that the symbol table is not an external file, but we still 828 // must check any Expected<> return value. 829 Expected<StringRef> BufOrErr = C->getBuffer(); 830 if (!BufOrErr) { 831 Err = BufOrErr.takeError(); 832 return; 833 } 834 SymbolTable = BufOrErr.get(); 835 if (Name == "/SYM64/") 836 has64SymTable = true; 837 838 if (Increment()) 839 return; 840 if (I == E) { 841 Err = Error::success(); 842 return; 843 } 844 Expected<StringRef> NameOrErr = C->getRawName(); 845 if (!NameOrErr) { 846 Err = NameOrErr.takeError(); 847 return; 848 } 849 Name = NameOrErr.get(); 850 } 851 852 if (Name == "//") { 853 Format = has64SymTable ? K_GNU64 : K_GNU; 854 // The string table is never an external member, but we still 855 // must check any Expected<> return value. 856 Expected<StringRef> BufOrErr = C->getBuffer(); 857 if (!BufOrErr) { 858 Err = BufOrErr.takeError(); 859 return; 860 } 861 StringTable = BufOrErr.get(); 862 if (Increment()) 863 return; 864 setFirstRegular(*C); 865 Err = Error::success(); 866 return; 867 } 868 869 if (Name[0] != '/') { 870 Format = has64SymTable ? K_GNU64 : K_GNU; 871 setFirstRegular(*C); 872 Err = Error::success(); 873 return; 874 } 875 876 if (Name != "/") { 877 Err = errorCodeToError(object_error::parse_failed); 878 return; 879 } 880 881 Format = K_COFF; 882 // We know that the symbol table is not an external file, but we still 883 // must check any Expected<> return value. 884 Expected<StringRef> BufOrErr = C->getBuffer(); 885 if (!BufOrErr) { 886 Err = BufOrErr.takeError(); 887 return; 888 } 889 SymbolTable = BufOrErr.get(); 890 891 if (Increment()) 892 return; 893 894 if (I == E) { 895 setFirstRegular(*C); 896 Err = Error::success(); 897 return; 898 } 899 900 NameOrErr = C->getRawName(); 901 if (!NameOrErr) { 902 Err = NameOrErr.takeError(); 903 return; 904 } 905 Name = NameOrErr.get(); 906 907 if (Name == "//") { 908 // The string table is never an external member, but we still 909 // must check any Expected<> return value. 910 Expected<StringRef> BufOrErr = C->getBuffer(); 911 if (!BufOrErr) { 912 Err = BufOrErr.takeError(); 913 return; 914 } 915 StringTable = BufOrErr.get(); 916 if (Increment()) 917 return; 918 } 919 920 setFirstRegular(*C); 921 Err = Error::success(); 922 } 923 924 Archive::child_iterator Archive::child_begin(Error &Err, 925 bool SkipInternal) const { 926 if (isEmpty()) 927 return child_end(); 928 929 if (SkipInternal) 930 return child_iterator::itr( 931 Child(this, FirstRegularData, FirstRegularStartOfFile), Err); 932 933 const char *Loc = Data.getBufferStart() + getFirstChildOffset(); 934 Child C(this, Loc, &Err); 935 if (Err) 936 return child_end(); 937 return child_iterator::itr(C, Err); 938 } 939 940 Archive::child_iterator Archive::child_end() const { 941 return child_iterator::end(Child(nullptr, nullptr, nullptr)); 942 } 943 944 StringRef Archive::Symbol::getName() const { 945 return Parent->getSymbolTable().begin() + StringIndex; 946 } 947 948 Expected<Archive::Child> Archive::Symbol::getMember() const { 949 const char *Buf = Parent->getSymbolTable().begin(); 950 const char *Offsets = Buf; 951 if (Parent->kind() == K_GNU64 || Parent->kind() == K_DARWIN64) 952 Offsets += sizeof(uint64_t); 953 else 954 Offsets += sizeof(uint32_t); 955 uint64_t Offset = 0; 956 if (Parent->kind() == K_GNU) { 957 Offset = read32be(Offsets + SymbolIndex * 4); 958 } else if (Parent->kind() == K_GNU64) { 959 Offset = read64be(Offsets + SymbolIndex * 8); 960 } else if (Parent->kind() == K_BSD) { 961 // The SymbolIndex is an index into the ranlib structs that start at 962 // Offsets (the first uint32_t is the number of bytes of the ranlib 963 // structs). The ranlib structs are a pair of uint32_t's the first 964 // being a string table offset and the second being the offset into 965 // the archive of the member that defines the symbol. Which is what 966 // is needed here. 967 Offset = read32le(Offsets + SymbolIndex * 8 + 4); 968 } else if (Parent->kind() == K_DARWIN64) { 969 // The SymbolIndex is an index into the ranlib_64 structs that start at 970 // Offsets (the first uint64_t is the number of bytes of the ranlib_64 971 // structs). The ranlib_64 structs are a pair of uint64_t's the first 972 // being a string table offset and the second being the offset into 973 // the archive of the member that defines the symbol. Which is what 974 // is needed here. 975 Offset = read64le(Offsets + SymbolIndex * 16 + 8); 976 } else { 977 // Skip offsets. 978 uint32_t MemberCount = read32le(Buf); 979 Buf += MemberCount * 4 + 4; 980 981 uint32_t SymbolCount = read32le(Buf); 982 if (SymbolIndex >= SymbolCount) 983 return errorCodeToError(object_error::parse_failed); 984 985 // Skip SymbolCount to get to the indices table. 986 const char *Indices = Buf + 4; 987 988 // Get the index of the offset in the file member offset table for this 989 // symbol. 990 uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2); 991 // Subtract 1 since OffsetIndex is 1 based. 992 --OffsetIndex; 993 994 if (OffsetIndex >= MemberCount) 995 return errorCodeToError(object_error::parse_failed); 996 997 Offset = read32le(Offsets + OffsetIndex * 4); 998 } 999 1000 const char *Loc = Parent->getData().begin() + Offset; 1001 Error Err = Error::success(); 1002 Child C(Parent, Loc, &Err); 1003 if (Err) 1004 return std::move(Err); 1005 return C; 1006 } 1007 1008 Archive::Symbol Archive::Symbol::getNext() const { 1009 Symbol t(*this); 1010 if (Parent->kind() == K_BSD) { 1011 // t.StringIndex is an offset from the start of the __.SYMDEF or 1012 // "__.SYMDEF SORTED" member into the string table for the ranlib 1013 // struct indexed by t.SymbolIndex . To change t.StringIndex to the 1014 // offset in the string table for t.SymbolIndex+1 we subtract the 1015 // its offset from the start of the string table for t.SymbolIndex 1016 // and add the offset of the string table for t.SymbolIndex+1. 1017 1018 // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t 1019 // which is the number of bytes of ranlib structs that follow. The ranlib 1020 // structs are a pair of uint32_t's the first being a string table offset 1021 // and the second being the offset into the archive of the member that 1022 // define the symbol. After that the next uint32_t is the byte count of 1023 // the string table followed by the string table. 1024 const char *Buf = Parent->getSymbolTable().begin(); 1025 uint32_t RanlibCount = 0; 1026 RanlibCount = read32le(Buf) / 8; 1027 // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount) 1028 // don't change the t.StringIndex as we don't want to reference a ranlib 1029 // past RanlibCount. 1030 if (t.SymbolIndex + 1 < RanlibCount) { 1031 const char *Ranlibs = Buf + 4; 1032 uint32_t CurRanStrx = 0; 1033 uint32_t NextRanStrx = 0; 1034 CurRanStrx = read32le(Ranlibs + t.SymbolIndex * 8); 1035 NextRanStrx = read32le(Ranlibs + (t.SymbolIndex + 1) * 8); 1036 t.StringIndex -= CurRanStrx; 1037 t.StringIndex += NextRanStrx; 1038 } 1039 } else { 1040 // Go to one past next null. 1041 t.StringIndex = Parent->getSymbolTable().find('\0', t.StringIndex) + 1; 1042 } 1043 ++t.SymbolIndex; 1044 return t; 1045 } 1046 1047 Archive::symbol_iterator Archive::symbol_begin() const { 1048 if (!hasSymbolTable()) 1049 return symbol_iterator(Symbol(this, 0, 0)); 1050 1051 const char *buf = getSymbolTable().begin(); 1052 if (kind() == K_GNU) { 1053 uint32_t symbol_count = 0; 1054 symbol_count = read32be(buf); 1055 buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t))); 1056 } else if (kind() == K_GNU64) { 1057 uint64_t symbol_count = read64be(buf); 1058 buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t))); 1059 } else if (kind() == K_BSD) { 1060 // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t 1061 // which is the number of bytes of ranlib structs that follow. The ranlib 1062 // structs are a pair of uint32_t's the first being a string table offset 1063 // and the second being the offset into the archive of the member that 1064 // define the symbol. After that the next uint32_t is the byte count of 1065 // the string table followed by the string table. 1066 uint32_t ranlib_count = 0; 1067 ranlib_count = read32le(buf) / 8; 1068 const char *ranlibs = buf + 4; 1069 uint32_t ran_strx = 0; 1070 ran_strx = read32le(ranlibs); 1071 buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t)))); 1072 // Skip the byte count of the string table. 1073 buf += sizeof(uint32_t); 1074 buf += ran_strx; 1075 } else if (kind() == K_DARWIN64) { 1076 // The __.SYMDEF_64 or "__.SYMDEF_64 SORTED" member starts with a uint64_t 1077 // which is the number of bytes of ranlib_64 structs that follow. The 1078 // ranlib_64 structs are a pair of uint64_t's the first being a string 1079 // table offset and the second being the offset into the archive of the 1080 // member that define the symbol. After that the next uint64_t is the byte 1081 // count of the string table followed by the string table. 1082 uint64_t ranlib_count = 0; 1083 ranlib_count = read64le(buf) / 16; 1084 const char *ranlibs = buf + 8; 1085 uint64_t ran_strx = 0; 1086 ran_strx = read64le(ranlibs); 1087 buf += sizeof(uint64_t) + (ranlib_count * (2 * (sizeof(uint64_t)))); 1088 // Skip the byte count of the string table. 1089 buf += sizeof(uint64_t); 1090 buf += ran_strx; 1091 } else { 1092 uint32_t member_count = 0; 1093 uint32_t symbol_count = 0; 1094 member_count = read32le(buf); 1095 buf += 4 + (member_count * 4); // Skip offsets. 1096 symbol_count = read32le(buf); 1097 buf += 4 + (symbol_count * 2); // Skip indices. 1098 } 1099 uint32_t string_start_offset = buf - getSymbolTable().begin(); 1100 return symbol_iterator(Symbol(this, 0, string_start_offset)); 1101 } 1102 1103 Archive::symbol_iterator Archive::symbol_end() const { 1104 return symbol_iterator(Symbol(this, getNumberOfSymbols(), 0)); 1105 } 1106 1107 uint32_t Archive::getNumberOfSymbols() const { 1108 if (!hasSymbolTable()) 1109 return 0; 1110 const char *buf = getSymbolTable().begin(); 1111 if (kind() == K_GNU) 1112 return read32be(buf); 1113 if (kind() == K_GNU64) 1114 return read64be(buf); 1115 if (kind() == K_BSD) 1116 return read32le(buf) / 8; 1117 if (kind() == K_DARWIN64) 1118 return read64le(buf) / 16; 1119 uint32_t member_count = 0; 1120 member_count = read32le(buf); 1121 buf += 4 + (member_count * 4); // Skip offsets. 1122 return read32le(buf); 1123 } 1124 1125 Expected<Optional<Archive::Child>> Archive::findSym(StringRef name) const { 1126 Archive::symbol_iterator bs = symbol_begin(); 1127 Archive::symbol_iterator es = symbol_end(); 1128 1129 for (; bs != es; ++bs) { 1130 StringRef SymName = bs->getName(); 1131 if (SymName == name) { 1132 if (auto MemberOrErr = bs->getMember()) 1133 return Child(*MemberOrErr); 1134 else 1135 return MemberOrErr.takeError(); 1136 } 1137 } 1138 return Optional<Child>(); 1139 } 1140 1141 // Returns true if archive file contains no member file. 1142 bool Archive::isEmpty() const { 1143 return Data.getBufferSize() == getArchiveMagicLen(); 1144 } 1145 1146 bool Archive::hasSymbolTable() const { return !SymbolTable.empty(); } 1147 1148 BigArchive::BigArchive(MemoryBufferRef Source, Error &Err) 1149 : Archive(Source, Err) { 1150 ErrorAsOutParameter ErrAsOutParam(&Err); 1151 StringRef Buffer = Data.getBuffer(); 1152 ArFixLenHdr = reinterpret_cast<const FixLenHdr *>(Buffer.data()); 1153 1154 StringRef RawOffset = getFieldRawString(ArFixLenHdr->FirstChildOffset); 1155 if (RawOffset.getAsInteger(10, FirstChildOffset)) 1156 // TODO: Out-of-line. 1157 Err = malformedError("malformed AIX big archive: first member offset \"" + 1158 RawOffset + "\" is not a number"); 1159 1160 RawOffset = getFieldRawString(ArFixLenHdr->LastChildOffset); 1161 if (RawOffset.getAsInteger(10, LastChildOffset)) 1162 // TODO: Out-of-line. 1163 Err = malformedError("malformed AIX big archive: last member offset \"" + 1164 RawOffset + "\" is not a number"); 1165 1166 child_iterator I = child_begin(Err, false); 1167 if (Err) 1168 return; 1169 child_iterator E = child_end(); 1170 if (I == E) { 1171 Err = Error::success(); 1172 return; 1173 } 1174 setFirstRegular(*I); 1175 Err = Error::success(); 1176 } 1177