1 //===- Archive.cpp - ar File Format implementation ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the ArchiveObjectFile class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Object/Archive.h" 14 #include "llvm/ADT/Optional.h" 15 #include "llvm/ADT/SmallString.h" 16 #include "llvm/ADT/StringRef.h" 17 #include "llvm/ADT/Twine.h" 18 #include "llvm/Object/Binary.h" 19 #include "llvm/Object/Error.h" 20 #include "llvm/Support/Chrono.h" 21 #include "llvm/Support/Endian.h" 22 #include "llvm/Support/Error.h" 23 #include "llvm/Support/ErrorOr.h" 24 #include "llvm/Support/FileSystem.h" 25 #include "llvm/Support/Host.h" 26 #include "llvm/Support/MathExtras.h" 27 #include "llvm/Support/MemoryBuffer.h" 28 #include "llvm/Support/Path.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include <algorithm> 31 #include <cassert> 32 #include <cstddef> 33 #include <cstdint> 34 #include <memory> 35 #include <string> 36 #include <system_error> 37 38 using namespace llvm; 39 using namespace object; 40 using namespace llvm::support::endian; 41 42 void Archive::anchor() {} 43 44 static Error malformedError(Twine Msg) { 45 std::string StringMsg = "truncated or malformed archive (" + Msg.str() + ")"; 46 return make_error<GenericBinaryError>(std::move(StringMsg), 47 object_error::parse_failed); 48 } 49 50 static Error 51 createMemberHeaderParseError(const AbstractArchiveMemberHeader *ArMemHeader, 52 const char *RawHeaderPtr, uint64_t Size) { 53 StringRef Msg("remaining size of archive too small for next archive " 54 "member header "); 55 56 Expected<StringRef> NameOrErr = ArMemHeader->getName(Size); 57 if (NameOrErr) 58 return malformedError(Msg + "for " + *NameOrErr); 59 60 consumeError(NameOrErr.takeError()); 61 uint64_t Offset = RawHeaderPtr - ArMemHeader->Parent->getData().data(); 62 return malformedError(Msg + "at offset " + Twine(Offset)); 63 } 64 65 template <class T, std::size_t N> 66 StringRef getFieldRawString(const T (&Field)[N]) { 67 return StringRef(Field, N).rtrim(" "); 68 } 69 70 template <class T> 71 StringRef CommonArchiveMemberHeader<T>::getRawAccessMode() const { 72 return getFieldRawString(ArMemHdr->AccessMode); 73 } 74 75 template <class T> 76 StringRef CommonArchiveMemberHeader<T>::getRawLastModified() const { 77 return getFieldRawString(ArMemHdr->LastModified); 78 } 79 80 template <class T> StringRef CommonArchiveMemberHeader<T>::getRawUID() const { 81 return getFieldRawString(ArMemHdr->UID); 82 } 83 84 template <class T> StringRef CommonArchiveMemberHeader<T>::getRawGID() const { 85 return getFieldRawString(ArMemHdr->GID); 86 } 87 88 template <class T> uint64_t CommonArchiveMemberHeader<T>::getOffset() const { 89 return reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 90 } 91 92 template class object::CommonArchiveMemberHeader<UnixArMemHdrType>; 93 template class object::CommonArchiveMemberHeader<BigArMemHdrType>; 94 95 ArchiveMemberHeader::ArchiveMemberHeader(const Archive *Parent, 96 const char *RawHeaderPtr, 97 uint64_t Size, Error *Err) 98 : CommonArchiveMemberHeader<UnixArMemHdrType>( 99 Parent, reinterpret_cast<const UnixArMemHdrType *>(RawHeaderPtr)) { 100 if (RawHeaderPtr == nullptr) 101 return; 102 ErrorAsOutParameter ErrAsOutParam(Err); 103 104 if (Size < getSizeOf()) { 105 *Err = createMemberHeaderParseError(this, RawHeaderPtr, Size); 106 return; 107 } 108 if (ArMemHdr->Terminator[0] != '`' || ArMemHdr->Terminator[1] != '\n') { 109 if (Err) { 110 std::string Buf; 111 raw_string_ostream OS(Buf); 112 OS.write_escaped( 113 StringRef(ArMemHdr->Terminator, sizeof(ArMemHdr->Terminator))); 114 OS.flush(); 115 std::string Msg("terminator characters in archive member \"" + Buf + 116 "\" not the correct \"`\\n\" values for the archive " 117 "member header "); 118 Expected<StringRef> NameOrErr = getName(Size); 119 if (!NameOrErr) { 120 consumeError(NameOrErr.takeError()); 121 uint64_t Offset = RawHeaderPtr - Parent->getData().data(); 122 *Err = malformedError(Msg + "at offset " + Twine(Offset)); 123 } else 124 *Err = malformedError(Msg + "for " + NameOrErr.get()); 125 } 126 return; 127 } 128 } 129 130 BigArchiveMemberHeader::BigArchiveMemberHeader(const Archive *Parent, 131 const char *RawHeaderPtr, 132 uint64_t Size, Error *Err) 133 : CommonArchiveMemberHeader<BigArMemHdrType>( 134 Parent, reinterpret_cast<const BigArMemHdrType *>(RawHeaderPtr)) { 135 if (RawHeaderPtr == nullptr) 136 return; 137 ErrorAsOutParameter ErrAsOutParam(Err); 138 139 if (Size < getSizeOf()) 140 *Err = createMemberHeaderParseError(this, RawHeaderPtr, Size); 141 } 142 143 // This gets the raw name from the ArMemHdr->Name field and checks that it is 144 // valid for the kind of archive. If it is not valid it returns an Error. 145 Expected<StringRef> ArchiveMemberHeader::getRawName() const { 146 char EndCond; 147 auto Kind = Parent->kind(); 148 if (Kind == Archive::K_BSD || Kind == Archive::K_DARWIN64) { 149 if (ArMemHdr->Name[0] == ' ') { 150 uint64_t Offset = 151 reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 152 return malformedError("name contains a leading space for archive member " 153 "header at offset " + 154 Twine(Offset)); 155 } 156 EndCond = ' '; 157 } else if (ArMemHdr->Name[0] == '/' || ArMemHdr->Name[0] == '#') 158 EndCond = ' '; 159 else 160 EndCond = '/'; 161 StringRef::size_type end = 162 StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(EndCond); 163 if (end == StringRef::npos) 164 end = sizeof(ArMemHdr->Name); 165 assert(end <= sizeof(ArMemHdr->Name) && end > 0); 166 // Don't include the EndCond if there is one. 167 return StringRef(ArMemHdr->Name, end); 168 } 169 170 Expected<uint64_t> 171 getArchiveMemberDecField(Twine FieldName, const StringRef RawField, 172 const Archive *Parent, 173 const AbstractArchiveMemberHeader *MemHeader) { 174 uint64_t Value; 175 if (RawField.getAsInteger(10, Value)) { 176 uint64_t Offset = MemHeader->getOffset(); 177 return malformedError("characters in " + FieldName + 178 " field in archive member header are not " 179 "all decimal numbers: '" + 180 RawField + 181 "' for the archive " 182 "member header at offset " + 183 Twine(Offset)); 184 } 185 return Value; 186 } 187 188 Expected<uint64_t> 189 getArchiveMemberOctField(Twine FieldName, const StringRef RawField, 190 const Archive *Parent, 191 const AbstractArchiveMemberHeader *MemHeader) { 192 uint64_t Value; 193 if (RawField.getAsInteger(8, Value)) { 194 uint64_t Offset = MemHeader->getOffset(); 195 return malformedError("characters in " + FieldName + 196 " field in archive member header are not " 197 "all octal numbers: '" + 198 RawField + 199 "' for the archive " 200 "member header at offset " + 201 Twine(Offset)); 202 } 203 return Value; 204 } 205 206 Expected<StringRef> BigArchiveMemberHeader::getRawName() const { 207 Expected<uint64_t> NameLenOrErr = getArchiveMemberDecField( 208 "NameLen", getFieldRawString(ArMemHdr->NameLen), Parent, this); 209 if (!NameLenOrErr) 210 // TODO: Out-of-line. 211 return NameLenOrErr.takeError(); 212 uint64_t NameLen = NameLenOrErr.get(); 213 214 // If the name length is odd, pad with '\0' to get an even length. After 215 // padding, there is the name terminator "`\n". 216 uint64_t NameLenWithPadding = alignTo(NameLen, 2); 217 StringRef NameTerminator = "`\n"; 218 StringRef NameStringWithNameTerminator = 219 StringRef(ArMemHdr->Name, NameLenWithPadding + NameTerminator.size()); 220 if (!NameStringWithNameTerminator.endswith(NameTerminator)) { 221 uint64_t Offset = 222 reinterpret_cast<const char *>(ArMemHdr->Name + NameLenWithPadding) - 223 Parent->getData().data(); 224 // TODO: Out-of-line. 225 return malformedError( 226 "name does not have name terminator \"`\\n\" for archive member" 227 "header at offset " + 228 Twine(Offset)); 229 } 230 return StringRef(ArMemHdr->Name, NameLen); 231 } 232 233 // member including the header, so the size of any name following the header 234 // is checked to make sure it does not overflow. 235 Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const { 236 237 // This can be called from the ArchiveMemberHeader constructor when the 238 // archive header is truncated to produce an error message with the name. 239 // Make sure the name field is not truncated. 240 if (Size < offsetof(UnixArMemHdrType, Name) + sizeof(ArMemHdr->Name)) { 241 uint64_t ArchiveOffset = 242 reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 243 return malformedError("archive header truncated before the name field " 244 "for archive member header at offset " + 245 Twine(ArchiveOffset)); 246 } 247 248 // The raw name itself can be invalid. 249 Expected<StringRef> NameOrErr = getRawName(); 250 if (!NameOrErr) 251 return NameOrErr.takeError(); 252 StringRef Name = NameOrErr.get(); 253 254 // Check if it's a special name. 255 if (Name[0] == '/') { 256 if (Name.size() == 1) // Linker member. 257 return Name; 258 if (Name.size() == 2 && Name[1] == '/') // String table. 259 return Name; 260 // System libraries from the Windows SDK for Windows 11 contain this symbol. 261 // It looks like a CFG guard: we just skip it for now. 262 if (Name.equals("/<XFGHASHMAP>/")) 263 return Name; 264 // It's a long name. 265 // Get the string table offset. 266 std::size_t StringOffset; 267 if (Name.substr(1).rtrim(' ').getAsInteger(10, StringOffset)) { 268 std::string Buf; 269 raw_string_ostream OS(Buf); 270 OS.write_escaped(Name.substr(1).rtrim(' ')); 271 OS.flush(); 272 uint64_t ArchiveOffset = 273 reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 274 return malformedError("long name offset characters after the '/' are " 275 "not all decimal numbers: '" + 276 Buf + "' for archive member header at offset " + 277 Twine(ArchiveOffset)); 278 } 279 280 // Verify it. 281 if (StringOffset >= Parent->getStringTable().size()) { 282 uint64_t ArchiveOffset = 283 reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 284 return malformedError("long name offset " + Twine(StringOffset) + 285 " past the end of the string table for archive " 286 "member header at offset " + 287 Twine(ArchiveOffset)); 288 } 289 290 // GNU long file names end with a "/\n". 291 if (Parent->kind() == Archive::K_GNU || 292 Parent->kind() == Archive::K_GNU64) { 293 size_t End = Parent->getStringTable().find('\n', /*From=*/StringOffset); 294 if (End == StringRef::npos || End < 1 || 295 Parent->getStringTable()[End - 1] != '/') { 296 return malformedError("string table at long name offset " + 297 Twine(StringOffset) + "not terminated"); 298 } 299 return Parent->getStringTable().slice(StringOffset, End - 1); 300 } 301 return Parent->getStringTable().begin() + StringOffset; 302 } 303 304 if (Name.startswith("#1/")) { 305 uint64_t NameLength; 306 if (Name.substr(3).rtrim(' ').getAsInteger(10, NameLength)) { 307 std::string Buf; 308 raw_string_ostream OS(Buf); 309 OS.write_escaped(Name.substr(3).rtrim(' ')); 310 OS.flush(); 311 uint64_t ArchiveOffset = 312 reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 313 return malformedError("long name length characters after the #1/ are " 314 "not all decimal numbers: '" + 315 Buf + "' for archive member header at offset " + 316 Twine(ArchiveOffset)); 317 } 318 if (getSizeOf() + NameLength > Size) { 319 uint64_t ArchiveOffset = 320 reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 321 return malformedError("long name length: " + Twine(NameLength) + 322 " extends past the end of the member or archive " 323 "for archive member header at offset " + 324 Twine(ArchiveOffset)); 325 } 326 return StringRef(reinterpret_cast<const char *>(ArMemHdr) + getSizeOf(), 327 NameLength) 328 .rtrim('\0'); 329 } 330 331 // It is not a long name so trim the blanks at the end of the name. 332 if (Name[Name.size() - 1] != '/') 333 return Name.rtrim(' '); 334 335 // It's a simple name. 336 return Name.drop_back(1); 337 } 338 339 Expected<StringRef> BigArchiveMemberHeader::getName(uint64_t Size) const { 340 return getRawName(); 341 } 342 343 Expected<uint64_t> ArchiveMemberHeader::getSize() const { 344 return getArchiveMemberDecField("size", getFieldRawString(ArMemHdr->Size), 345 Parent, this); 346 } 347 348 Expected<uint64_t> BigArchiveMemberHeader::getSize() const { 349 Expected<uint64_t> SizeOrErr = getArchiveMemberDecField( 350 "size", getFieldRawString(ArMemHdr->Size), Parent, this); 351 if (!SizeOrErr) 352 return SizeOrErr.takeError(); 353 354 Expected<uint64_t> NameLenOrErr = getRawNameSize(); 355 if (!NameLenOrErr) 356 return NameLenOrErr.takeError(); 357 358 return *SizeOrErr + alignTo(*NameLenOrErr, 2); 359 } 360 361 Expected<uint64_t> BigArchiveMemberHeader::getRawNameSize() const { 362 return getArchiveMemberDecField( 363 "NameLen", getFieldRawString(ArMemHdr->NameLen), Parent, this); 364 } 365 366 Expected<uint64_t> BigArchiveMemberHeader::getNextOffset() const { 367 return getArchiveMemberDecField( 368 "NextOffset", getFieldRawString(ArMemHdr->NextOffset), Parent, this); 369 } 370 371 Expected<sys::fs::perms> AbstractArchiveMemberHeader::getAccessMode() const { 372 Expected<uint64_t> AccessModeOrErr = 373 getArchiveMemberOctField("AccessMode", getRawAccessMode(), Parent, this); 374 if (!AccessModeOrErr) 375 return AccessModeOrErr.takeError(); 376 return static_cast<sys::fs::perms>(*AccessModeOrErr); 377 } 378 379 Expected<sys::TimePoint<std::chrono::seconds>> 380 AbstractArchiveMemberHeader::getLastModified() const { 381 Expected<uint64_t> SecondsOrErr = getArchiveMemberDecField( 382 "LastModified", getRawLastModified(), Parent, this); 383 384 if (!SecondsOrErr) 385 return SecondsOrErr.takeError(); 386 387 return sys::toTimePoint(*SecondsOrErr); 388 } 389 390 Expected<unsigned> AbstractArchiveMemberHeader::getUID() const { 391 StringRef User = getRawUID(); 392 if (User.empty()) 393 return 0; 394 return getArchiveMemberDecField("UID", User, Parent, this); 395 } 396 397 Expected<unsigned> AbstractArchiveMemberHeader::getGID() const { 398 StringRef Group = getRawGID(); 399 if (Group.empty()) 400 return 0; 401 return getArchiveMemberDecField("GID", Group, Parent, this); 402 } 403 404 Expected<bool> ArchiveMemberHeader::isThin() const { 405 Expected<StringRef> NameOrErr = getRawName(); 406 if (!NameOrErr) 407 return NameOrErr.takeError(); 408 StringRef Name = NameOrErr.get(); 409 return Parent->isThin() && Name != "/" && Name != "//" && Name != "/SYM64/"; 410 } 411 412 Expected<const char *> ArchiveMemberHeader::getNextChildLoc() const { 413 uint64_t Size = getSizeOf(); 414 Expected<bool> isThinOrErr = isThin(); 415 if (!isThinOrErr) 416 return isThinOrErr.takeError(); 417 418 bool isThin = isThinOrErr.get(); 419 if (!isThin) { 420 Expected<uint64_t> MemberSize = getSize(); 421 if (!MemberSize) 422 return MemberSize.takeError(); 423 424 Size += MemberSize.get(); 425 } 426 427 // If Size is odd, add 1 to make it even. 428 const char *NextLoc = 429 reinterpret_cast<const char *>(ArMemHdr) + alignTo(Size, 2); 430 431 if (NextLoc == Parent->getMemoryBufferRef().getBufferEnd()) 432 return nullptr; 433 434 return NextLoc; 435 } 436 437 Expected<const char *> BigArchiveMemberHeader::getNextChildLoc() const { 438 if (getOffset() == 439 static_cast<const BigArchive *>(Parent)->getLastChildOffset()) 440 return nullptr; 441 442 Expected<uint64_t> NextOffsetOrErr = getNextOffset(); 443 if (!NextOffsetOrErr) 444 return NextOffsetOrErr.takeError(); 445 return Parent->getData().data() + NextOffsetOrErr.get(); 446 } 447 448 Archive::Child::Child(const Archive *Parent, StringRef Data, 449 uint16_t StartOfFile) 450 : Parent(Parent), Data(Data), StartOfFile(StartOfFile) { 451 Header = Parent->createArchiveMemberHeader(Data.data(), Data.size(), nullptr); 452 } 453 454 Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err) 455 : Parent(Parent) { 456 if (!Start) { 457 Header = nullptr; 458 return; 459 } 460 461 Header = Parent->createArchiveMemberHeader( 462 Start, 463 Parent ? Parent->getData().size() - (Start - Parent->getData().data()) 464 : 0, 465 Err); 466 467 // If we are pointed to real data, Start is not a nullptr, then there must be 468 // a non-null Err pointer available to report malformed data on. Only in 469 // the case sentinel value is being constructed is Err is permitted to be a 470 // nullptr. 471 assert(Err && "Err can't be nullptr if Start is not a nullptr"); 472 473 ErrorAsOutParameter ErrAsOutParam(Err); 474 475 // If there was an error in the construction of the Header 476 // then just return with the error now set. 477 if (*Err) 478 return; 479 480 uint64_t Size = Header->getSizeOf(); 481 Data = StringRef(Start, Size); 482 Expected<bool> isThinOrErr = isThinMember(); 483 if (!isThinOrErr) { 484 *Err = isThinOrErr.takeError(); 485 return; 486 } 487 bool isThin = isThinOrErr.get(); 488 if (!isThin) { 489 Expected<uint64_t> MemberSize = getRawSize(); 490 if (!MemberSize) { 491 *Err = MemberSize.takeError(); 492 return; 493 } 494 Size += MemberSize.get(); 495 Data = StringRef(Start, Size); 496 } 497 498 // Setup StartOfFile and PaddingBytes. 499 StartOfFile = Header->getSizeOf(); 500 // Don't include attached name. 501 Expected<StringRef> NameOrErr = getRawName(); 502 if (!NameOrErr) { 503 *Err = NameOrErr.takeError(); 504 return; 505 } 506 StringRef Name = NameOrErr.get(); 507 508 if (Parent->kind() == Archive::K_AIXBIG) { 509 // The actual start of the file is after the name and any necessary 510 // even-alignment padding. 511 StartOfFile += ((Name.size() + 1) >> 1) << 1; 512 } else if (Name.startswith("#1/")) { 513 uint64_t NameSize; 514 StringRef RawNameSize = Name.substr(3).rtrim(' '); 515 if (RawNameSize.getAsInteger(10, NameSize)) { 516 uint64_t Offset = Start - Parent->getData().data(); 517 *Err = malformedError("long name length characters after the #1/ are " 518 "not all decimal numbers: '" + 519 RawNameSize + 520 "' for archive member header at offset " + 521 Twine(Offset)); 522 return; 523 } 524 StartOfFile += NameSize; 525 } 526 } 527 528 Expected<uint64_t> Archive::Child::getSize() const { 529 if (Parent->IsThin) 530 return Header->getSize(); 531 return Data.size() - StartOfFile; 532 } 533 534 Expected<uint64_t> Archive::Child::getRawSize() const { 535 return Header->getSize(); 536 } 537 538 Expected<bool> Archive::Child::isThinMember() const { return Header->isThin(); } 539 540 Expected<std::string> Archive::Child::getFullName() const { 541 Expected<bool> isThin = isThinMember(); 542 if (!isThin) 543 return isThin.takeError(); 544 assert(isThin.get()); 545 Expected<StringRef> NameOrErr = getName(); 546 if (!NameOrErr) 547 return NameOrErr.takeError(); 548 StringRef Name = *NameOrErr; 549 if (sys::path::is_absolute(Name)) 550 return std::string(Name); 551 552 SmallString<128> FullName = sys::path::parent_path( 553 Parent->getMemoryBufferRef().getBufferIdentifier()); 554 sys::path::append(FullName, Name); 555 return std::string(FullName.str()); 556 } 557 558 Expected<StringRef> Archive::Child::getBuffer() const { 559 Expected<bool> isThinOrErr = isThinMember(); 560 if (!isThinOrErr) 561 return isThinOrErr.takeError(); 562 bool isThin = isThinOrErr.get(); 563 if (!isThin) { 564 Expected<uint64_t> Size = getSize(); 565 if (!Size) 566 return Size.takeError(); 567 return StringRef(Data.data() + StartOfFile, Size.get()); 568 } 569 Expected<std::string> FullNameOrErr = getFullName(); 570 if (!FullNameOrErr) 571 return FullNameOrErr.takeError(); 572 const std::string &FullName = *FullNameOrErr; 573 ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName); 574 if (std::error_code EC = Buf.getError()) 575 return errorCodeToError(EC); 576 Parent->ThinBuffers.push_back(std::move(*Buf)); 577 return Parent->ThinBuffers.back()->getBuffer(); 578 } 579 580 Expected<Archive::Child> Archive::Child::getNext() const { 581 Expected<const char *> NextLocOrErr = Header->getNextChildLoc(); 582 if (!NextLocOrErr) 583 return NextLocOrErr.takeError(); 584 585 const char *NextLoc = *NextLocOrErr; 586 587 // Check to see if this is at the end of the archive. 588 if (NextLoc == nullptr) 589 return Child(nullptr, nullptr, nullptr); 590 591 // Check to see if this is past the end of the archive. 592 if (NextLoc > Parent->Data.getBufferEnd()) { 593 std::string Msg("offset to next archive member past the end of the archive " 594 "after member "); 595 Expected<StringRef> NameOrErr = getName(); 596 if (!NameOrErr) { 597 consumeError(NameOrErr.takeError()); 598 uint64_t Offset = Data.data() - Parent->getData().data(); 599 return malformedError(Msg + "at offset " + Twine(Offset)); 600 } else 601 return malformedError(Msg + NameOrErr.get()); 602 } 603 604 Error Err = Error::success(); 605 Child Ret(Parent, NextLoc, &Err); 606 if (Err) 607 return std::move(Err); 608 return Ret; 609 } 610 611 uint64_t Archive::Child::getChildOffset() const { 612 const char *a = Parent->Data.getBuffer().data(); 613 const char *c = Data.data(); 614 uint64_t offset = c - a; 615 return offset; 616 } 617 618 Expected<StringRef> Archive::Child::getName() const { 619 Expected<uint64_t> RawSizeOrErr = getRawSize(); 620 if (!RawSizeOrErr) 621 return RawSizeOrErr.takeError(); 622 uint64_t RawSize = RawSizeOrErr.get(); 623 Expected<StringRef> NameOrErr = 624 Header->getName(Header->getSizeOf() + RawSize); 625 if (!NameOrErr) 626 return NameOrErr.takeError(); 627 StringRef Name = NameOrErr.get(); 628 return Name; 629 } 630 631 Expected<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const { 632 Expected<StringRef> NameOrErr = getName(); 633 if (!NameOrErr) 634 return NameOrErr.takeError(); 635 StringRef Name = NameOrErr.get(); 636 Expected<StringRef> Buf = getBuffer(); 637 if (!Buf) 638 return createFileError(Name, Buf.takeError()); 639 return MemoryBufferRef(*Buf, Name); 640 } 641 642 Expected<std::unique_ptr<Binary>> 643 Archive::Child::getAsBinary(LLVMContext *Context) const { 644 Expected<MemoryBufferRef> BuffOrErr = getMemoryBufferRef(); 645 if (!BuffOrErr) 646 return BuffOrErr.takeError(); 647 648 auto BinaryOrErr = createBinary(BuffOrErr.get(), Context); 649 if (BinaryOrErr) 650 return std::move(*BinaryOrErr); 651 return BinaryOrErr.takeError(); 652 } 653 654 Expected<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) { 655 Error Err = Error::success(); 656 std::unique_ptr<Archive> Ret; 657 StringRef Buffer = Source.getBuffer(); 658 659 if (Buffer.startswith(BigArchiveMagic)) 660 Ret = std::make_unique<BigArchive>(Source, Err); 661 else 662 Ret = std::make_unique<Archive>(Source, Err); 663 664 if (Err) 665 return std::move(Err); 666 return std::move(Ret); 667 } 668 669 std::unique_ptr<AbstractArchiveMemberHeader> 670 Archive::createArchiveMemberHeader(const char *RawHeaderPtr, uint64_t Size, 671 Error *Err) const { 672 ErrorAsOutParameter ErrAsOutParam(Err); 673 if (kind() != K_AIXBIG) 674 return std::make_unique<ArchiveMemberHeader>(this, RawHeaderPtr, Size, Err); 675 return std::make_unique<BigArchiveMemberHeader>(this, RawHeaderPtr, Size, 676 Err); 677 } 678 679 uint64_t Archive::getArchiveMagicLen() const { 680 if (isThin()) 681 return sizeof(ThinArchiveMagic) - 1; 682 683 if (Kind() == K_AIXBIG) 684 return sizeof(BigArchiveMagic) - 1; 685 686 return sizeof(ArchiveMagic) - 1; 687 } 688 689 void Archive::setFirstRegular(const Child &C) { 690 FirstRegularData = C.Data; 691 FirstRegularStartOfFile = C.StartOfFile; 692 } 693 694 Archive::Archive(MemoryBufferRef Source, Error &Err) 695 : Binary(Binary::ID_Archive, Source) { 696 ErrorAsOutParameter ErrAsOutParam(&Err); 697 StringRef Buffer = Data.getBuffer(); 698 // Check for sufficient magic. 699 if (Buffer.startswith(ThinArchiveMagic)) { 700 IsThin = true; 701 } else if (Buffer.startswith(ArchiveMagic)) { 702 IsThin = false; 703 } else if (Buffer.startswith(BigArchiveMagic)) { 704 Format = K_AIXBIG; 705 IsThin = false; 706 return; 707 } else { 708 Err = make_error<GenericBinaryError>("file too small to be an archive", 709 object_error::invalid_file_type); 710 return; 711 } 712 713 // Make sure Format is initialized before any call to 714 // ArchiveMemberHeader::getName() is made. This could be a valid empty 715 // archive which is the same in all formats. So claiming it to be gnu to is 716 // fine if not totally correct before we look for a string table or table of 717 // contents. 718 Format = K_GNU; 719 720 // Get the special members. 721 child_iterator I = child_begin(Err, false); 722 if (Err) 723 return; 724 child_iterator E = child_end(); 725 726 // See if this is a valid empty archive and if so return. 727 if (I == E) { 728 Err = Error::success(); 729 return; 730 } 731 const Child *C = &*I; 732 733 auto Increment = [&]() { 734 ++I; 735 if (Err) 736 return true; 737 C = &*I; 738 return false; 739 }; 740 741 Expected<StringRef> NameOrErr = C->getRawName(); 742 if (!NameOrErr) { 743 Err = NameOrErr.takeError(); 744 return; 745 } 746 StringRef Name = NameOrErr.get(); 747 748 // Below is the pattern that is used to figure out the archive format 749 // GNU archive format 750 // First member : / (may exist, if it exists, points to the symbol table ) 751 // Second member : // (may exist, if it exists, points to the string table) 752 // Note : The string table is used if the filename exceeds 15 characters 753 // BSD archive format 754 // First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table) 755 // There is no string table, if the filename exceeds 15 characters or has a 756 // embedded space, the filename has #1/<size>, The size represents the size 757 // of the filename that needs to be read after the archive header 758 // COFF archive format 759 // First member : / 760 // Second member : / (provides a directory of symbols) 761 // Third member : // (may exist, if it exists, contains the string table) 762 // Note: Microsoft PE/COFF Spec 8.3 says that the third member is present 763 // even if the string table is empty. However, lib.exe does not in fact 764 // seem to create the third member if there's no member whose filename 765 // exceeds 15 characters. So the third member is optional. 766 767 if (Name == "__.SYMDEF" || Name == "__.SYMDEF_64") { 768 if (Name == "__.SYMDEF") 769 Format = K_BSD; 770 else // Name == "__.SYMDEF_64" 771 Format = K_DARWIN64; 772 // We know that the symbol table is not an external file, but we still must 773 // check any Expected<> return value. 774 Expected<StringRef> BufOrErr = C->getBuffer(); 775 if (!BufOrErr) { 776 Err = BufOrErr.takeError(); 777 return; 778 } 779 SymbolTable = BufOrErr.get(); 780 if (Increment()) 781 return; 782 setFirstRegular(*C); 783 784 Err = Error::success(); 785 return; 786 } 787 788 if (Name.startswith("#1/")) { 789 Format = K_BSD; 790 // We know this is BSD, so getName will work since there is no string table. 791 Expected<StringRef> NameOrErr = C->getName(); 792 if (!NameOrErr) { 793 Err = NameOrErr.takeError(); 794 return; 795 } 796 Name = NameOrErr.get(); 797 if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") { 798 // We know that the symbol table is not an external file, but we still 799 // must check any Expected<> return value. 800 Expected<StringRef> BufOrErr = C->getBuffer(); 801 if (!BufOrErr) { 802 Err = BufOrErr.takeError(); 803 return; 804 } 805 SymbolTable = BufOrErr.get(); 806 if (Increment()) 807 return; 808 } else if (Name == "__.SYMDEF_64 SORTED" || Name == "__.SYMDEF_64") { 809 Format = K_DARWIN64; 810 // We know that the symbol table is not an external file, but we still 811 // must check any Expected<> return value. 812 Expected<StringRef> BufOrErr = C->getBuffer(); 813 if (!BufOrErr) { 814 Err = BufOrErr.takeError(); 815 return; 816 } 817 SymbolTable = BufOrErr.get(); 818 if (Increment()) 819 return; 820 } 821 setFirstRegular(*C); 822 return; 823 } 824 825 // MIPS 64-bit ELF archives use a special format of a symbol table. 826 // This format is marked by `ar_name` field equals to "/SYM64/". 827 // For detailed description see page 96 in the following document: 828 // http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf 829 830 bool has64SymTable = false; 831 if (Name == "/" || Name == "/SYM64/") { 832 // We know that the symbol table is not an external file, but we still 833 // must check any Expected<> return value. 834 Expected<StringRef> BufOrErr = C->getBuffer(); 835 if (!BufOrErr) { 836 Err = BufOrErr.takeError(); 837 return; 838 } 839 SymbolTable = BufOrErr.get(); 840 if (Name == "/SYM64/") 841 has64SymTable = true; 842 843 if (Increment()) 844 return; 845 if (I == E) { 846 Err = Error::success(); 847 return; 848 } 849 Expected<StringRef> NameOrErr = C->getRawName(); 850 if (!NameOrErr) { 851 Err = NameOrErr.takeError(); 852 return; 853 } 854 Name = NameOrErr.get(); 855 } 856 857 if (Name == "//") { 858 Format = has64SymTable ? K_GNU64 : K_GNU; 859 // The string table is never an external member, but we still 860 // must check any Expected<> return value. 861 Expected<StringRef> BufOrErr = C->getBuffer(); 862 if (!BufOrErr) { 863 Err = BufOrErr.takeError(); 864 return; 865 } 866 StringTable = BufOrErr.get(); 867 if (Increment()) 868 return; 869 setFirstRegular(*C); 870 Err = Error::success(); 871 return; 872 } 873 874 if (Name[0] != '/') { 875 Format = has64SymTable ? K_GNU64 : K_GNU; 876 setFirstRegular(*C); 877 Err = Error::success(); 878 return; 879 } 880 881 if (Name != "/") { 882 Err = errorCodeToError(object_error::parse_failed); 883 return; 884 } 885 886 Format = K_COFF; 887 // We know that the symbol table is not an external file, but we still 888 // must check any Expected<> return value. 889 Expected<StringRef> BufOrErr = C->getBuffer(); 890 if (!BufOrErr) { 891 Err = BufOrErr.takeError(); 892 return; 893 } 894 SymbolTable = BufOrErr.get(); 895 896 if (Increment()) 897 return; 898 899 if (I == E) { 900 setFirstRegular(*C); 901 Err = Error::success(); 902 return; 903 } 904 905 NameOrErr = C->getRawName(); 906 if (!NameOrErr) { 907 Err = NameOrErr.takeError(); 908 return; 909 } 910 Name = NameOrErr.get(); 911 912 if (Name == "//") { 913 // The string table is never an external member, but we still 914 // must check any Expected<> return value. 915 Expected<StringRef> BufOrErr = C->getBuffer(); 916 if (!BufOrErr) { 917 Err = BufOrErr.takeError(); 918 return; 919 } 920 StringTable = BufOrErr.get(); 921 if (Increment()) 922 return; 923 } 924 925 setFirstRegular(*C); 926 Err = Error::success(); 927 } 928 929 object::Archive::Kind Archive::getDefaultKindForHost() { 930 Triple HostTriple(sys::getProcessTriple()); 931 return HostTriple.isOSDarwin() 932 ? object::Archive::K_DARWIN 933 : (HostTriple.isOSAIX() ? object::Archive::K_AIXBIG 934 : object::Archive::K_GNU); 935 } 936 937 Archive::child_iterator Archive::child_begin(Error &Err, 938 bool SkipInternal) const { 939 if (isEmpty()) 940 return child_end(); 941 942 if (SkipInternal) 943 return child_iterator::itr( 944 Child(this, FirstRegularData, FirstRegularStartOfFile), Err); 945 946 const char *Loc = Data.getBufferStart() + getFirstChildOffset(); 947 Child C(this, Loc, &Err); 948 if (Err) 949 return child_end(); 950 return child_iterator::itr(C, Err); 951 } 952 953 Archive::child_iterator Archive::child_end() const { 954 return child_iterator::end(Child(nullptr, nullptr, nullptr)); 955 } 956 957 StringRef Archive::Symbol::getName() const { 958 return Parent->getSymbolTable().begin() + StringIndex; 959 } 960 961 Expected<Archive::Child> Archive::Symbol::getMember() const { 962 const char *Buf = Parent->getSymbolTable().begin(); 963 const char *Offsets = Buf; 964 if (Parent->kind() == K_GNU64 || Parent->kind() == K_DARWIN64) 965 Offsets += sizeof(uint64_t); 966 else 967 Offsets += sizeof(uint32_t); 968 uint64_t Offset = 0; 969 if (Parent->kind() == K_GNU) { 970 Offset = read32be(Offsets + SymbolIndex * 4); 971 } else if (Parent->kind() == K_GNU64) { 972 Offset = read64be(Offsets + SymbolIndex * 8); 973 } else if (Parent->kind() == K_BSD) { 974 // The SymbolIndex is an index into the ranlib structs that start at 975 // Offsets (the first uint32_t is the number of bytes of the ranlib 976 // structs). The ranlib structs are a pair of uint32_t's the first 977 // being a string table offset and the second being the offset into 978 // the archive of the member that defines the symbol. Which is what 979 // is needed here. 980 Offset = read32le(Offsets + SymbolIndex * 8 + 4); 981 } else if (Parent->kind() == K_DARWIN64) { 982 // The SymbolIndex is an index into the ranlib_64 structs that start at 983 // Offsets (the first uint64_t is the number of bytes of the ranlib_64 984 // structs). The ranlib_64 structs are a pair of uint64_t's the first 985 // being a string table offset and the second being the offset into 986 // the archive of the member that defines the symbol. Which is what 987 // is needed here. 988 Offset = read64le(Offsets + SymbolIndex * 16 + 8); 989 } else { 990 // Skip offsets. 991 uint32_t MemberCount = read32le(Buf); 992 Buf += MemberCount * 4 + 4; 993 994 uint32_t SymbolCount = read32le(Buf); 995 if (SymbolIndex >= SymbolCount) 996 return errorCodeToError(object_error::parse_failed); 997 998 // Skip SymbolCount to get to the indices table. 999 const char *Indices = Buf + 4; 1000 1001 // Get the index of the offset in the file member offset table for this 1002 // symbol. 1003 uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2); 1004 // Subtract 1 since OffsetIndex is 1 based. 1005 --OffsetIndex; 1006 1007 if (OffsetIndex >= MemberCount) 1008 return errorCodeToError(object_error::parse_failed); 1009 1010 Offset = read32le(Offsets + OffsetIndex * 4); 1011 } 1012 1013 const char *Loc = Parent->getData().begin() + Offset; 1014 Error Err = Error::success(); 1015 Child C(Parent, Loc, &Err); 1016 if (Err) 1017 return std::move(Err); 1018 return C; 1019 } 1020 1021 Archive::Symbol Archive::Symbol::getNext() const { 1022 Symbol t(*this); 1023 if (Parent->kind() == K_BSD) { 1024 // t.StringIndex is an offset from the start of the __.SYMDEF or 1025 // "__.SYMDEF SORTED" member into the string table for the ranlib 1026 // struct indexed by t.SymbolIndex . To change t.StringIndex to the 1027 // offset in the string table for t.SymbolIndex+1 we subtract the 1028 // its offset from the start of the string table for t.SymbolIndex 1029 // and add the offset of the string table for t.SymbolIndex+1. 1030 1031 // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t 1032 // which is the number of bytes of ranlib structs that follow. The ranlib 1033 // structs are a pair of uint32_t's the first being a string table offset 1034 // and the second being the offset into the archive of the member that 1035 // define the symbol. After that the next uint32_t is the byte count of 1036 // the string table followed by the string table. 1037 const char *Buf = Parent->getSymbolTable().begin(); 1038 uint32_t RanlibCount = 0; 1039 RanlibCount = read32le(Buf) / 8; 1040 // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount) 1041 // don't change the t.StringIndex as we don't want to reference a ranlib 1042 // past RanlibCount. 1043 if (t.SymbolIndex + 1 < RanlibCount) { 1044 const char *Ranlibs = Buf + 4; 1045 uint32_t CurRanStrx = 0; 1046 uint32_t NextRanStrx = 0; 1047 CurRanStrx = read32le(Ranlibs + t.SymbolIndex * 8); 1048 NextRanStrx = read32le(Ranlibs + (t.SymbolIndex + 1) * 8); 1049 t.StringIndex -= CurRanStrx; 1050 t.StringIndex += NextRanStrx; 1051 } 1052 } else { 1053 // Go to one past next null. 1054 t.StringIndex = Parent->getSymbolTable().find('\0', t.StringIndex) + 1; 1055 } 1056 ++t.SymbolIndex; 1057 return t; 1058 } 1059 1060 Archive::symbol_iterator Archive::symbol_begin() const { 1061 if (!hasSymbolTable()) 1062 return symbol_iterator(Symbol(this, 0, 0)); 1063 1064 const char *buf = getSymbolTable().begin(); 1065 if (kind() == K_GNU) { 1066 uint32_t symbol_count = 0; 1067 symbol_count = read32be(buf); 1068 buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t))); 1069 } else if (kind() == K_GNU64) { 1070 uint64_t symbol_count = read64be(buf); 1071 buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t))); 1072 } else if (kind() == K_BSD) { 1073 // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t 1074 // which is the number of bytes of ranlib structs that follow. The ranlib 1075 // structs are a pair of uint32_t's the first being a string table offset 1076 // and the second being the offset into the archive of the member that 1077 // define the symbol. After that the next uint32_t is the byte count of 1078 // the string table followed by the string table. 1079 uint32_t ranlib_count = 0; 1080 ranlib_count = read32le(buf) / 8; 1081 const char *ranlibs = buf + 4; 1082 uint32_t ran_strx = 0; 1083 ran_strx = read32le(ranlibs); 1084 buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t)))); 1085 // Skip the byte count of the string table. 1086 buf += sizeof(uint32_t); 1087 buf += ran_strx; 1088 } else if (kind() == K_DARWIN64) { 1089 // The __.SYMDEF_64 or "__.SYMDEF_64 SORTED" member starts with a uint64_t 1090 // which is the number of bytes of ranlib_64 structs that follow. The 1091 // ranlib_64 structs are a pair of uint64_t's the first being a string 1092 // table offset and the second being the offset into the archive of the 1093 // member that define the symbol. After that the next uint64_t is the byte 1094 // count of the string table followed by the string table. 1095 uint64_t ranlib_count = 0; 1096 ranlib_count = read64le(buf) / 16; 1097 const char *ranlibs = buf + 8; 1098 uint64_t ran_strx = 0; 1099 ran_strx = read64le(ranlibs); 1100 buf += sizeof(uint64_t) + (ranlib_count * (2 * (sizeof(uint64_t)))); 1101 // Skip the byte count of the string table. 1102 buf += sizeof(uint64_t); 1103 buf += ran_strx; 1104 } else { 1105 uint32_t member_count = 0; 1106 uint32_t symbol_count = 0; 1107 member_count = read32le(buf); 1108 buf += 4 + (member_count * 4); // Skip offsets. 1109 symbol_count = read32le(buf); 1110 buf += 4 + (symbol_count * 2); // Skip indices. 1111 } 1112 uint32_t string_start_offset = buf - getSymbolTable().begin(); 1113 return symbol_iterator(Symbol(this, 0, string_start_offset)); 1114 } 1115 1116 Archive::symbol_iterator Archive::symbol_end() const { 1117 return symbol_iterator(Symbol(this, getNumberOfSymbols(), 0)); 1118 } 1119 1120 uint32_t Archive::getNumberOfSymbols() const { 1121 if (!hasSymbolTable()) 1122 return 0; 1123 const char *buf = getSymbolTable().begin(); 1124 if (kind() == K_GNU) 1125 return read32be(buf); 1126 if (kind() == K_GNU64) 1127 return read64be(buf); 1128 if (kind() == K_BSD) 1129 return read32le(buf) / 8; 1130 if (kind() == K_DARWIN64) 1131 return read64le(buf) / 16; 1132 uint32_t member_count = 0; 1133 member_count = read32le(buf); 1134 buf += 4 + (member_count * 4); // Skip offsets. 1135 return read32le(buf); 1136 } 1137 1138 Expected<Optional<Archive::Child>> Archive::findSym(StringRef name) const { 1139 Archive::symbol_iterator bs = symbol_begin(); 1140 Archive::symbol_iterator es = symbol_end(); 1141 1142 for (; bs != es; ++bs) { 1143 StringRef SymName = bs->getName(); 1144 if (SymName == name) { 1145 if (auto MemberOrErr = bs->getMember()) 1146 return Child(*MemberOrErr); 1147 else 1148 return MemberOrErr.takeError(); 1149 } 1150 } 1151 return Optional<Child>(); 1152 } 1153 1154 // Returns true if archive file contains no member file. 1155 bool Archive::isEmpty() const { 1156 return Data.getBufferSize() == getArchiveMagicLen(); 1157 } 1158 1159 bool Archive::hasSymbolTable() const { return !SymbolTable.empty(); } 1160 1161 BigArchive::BigArchive(MemoryBufferRef Source, Error &Err) 1162 : Archive(Source, Err) { 1163 ErrorAsOutParameter ErrAsOutParam(&Err); 1164 StringRef Buffer = Data.getBuffer(); 1165 ArFixLenHdr = reinterpret_cast<const FixLenHdr *>(Buffer.data()); 1166 1167 StringRef RawOffset = getFieldRawString(ArFixLenHdr->FirstChildOffset); 1168 if (RawOffset.getAsInteger(10, FirstChildOffset)) 1169 // TODO: Out-of-line. 1170 Err = malformedError("malformed AIX big archive: first member offset \"" + 1171 RawOffset + "\" is not a number"); 1172 1173 RawOffset = getFieldRawString(ArFixLenHdr->LastChildOffset); 1174 if (RawOffset.getAsInteger(10, LastChildOffset)) 1175 // TODO: Out-of-line. 1176 Err = malformedError("malformed AIX big archive: last member offset \"" + 1177 RawOffset + "\" is not a number"); 1178 1179 child_iterator I = child_begin(Err, false); 1180 if (Err) 1181 return; 1182 child_iterator E = child_end(); 1183 if (I == E) { 1184 Err = Error::success(); 1185 return; 1186 } 1187 setFirstRegular(*I); 1188 Err = Error::success(); 1189 } 1190