1 //===- ArchiveWriter.cpp - ar File Format implementation --------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the writeArchive function. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Object/ArchiveWriter.h" 14 #include "llvm/ADT/ArrayRef.h" 15 #include "llvm/ADT/StringMap.h" 16 #include "llvm/ADT/StringRef.h" 17 #include "llvm/BinaryFormat/Magic.h" 18 #include "llvm/IR/LLVMContext.h" 19 #include "llvm/Object/Archive.h" 20 #include "llvm/Object/Error.h" 21 #include "llvm/Object/ObjectFile.h" 22 #include "llvm/Object/SymbolicFile.h" 23 #include "llvm/Support/Alignment.h" 24 #include "llvm/Support/EndianStream.h" 25 #include "llvm/Support/Errc.h" 26 #include "llvm/Support/ErrorHandling.h" 27 #include "llvm/Support/Format.h" 28 #include "llvm/Support/MathExtras.h" 29 #include "llvm/Support/Path.h" 30 #include "llvm/Support/SmallVectorMemoryBuffer.h" 31 #include "llvm/Support/raw_ostream.h" 32 33 #include <map> 34 35 #if !defined(_MSC_VER) && !defined(__MINGW32__) 36 #include <unistd.h> 37 #else 38 #include <io.h> 39 #endif 40 41 using namespace llvm; 42 43 NewArchiveMember::NewArchiveMember(MemoryBufferRef BufRef) 44 : Buf(MemoryBuffer::getMemBuffer(BufRef, false)), 45 MemberName(BufRef.getBufferIdentifier()) {} 46 47 Expected<NewArchiveMember> 48 NewArchiveMember::getOldMember(const object::Archive::Child &OldMember, 49 bool Deterministic) { 50 Expected<llvm::MemoryBufferRef> BufOrErr = OldMember.getMemoryBufferRef(); 51 if (!BufOrErr) 52 return BufOrErr.takeError(); 53 54 NewArchiveMember M; 55 M.Buf = MemoryBuffer::getMemBuffer(*BufOrErr, false); 56 M.MemberName = M.Buf->getBufferIdentifier(); 57 if (!Deterministic) { 58 auto ModTimeOrErr = OldMember.getLastModified(); 59 if (!ModTimeOrErr) 60 return ModTimeOrErr.takeError(); 61 M.ModTime = ModTimeOrErr.get(); 62 Expected<unsigned> UIDOrErr = OldMember.getUID(); 63 if (!UIDOrErr) 64 return UIDOrErr.takeError(); 65 M.UID = UIDOrErr.get(); 66 Expected<unsigned> GIDOrErr = OldMember.getGID(); 67 if (!GIDOrErr) 68 return GIDOrErr.takeError(); 69 M.GID = GIDOrErr.get(); 70 Expected<sys::fs::perms> AccessModeOrErr = OldMember.getAccessMode(); 71 if (!AccessModeOrErr) 72 return AccessModeOrErr.takeError(); 73 M.Perms = AccessModeOrErr.get(); 74 } 75 return std::move(M); 76 } 77 78 Expected<NewArchiveMember> NewArchiveMember::getFile(StringRef FileName, 79 bool Deterministic) { 80 sys::fs::file_status Status; 81 auto FDOrErr = sys::fs::openNativeFileForRead(FileName); 82 if (!FDOrErr) 83 return FDOrErr.takeError(); 84 sys::fs::file_t FD = *FDOrErr; 85 assert(FD != sys::fs::kInvalidFile); 86 87 if (auto EC = sys::fs::status(FD, Status)) 88 return errorCodeToError(EC); 89 90 // Opening a directory doesn't make sense. Let it fail. 91 // Linux cannot open directories with open(2), although 92 // cygwin and *bsd can. 93 if (Status.type() == sys::fs::file_type::directory_file) 94 return errorCodeToError(make_error_code(errc::is_a_directory)); 95 96 ErrorOr<std::unique_ptr<MemoryBuffer>> MemberBufferOrErr = 97 MemoryBuffer::getOpenFile(FD, FileName, Status.getSize(), false); 98 if (!MemberBufferOrErr) 99 return errorCodeToError(MemberBufferOrErr.getError()); 100 101 if (auto EC = sys::fs::closeFile(FD)) 102 return errorCodeToError(EC); 103 104 NewArchiveMember M; 105 M.Buf = std::move(*MemberBufferOrErr); 106 M.MemberName = M.Buf->getBufferIdentifier(); 107 if (!Deterministic) { 108 M.ModTime = std::chrono::time_point_cast<std::chrono::seconds>( 109 Status.getLastModificationTime()); 110 M.UID = Status.getUser(); 111 M.GID = Status.getGroup(); 112 M.Perms = Status.permissions(); 113 } 114 return std::move(M); 115 } 116 117 template <typename T> 118 static void printWithSpacePadding(raw_ostream &OS, T Data, unsigned Size) { 119 uint64_t OldPos = OS.tell(); 120 OS << Data; 121 unsigned SizeSoFar = OS.tell() - OldPos; 122 assert(SizeSoFar <= Size && "Data doesn't fit in Size"); 123 OS.indent(Size - SizeSoFar); 124 } 125 126 static bool isDarwin(object::Archive::Kind Kind) { 127 return Kind == object::Archive::K_DARWIN || 128 Kind == object::Archive::K_DARWIN64; 129 } 130 131 static bool isAIXBigArchive(object::Archive::Kind Kind) { 132 return Kind == object::Archive::K_AIXBIG; 133 } 134 135 static bool isBSDLike(object::Archive::Kind Kind) { 136 switch (Kind) { 137 case object::Archive::K_GNU: 138 case object::Archive::K_GNU64: 139 case object::Archive::K_AIXBIG: 140 return false; 141 case object::Archive::K_BSD: 142 case object::Archive::K_DARWIN: 143 case object::Archive::K_DARWIN64: 144 return true; 145 case object::Archive::K_COFF: 146 break; 147 } 148 llvm_unreachable("not supported for writting"); 149 } 150 151 template <class T> 152 static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val) { 153 support::endian::write(Out, Val, 154 isBSDLike(Kind) ? support::little : support::big); 155 } 156 157 static void printRestOfMemberHeader( 158 raw_ostream &Out, const sys::TimePoint<std::chrono::seconds> &ModTime, 159 unsigned UID, unsigned GID, unsigned Perms, uint64_t Size) { 160 printWithSpacePadding(Out, sys::toTimeT(ModTime), 12); 161 162 // The format has only 6 chars for uid and gid. Truncate if the provided 163 // values don't fit. 164 printWithSpacePadding(Out, UID % 1000000, 6); 165 printWithSpacePadding(Out, GID % 1000000, 6); 166 167 printWithSpacePadding(Out, format("%o", Perms), 8); 168 printWithSpacePadding(Out, Size, 10); 169 Out << "`\n"; 170 } 171 172 static void 173 printGNUSmallMemberHeader(raw_ostream &Out, StringRef Name, 174 const sys::TimePoint<std::chrono::seconds> &ModTime, 175 unsigned UID, unsigned GID, unsigned Perms, 176 uint64_t Size) { 177 printWithSpacePadding(Out, Twine(Name) + "/", 16); 178 printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size); 179 } 180 181 static void 182 printBSDMemberHeader(raw_ostream &Out, uint64_t Pos, StringRef Name, 183 const sys::TimePoint<std::chrono::seconds> &ModTime, 184 unsigned UID, unsigned GID, unsigned Perms, uint64_t Size) { 185 uint64_t PosAfterHeader = Pos + 60 + Name.size(); 186 // Pad so that even 64 bit object files are aligned. 187 unsigned Pad = offsetToAlignment(PosAfterHeader, Align(8)); 188 unsigned NameWithPadding = Name.size() + Pad; 189 printWithSpacePadding(Out, Twine("#1/") + Twine(NameWithPadding), 16); 190 printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, 191 NameWithPadding + Size); 192 Out << Name; 193 while (Pad--) 194 Out.write(uint8_t(0)); 195 } 196 197 static void 198 printBigArchiveMemberHeader(raw_ostream &Out, StringRef Name, 199 const sys::TimePoint<std::chrono::seconds> &ModTime, 200 unsigned UID, unsigned GID, unsigned Perms, 201 uint64_t Size, unsigned PrevOffset, 202 unsigned NextOffset) { 203 unsigned NameLen = Name.size(); 204 205 printWithSpacePadding(Out, Size, 20); // File member size 206 printWithSpacePadding(Out, NextOffset, 20); // Next member header offset 207 printWithSpacePadding(Out, PrevOffset, 20); // Previous member header offset 208 printWithSpacePadding(Out, sys::toTimeT(ModTime), 12); // File member date 209 // The big archive format has 12 chars for uid and gid. 210 printWithSpacePadding(Out, UID % 1000000000000, 12); // UID 211 printWithSpacePadding(Out, GID % 1000000000000, 12); // GID 212 printWithSpacePadding(Out, format("%o", Perms), 12); // Permission 213 printWithSpacePadding(Out, NameLen, 4); // Name length 214 if (NameLen) { 215 printWithSpacePadding(Out, Name, NameLen); // Name 216 if (NameLen % 2) 217 Out.write(uint8_t(0)); // Null byte padding 218 } 219 Out << "`\n"; // Terminator 220 } 221 222 static bool useStringTable(bool Thin, StringRef Name) { 223 return Thin || Name.size() >= 16 || Name.contains('/'); 224 } 225 226 static bool is64BitKind(object::Archive::Kind Kind) { 227 switch (Kind) { 228 case object::Archive::K_GNU: 229 case object::Archive::K_BSD: 230 case object::Archive::K_DARWIN: 231 case object::Archive::K_COFF: 232 return false; 233 case object::Archive::K_AIXBIG: 234 case object::Archive::K_DARWIN64: 235 case object::Archive::K_GNU64: 236 return true; 237 } 238 llvm_unreachable("not supported for writting"); 239 } 240 241 static void 242 printMemberHeader(raw_ostream &Out, uint64_t Pos, raw_ostream &StringTable, 243 StringMap<uint64_t> &MemberNames, object::Archive::Kind Kind, 244 bool Thin, const NewArchiveMember &M, 245 sys::TimePoint<std::chrono::seconds> ModTime, uint64_t Size) { 246 if (isBSDLike(Kind)) 247 return printBSDMemberHeader(Out, Pos, M.MemberName, ModTime, M.UID, M.GID, 248 M.Perms, Size); 249 if (!useStringTable(Thin, M.MemberName)) 250 return printGNUSmallMemberHeader(Out, M.MemberName, ModTime, M.UID, M.GID, 251 M.Perms, Size); 252 Out << '/'; 253 uint64_t NamePos; 254 if (Thin) { 255 NamePos = StringTable.tell(); 256 StringTable << M.MemberName << "/\n"; 257 } else { 258 auto Insertion = MemberNames.insert({M.MemberName, uint64_t(0)}); 259 if (Insertion.second) { 260 Insertion.first->second = StringTable.tell(); 261 StringTable << M.MemberName << "/\n"; 262 } 263 NamePos = Insertion.first->second; 264 } 265 printWithSpacePadding(Out, NamePos, 15); 266 printRestOfMemberHeader(Out, ModTime, M.UID, M.GID, M.Perms, Size); 267 } 268 269 namespace { 270 struct MemberData { 271 std::vector<unsigned> Symbols; 272 std::string Header; 273 StringRef Data; 274 StringRef Padding; 275 }; 276 } // namespace 277 278 static MemberData computeStringTable(StringRef Names) { 279 unsigned Size = Names.size(); 280 unsigned Pad = offsetToAlignment(Size, Align(2)); 281 std::string Header; 282 raw_string_ostream Out(Header); 283 printWithSpacePadding(Out, "//", 48); 284 printWithSpacePadding(Out, Size + Pad, 10); 285 Out << "`\n"; 286 Out.flush(); 287 return {{}, std::move(Header), Names, Pad ? "\n" : ""}; 288 } 289 290 static sys::TimePoint<std::chrono::seconds> now(bool Deterministic) { 291 using namespace std::chrono; 292 293 if (!Deterministic) 294 return time_point_cast<seconds>(system_clock::now()); 295 return sys::TimePoint<seconds>(); 296 } 297 298 static bool isArchiveSymbol(const object::BasicSymbolRef &S) { 299 Expected<uint32_t> SymFlagsOrErr = S.getFlags(); 300 if (!SymFlagsOrErr) 301 // TODO: Actually report errors helpfully. 302 report_fatal_error(SymFlagsOrErr.takeError()); 303 if (*SymFlagsOrErr & object::SymbolRef::SF_FormatSpecific) 304 return false; 305 if (!(*SymFlagsOrErr & object::SymbolRef::SF_Global)) 306 return false; 307 if (*SymFlagsOrErr & object::SymbolRef::SF_Undefined) 308 return false; 309 return true; 310 } 311 312 static void printNBits(raw_ostream &Out, object::Archive::Kind Kind, 313 uint64_t Val) { 314 if (is64BitKind(Kind)) 315 print<uint64_t>(Out, Kind, Val); 316 else 317 print<uint32_t>(Out, Kind, Val); 318 } 319 320 static uint64_t computeSymbolTableSize(object::Archive::Kind Kind, 321 uint64_t NumSyms, uint64_t OffsetSize, 322 StringRef StringTable, 323 uint32_t *Padding = nullptr) { 324 assert((OffsetSize == 4 || OffsetSize == 8) && "Unsupported OffsetSize"); 325 uint64_t Size = OffsetSize; // Number of entries 326 if (isBSDLike(Kind)) 327 Size += NumSyms * OffsetSize * 2; // Table 328 else 329 Size += NumSyms * OffsetSize; // Table 330 if (isBSDLike(Kind)) 331 Size += OffsetSize; // byte count 332 Size += StringTable.size(); 333 // ld64 expects the members to be 8-byte aligned for 64-bit content and at 334 // least 4-byte aligned for 32-bit content. Opt for the larger encoding 335 // uniformly. 336 // We do this for all bsd formats because it simplifies aligning members. 337 // For the big archive format, the symbol table is the last member, so there 338 // is no need to align. 339 uint32_t Pad = isAIXBigArchive(Kind) 340 ? 0 341 : offsetToAlignment(Size, Align(isBSDLike(Kind) ? 8 : 2)); 342 Size += Pad; 343 if (Padding) 344 *Padding = Pad; 345 return Size; 346 } 347 348 static void writeSymbolTableHeader(raw_ostream &Out, object::Archive::Kind Kind, 349 bool Deterministic, uint64_t Size, 350 uint64_t PrevMemberOffset = 0) { 351 if (isBSDLike(Kind)) { 352 const char *Name = is64BitKind(Kind) ? "__.SYMDEF_64" : "__.SYMDEF"; 353 printBSDMemberHeader(Out, Out.tell(), Name, now(Deterministic), 0, 0, 0, 354 Size); 355 } else if (isAIXBigArchive(Kind)) { 356 printBigArchiveMemberHeader(Out, "", now(Deterministic), 0, 0, 357 0, Size, PrevMemberOffset, 0); 358 } else { 359 const char *Name = is64BitKind(Kind) ? "/SYM64" : ""; 360 printGNUSmallMemberHeader(Out, Name, now(Deterministic), 0, 0, 0, Size); 361 } 362 } 363 364 static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, 365 bool Deterministic, ArrayRef<MemberData> Members, 366 StringRef StringTable, 367 uint64_t PrevMemberOffset = 0) { 368 // We don't write a symbol table on an archive with no members -- except on 369 // Darwin, where the linker will abort unless the archive has a symbol table. 370 if (StringTable.empty() && !isDarwin(Kind)) 371 return; 372 373 unsigned NumSyms = 0; 374 for (const MemberData &M : Members) 375 NumSyms += M.Symbols.size(); 376 377 uint64_t OffsetSize = is64BitKind(Kind) ? 8 : 4; 378 uint32_t Pad; 379 uint64_t Size = computeSymbolTableSize(Kind, NumSyms, OffsetSize, StringTable, &Pad); 380 writeSymbolTableHeader(Out, Kind, Deterministic, Size, PrevMemberOffset); 381 382 uint64_t Pos = isAIXBigArchive(Kind) ? sizeof(object::BigArchive::FixLenHdr) 383 : Out.tell() + Size; 384 385 if (isBSDLike(Kind)) 386 printNBits(Out, Kind, NumSyms * 2 * OffsetSize); 387 else 388 printNBits(Out, Kind, NumSyms); 389 390 for (const MemberData &M : Members) { 391 for (unsigned StringOffset : M.Symbols) { 392 if (isBSDLike(Kind)) 393 printNBits(Out, Kind, StringOffset); 394 printNBits(Out, Kind, Pos); // member offset 395 } 396 Pos += M.Header.size() + M.Data.size() + M.Padding.size(); 397 } 398 399 if (isBSDLike(Kind)) 400 // byte count of the string table 401 printNBits(Out, Kind, StringTable.size()); 402 Out << StringTable; 403 404 while (Pad--) 405 Out.write(uint8_t(0)); 406 } 407 408 static Expected<std::vector<unsigned>> 409 getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) { 410 std::vector<unsigned> Ret; 411 412 // In the scenario when LLVMContext is populated SymbolicFile will contain a 413 // reference to it, thus SymbolicFile should be destroyed first. 414 LLVMContext Context; 415 std::unique_ptr<object::SymbolicFile> Obj; 416 417 const file_magic Type = identify_magic(Buf.getBuffer()); 418 // Treat unsupported file types as having no symbols. 419 if (!object::SymbolicFile::isSymbolicFile(Type, &Context)) 420 return Ret; 421 if (Type == file_magic::bitcode) { 422 auto ObjOrErr = object::SymbolicFile::createSymbolicFile( 423 Buf, file_magic::bitcode, &Context); 424 if (!ObjOrErr) 425 return ObjOrErr.takeError(); 426 Obj = std::move(*ObjOrErr); 427 } else { 428 auto ObjOrErr = object::SymbolicFile::createSymbolicFile(Buf); 429 if (!ObjOrErr) 430 return ObjOrErr.takeError(); 431 Obj = std::move(*ObjOrErr); 432 } 433 434 HasObject = true; 435 for (const object::BasicSymbolRef &S : Obj->symbols()) { 436 if (!isArchiveSymbol(S)) 437 continue; 438 Ret.push_back(SymNames.tell()); 439 if (Error E = S.printName(SymNames)) 440 return std::move(E); 441 SymNames << '\0'; 442 } 443 return Ret; 444 } 445 446 static Expected<std::vector<MemberData>> 447 computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, 448 object::Archive::Kind Kind, bool Thin, bool Deterministic, 449 bool NeedSymbols, ArrayRef<NewArchiveMember> NewMembers) { 450 static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'}; 451 452 uint64_t Pos = 453 isAIXBigArchive(Kind) ? sizeof(object::BigArchive::FixLenHdr) : 0; 454 455 std::vector<MemberData> Ret; 456 bool HasObject = false; 457 458 // Deduplicate long member names in the string table and reuse earlier name 459 // offsets. This especially saves space for COFF Import libraries where all 460 // members have the same name. 461 StringMap<uint64_t> MemberNames; 462 463 // UniqueTimestamps is a special case to improve debugging on Darwin: 464 // 465 // The Darwin linker does not link debug info into the final 466 // binary. Instead, it emits entries of type N_OSO in in the output 467 // binary's symbol table, containing references to the linked-in 468 // object files. Using that reference, the debugger can read the 469 // debug data directly from the object files. Alternatively, an 470 // invocation of 'dsymutil' will link the debug data from the object 471 // files into a dSYM bundle, which can be loaded by the debugger, 472 // instead of the object files. 473 // 474 // For an object file, the N_OSO entries contain the absolute path 475 // path to the file, and the file's timestamp. For an object 476 // included in an archive, the path is formatted like 477 // "/absolute/path/to/archive.a(member.o)", and the timestamp is the 478 // archive member's timestamp, rather than the archive's timestamp. 479 // 480 // However, this doesn't always uniquely identify an object within 481 // an archive -- an archive file can have multiple entries with the 482 // same filename. (This will happen commonly if the original object 483 // files started in different directories.) The only way they get 484 // distinguished, then, is via the timestamp. But this process is 485 // unable to find the correct object file in the archive when there 486 // are two files of the same name and timestamp. 487 // 488 // Additionally, timestamp==0 is treated specially, and causes the 489 // timestamp to be ignored as a match criteria. 490 // 491 // That will "usually" work out okay when creating an archive not in 492 // deterministic timestamp mode, because the objects will probably 493 // have been created at different timestamps. 494 // 495 // To ameliorate this problem, in deterministic archive mode (which 496 // is the default), on Darwin we will emit a unique non-zero 497 // timestamp for each entry with a duplicated name. This is still 498 // deterministic: the only thing affecting that timestamp is the 499 // order of the files in the resultant archive. 500 // 501 // See also the functions that handle the lookup: 502 // in lldb: ObjectContainerBSDArchive::Archive::FindObject() 503 // in llvm/tools/dsymutil: BinaryHolder::GetArchiveMemberBuffers(). 504 bool UniqueTimestamps = Deterministic && isDarwin(Kind); 505 std::map<StringRef, unsigned> FilenameCount; 506 if (UniqueTimestamps) { 507 for (const NewArchiveMember &M : NewMembers) 508 FilenameCount[M.MemberName]++; 509 for (auto &Entry : FilenameCount) 510 Entry.second = Entry.second > 1 ? 1 : 0; 511 } 512 513 // The big archive format needs to know the offset of the previous member 514 // header. 515 unsigned PrevOffset = 0; 516 for (const NewArchiveMember &M : NewMembers) { 517 std::string Header; 518 raw_string_ostream Out(Header); 519 520 MemoryBufferRef Buf = M.Buf->getMemBufferRef(); 521 StringRef Data = Thin ? "" : Buf.getBuffer(); 522 523 // ld64 expects the members to be 8-byte aligned for 64-bit content and at 524 // least 4-byte aligned for 32-bit content. Opt for the larger encoding 525 // uniformly. This matches the behaviour with cctools and ensures that ld64 526 // is happy with archives that we generate. 527 unsigned MemberPadding = 528 isDarwin(Kind) ? offsetToAlignment(Data.size(), Align(8)) : 0; 529 unsigned TailPadding = 530 offsetToAlignment(Data.size() + MemberPadding, Align(2)); 531 StringRef Padding = StringRef(PaddingData, MemberPadding + TailPadding); 532 533 sys::TimePoint<std::chrono::seconds> ModTime; 534 if (UniqueTimestamps) 535 // Increment timestamp for each file of a given name. 536 ModTime = sys::toTimePoint(FilenameCount[M.MemberName]++); 537 else 538 ModTime = M.ModTime; 539 540 uint64_t Size = Buf.getBufferSize() + MemberPadding; 541 if (Size > object::Archive::MaxMemberSize) { 542 std::string StringMsg = 543 "File " + M.MemberName.str() + " exceeds size limit"; 544 return make_error<object::GenericBinaryError>( 545 std::move(StringMsg), object::object_error::parse_failed); 546 } 547 548 if (isAIXBigArchive(Kind)) { 549 unsigned NextOffset = Pos + sizeof(object::BigArMemHdrType) + 550 alignTo(M.MemberName.size(), 2) + alignTo(Size, 2); 551 printBigArchiveMemberHeader(Out, M.MemberName, ModTime, M.UID, M.GID, 552 M.Perms, Size, PrevOffset, NextOffset); 553 PrevOffset = Pos; 554 } else { 555 printMemberHeader(Out, Pos, StringTable, MemberNames, Kind, Thin, M, 556 ModTime, Size); 557 } 558 Out.flush(); 559 560 std::vector<unsigned> Symbols; 561 if (NeedSymbols) { 562 Expected<std::vector<unsigned>> SymbolsOrErr = 563 getSymbols(Buf, SymNames, HasObject); 564 if (auto E = SymbolsOrErr.takeError()) 565 return std::move(E); 566 Symbols = std::move(*SymbolsOrErr); 567 } 568 569 Pos += Header.size() + Data.size() + Padding.size(); 570 Ret.push_back({std::move(Symbols), std::move(Header), Data, Padding}); 571 } 572 // If there are no symbols, emit an empty symbol table, to satisfy Solaris 573 // tools, older versions of which expect a symbol table in a non-empty 574 // archive, regardless of whether there are any symbols in it. 575 if (HasObject && SymNames.tell() == 0) 576 SymNames << '\0' << '\0' << '\0'; 577 return Ret; 578 } 579 580 namespace llvm { 581 582 static ErrorOr<SmallString<128>> canonicalizePath(StringRef P) { 583 SmallString<128> Ret = P; 584 std::error_code Err = sys::fs::make_absolute(Ret); 585 if (Err) 586 return Err; 587 sys::path::remove_dots(Ret, /*removedotdot*/ true); 588 return Ret; 589 } 590 591 // Compute the relative path from From to To. 592 Expected<std::string> computeArchiveRelativePath(StringRef From, StringRef To) { 593 ErrorOr<SmallString<128>> PathToOrErr = canonicalizePath(To); 594 ErrorOr<SmallString<128>> DirFromOrErr = canonicalizePath(From); 595 if (!PathToOrErr || !DirFromOrErr) 596 return errorCodeToError(std::error_code(errno, std::generic_category())); 597 598 const SmallString<128> &PathTo = *PathToOrErr; 599 const SmallString<128> &DirFrom = sys::path::parent_path(*DirFromOrErr); 600 601 // Can't construct a relative path between different roots 602 if (sys::path::root_name(PathTo) != sys::path::root_name(DirFrom)) 603 return sys::path::convert_to_slash(PathTo); 604 605 // Skip common prefixes 606 auto FromTo = 607 std::mismatch(sys::path::begin(DirFrom), sys::path::end(DirFrom), 608 sys::path::begin(PathTo)); 609 auto FromI = FromTo.first; 610 auto ToI = FromTo.second; 611 612 // Construct relative path 613 SmallString<128> Relative; 614 for (auto FromE = sys::path::end(DirFrom); FromI != FromE; ++FromI) 615 sys::path::append(Relative, sys::path::Style::posix, ".."); 616 617 for (auto ToE = sys::path::end(PathTo); ToI != ToE; ++ToI) 618 sys::path::append(Relative, sys::path::Style::posix, *ToI); 619 620 return std::string(Relative.str()); 621 } 622 623 static Error writeArchiveToStream(raw_ostream &Out, 624 ArrayRef<NewArchiveMember> NewMembers, 625 bool WriteSymtab, object::Archive::Kind Kind, 626 bool Deterministic, bool Thin) { 627 assert((!Thin || !isBSDLike(Kind)) && "Only the gnu format has a thin mode"); 628 629 SmallString<0> SymNamesBuf; 630 raw_svector_ostream SymNames(SymNamesBuf); 631 SmallString<0> StringTableBuf; 632 raw_svector_ostream StringTable(StringTableBuf); 633 634 Expected<std::vector<MemberData>> DataOrErr = 635 computeMemberData(StringTable, SymNames, Kind, Thin, Deterministic, 636 WriteSymtab, NewMembers); 637 if (Error E = DataOrErr.takeError()) 638 return E; 639 std::vector<MemberData> &Data = *DataOrErr; 640 641 if (!StringTableBuf.empty() && !isAIXBigArchive(Kind)) 642 Data.insert(Data.begin(), computeStringTable(StringTableBuf)); 643 644 // We would like to detect if we need to switch to a 64-bit symbol table. 645 uint64_t LastMemberEndOffset = 646 isAIXBigArchive(Kind) ? sizeof(object::BigArchive::FixLenHdr) : 8; 647 uint64_t LastMemberHeaderOffset = LastMemberEndOffset; 648 uint64_t NumSyms = 0; 649 for (const auto &M : Data) { 650 // Record the start of the member's offset 651 LastMemberHeaderOffset = LastMemberEndOffset; 652 // Account for the size of each part associated with the member. 653 LastMemberEndOffset += M.Header.size() + M.Data.size() + M.Padding.size(); 654 NumSyms += M.Symbols.size(); 655 } 656 657 // The symbol table is put at the end of the big archive file. The symbol 658 // table is at the start of the archive file for other archive formats. 659 if (WriteSymtab && !isAIXBigArchive(Kind)) { 660 // We assume 32-bit offsets to see if 32-bit symbols are possible or not. 661 uint64_t SymtabSize = computeSymbolTableSize(Kind, NumSyms, 4, SymNamesBuf); 662 auto computeSymbolTableHeaderSize = 663 [=] { 664 SmallString<0> TmpBuf; 665 raw_svector_ostream Tmp(TmpBuf); 666 writeSymbolTableHeader(Tmp, Kind, Deterministic, SymtabSize); 667 return TmpBuf.size(); 668 }; 669 LastMemberHeaderOffset += computeSymbolTableHeaderSize() + SymtabSize; 670 671 // The SYM64 format is used when an archive's member offsets are larger than 672 // 32-bits can hold. The need for this shift in format is detected by 673 // writeArchive. To test this we need to generate a file with a member that 674 // has an offset larger than 32-bits but this demands a very slow test. To 675 // speed the test up we use this environment variable to pretend like the 676 // cutoff happens before 32-bits and instead happens at some much smaller 677 // value. 678 uint64_t Sym64Threshold = 1ULL << 32; 679 const char *Sym64Env = std::getenv("SYM64_THRESHOLD"); 680 if (Sym64Env) 681 StringRef(Sym64Env).getAsInteger(10, Sym64Threshold); 682 683 // If LastMemberHeaderOffset isn't going to fit in a 32-bit varible we need 684 // to switch to 64-bit. Note that the file can be larger than 4GB as long as 685 // the last member starts before the 4GB offset. 686 if (LastMemberHeaderOffset >= Sym64Threshold) { 687 if (Kind == object::Archive::K_DARWIN) 688 Kind = object::Archive::K_DARWIN64; 689 else 690 Kind = object::Archive::K_GNU64; 691 } 692 } 693 694 if (Thin) 695 Out << "!<thin>\n"; 696 else if (isAIXBigArchive(Kind)) 697 Out << "<bigaf>\n"; 698 else 699 Out << "!<arch>\n"; 700 701 if (!isAIXBigArchive(Kind)) { 702 if (WriteSymtab) 703 writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf); 704 for (const MemberData &M : Data) 705 Out << M.Header << M.Data << M.Padding; 706 } else { 707 // For the big archive (AIX) format, compute a table of member names and 708 // offsets, used in the member table. 709 uint64_t MemberTableNameStrTblSize = 0; 710 std::vector<size_t> MemberOffsets; 711 std::vector<StringRef> MemberNames; 712 // Loop across object to find offset and names. 713 uint64_t MemberEndOffset = sizeof(object::BigArchive::FixLenHdr); 714 for (size_t I = 0, Size = NewMembers.size(); I != Size; ++I) { 715 const NewArchiveMember &Member = NewMembers[I]; 716 MemberTableNameStrTblSize += Member.MemberName.size() + 1; 717 MemberOffsets.push_back(MemberEndOffset); 718 MemberNames.push_back(Member.MemberName); 719 // File member name ended with "`\n". The length is included in 720 // BigArMemHdrType. 721 MemberEndOffset += sizeof(object::BigArMemHdrType) + 722 alignTo(Data[I].Data.size(), 2) + 723 alignTo(Member.MemberName.size(), 2); 724 } 725 726 // AIX member table size. 727 unsigned MemberTableSize = 20 + // Number of members field 728 20 * MemberOffsets.size() + 729 MemberTableNameStrTblSize; 730 731 unsigned GlobalSymbolOffset = 732 (WriteSymtab && NumSyms > 0) 733 ? LastMemberEndOffset + 734 alignTo(sizeof(object::BigArMemHdrType) + MemberTableSize, 2) 735 : 0; 736 737 // Fixed Sized Header. 738 printWithSpacePadding(Out, NewMembers.size() ? LastMemberEndOffset : 0, 739 20); // Offset to member table 740 // If there are no file members in the archive, there will be no global 741 // symbol table. 742 printWithSpacePadding(Out, NewMembers.size() ? GlobalSymbolOffset : 0, 20); 743 printWithSpacePadding( 744 Out, 0, 745 20); // Offset to 64 bits global symbol table - Not supported yet 746 printWithSpacePadding( 747 Out, NewMembers.size() ? sizeof(object::BigArchive::FixLenHdr) : 0, 748 20); // Offset to first archive member 749 printWithSpacePadding(Out, NewMembers.size() ? LastMemberHeaderOffset : 0, 750 20); // Offset to last archive member 751 printWithSpacePadding( 752 Out, 0, 753 20); // Offset to first member of free list - Not supported yet 754 755 for (const MemberData &M : Data) { 756 Out << M.Header << M.Data; 757 if (M.Data.size() % 2) 758 Out << '\0'; 759 } 760 761 if (NewMembers.size()) { 762 // Member table. 763 printBigArchiveMemberHeader(Out, "", sys::toTimePoint(0), 0, 0, 0, 764 MemberTableSize, LastMemberHeaderOffset, 765 GlobalSymbolOffset); 766 printWithSpacePadding(Out, MemberOffsets.size(), 20); // Number of members 767 for (uint64_t MemberOffset : MemberOffsets) 768 printWithSpacePadding(Out, MemberOffset, 769 20); // Offset to member file header. 770 for (StringRef MemberName : MemberNames) 771 Out << MemberName << '\0'; // Member file name, null byte padding. 772 773 if (MemberTableNameStrTblSize % 2) 774 Out << '\0'; // Name table must be tail padded to an even number of 775 // bytes. 776 777 if (WriteSymtab && NumSyms > 0) 778 writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf, 779 LastMemberEndOffset); 780 } 781 } 782 Out.flush(); 783 return Error::success(); 784 } 785 786 Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers, 787 bool WriteSymtab, object::Archive::Kind Kind, 788 bool Deterministic, bool Thin, 789 std::unique_ptr<MemoryBuffer> OldArchiveBuf) { 790 Expected<sys::fs::TempFile> Temp = 791 sys::fs::TempFile::create(ArcName + ".temp-archive-%%%%%%%.a"); 792 if (!Temp) 793 return Temp.takeError(); 794 raw_fd_ostream Out(Temp->FD, false); 795 796 if (Error E = writeArchiveToStream(Out, NewMembers, WriteSymtab, Kind, 797 Deterministic, Thin)) { 798 if (Error DiscardError = Temp->discard()) 799 return joinErrors(std::move(E), std::move(DiscardError)); 800 return E; 801 } 802 803 // At this point, we no longer need whatever backing memory 804 // was used to generate the NewMembers. On Windows, this buffer 805 // could be a mapped view of the file we want to replace (if 806 // we're updating an existing archive, say). In that case, the 807 // rename would still succeed, but it would leave behind a 808 // temporary file (actually the original file renamed) because 809 // a file cannot be deleted while there's a handle open on it, 810 // only renamed. So by freeing this buffer, this ensures that 811 // the last open handle on the destination file, if any, is 812 // closed before we attempt to rename. 813 OldArchiveBuf.reset(); 814 815 return Temp->keep(ArcName); 816 } 817 818 Expected<std::unique_ptr<MemoryBuffer>> 819 writeArchiveToBuffer(ArrayRef<NewArchiveMember> NewMembers, bool WriteSymtab, 820 object::Archive::Kind Kind, bool Deterministic, 821 bool Thin) { 822 SmallVector<char, 0> ArchiveBufferVector; 823 raw_svector_ostream ArchiveStream(ArchiveBufferVector); 824 825 if (Error E = writeArchiveToStream(ArchiveStream, NewMembers, WriteSymtab, 826 Kind, Deterministic, Thin)) 827 return std::move(E); 828 829 return std::make_unique<SmallVectorMemoryBuffer>( 830 std::move(ArchiveBufferVector), /*RequiresNullTerminator=*/false); 831 } 832 833 } // namespace llvm 834