1 //===- ArchiveWriter.cpp - ar File Format implementation --------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the writeArchive function. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Object/ArchiveWriter.h" 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/StringRef.h" 17 #include "llvm/BinaryFormat/Magic.h" 18 #include "llvm/IR/LLVMContext.h" 19 #include "llvm/Object/Archive.h" 20 #include "llvm/Object/ObjectFile.h" 21 #include "llvm/Object/SymbolicFile.h" 22 #include "llvm/Support/EndianStream.h" 23 #include "llvm/Support/Errc.h" 24 #include "llvm/Support/ErrorHandling.h" 25 #include "llvm/Support/Format.h" 26 #include "llvm/Support/Path.h" 27 #include "llvm/Support/ToolOutputFile.h" 28 #include "llvm/Support/raw_ostream.h" 29 30 #if !defined(_MSC_VER) && !defined(__MINGW32__) 31 #include <unistd.h> 32 #else 33 #include <io.h> 34 #endif 35 36 using namespace llvm; 37 38 NewArchiveMember::NewArchiveMember(MemoryBufferRef BufRef) 39 : Buf(MemoryBuffer::getMemBuffer(BufRef, false)), 40 MemberName(BufRef.getBufferIdentifier()) {} 41 42 Expected<NewArchiveMember> 43 NewArchiveMember::getOldMember(const object::Archive::Child &OldMember, 44 bool Deterministic) { 45 Expected<llvm::MemoryBufferRef> BufOrErr = OldMember.getMemoryBufferRef(); 46 if (!BufOrErr) 47 return BufOrErr.takeError(); 48 49 NewArchiveMember M; 50 assert(M.IsNew == false); 51 M.Buf = MemoryBuffer::getMemBuffer(*BufOrErr, false); 52 M.MemberName = M.Buf->getBufferIdentifier(); 53 if (!Deterministic) { 54 auto ModTimeOrErr = OldMember.getLastModified(); 55 if (!ModTimeOrErr) 56 return ModTimeOrErr.takeError(); 57 M.ModTime = ModTimeOrErr.get(); 58 Expected<unsigned> UIDOrErr = OldMember.getUID(); 59 if (!UIDOrErr) 60 return UIDOrErr.takeError(); 61 M.UID = UIDOrErr.get(); 62 Expected<unsigned> GIDOrErr = OldMember.getGID(); 63 if (!GIDOrErr) 64 return GIDOrErr.takeError(); 65 M.GID = GIDOrErr.get(); 66 Expected<sys::fs::perms> AccessModeOrErr = OldMember.getAccessMode(); 67 if (!AccessModeOrErr) 68 return AccessModeOrErr.takeError(); 69 M.Perms = AccessModeOrErr.get(); 70 } 71 return std::move(M); 72 } 73 74 Expected<NewArchiveMember> NewArchiveMember::getFile(StringRef FileName, 75 bool Deterministic) { 76 sys::fs::file_status Status; 77 int FD; 78 if (auto EC = sys::fs::openFileForRead(FileName, FD)) 79 return errorCodeToError(EC); 80 assert(FD != -1); 81 82 if (auto EC = sys::fs::status(FD, Status)) 83 return errorCodeToError(EC); 84 85 // Opening a directory doesn't make sense. Let it fail. 86 // Linux cannot open directories with open(2), although 87 // cygwin and *bsd can. 88 if (Status.type() == sys::fs::file_type::directory_file) 89 return errorCodeToError(make_error_code(errc::is_a_directory)); 90 91 ErrorOr<std::unique_ptr<MemoryBuffer>> MemberBufferOrErr = 92 MemoryBuffer::getOpenFile(FD, FileName, Status.getSize(), false); 93 if (!MemberBufferOrErr) 94 return errorCodeToError(MemberBufferOrErr.getError()); 95 96 if (close(FD) != 0) 97 return errorCodeToError(std::error_code(errno, std::generic_category())); 98 99 NewArchiveMember M; 100 M.IsNew = true; 101 M.Buf = std::move(*MemberBufferOrErr); 102 M.MemberName = M.Buf->getBufferIdentifier(); 103 if (!Deterministic) { 104 M.ModTime = std::chrono::time_point_cast<std::chrono::seconds>( 105 Status.getLastModificationTime()); 106 M.UID = Status.getUser(); 107 M.GID = Status.getGroup(); 108 M.Perms = Status.permissions(); 109 } 110 return std::move(M); 111 } 112 113 template <typename T> 114 static void printWithSpacePadding(raw_ostream &OS, T Data, unsigned Size) { 115 uint64_t OldPos = OS.tell(); 116 OS << Data; 117 unsigned SizeSoFar = OS.tell() - OldPos; 118 assert(SizeSoFar <= Size && "Data doesn't fit in Size"); 119 OS.indent(Size - SizeSoFar); 120 } 121 122 static bool isBSDLike(object::Archive::Kind Kind) { 123 switch (Kind) { 124 case object::Archive::K_GNU: 125 case object::Archive::K_GNU64: 126 return false; 127 case object::Archive::K_BSD: 128 case object::Archive::K_DARWIN: 129 return true; 130 case object::Archive::K_DARWIN64: 131 case object::Archive::K_COFF: 132 break; 133 } 134 llvm_unreachable("not supported for writting"); 135 } 136 137 template <class T> 138 static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val) { 139 if (isBSDLike(Kind)) 140 support::endian::Writer<support::little>(Out).write(Val); 141 else 142 support::endian::Writer<support::big>(Out).write(Val); 143 } 144 145 static void printRestOfMemberHeader( 146 raw_ostream &Out, const sys::TimePoint<std::chrono::seconds> &ModTime, 147 unsigned UID, unsigned GID, unsigned Perms, unsigned Size) { 148 printWithSpacePadding(Out, sys::toTimeT(ModTime), 12); 149 150 // The format has only 6 chars for uid and gid. Truncate if the provided 151 // values don't fit. 152 printWithSpacePadding(Out, UID % 1000000, 6); 153 printWithSpacePadding(Out, GID % 1000000, 6); 154 155 printWithSpacePadding(Out, format("%o", Perms), 8); 156 printWithSpacePadding(Out, Size, 10); 157 Out << "`\n"; 158 } 159 160 static void 161 printGNUSmallMemberHeader(raw_ostream &Out, StringRef Name, 162 const sys::TimePoint<std::chrono::seconds> &ModTime, 163 unsigned UID, unsigned GID, unsigned Perms, 164 unsigned Size) { 165 printWithSpacePadding(Out, Twine(Name) + "/", 16); 166 printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size); 167 } 168 169 static void 170 printBSDMemberHeader(raw_ostream &Out, uint64_t Pos, StringRef Name, 171 const sys::TimePoint<std::chrono::seconds> &ModTime, 172 unsigned UID, unsigned GID, unsigned Perms, 173 unsigned Size) { 174 uint64_t PosAfterHeader = Pos + 60 + Name.size(); 175 // Pad so that even 64 bit object files are aligned. 176 unsigned Pad = OffsetToAlignment(PosAfterHeader, 8); 177 unsigned NameWithPadding = Name.size() + Pad; 178 printWithSpacePadding(Out, Twine("#1/") + Twine(NameWithPadding), 16); 179 printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, 180 NameWithPadding + Size); 181 Out << Name; 182 while (Pad--) 183 Out.write(uint8_t(0)); 184 } 185 186 static bool useStringTable(bool Thin, StringRef Name) { 187 return Thin || Name.size() >= 16 || Name.contains('/'); 188 } 189 190 // Compute the relative path from From to To. 191 static std::string computeRelativePath(StringRef From, StringRef To) { 192 if (sys::path::is_absolute(From) || sys::path::is_absolute(To)) 193 return To; 194 195 StringRef DirFrom = sys::path::parent_path(From); 196 auto FromI = sys::path::begin(DirFrom); 197 auto ToI = sys::path::begin(To); 198 while (*FromI == *ToI) { 199 ++FromI; 200 ++ToI; 201 } 202 203 SmallString<128> Relative; 204 for (auto FromE = sys::path::end(DirFrom); FromI != FromE; ++FromI) 205 sys::path::append(Relative, ".."); 206 207 for (auto ToE = sys::path::end(To); ToI != ToE; ++ToI) 208 sys::path::append(Relative, *ToI); 209 210 #ifdef LLVM_ON_WIN32 211 // Replace backslashes with slashes so that the path is portable between *nix 212 // and Windows. 213 std::replace(Relative.begin(), Relative.end(), '\\', '/'); 214 #endif 215 216 return Relative.str(); 217 } 218 219 static bool is64BitKind(object::Archive::Kind Kind) { 220 switch (Kind) { 221 case object::Archive::K_GNU: 222 case object::Archive::K_BSD: 223 case object::Archive::K_DARWIN: 224 case object::Archive::K_COFF: 225 return false; 226 case object::Archive::K_DARWIN64: 227 case object::Archive::K_GNU64: 228 return true; 229 } 230 llvm_unreachable("not supported for writting"); 231 } 232 233 static void addToStringTable(raw_ostream &Out, StringRef ArcName, 234 const NewArchiveMember &M, bool Thin) { 235 StringRef ID = M.Buf->getBufferIdentifier(); 236 if (Thin) { 237 if (M.IsNew) 238 Out << computeRelativePath(ArcName, ID); 239 else 240 Out << ID; 241 } else 242 Out << M.MemberName; 243 Out << "/\n"; 244 } 245 246 static void printMemberHeader(raw_ostream &Out, uint64_t Pos, 247 raw_ostream &StringTable, 248 object::Archive::Kind Kind, bool Thin, 249 StringRef ArcName, const NewArchiveMember &M, 250 unsigned Size) { 251 if (isBSDLike(Kind)) 252 return printBSDMemberHeader(Out, Pos, M.MemberName, M.ModTime, M.UID, M.GID, 253 M.Perms, Size); 254 if (!useStringTable(Thin, M.MemberName)) 255 return printGNUSmallMemberHeader(Out, M.MemberName, M.ModTime, M.UID, M.GID, 256 M.Perms, Size); 257 Out << '/'; 258 uint64_t NamePos = StringTable.tell(); 259 addToStringTable(StringTable, ArcName, M, Thin); 260 printWithSpacePadding(Out, NamePos, 15); 261 printRestOfMemberHeader(Out, M.ModTime, M.UID, M.GID, M.Perms, Size); 262 } 263 264 namespace { 265 struct MemberData { 266 std::vector<unsigned> Symbols; 267 std::string Header; 268 StringRef Data; 269 StringRef Padding; 270 }; 271 } // namespace 272 273 static MemberData computeStringTable(StringRef Names) { 274 unsigned Size = Names.size(); 275 unsigned Pad = OffsetToAlignment(Size, 2); 276 std::string Header; 277 raw_string_ostream Out(Header); 278 printWithSpacePadding(Out, "//", 48); 279 printWithSpacePadding(Out, Size + Pad, 10); 280 Out << "`\n"; 281 Out.flush(); 282 return {{}, std::move(Header), Names, Pad ? "\n" : ""}; 283 } 284 285 static sys::TimePoint<std::chrono::seconds> now(bool Deterministic) { 286 using namespace std::chrono; 287 288 if (!Deterministic) 289 return time_point_cast<seconds>(system_clock::now()); 290 return sys::TimePoint<seconds>(); 291 } 292 293 static bool isArchiveSymbol(const object::BasicSymbolRef &S) { 294 uint32_t Symflags = S.getFlags(); 295 if (Symflags & object::SymbolRef::SF_FormatSpecific) 296 return false; 297 if (!(Symflags & object::SymbolRef::SF_Global)) 298 return false; 299 if (Symflags & object::SymbolRef::SF_Undefined && 300 !(Symflags & object::SymbolRef::SF_Indirect)) 301 return false; 302 return true; 303 } 304 305 static void printNBits(raw_ostream &Out, object::Archive::Kind Kind, 306 uint64_t Val) { 307 if (is64BitKind(Kind)) 308 print<uint64_t>(Out, Kind, Val); 309 else 310 print<uint32_t>(Out, Kind, Val); 311 } 312 313 static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, 314 bool Deterministic, ArrayRef<MemberData> Members, 315 StringRef StringTable) { 316 if (StringTable.empty()) 317 return; 318 319 unsigned NumSyms = 0; 320 for (const MemberData &M : Members) 321 NumSyms += M.Symbols.size(); 322 323 unsigned Size = 0; 324 Size += is64BitKind(Kind) ? 8 : 4; // Number of entries 325 if (isBSDLike(Kind)) 326 Size += NumSyms * 8; // Table 327 else if (is64BitKind(Kind)) 328 Size += NumSyms * 8; // Table 329 else 330 Size += NumSyms * 4; // Table 331 if (isBSDLike(Kind)) 332 Size += 4; // byte count 333 Size += StringTable.size(); 334 // ld64 expects the members to be 8-byte aligned for 64-bit content and at 335 // least 4-byte aligned for 32-bit content. Opt for the larger encoding 336 // uniformly. 337 // We do this for all bsd formats because it simplifies aligning members. 338 unsigned Alignment = isBSDLike(Kind) ? 8 : 2; 339 unsigned Pad = OffsetToAlignment(Size, Alignment); 340 Size += Pad; 341 342 if (isBSDLike(Kind)) 343 printBSDMemberHeader(Out, Out.tell(), "__.SYMDEF", now(Deterministic), 0, 0, 344 0, Size); 345 else if (is64BitKind(Kind)) 346 printGNUSmallMemberHeader(Out, "/SYM64", now(Deterministic), 0, 0, 0, Size); 347 else 348 printGNUSmallMemberHeader(Out, "", now(Deterministic), 0, 0, 0, Size); 349 350 uint64_t Pos = Out.tell() + Size; 351 352 if (isBSDLike(Kind)) 353 print<uint32_t>(Out, Kind, NumSyms * 8); 354 else 355 printNBits(Out, Kind, NumSyms); 356 357 for (const MemberData &M : Members) { 358 for (unsigned StringOffset : M.Symbols) { 359 if (isBSDLike(Kind)) 360 print<uint32_t>(Out, Kind, StringOffset); 361 printNBits(Out, Kind, Pos); // member offset 362 } 363 Pos += M.Header.size() + M.Data.size() + M.Padding.size(); 364 } 365 366 if (isBSDLike(Kind)) 367 // byte count of the string table 368 print<uint32_t>(Out, Kind, StringTable.size()); 369 Out << StringTable; 370 371 while (Pad--) 372 Out.write(uint8_t(0)); 373 } 374 375 static Expected<std::vector<unsigned>> 376 getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) { 377 std::vector<unsigned> Ret; 378 LLVMContext Context; 379 380 Expected<std::unique_ptr<object::SymbolicFile>> ObjOrErr = 381 object::SymbolicFile::createSymbolicFile(Buf, llvm::file_magic::unknown, 382 &Context); 383 if (!ObjOrErr) { 384 // FIXME: check only for "not an object file" errors. 385 consumeError(ObjOrErr.takeError()); 386 return Ret; 387 } 388 389 HasObject = true; 390 object::SymbolicFile &Obj = *ObjOrErr.get(); 391 for (const object::BasicSymbolRef &S : Obj.symbols()) { 392 if (!isArchiveSymbol(S)) 393 continue; 394 Ret.push_back(SymNames.tell()); 395 if (auto EC = S.printName(SymNames)) 396 return errorCodeToError(EC); 397 SymNames << '\0'; 398 } 399 return Ret; 400 } 401 402 static Expected<std::vector<MemberData>> 403 computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, 404 object::Archive::Kind Kind, bool Thin, StringRef ArcName, 405 ArrayRef<NewArchiveMember> NewMembers) { 406 static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'}; 407 408 // This ignores the symbol table, but we only need the value mod 8 and the 409 // symbol table is aligned to be a multiple of 8 bytes 410 uint64_t Pos = 0; 411 412 std::vector<MemberData> Ret; 413 bool HasObject = false; 414 for (const NewArchiveMember &M : NewMembers) { 415 std::string Header; 416 raw_string_ostream Out(Header); 417 418 MemoryBufferRef Buf = M.Buf->getMemBufferRef(); 419 StringRef Data = Thin ? "" : Buf.getBuffer(); 420 421 // ld64 expects the members to be 8-byte aligned for 64-bit content and at 422 // least 4-byte aligned for 32-bit content. Opt for the larger encoding 423 // uniformly. This matches the behaviour with cctools and ensures that ld64 424 // is happy with archives that we generate. 425 unsigned MemberPadding = Kind == object::Archive::K_DARWIN 426 ? OffsetToAlignment(Data.size(), 8) 427 : 0; 428 unsigned TailPadding = OffsetToAlignment(Data.size() + MemberPadding, 2); 429 StringRef Padding = StringRef(PaddingData, MemberPadding + TailPadding); 430 431 printMemberHeader(Out, Pos, StringTable, Kind, Thin, ArcName, M, 432 Buf.getBufferSize() + MemberPadding); 433 Out.flush(); 434 435 Expected<std::vector<unsigned>> Symbols = 436 getSymbols(Buf, SymNames, HasObject); 437 if (auto E = Symbols.takeError()) 438 return std::move(E); 439 440 Pos += Header.size() + Data.size() + Padding.size(); 441 Ret.push_back({std::move(*Symbols), std::move(Header), Data, Padding}); 442 } 443 // If there are no symbols, emit an empty symbol table, to satisfy Solaris 444 // tools, older versions of which expect a symbol table in a non-empty 445 // archive, regardless of whether there are any symbols in it. 446 if (HasObject && SymNames.tell() == 0) 447 SymNames << '\0' << '\0' << '\0'; 448 return Ret; 449 } 450 451 Error llvm::writeArchive(StringRef ArcName, 452 ArrayRef<NewArchiveMember> NewMembers, 453 bool WriteSymtab, object::Archive::Kind Kind, 454 bool Deterministic, bool Thin, 455 std::unique_ptr<MemoryBuffer> OldArchiveBuf) { 456 assert((!Thin || !isBSDLike(Kind)) && "Only the gnu format has a thin mode"); 457 458 SmallString<0> SymNamesBuf; 459 raw_svector_ostream SymNames(SymNamesBuf); 460 SmallString<0> StringTableBuf; 461 raw_svector_ostream StringTable(StringTableBuf); 462 463 Expected<std::vector<MemberData>> DataOrErr = 464 computeMemberData(StringTable, SymNames, Kind, Thin, ArcName, NewMembers); 465 if (Error E = DataOrErr.takeError()) 466 return E; 467 std::vector<MemberData> &Data = *DataOrErr; 468 469 if (!StringTableBuf.empty()) 470 Data.insert(Data.begin(), computeStringTable(StringTableBuf)); 471 472 // We would like to detect if we need to switch to a 64-bit symbol table. 473 if (WriteSymtab) { 474 uint64_t MaxOffset = 0; 475 uint64_t LastOffset = MaxOffset; 476 for (const auto& M : Data) { 477 // Record the start of the member's offset 478 LastOffset = MaxOffset; 479 // Account for the size of each part associated with the member. 480 MaxOffset += M.Header.size() + M.Data.size() + M.Padding.size(); 481 // We assume 32-bit symbols to see if 32-bit symbols are possible or not. 482 MaxOffset += M.Symbols.size() * 4; 483 } 484 // If LastOffset isn't going to fit in a 32-bit varible we need to switch 485 // to 64-bit. Note that the file can be larger than 4GB as long as the last 486 // member starts before the 4GB offset. 487 if (LastOffset >> 32 != 0) 488 Kind = object::Archive::K_GNU64; 489 } 490 491 Expected<sys::fs::TempFile> Temp = 492 sys::fs::TempFile::create(ArcName + ".temp-archive-%%%%%%%.a"); 493 if (!Temp) 494 return Temp.takeError(); 495 496 raw_fd_ostream Out(Temp->FD, false); 497 if (Thin) 498 Out << "!<thin>\n"; 499 else 500 Out << "!<arch>\n"; 501 502 if (WriteSymtab) 503 writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf); 504 505 for (const MemberData &M : Data) 506 Out << M.Header << M.Data << M.Padding; 507 508 Out.flush(); 509 510 // At this point, we no longer need whatever backing memory 511 // was used to generate the NewMembers. On Windows, this buffer 512 // could be a mapped view of the file we want to replace (if 513 // we're updating an existing archive, say). In that case, the 514 // rename would still succeed, but it would leave behind a 515 // temporary file (actually the original file renamed) because 516 // a file cannot be deleted while there's a handle open on it, 517 // only renamed. So by freeing this buffer, this ensures that 518 // the last open handle on the destination file, if any, is 519 // closed before we attempt to rename. 520 OldArchiveBuf.reset(); 521 522 return Temp->keep(ArcName); 523 } 524