1 //===-- TarWriter.cpp - Tar archive file creator --------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // TarWriter class provides a feature to create a tar archive file. 11 // 12 // I put emphasis on simplicity over comprehensiveness when implementing this 13 // class because we don't need a full-fledged archive file generator in LLVM 14 // at the moment. 15 // 16 // The filename field in the Unix V7 tar header is 100 bytes. Longer filenames 17 // are stored using the PAX extension. The PAX header is standardized in 18 // POSIX.1-2001. 19 // 20 // The struct definition of UstarHeader is copied from 21 // https://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5 22 // 23 //===----------------------------------------------------------------------===// 24 25 #include "llvm/Support/TarWriter.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/Support/FileSystem.h" 28 #include "llvm/Support/MathExtras.h" 29 #include "llvm/Support/Path.h" 30 31 using namespace llvm; 32 33 // Each file in an archive must be aligned to this block size. 34 static const int BlockSize = 512; 35 36 struct UstarHeader { 37 char Name[100]; 38 char Mode[8]; 39 char Uid[8]; 40 char Gid[8]; 41 char Size[12]; 42 char Mtime[12]; 43 char Checksum[8]; 44 char TypeFlag; 45 char Linkname[100]; 46 char Magic[6]; 47 char Version[2]; 48 char Uname[32]; 49 char Gname[32]; 50 char DevMajor[8]; 51 char DevMinor[8]; 52 char Prefix[155]; 53 char Pad[12]; 54 }; 55 static_assert(sizeof(UstarHeader) == BlockSize, "invalid Ustar header"); 56 57 static UstarHeader makeUstarHeader() { 58 UstarHeader Hdr = {}; 59 memcpy(Hdr.Magic, "ustar", 5); // Ustar magic 60 memcpy(Hdr.Version, "00", 2); // Ustar version 61 return Hdr; 62 } 63 64 // A PAX attribute is in the form of "<length> <key>=<value>\n" 65 // where <length> is the length of the entire string including 66 // the length field itself. An example string is this. 67 // 68 // 25 ctime=1084839148.1212\n 69 // 70 // This function create such string. 71 static std::string formatPax(StringRef Key, StringRef Val) { 72 int Len = Key.size() + Val.size() + 3; // +3 for " ", "=" and "\n" 73 74 // We need to compute total size twice because appending 75 // a length field could change total size by one. 76 int Total = Len + Twine(Len).str().size(); 77 Total = Len + Twine(Total).str().size(); 78 return (Twine(Total) + " " + Key + "=" + Val + "\n").str(); 79 } 80 81 // Headers in tar files must be aligned to 512 byte boundaries. 82 // This function forwards the current file position to the next boundary. 83 static void pad(raw_fd_ostream &OS) { 84 uint64_t Pos = OS.tell(); 85 OS.seek(alignTo(Pos, BlockSize)); 86 } 87 88 // Computes a checksum for a tar header. 89 static void computeChecksum(UstarHeader &Hdr) { 90 // Before computing a checksum, checksum field must be 91 // filled with space characters. 92 memset(Hdr.Checksum, ' ', sizeof(Hdr.Checksum)); 93 94 // Compute a checksum and set it to the checksum field. 95 unsigned Chksum = 0; 96 for (size_t I = 0; I < sizeof(Hdr); ++I) 97 Chksum += reinterpret_cast<uint8_t *>(&Hdr)[I]; 98 snprintf(Hdr.Checksum, sizeof(Hdr.Checksum), "%06o", Chksum); 99 } 100 101 // Create a tar header and write it to a given output stream. 102 static void writePaxHeader(raw_fd_ostream &OS, StringRef Path) { 103 // A PAX header consists of a 512-byte header followed 104 // by key-value strings. First, create key-value strings. 105 std::string PaxAttr = formatPax("path", Path); 106 107 // Create a 512-byte header. 108 UstarHeader Hdr = makeUstarHeader(); 109 snprintf(Hdr.Size, sizeof(Hdr.Size), "%011zo", PaxAttr.size()); 110 Hdr.TypeFlag = 'x'; // PAX magic 111 computeChecksum(Hdr); 112 113 // Write them down. 114 OS << StringRef(reinterpret_cast<char *>(&Hdr), sizeof(Hdr)); 115 OS << PaxAttr; 116 pad(OS); 117 } 118 119 // In the Ustar header, a path can be split at any '/' to store 120 // a path into UstarHeader::Name and UstarHeader::Prefix. This 121 // function splits a given path for that purpose. 122 static std::pair<StringRef, StringRef> splitPath(StringRef Path) { 123 if (Path.size() <= sizeof(UstarHeader::Name)) 124 return {"", Path}; 125 size_t Sep = Path.rfind('/', sizeof(UstarHeader::Prefix) + 1); 126 if (Sep == StringRef::npos) 127 return {"", Path}; 128 return {Path.substr(0, Sep), Path.substr(Sep + 1)}; 129 } 130 131 // Returns true if a given path can be stored to a Ustar header 132 // without the PAX extension. 133 static bool fitsInUstar(StringRef Path) { 134 StringRef Prefix; 135 StringRef Name; 136 std::tie(Prefix, Name) = splitPath(Path); 137 return Name.size() <= sizeof(UstarHeader::Name); 138 } 139 140 // The PAX header is an extended format, so a PAX header needs 141 // to be followed by a "real" header. 142 static void writeUstarHeader(raw_fd_ostream &OS, StringRef Path, size_t Size) { 143 StringRef Prefix; 144 StringRef Name; 145 std::tie(Prefix, Name) = splitPath(Path); 146 147 UstarHeader Hdr = makeUstarHeader(); 148 memcpy(Hdr.Name, Name.data(), Name.size()); 149 memcpy(Hdr.Mode, "0000664", 8); 150 snprintf(Hdr.Size, sizeof(Hdr.Size), "%011zo", Size); 151 memcpy(Hdr.Prefix, Prefix.data(), Prefix.size()); 152 computeChecksum(Hdr); 153 OS << StringRef(reinterpret_cast<char *>(&Hdr), sizeof(Hdr)); 154 } 155 156 // Creates a TarWriter instance and returns it. 157 Expected<std::unique_ptr<TarWriter>> TarWriter::create(StringRef OutputPath, 158 StringRef BaseDir) { 159 int FD; 160 if (std::error_code EC = openFileForWrite(OutputPath, FD, sys::fs::F_None)) 161 return make_error<StringError>("cannot open " + OutputPath, EC); 162 return std::unique_ptr<TarWriter>(new TarWriter(FD, BaseDir)); 163 } 164 165 TarWriter::TarWriter(int FD, StringRef BaseDir) 166 : OS(FD, /*shouldClose=*/true, /*unbuffered=*/false), BaseDir(BaseDir) {} 167 168 // Append a given file to an archive. 169 void TarWriter::append(StringRef Path, StringRef Data) { 170 // Write Path and Data. 171 std::string S = BaseDir + "/" + sys::path::convert_to_slash(Path) + "\0"; 172 if (fitsInUstar(S)) { 173 writeUstarHeader(OS, S, Data.size()); 174 } else { 175 writePaxHeader(OS, S); 176 writeUstarHeader(OS, "", Data.size()); 177 } 178 179 OS << Data; 180 pad(OS); 181 182 // POSIX requires tar archives end with two null blocks. 183 // Here, we write the terminator and then seek back, so that 184 // the file being output is terminated correctly at any moment. 185 uint64_t Pos = OS.tell(); 186 OS << std::string(BlockSize * 2, '\0'); 187 OS.seek(Pos); 188 OS.flush(); 189 } 190