1 //===-- TarWriter.cpp - Tar archive file creator --------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // TarWriter class provides a feature to create a tar archive file. 11 // 12 // I put emphasis on simplicity over comprehensiveness when implementing this 13 // class because we don't need a full-fledged archive file generator in LLVM 14 // at the moment. 15 // 16 // The filename field in the Unix V7 tar header is 100 bytes. Longer filenames 17 // are stored using the PAX extension. The PAX header is standardized in 18 // POSIX.1-2001. 19 // 20 // The struct definition of UstarHeader is copied from 21 // https://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5 22 // 23 //===----------------------------------------------------------------------===// 24 25 #include "llvm/Support/TarWriter.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/Support/FileSystem.h" 28 #include "llvm/Support/MathExtras.h" 29 #include "llvm/Support/Path.h" 30 31 using namespace llvm; 32 33 // Each file in an archive must be aligned to this block size. 34 static const int BlockSize = 512; 35 36 struct UstarHeader { 37 char Name[100]; 38 char Mode[8]; 39 char Uid[8]; 40 char Gid[8]; 41 char Size[12]; 42 char Mtime[12]; 43 char Checksum[8]; 44 char TypeFlag; 45 char Linkname[100]; 46 char Magic[6]; 47 char Version[2]; 48 char Uname[32]; 49 char Gname[32]; 50 char DevMajor[8]; 51 char DevMinor[8]; 52 char Prefix[155]; 53 char Pad[12]; 54 }; 55 static_assert(sizeof(UstarHeader) == BlockSize, "invalid Ustar header"); 56 57 static UstarHeader makeUstarHeader() { 58 UstarHeader Hdr = {}; 59 memcpy(Hdr.Magic, "ustar", 5); // Ustar magic 60 memcpy(Hdr.Version, "00", 2); // Ustar version 61 return Hdr; 62 } 63 64 // A PAX attribute is in the form of "<length> <key>=<value>\n" 65 // where <length> is the length of the entire string including 66 // the length field itself. An example string is this. 67 // 68 // 25 ctime=1084839148.1212\n 69 // 70 // This function create such string. 71 static std::string formatPax(StringRef Key, StringRef Val) { 72 int Len = Key.size() + Val.size() + 3; // +3 for " ", "=" and "\n" 73 74 // We need to compute total size twice because appending 75 // a length field could change total size by one. 76 int Total = Len + Twine(Len).str().size(); 77 Total = Len + Twine(Total).str().size(); 78 return (Twine(Total) + " " + Key + "=" + Val + "\n").str(); 79 } 80 81 // Headers in tar files must be aligned to 512 byte boundaries. 82 // This function forwards the current file position to the next boundary. 83 static void pad(raw_fd_ostream &OS) { 84 uint64_t Pos = OS.tell(); 85 OS.seek(alignTo(Pos, BlockSize)); 86 } 87 88 // Computes a checksum for a tar header. 89 static void computeChecksum(UstarHeader &Hdr) { 90 // Before computing a checksum, checksum field must be 91 // filled with space characters. 92 memset(Hdr.Checksum, ' ', sizeof(Hdr.Checksum)); 93 94 // Compute a checksum and set it to the checksum field. 95 unsigned Chksum = 0; 96 for (size_t I = 0; I < sizeof(Hdr); ++I) 97 Chksum += reinterpret_cast<uint8_t *>(&Hdr)[I]; 98 snprintf(Hdr.Checksum, sizeof(Hdr.Checksum), "%06o", Chksum); 99 } 100 101 // Create a tar header and write it to a given output stream. 102 static void writePaxHeader(raw_fd_ostream &OS, StringRef Path) { 103 // A PAX header consists of a 512-byte header followed 104 // by key-value strings. First, create key-value strings. 105 std::string PaxAttr = formatPax("path", Path); 106 107 // Create a 512-byte header. 108 UstarHeader Hdr = makeUstarHeader(); 109 snprintf(Hdr.Size, sizeof(Hdr.Size), "%011zo", PaxAttr.size()); 110 Hdr.TypeFlag = 'x'; // PAX magic 111 computeChecksum(Hdr); 112 113 // Write them down. 114 OS << StringRef(reinterpret_cast<char *>(&Hdr), sizeof(Hdr)); 115 OS << PaxAttr; 116 pad(OS); 117 } 118 119 // Path fits in a Ustar header if 120 // 121 // - Path is less than 100 characters long, or 122 // - Path is in the form of "<prefix>/<name>" where <prefix> is less 123 // than or equal to 155 characters long and <name> is less than 100 124 // characters long. Both <prefix> and <name> can contain extra '/'. 125 // 126 // If Path fits in a Ustar header, updates Prefix and Name and returns true. 127 // Otherwise, returns false. 128 static bool splitUstar(StringRef Path, StringRef &Prefix, StringRef &Name) { 129 if (Path.size() < sizeof(UstarHeader::Name)) { 130 Prefix = ""; 131 Name = Path; 132 return true; 133 } 134 135 size_t Sep = Path.rfind('/', sizeof(UstarHeader::Prefix) + 1); 136 if (Sep == StringRef::npos) 137 return false; 138 if (Path.size() - Sep - 1 >= sizeof(UstarHeader::Name)) 139 return false; 140 141 Prefix = Path.substr(0, Sep); 142 Name = Path.substr(Sep + 1); 143 return true; 144 } 145 146 // The PAX header is an extended format, so a PAX header needs 147 // to be followed by a "real" header. 148 static void writeUstarHeader(raw_fd_ostream &OS, StringRef Prefix, 149 StringRef Name, size_t Size) { 150 UstarHeader Hdr = makeUstarHeader(); 151 memcpy(Hdr.Name, Name.data(), Name.size()); 152 memcpy(Hdr.Mode, "0000664", 8); 153 snprintf(Hdr.Size, sizeof(Hdr.Size), "%011zo", Size); 154 memcpy(Hdr.Prefix, Prefix.data(), Prefix.size()); 155 computeChecksum(Hdr); 156 OS << StringRef(reinterpret_cast<char *>(&Hdr), sizeof(Hdr)); 157 } 158 159 // Creates a TarWriter instance and returns it. 160 Expected<std::unique_ptr<TarWriter>> TarWriter::create(StringRef OutputPath, 161 StringRef BaseDir) { 162 int FD; 163 if (std::error_code EC = openFileForWrite(OutputPath, FD, sys::fs::F_None)) 164 return make_error<StringError>("cannot open " + OutputPath, EC); 165 return std::unique_ptr<TarWriter>(new TarWriter(FD, BaseDir)); 166 } 167 168 TarWriter::TarWriter(int FD, StringRef BaseDir) 169 : OS(FD, /*shouldClose=*/true, /*unbuffered=*/false), BaseDir(BaseDir) {} 170 171 // Append a given file to an archive. 172 void TarWriter::append(StringRef Path, StringRef Data) { 173 // Write Path and Data. 174 std::string Fullpath = BaseDir + "/" + sys::path::convert_to_slash(Path); 175 176 // We do not want to include the same file more than once. 177 if (!Files.insert(Fullpath).second) 178 return; 179 180 StringRef Prefix; 181 StringRef Name; 182 if (splitUstar(Fullpath, Prefix, Name)) { 183 writeUstarHeader(OS, Prefix, Name, Data.size()); 184 } else { 185 writePaxHeader(OS, Fullpath); 186 writeUstarHeader(OS, "", "", Data.size()); 187 } 188 189 OS << Data; 190 pad(OS); 191 192 // POSIX requires tar archives end with two null blocks. 193 // Here, we write the terminator and then seek back, so that 194 // the file being output is terminated correctly at any moment. 195 uint64_t Pos = OS.tell(); 196 OS << std::string(BlockSize * 2, '\0'); 197 OS.seek(Pos); 198 OS.flush(); 199 } 200