1 //===-- TarWriter.cpp - Tar archive file creator --------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // TarWriter class provides a feature to create a tar archive file. 11 // 12 // I put emphasis on simplicity over comprehensiveness when 13 // implementing this class because we don't need a full-fledged 14 // archive file generator in LLVM at the moment. 15 // 16 // The filename field in the Unix V7 tar header is 100 bytes, which is 17 // apparently too small. Various extensions were proposed and 18 // implemented to fix the issue. The writer implemented in this file 19 // uses PAX extension headers. 20 // 21 // Note that we emit PAX headers even if filenames fit in the V7 22 // header for the sake of simplicity. So, generated files are N 23 // kilobyte larger than the ideal where N is the number of files in 24 // archives. In practice, I think you don't need to worry about that. 25 // 26 // The PAX header is standardized in IEEE Std 1003.1-2001. 27 // 28 // The struct definition of UstarHeader is copied from 29 // https://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5 30 // 31 //===----------------------------------------------------------------------===// 32 33 #include "llvm/Support/TarWriter.h" 34 #include "llvm/ADT/StringRef.h" 35 #include "llvm/Support/FileSystem.h" 36 #include "llvm/Support/MathExtras.h" 37 38 using namespace llvm; 39 40 // Each file in an archive must be aligned to this block size. 41 static const int BlockSize = 512; 42 43 struct UstarHeader { 44 char Name[100]; 45 char Mode[8]; 46 char Uid[8]; 47 char Gid[8]; 48 char Size[12]; 49 char Mtime[12]; 50 char Checksum[8]; 51 char TypeFlag; 52 char Linkname[100]; 53 char Magic[6]; 54 char Version[2]; 55 char Uname[32]; 56 char Gname[32]; 57 char DevMajor[8]; 58 char DevMinor[8]; 59 char Prefix[155]; 60 char Pad[12]; 61 }; 62 static_assert(sizeof(UstarHeader) == BlockSize, "invalid Ustar header"); 63 64 // A PAX attribute is in the form of "<length> <key>=<value>\n" 65 // where <length> is the length of the entire string including 66 // the length field itself. An example string is this. 67 // 68 // 25 ctime=1084839148.1212\n 69 // 70 // This function create such string. 71 static std::string formatPax(StringRef Key, const Twine &Val) { 72 int Len = Key.size() + Val.str().size() + 3; // +3 for " ", "=" and "\n" 73 74 // We need to compute total size twice because appending 75 // a length field could change total size by one. 76 int Total = Len + Twine(Len).str().size(); 77 Total = Len + Twine(Total).str().size(); 78 return (Twine(Total) + " " + Key + "=" + Val + "\n").str(); 79 } 80 81 // Headers in tar files must be aligned to 512 byte boundaries. 82 // This function writes null bytes so that the file is a multiple 83 // of 512 bytes. 84 static void pad(raw_fd_ostream &OS) { 85 uint64_t Pos = OS.tell(); 86 OS.seek(alignTo(Pos, BlockSize)); 87 } 88 89 // Computes a checksum for a tar header. 90 static void computeChecksum(UstarHeader &Hdr) { 91 // Before computing a checksum, checksum field must be 92 // filled with space characters. 93 memset(Hdr.Checksum, ' ', sizeof(Hdr.Checksum)); 94 95 // Compute a checksum and set it to the checksum field. 96 unsigned Chksum = 0; 97 for (size_t I = 0; I < sizeof(Hdr); ++I) 98 Chksum += reinterpret_cast<uint8_t *>(&Hdr)[I]; 99 sprintf(Hdr.Checksum, "%06o", Chksum); 100 } 101 102 // Create a tar header and write it to a given output stream. 103 static void writePaxHeader(raw_fd_ostream &OS, const Twine &Path) { 104 // A PAX header consists of a 512-byte header followed 105 // by key-value strings. First, create key-value strings. 106 std::string PaxAttr = formatPax("path", Path); 107 108 // Create a 512-byte header. 109 UstarHeader Hdr = {}; 110 sprintf(Hdr.Size, "%011lo", PaxAttr.size()); 111 Hdr.TypeFlag = 'x'; // PAX magic 112 memcpy(Hdr.Magic, "ustar", 6); // Ustar magic 113 computeChecksum(Hdr); 114 115 // Write them down. 116 OS << StringRef(reinterpret_cast<char *>(&Hdr), sizeof(Hdr)); 117 OS << PaxAttr; 118 pad(OS); 119 } 120 121 // The PAX header is an extended format, so a PAX header needs 122 // to be followed by a "real" header. 123 static void writeUstarHeader(raw_fd_ostream &OS, size_t Size) { 124 UstarHeader Hdr = {}; 125 strcpy(Hdr.Mode, "0000664"); 126 sprintf(Hdr.Size, "%011lo", Size); 127 memcpy(Hdr.Magic, "ustar", 6); 128 129 computeChecksum(Hdr); 130 OS << StringRef(reinterpret_cast<char *>(&Hdr), sizeof(Hdr)); 131 } 132 133 // We want to use '/' as a path separator even on Windows. 134 // This function canonicalizes a given path. 135 static std::string canonicalize(std::string S) { 136 #ifdef LLVM_ON_WIN32 137 std::replace(S.begin(), S.end(), '\\', '/'); 138 #endif 139 return S; 140 } 141 142 // Creates a TarWriter instance and returns it. 143 Expected<std::unique_ptr<TarWriter>> TarWriter::create(StringRef OutputPath, 144 StringRef BaseDir) { 145 int FD; 146 if (std::error_code EC = openFileForWrite(OutputPath, FD, sys::fs::F_None)) 147 return make_error<StringError>("cannot open " + OutputPath, EC); 148 return std::unique_ptr<TarWriter>(new TarWriter(FD, BaseDir)); 149 } 150 151 TarWriter::TarWriter(int FD, StringRef BaseDir) 152 : OS(FD, /*shouldClose=*/true, /*unbuffered=*/false), BaseDir(BaseDir) {} 153 154 // Append a given file to an archive. 155 void TarWriter::append(StringRef Path, StringRef Data) { 156 // Write Path and Data. 157 writePaxHeader(OS, BaseDir + "/" + canonicalize(Path)); 158 writeUstarHeader(OS, Data.size()); 159 OS << Data; 160 pad(OS); 161 162 // POSIX requires tar archives end with two null blocks. 163 // Here, we write the terminator and then seek back, so that 164 // the file being output is terminated correctly at any moment. 165 uint64_t Pos = OS.tell(); 166 OS << std::string(BlockSize * 2, '\0'); 167 OS.seek(Pos); 168 OS.flush(); 169 } 170