1 //===- split-file.cpp - Input splitting utility ---------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Split input into multipe parts separated by regex '^(.|//)--- ' and extract 10 // the specified part. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/DenseMap.h" 15 #include "llvm/ADT/StringExtras.h" 16 #include "llvm/ADT/StringRef.h" 17 #include "llvm/Support/CommandLine.h" 18 #include "llvm/Support/FileOutputBuffer.h" 19 #include "llvm/Support/FileSystem.h" 20 #include "llvm/Support/LineIterator.h" 21 #include "llvm/Support/MemoryBuffer.h" 22 #include "llvm/Support/Path.h" 23 #include "llvm/Support/ToolOutputFile.h" 24 #include "llvm/Support/WithColor.h" 25 #include <string> 26 #include <system_error> 27 28 using namespace llvm; 29 30 static cl::OptionCategory cat("split-file Options"); 31 32 static cl::opt<std::string> input(cl::Positional, cl::desc("filename"), 33 cl::cat(cat)); 34 35 static cl::opt<std::string> output(cl::Positional, cl::desc("directory"), 36 cl::value_desc("directory"), cl::cat(cat)); 37 38 static cl::opt<bool> noLeadingLines("no-leading-lines", 39 cl::desc("Don't preserve line numbers"), 40 cl::cat(cat)); 41 42 static StringRef toolName; 43 static int errorCount; 44 45 LLVM_ATTRIBUTE_NORETURN static void fatal(StringRef filename, 46 const Twine &message) { 47 if (filename.empty()) 48 WithColor::error(errs(), toolName) << message << '\n'; 49 else 50 WithColor::error(errs(), toolName) << filename << ": " << message << '\n'; 51 exit(1); 52 } 53 54 static void error(StringRef filename, int64_t line, const Twine &message) { 55 ++errorCount; 56 errs() << filename << ':' << line << ": "; 57 WithColor::error(errs()) << message << '\n'; 58 } 59 60 namespace { 61 struct Part { 62 const char *begin = nullptr; 63 const char *end = nullptr; 64 int64_t leadingLines = 0; 65 }; 66 } // namespace 67 68 static int handle(MemoryBuffer &inputBuf, StringRef input) { 69 DenseMap<StringRef, Part> partToBegin; 70 StringRef lastPart, separator; 71 for (line_iterator i(inputBuf, /*SkipBlanks=*/false, '\0'); !i.is_at_eof();) { 72 const int64_t lineNo = i.line_number(); 73 const StringRef line = *i++; 74 const size_t markerLen = line.startswith("//") ? 6 : 5; 75 if (!(line.size() >= markerLen && 76 line.substr(markerLen - 4).startswith("--- "))) 77 continue; 78 separator = line.substr(0, markerLen); 79 const StringRef partName = line.substr(markerLen); 80 if (partName.empty()) { 81 error(input, lineNo, "empty part name"); 82 continue; 83 } 84 if (isSpace(partName.front()) || isSpace(partName.back())) { 85 error(input, lineNo, "part name cannot have leading or trailing space"); 86 continue; 87 } 88 89 auto res = partToBegin.try_emplace(partName); 90 if (!res.second) { 91 error(input, lineNo, 92 "'" + separator + partName + "' occurs more than once"); 93 continue; 94 } 95 if (!lastPart.empty()) 96 partToBegin[lastPart].end = line.data(); 97 Part &cur = res.first->second; 98 if (!i.is_at_eof()) 99 cur.begin = i->data(); 100 // If --no-leading-lines is not specified, numEmptyLines is 0. Append 101 // newlines so that the extracted part preserves line numbers. 102 cur.leadingLines = noLeadingLines ? 0 : i.line_number() - 1; 103 104 lastPart = partName; 105 } 106 if (lastPart.empty()) 107 fatal(input, "no part separator was found"); 108 if (errorCount) 109 return 1; 110 partToBegin[lastPart].end = inputBuf.getBufferEnd(); 111 112 std::vector<std::unique_ptr<ToolOutputFile>> outputFiles; 113 SmallString<256> partPath; 114 for (auto &keyValue : partToBegin) { 115 partPath.clear(); 116 sys::path::append(partPath, output, keyValue.first); 117 std::error_code ec = 118 sys::fs::create_directories(sys::path::parent_path(partPath)); 119 if (ec) 120 fatal(input, ec.message()); 121 auto f = std::make_unique<ToolOutputFile>(partPath.str(), ec, 122 llvm::sys::fs::OF_None); 123 if (!f) 124 fatal(input, ec.message()); 125 126 Part &part = keyValue.second; 127 for (int64_t i = 0; i != part.leadingLines; ++i) 128 (*f).os().write('\n'); 129 if (part.begin) 130 (*f).os().write(part.begin, part.end - part.begin); 131 outputFiles.push_back(std::move(f)); 132 } 133 134 for (std::unique_ptr<ToolOutputFile> &outputFile : outputFiles) 135 outputFile->keep(); 136 return 0; 137 } 138 139 int main(int argc, const char **argv) { 140 toolName = sys::path::stem(argv[0]); 141 cl::HideUnrelatedOptions({&cat}); 142 cl::ParseCommandLineOptions( 143 argc, argv, 144 "Split input into multiple parts separated by regex '^(.|//)--- ' and " 145 "extract the part specified by '^(.|//)--- <part>'\n", 146 nullptr, 147 /*EnvVar=*/nullptr, 148 /*LongOptionsUseDoubleDash=*/true); 149 150 if (input.empty()) 151 fatal("", "input filename is not specified"); 152 if (output.empty()) 153 fatal("", "output directory is not specified"); 154 ErrorOr<std::unique_ptr<MemoryBuffer>> bufferOrErr = 155 MemoryBuffer::getFileOrSTDIN(input); 156 if (std::error_code ec = bufferOrErr.getError()) 157 fatal(input, ec.message()); 158 159 // Delete output if it is a file or an empty directory, so that we can create 160 // a directory. 161 sys::fs::file_status status; 162 if (std::error_code ec = sys::fs::status(output, status)) 163 if (ec.value() != static_cast<int>(std::errc::no_such_file_or_directory)) 164 fatal(output, ec.message()); 165 if (status.type() != sys::fs::file_type::file_not_found && 166 status.type() != sys::fs::file_type::directory_file && 167 status.type() != sys::fs::file_type::regular_file) 168 fatal(output, "output cannot be a special file"); 169 if (std::error_code ec = sys::fs::remove(output, /*IgnoreNonExisting=*/true)) 170 if (ec.value() != static_cast<int>(std::errc::directory_not_empty) && 171 ec.value() != static_cast<int>(std::errc::file_exists)) 172 fatal(output, ec.message()); 173 return handle(**bufferOrErr, input); 174 } 175