1 //===- split-file.cpp - Input splitting utility ---------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Split input into multipe parts separated by regex '^(.|//)--- ' and extract
10 // the specified part.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/DenseMap.h"
15 #include "llvm/ADT/StringExtras.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/FileOutputBuffer.h"
19 #include "llvm/Support/FileSystem.h"
20 #include "llvm/Support/LineIterator.h"
21 #include "llvm/Support/MemoryBuffer.h"
22 #include "llvm/Support/Path.h"
23 #include "llvm/Support/ToolOutputFile.h"
24 #include "llvm/Support/WithColor.h"
25 #include <string>
26 #include <system_error>
27 
28 using namespace llvm;
29 
30 static cl::OptionCategory cat("split-file Options");
31 
32 static cl::opt<std::string> input(cl::Positional, cl::desc("filename"),
33                                   cl::cat(cat));
34 
35 static cl::opt<std::string> output(cl::Positional, cl::desc("directory"),
36                                    cl::value_desc("directory"), cl::cat(cat));
37 
38 static cl::opt<bool> leadingLines("leading-lines",
39                                     cl::desc("Preserve line numbers"),
40                                     cl::cat(cat));
41 
42 static cl::opt<bool> noLeadingLines("no-leading-lines",
43                                     cl::desc("Don't preserve line numbers (default)"),
44                                     cl::cat(cat));
45 
46 static StringRef toolName;
47 static int errorCount;
48 
49 [[noreturn]] static void fatal(StringRef filename, const Twine &message) {
50   if (filename.empty())
51     WithColor::error(errs(), toolName) << message << '\n';
52   else
53     WithColor::error(errs(), toolName) << filename << ": " << message << '\n';
54   exit(1);
55 }
56 
57 static void error(StringRef filename, int64_t line, const Twine &message) {
58   ++errorCount;
59   errs() << filename << ':' << line << ": ";
60   WithColor::error(errs()) << message << '\n';
61 }
62 
63 namespace {
64 struct Part {
65   const char *begin = nullptr;
66   const char *end = nullptr;
67   int64_t leadingLines = 0;
68 };
69 } // namespace
70 
71 static int handle(MemoryBuffer &inputBuf, StringRef input) {
72   DenseMap<StringRef, Part> partToBegin;
73   StringRef lastPart, separator;
74   for (line_iterator i(inputBuf, /*SkipBlanks=*/false, '\0'); !i.is_at_eof();) {
75     const int64_t lineNo = i.line_number();
76     const StringRef line = *i++;
77     const size_t markerLen = line.startswith("//") ? 6 : 5;
78     if (!(line.size() >= markerLen &&
79           line.substr(markerLen - 4).startswith("--- ")))
80       continue;
81     separator = line.substr(0, markerLen);
82     const StringRef partName = line.substr(markerLen);
83     if (partName.empty()) {
84       error(input, lineNo, "empty part name");
85       continue;
86     }
87     if (isSpace(partName.front()) || isSpace(partName.back())) {
88       error(input, lineNo, "part name cannot have leading or trailing space");
89       continue;
90     }
91 
92     auto res = partToBegin.try_emplace(partName);
93     if (!res.second) {
94       error(input, lineNo,
95             "'" + separator + partName + "' occurs more than once");
96       continue;
97     }
98     if (!lastPart.empty())
99       partToBegin[lastPart].end = line.data();
100     Part &cur = res.first->second;
101     if (!i.is_at_eof())
102       cur.begin = i->data();
103     // If --leading-lines is specified, numEmptyLines is 0. Append newlines so
104     // that the extracted part preserves line numbers.
105     cur.leadingLines = leadingLines ? i.line_number() - 1 : 0;
106 
107     lastPart = partName;
108   }
109   if (lastPart.empty())
110     fatal(input, "no part separator was found");
111   if (errorCount)
112     return 1;
113   partToBegin[lastPart].end = inputBuf.getBufferEnd();
114 
115   std::vector<std::unique_ptr<ToolOutputFile>> outputFiles;
116   SmallString<256> partPath;
117   for (auto &keyValue : partToBegin) {
118     partPath.clear();
119     sys::path::append(partPath, output, keyValue.first);
120     std::error_code ec =
121         sys::fs::create_directories(sys::path::parent_path(partPath));
122     if (ec)
123       fatal(input, ec.message());
124     auto f = std::make_unique<ToolOutputFile>(partPath.str(), ec,
125                                               llvm::sys::fs::OF_None);
126     if (!f)
127       fatal(input, ec.message());
128 
129     Part &part = keyValue.second;
130     for (int64_t i = 0; i != part.leadingLines; ++i)
131       (*f).os().write('\n');
132     if (part.begin)
133       (*f).os().write(part.begin, part.end - part.begin);
134     outputFiles.push_back(std::move(f));
135   }
136 
137   for (std::unique_ptr<ToolOutputFile> &outputFile : outputFiles)
138     outputFile->keep();
139   return 0;
140 }
141 
142 int main(int argc, const char **argv) {
143   toolName = sys::path::stem(argv[0]);
144   cl::HideUnrelatedOptions({&cat});
145   cl::ParseCommandLineOptions(
146       argc, argv,
147       "Split input into multiple parts separated by regex '^(.|//)--- ' and "
148       "extract the part specified by '^(.|//)--- <part>'\n",
149       nullptr,
150       /*EnvVar=*/nullptr,
151       /*LongOptionsUseDoubleDash=*/true);
152 
153   if (input.empty())
154     fatal("", "input filename is not specified");
155   if (output.empty())
156     fatal("", "output directory is not specified");
157   ErrorOr<std::unique_ptr<MemoryBuffer>> bufferOrErr =
158       MemoryBuffer::getFileOrSTDIN(input);
159   if (std::error_code ec = bufferOrErr.getError())
160     fatal(input, ec.message());
161 
162   // Delete output if it is a file or an empty directory, so that we can create
163   // a directory.
164   sys::fs::file_status status;
165   if (std::error_code ec = sys::fs::status(output, status))
166     if (ec.value() != static_cast<int>(std::errc::no_such_file_or_directory))
167       fatal(output, ec.message());
168   if (status.type() != sys::fs::file_type::file_not_found &&
169       status.type() != sys::fs::file_type::directory_file &&
170       status.type() != sys::fs::file_type::regular_file)
171     fatal(output, "output cannot be a special file");
172   if (std::error_code ec = sys::fs::remove(output, /*IgnoreNonExisting=*/true))
173     if (ec.value() != static_cast<int>(std::errc::directory_not_empty) &&
174         ec.value() != static_cast<int>(std::errc::file_exists))
175       fatal(output, ec.message());
176   return handle(**bufferOrErr, input);
177 }
178