1bdd1243dSDimitry Andric //===- split-file.cpp - Input splitting utility ---------------------------===//
2bdd1243dSDimitry Andric //
3bdd1243dSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4bdd1243dSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5bdd1243dSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6bdd1243dSDimitry Andric //
7bdd1243dSDimitry Andric //===----------------------------------------------------------------------===//
8bdd1243dSDimitry Andric //
9bdd1243dSDimitry Andric // Split input into multipe parts separated by regex '^(.|//)--- ' and extract
10bdd1243dSDimitry Andric // the specified part.
11bdd1243dSDimitry Andric //
12bdd1243dSDimitry Andric //===----------------------------------------------------------------------===//
13bdd1243dSDimitry Andric
14bdd1243dSDimitry Andric #include "llvm/ADT/DenseMap.h"
15bdd1243dSDimitry Andric #include "llvm/ADT/StringExtras.h"
16bdd1243dSDimitry Andric #include "llvm/ADT/StringRef.h"
17bdd1243dSDimitry Andric #include "llvm/Support/CommandLine.h"
18bdd1243dSDimitry Andric #include "llvm/Support/FileOutputBuffer.h"
19bdd1243dSDimitry Andric #include "llvm/Support/FileSystem.h"
20bdd1243dSDimitry Andric #include "llvm/Support/LineIterator.h"
21bdd1243dSDimitry Andric #include "llvm/Support/MemoryBuffer.h"
22bdd1243dSDimitry Andric #include "llvm/Support/Path.h"
23bdd1243dSDimitry Andric #include "llvm/Support/ToolOutputFile.h"
24bdd1243dSDimitry Andric #include "llvm/Support/WithColor.h"
25bdd1243dSDimitry Andric #include <string>
26bdd1243dSDimitry Andric #include <system_error>
27bdd1243dSDimitry Andric
28bdd1243dSDimitry Andric using namespace llvm;
29bdd1243dSDimitry Andric
30bdd1243dSDimitry Andric static cl::OptionCategory cat("split-file Options");
31bdd1243dSDimitry Andric
32bdd1243dSDimitry Andric static cl::opt<std::string> input(cl::Positional, cl::desc("filename"),
33bdd1243dSDimitry Andric cl::cat(cat));
34bdd1243dSDimitry Andric
35bdd1243dSDimitry Andric static cl::opt<std::string> output(cl::Positional, cl::desc("directory"),
36bdd1243dSDimitry Andric cl::value_desc("directory"), cl::cat(cat));
37bdd1243dSDimitry Andric
38bdd1243dSDimitry Andric static cl::opt<bool> leadingLines("leading-lines",
39bdd1243dSDimitry Andric cl::desc("Preserve line numbers"),
40bdd1243dSDimitry Andric cl::cat(cat));
41bdd1243dSDimitry Andric
42bdd1243dSDimitry Andric static cl::opt<bool> noLeadingLines("no-leading-lines",
43bdd1243dSDimitry Andric cl::desc("Don't preserve line numbers (default)"),
44bdd1243dSDimitry Andric cl::cat(cat));
45bdd1243dSDimitry Andric
46bdd1243dSDimitry Andric static StringRef toolName;
47bdd1243dSDimitry Andric static int errorCount;
48bdd1243dSDimitry Andric
fatal(StringRef filename,const Twine & message)49bdd1243dSDimitry Andric [[noreturn]] static void fatal(StringRef filename, const Twine &message) {
50bdd1243dSDimitry Andric if (filename.empty())
51bdd1243dSDimitry Andric WithColor::error(errs(), toolName) << message << '\n';
52bdd1243dSDimitry Andric else
53bdd1243dSDimitry Andric WithColor::error(errs(), toolName) << filename << ": " << message << '\n';
54bdd1243dSDimitry Andric exit(1);
55bdd1243dSDimitry Andric }
56bdd1243dSDimitry Andric
error(StringRef filename,int64_t line,const Twine & message)57bdd1243dSDimitry Andric static void error(StringRef filename, int64_t line, const Twine &message) {
58bdd1243dSDimitry Andric ++errorCount;
59bdd1243dSDimitry Andric errs() << filename << ':' << line << ": ";
60bdd1243dSDimitry Andric WithColor::error(errs()) << message << '\n';
61bdd1243dSDimitry Andric }
62bdd1243dSDimitry Andric
63bdd1243dSDimitry Andric namespace {
64bdd1243dSDimitry Andric struct Part {
65bdd1243dSDimitry Andric const char *begin = nullptr;
66bdd1243dSDimitry Andric const char *end = nullptr;
67bdd1243dSDimitry Andric int64_t leadingLines = 0;
68bdd1243dSDimitry Andric };
69bdd1243dSDimitry Andric } // namespace
70bdd1243dSDimitry Andric
handle(MemoryBuffer & inputBuf,StringRef input)71bdd1243dSDimitry Andric static int handle(MemoryBuffer &inputBuf, StringRef input) {
72bdd1243dSDimitry Andric DenseMap<StringRef, Part> partToBegin;
73bdd1243dSDimitry Andric StringRef lastPart, separator;
74bdd1243dSDimitry Andric StringRef EOL = inputBuf.getBuffer().detectEOL();
75bdd1243dSDimitry Andric for (line_iterator i(inputBuf, /*SkipBlanks=*/false, '\0'); !i.is_at_eof();) {
76bdd1243dSDimitry Andric const int64_t lineNo = i.line_number();
77bdd1243dSDimitry Andric const StringRef line = *i++;
78*c9157d92SDimitry Andric const size_t markerLen = line.starts_with("//") ? 6 : 5;
79bdd1243dSDimitry Andric if (!(line.size() >= markerLen &&
80*c9157d92SDimitry Andric line.substr(markerLen - 4).starts_with("--- ")))
81bdd1243dSDimitry Andric continue;
82bdd1243dSDimitry Andric separator = line.substr(0, markerLen);
83bdd1243dSDimitry Andric const StringRef partName = line.substr(markerLen);
84bdd1243dSDimitry Andric if (partName.empty()) {
85bdd1243dSDimitry Andric error(input, lineNo, "empty part name");
86bdd1243dSDimitry Andric continue;
87bdd1243dSDimitry Andric }
88bdd1243dSDimitry Andric if (isSpace(partName.front()) || isSpace(partName.back())) {
89bdd1243dSDimitry Andric error(input, lineNo, "part name cannot have leading or trailing space");
90bdd1243dSDimitry Andric continue;
91bdd1243dSDimitry Andric }
92bdd1243dSDimitry Andric
93bdd1243dSDimitry Andric auto res = partToBegin.try_emplace(partName);
94bdd1243dSDimitry Andric if (!res.second) {
95bdd1243dSDimitry Andric error(input, lineNo,
96bdd1243dSDimitry Andric "'" + separator + partName + "' occurs more than once");
97bdd1243dSDimitry Andric continue;
98bdd1243dSDimitry Andric }
99bdd1243dSDimitry Andric if (!lastPart.empty())
100bdd1243dSDimitry Andric partToBegin[lastPart].end = line.data();
101bdd1243dSDimitry Andric Part &cur = res.first->second;
102bdd1243dSDimitry Andric if (!i.is_at_eof())
103bdd1243dSDimitry Andric cur.begin = i->data();
104bdd1243dSDimitry Andric // If --leading-lines is specified, numEmptyLines is 0. Append newlines so
105bdd1243dSDimitry Andric // that the extracted part preserves line numbers.
106bdd1243dSDimitry Andric cur.leadingLines = leadingLines ? i.line_number() - 1 : 0;
107bdd1243dSDimitry Andric
108bdd1243dSDimitry Andric lastPart = partName;
109bdd1243dSDimitry Andric }
110bdd1243dSDimitry Andric if (lastPart.empty())
111bdd1243dSDimitry Andric fatal(input, "no part separator was found");
112bdd1243dSDimitry Andric if (errorCount)
113bdd1243dSDimitry Andric return 1;
114bdd1243dSDimitry Andric partToBegin[lastPart].end = inputBuf.getBufferEnd();
115bdd1243dSDimitry Andric
116bdd1243dSDimitry Andric std::vector<std::unique_ptr<ToolOutputFile>> outputFiles;
117bdd1243dSDimitry Andric SmallString<256> partPath;
118bdd1243dSDimitry Andric for (auto &keyValue : partToBegin) {
119bdd1243dSDimitry Andric partPath.clear();
120bdd1243dSDimitry Andric sys::path::append(partPath, output, keyValue.first);
121bdd1243dSDimitry Andric std::error_code ec =
122bdd1243dSDimitry Andric sys::fs::create_directories(sys::path::parent_path(partPath));
123bdd1243dSDimitry Andric if (ec)
124bdd1243dSDimitry Andric fatal(input, ec.message());
125bdd1243dSDimitry Andric auto f = std::make_unique<ToolOutputFile>(partPath.str(), ec,
126bdd1243dSDimitry Andric llvm::sys::fs::OF_None);
127bdd1243dSDimitry Andric if (!f)
128bdd1243dSDimitry Andric fatal(input, ec.message());
129bdd1243dSDimitry Andric
130bdd1243dSDimitry Andric Part &part = keyValue.second;
131bdd1243dSDimitry Andric for (int64_t i = 0; i != part.leadingLines; ++i)
132bdd1243dSDimitry Andric (*f).os() << EOL;
133bdd1243dSDimitry Andric if (part.begin)
134bdd1243dSDimitry Andric (*f).os().write(part.begin, part.end - part.begin);
135bdd1243dSDimitry Andric outputFiles.push_back(std::move(f));
136bdd1243dSDimitry Andric }
137bdd1243dSDimitry Andric
138bdd1243dSDimitry Andric for (std::unique_ptr<ToolOutputFile> &outputFile : outputFiles)
139bdd1243dSDimitry Andric outputFile->keep();
140bdd1243dSDimitry Andric return 0;
141bdd1243dSDimitry Andric }
142bdd1243dSDimitry Andric
main(int argc,const char ** argv)143bdd1243dSDimitry Andric int main(int argc, const char **argv) {
144bdd1243dSDimitry Andric toolName = sys::path::stem(argv[0]);
145bdd1243dSDimitry Andric cl::HideUnrelatedOptions({&cat});
146bdd1243dSDimitry Andric cl::ParseCommandLineOptions(
147bdd1243dSDimitry Andric argc, argv,
148bdd1243dSDimitry Andric "Split input into multiple parts separated by regex '^(.|//)--- ' and "
149bdd1243dSDimitry Andric "extract the part specified by '^(.|//)--- <part>'\n",
150bdd1243dSDimitry Andric nullptr,
151bdd1243dSDimitry Andric /*EnvVar=*/nullptr,
152bdd1243dSDimitry Andric /*LongOptionsUseDoubleDash=*/true);
153bdd1243dSDimitry Andric
154bdd1243dSDimitry Andric if (input.empty())
155bdd1243dSDimitry Andric fatal("", "input filename is not specified");
156bdd1243dSDimitry Andric if (output.empty())
157bdd1243dSDimitry Andric fatal("", "output directory is not specified");
158bdd1243dSDimitry Andric ErrorOr<std::unique_ptr<MemoryBuffer>> bufferOrErr =
159bdd1243dSDimitry Andric MemoryBuffer::getFileOrSTDIN(input);
160bdd1243dSDimitry Andric if (std::error_code ec = bufferOrErr.getError())
161bdd1243dSDimitry Andric fatal(input, ec.message());
162bdd1243dSDimitry Andric
163bdd1243dSDimitry Andric // Delete output if it is a file or an empty directory, so that we can create
164bdd1243dSDimitry Andric // a directory.
165bdd1243dSDimitry Andric sys::fs::file_status status;
166bdd1243dSDimitry Andric if (std::error_code ec = sys::fs::status(output, status))
167bdd1243dSDimitry Andric if (ec.value() != static_cast<int>(std::errc::no_such_file_or_directory))
168bdd1243dSDimitry Andric fatal(output, ec.message());
169bdd1243dSDimitry Andric if (status.type() != sys::fs::file_type::file_not_found &&
170bdd1243dSDimitry Andric status.type() != sys::fs::file_type::directory_file &&
171bdd1243dSDimitry Andric status.type() != sys::fs::file_type::regular_file)
172bdd1243dSDimitry Andric fatal(output, "output cannot be a special file");
173bdd1243dSDimitry Andric if (std::error_code ec = sys::fs::remove(output, /*IgnoreNonExisting=*/true))
174bdd1243dSDimitry Andric if (ec.value() != static_cast<int>(std::errc::directory_not_empty) &&
175bdd1243dSDimitry Andric ec.value() != static_cast<int>(std::errc::file_exists))
176bdd1243dSDimitry Andric fatal(output, ec.message());
177bdd1243dSDimitry Andric return handle(**bufferOrErr, input);
178bdd1243dSDimitry Andric }
179