1bcea3a7aSFangrui Song //===- split-file.cpp - Input splitting utility ---------------------------===//
2bcea3a7aSFangrui Song //
3bcea3a7aSFangrui Song // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4bcea3a7aSFangrui Song // See https://llvm.org/LICENSE.txt for license information.
5bcea3a7aSFangrui Song // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6bcea3a7aSFangrui Song //
7bcea3a7aSFangrui Song //===----------------------------------------------------------------------===//
8bcea3a7aSFangrui Song //
9bcea3a7aSFangrui Song // Split input into multipe parts separated by regex '^(.|//)--- ' and extract
10bcea3a7aSFangrui Song // the specified part.
11bcea3a7aSFangrui Song //
12bcea3a7aSFangrui Song //===----------------------------------------------------------------------===//
13bcea3a7aSFangrui Song 
14bcea3a7aSFangrui Song #include "llvm/ADT/DenseMap.h"
15bcea3a7aSFangrui Song #include "llvm/ADT/StringExtras.h"
16bcea3a7aSFangrui Song #include "llvm/ADT/StringRef.h"
17bcea3a7aSFangrui Song #include "llvm/Support/CommandLine.h"
18bcea3a7aSFangrui Song #include "llvm/Support/FileOutputBuffer.h"
19ba7a92c0SNico Weber #include "llvm/Support/FileSystem.h"
20bcea3a7aSFangrui Song #include "llvm/Support/LineIterator.h"
21bcea3a7aSFangrui Song #include "llvm/Support/MemoryBuffer.h"
22bcea3a7aSFangrui Song #include "llvm/Support/Path.h"
23bcea3a7aSFangrui Song #include "llvm/Support/ToolOutputFile.h"
24bcea3a7aSFangrui Song #include "llvm/Support/WithColor.h"
25bcea3a7aSFangrui Song #include <string>
26bcea3a7aSFangrui Song #include <system_error>
27bcea3a7aSFangrui Song 
28bcea3a7aSFangrui Song using namespace llvm;
29bcea3a7aSFangrui Song 
30bcea3a7aSFangrui Song static cl::OptionCategory cat("split-file Options");
31bcea3a7aSFangrui Song 
32bcea3a7aSFangrui Song static cl::opt<std::string> input(cl::Positional, cl::desc("filename"),
33bcea3a7aSFangrui Song                                   cl::cat(cat));
34bcea3a7aSFangrui Song 
35bcea3a7aSFangrui Song static cl::opt<std::string> output(cl::Positional, cl::desc("directory"),
36bcea3a7aSFangrui Song                                    cl::value_desc("directory"), cl::cat(cat));
37bcea3a7aSFangrui Song 
3854e76cb1SFangrui Song static cl::opt<bool> leadingLines("leading-lines",
3954e76cb1SFangrui Song                                     cl::desc("Preserve line numbers"),
4054e76cb1SFangrui Song                                     cl::cat(cat));
4154e76cb1SFangrui Song 
42bcea3a7aSFangrui Song static cl::opt<bool> noLeadingLines("no-leading-lines",
4354e76cb1SFangrui Song                                     cl::desc("Don't preserve line numbers (default)"),
44bcea3a7aSFangrui Song                                     cl::cat(cat));
45bcea3a7aSFangrui Song 
46bcea3a7aSFangrui Song static StringRef toolName;
47bcea3a7aSFangrui Song static int errorCount;
48bcea3a7aSFangrui Song 
fatal(StringRef filename,const Twine & message)496da3d8b1SFangrui Song [[noreturn]] static void fatal(StringRef filename, const Twine &message) {
50bcea3a7aSFangrui Song   if (filename.empty())
51bcea3a7aSFangrui Song     WithColor::error(errs(), toolName) << message << '\n';
52bcea3a7aSFangrui Song   else
53bcea3a7aSFangrui Song     WithColor::error(errs(), toolName) << filename << ": " << message << '\n';
54bcea3a7aSFangrui Song   exit(1);
55bcea3a7aSFangrui Song }
56bcea3a7aSFangrui Song 
error(StringRef filename,int64_t line,const Twine & message)57bcea3a7aSFangrui Song static void error(StringRef filename, int64_t line, const Twine &message) {
58bcea3a7aSFangrui Song   ++errorCount;
59bcea3a7aSFangrui Song   errs() << filename << ':' << line << ": ";
60bcea3a7aSFangrui Song   WithColor::error(errs()) << message << '\n';
61bcea3a7aSFangrui Song }
62bcea3a7aSFangrui Song 
63bcea3a7aSFangrui Song namespace {
64bcea3a7aSFangrui Song struct Part {
65bcea3a7aSFangrui Song   const char *begin = nullptr;
66bcea3a7aSFangrui Song   const char *end = nullptr;
67bcea3a7aSFangrui Song   int64_t leadingLines = 0;
68bcea3a7aSFangrui Song };
69bcea3a7aSFangrui Song } // namespace
70bcea3a7aSFangrui Song 
handle(MemoryBuffer & inputBuf,StringRef input)71bcea3a7aSFangrui Song static int handle(MemoryBuffer &inputBuf, StringRef input) {
72bcea3a7aSFangrui Song   DenseMap<StringRef, Part> partToBegin;
73bcea3a7aSFangrui Song   StringRef lastPart, separator;
74*13fa17dbSChris Bieneman   StringRef EOL = inputBuf.getBuffer().detectEOL();
75bcea3a7aSFangrui Song   for (line_iterator i(inputBuf, /*SkipBlanks=*/false, '\0'); !i.is_at_eof();) {
76bcea3a7aSFangrui Song     const int64_t lineNo = i.line_number();
77bcea3a7aSFangrui Song     const StringRef line = *i++;
78bcea3a7aSFangrui Song     const size_t markerLen = line.startswith("//") ? 6 : 5;
79bcea3a7aSFangrui Song     if (!(line.size() >= markerLen &&
80bcea3a7aSFangrui Song           line.substr(markerLen - 4).startswith("--- ")))
81bcea3a7aSFangrui Song       continue;
82bcea3a7aSFangrui Song     separator = line.substr(0, markerLen);
83bcea3a7aSFangrui Song     const StringRef partName = line.substr(markerLen);
84bcea3a7aSFangrui Song     if (partName.empty()) {
85bcea3a7aSFangrui Song       error(input, lineNo, "empty part name");
86bcea3a7aSFangrui Song       continue;
87bcea3a7aSFangrui Song     }
88bcea3a7aSFangrui Song     if (isSpace(partName.front()) || isSpace(partName.back())) {
89bcea3a7aSFangrui Song       error(input, lineNo, "part name cannot have leading or trailing space");
90bcea3a7aSFangrui Song       continue;
91bcea3a7aSFangrui Song     }
92bcea3a7aSFangrui Song 
93bcea3a7aSFangrui Song     auto res = partToBegin.try_emplace(partName);
94bcea3a7aSFangrui Song     if (!res.second) {
95bcea3a7aSFangrui Song       error(input, lineNo,
96bcea3a7aSFangrui Song             "'" + separator + partName + "' occurs more than once");
97bcea3a7aSFangrui Song       continue;
98bcea3a7aSFangrui Song     }
99bcea3a7aSFangrui Song     if (!lastPart.empty())
100bcea3a7aSFangrui Song       partToBegin[lastPart].end = line.data();
101bcea3a7aSFangrui Song     Part &cur = res.first->second;
102bcea3a7aSFangrui Song     if (!i.is_at_eof())
103bcea3a7aSFangrui Song       cur.begin = i->data();
10454e76cb1SFangrui Song     // If --leading-lines is specified, numEmptyLines is 0. Append newlines so
10554e76cb1SFangrui Song     // that the extracted part preserves line numbers.
10654e76cb1SFangrui Song     cur.leadingLines = leadingLines ? i.line_number() - 1 : 0;
107bcea3a7aSFangrui Song 
108bcea3a7aSFangrui Song     lastPart = partName;
109bcea3a7aSFangrui Song   }
110bcea3a7aSFangrui Song   if (lastPart.empty())
111bcea3a7aSFangrui Song     fatal(input, "no part separator was found");
112bcea3a7aSFangrui Song   if (errorCount)
113bcea3a7aSFangrui Song     return 1;
114bcea3a7aSFangrui Song   partToBegin[lastPart].end = inputBuf.getBufferEnd();
115bcea3a7aSFangrui Song 
116bcea3a7aSFangrui Song   std::vector<std::unique_ptr<ToolOutputFile>> outputFiles;
117bcea3a7aSFangrui Song   SmallString<256> partPath;
118bcea3a7aSFangrui Song   for (auto &keyValue : partToBegin) {
119bcea3a7aSFangrui Song     partPath.clear();
120bcea3a7aSFangrui Song     sys::path::append(partPath, output, keyValue.first);
121bcea3a7aSFangrui Song     std::error_code ec =
122bcea3a7aSFangrui Song         sys::fs::create_directories(sys::path::parent_path(partPath));
123bcea3a7aSFangrui Song     if (ec)
124bcea3a7aSFangrui Song       fatal(input, ec.message());
125bcea3a7aSFangrui Song     auto f = std::make_unique<ToolOutputFile>(partPath.str(), ec,
126bcea3a7aSFangrui Song                                               llvm::sys::fs::OF_None);
127bcea3a7aSFangrui Song     if (!f)
128bcea3a7aSFangrui Song       fatal(input, ec.message());
129bcea3a7aSFangrui Song 
130bcea3a7aSFangrui Song     Part &part = keyValue.second;
131bcea3a7aSFangrui Song     for (int64_t i = 0; i != part.leadingLines; ++i)
132*13fa17dbSChris Bieneman       (*f).os() << EOL;
133bcea3a7aSFangrui Song     if (part.begin)
134bcea3a7aSFangrui Song       (*f).os().write(part.begin, part.end - part.begin);
135bcea3a7aSFangrui Song     outputFiles.push_back(std::move(f));
136bcea3a7aSFangrui Song   }
137bcea3a7aSFangrui Song 
138bcea3a7aSFangrui Song   for (std::unique_ptr<ToolOutputFile> &outputFile : outputFiles)
139bcea3a7aSFangrui Song     outputFile->keep();
140bcea3a7aSFangrui Song   return 0;
141bcea3a7aSFangrui Song }
142bcea3a7aSFangrui Song 
main(int argc,const char ** argv)143bcea3a7aSFangrui Song int main(int argc, const char **argv) {
144bcea3a7aSFangrui Song   toolName = sys::path::stem(argv[0]);
145bcea3a7aSFangrui Song   cl::HideUnrelatedOptions({&cat});
146bcea3a7aSFangrui Song   cl::ParseCommandLineOptions(
147bcea3a7aSFangrui Song       argc, argv,
148bcea3a7aSFangrui Song       "Split input into multiple parts separated by regex '^(.|//)--- ' and "
149bcea3a7aSFangrui Song       "extract the part specified by '^(.|//)--- <part>'\n",
150bcea3a7aSFangrui Song       nullptr,
151bcea3a7aSFangrui Song       /*EnvVar=*/nullptr,
152bcea3a7aSFangrui Song       /*LongOptionsUseDoubleDash=*/true);
153bcea3a7aSFangrui Song 
154bcea3a7aSFangrui Song   if (input.empty())
155bcea3a7aSFangrui Song     fatal("", "input filename is not specified");
156bcea3a7aSFangrui Song   if (output.empty())
157bcea3a7aSFangrui Song     fatal("", "output directory is not specified");
158bcea3a7aSFangrui Song   ErrorOr<std::unique_ptr<MemoryBuffer>> bufferOrErr =
159bcea3a7aSFangrui Song       MemoryBuffer::getFileOrSTDIN(input);
160bcea3a7aSFangrui Song   if (std::error_code ec = bufferOrErr.getError())
161bcea3a7aSFangrui Song     fatal(input, ec.message());
162bcea3a7aSFangrui Song 
163bcea3a7aSFangrui Song   // Delete output if it is a file or an empty directory, so that we can create
164bcea3a7aSFangrui Song   // a directory.
165bcea3a7aSFangrui Song   sys::fs::file_status status;
166bcea3a7aSFangrui Song   if (std::error_code ec = sys::fs::status(output, status))
167bcea3a7aSFangrui Song     if (ec.value() != static_cast<int>(std::errc::no_such_file_or_directory))
168bcea3a7aSFangrui Song       fatal(output, ec.message());
169bcea3a7aSFangrui Song   if (status.type() != sys::fs::file_type::file_not_found &&
170bcea3a7aSFangrui Song       status.type() != sys::fs::file_type::directory_file &&
171bcea3a7aSFangrui Song       status.type() != sys::fs::file_type::regular_file)
172bcea3a7aSFangrui Song     fatal(output, "output cannot be a special file");
173bcea3a7aSFangrui Song   if (std::error_code ec = sys::fs::remove(output, /*IgnoreNonExisting=*/true))
174dbc468dcSFangrui Song     if (ec.value() != static_cast<int>(std::errc::directory_not_empty) &&
175dbc468dcSFangrui Song         ec.value() != static_cast<int>(std::errc::file_exists))
176bcea3a7aSFangrui Song       fatal(output, ec.message());
177bcea3a7aSFangrui Song   return handle(**bufferOrErr, input);
178bcea3a7aSFangrui Song }
179