186814bf6SAlex Lorenz //===-- FileCollector.cpp ---------------------------------------*- C++ -*-===//
286814bf6SAlex Lorenz //
386814bf6SAlex Lorenz // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
486814bf6SAlex Lorenz // See https://llvm.org/LICENSE.txt for license information.
586814bf6SAlex Lorenz // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
686814bf6SAlex Lorenz //
786814bf6SAlex Lorenz //===----------------------------------------------------------------------===//
886814bf6SAlex Lorenz
986814bf6SAlex Lorenz #include "llvm/Support/FileCollector.h"
1086814bf6SAlex Lorenz #include "llvm/ADT/SmallString.h"
11892df9e7SSimon Pilgrim #include "llvm/ADT/Twine.h"
1286814bf6SAlex Lorenz #include "llvm/Support/FileSystem.h"
1386814bf6SAlex Lorenz #include "llvm/Support/Path.h"
1486814bf6SAlex Lorenz #include "llvm/Support/Process.h"
1586814bf6SAlex Lorenz
1686814bf6SAlex Lorenz using namespace llvm;
1786814bf6SAlex Lorenz
18f44fb130SJonas Devlieghere FileCollectorBase::FileCollectorBase() = default;
19f44fb130SJonas Devlieghere FileCollectorBase::~FileCollectorBase() = default;
20f44fb130SJonas Devlieghere
addFile(const Twine & File)21f44fb130SJonas Devlieghere void FileCollectorBase::addFile(const Twine &File) {
22f44fb130SJonas Devlieghere std::lock_guard<std::mutex> lock(Mutex);
23f44fb130SJonas Devlieghere std::string FileStr = File.str();
24f44fb130SJonas Devlieghere if (markAsSeen(FileStr))
25f44fb130SJonas Devlieghere addFileImpl(FileStr);
26f44fb130SJonas Devlieghere }
27f44fb130SJonas Devlieghere
addDirectory(const Twine & Dir)28f44fb130SJonas Devlieghere void FileCollectorBase::addDirectory(const Twine &Dir) {
29f44fb130SJonas Devlieghere assert(sys::fs::is_directory(Dir));
30f44fb130SJonas Devlieghere std::error_code EC;
31f44fb130SJonas Devlieghere addDirectoryImpl(Dir, vfs::getRealFileSystem(), EC);
32f44fb130SJonas Devlieghere }
33f44fb130SJonas Devlieghere
isCaseSensitivePath(StringRef Path)34eb1b4c5dSJonas Devlieghere static bool isCaseSensitivePath(StringRef Path) {
35eb1b4c5dSJonas Devlieghere SmallString<256> TmpDest = Path, UpperDest, RealDest;
3686814bf6SAlex Lorenz
3786814bf6SAlex Lorenz // Remove component traversals, links, etc.
38f88b502dSRaphael Isemann if (sys::fs::real_path(Path, TmpDest))
3986814bf6SAlex Lorenz return true; // Current default value in vfs.yaml
40eb1b4c5dSJonas Devlieghere Path = TmpDest;
4186814bf6SAlex Lorenz
4286814bf6SAlex Lorenz // Change path to all upper case and ask for its real path, if the latter
4386814bf6SAlex Lorenz // exists and is equal to path, it's not case sensitive. Default to case
4486814bf6SAlex Lorenz // sensitive in the absence of real_path, since this is the YAMLVFSWriter
4586814bf6SAlex Lorenz // default.
46eb1b4c5dSJonas Devlieghere UpperDest = Path.upper();
47f88b502dSRaphael Isemann if (!sys::fs::real_path(UpperDest, RealDest) && Path.equals(RealDest))
4886814bf6SAlex Lorenz return false;
4986814bf6SAlex Lorenz return true;
5086814bf6SAlex Lorenz }
5186814bf6SAlex Lorenz
FileCollector(std::string Root,std::string OverlayRoot)52eb1b4c5dSJonas Devlieghere FileCollector::FileCollector(std::string Root, std::string OverlayRoot)
53eb1b4c5dSJonas Devlieghere : Root(std::move(Root)), OverlayRoot(std::move(OverlayRoot)) {
5486814bf6SAlex Lorenz }
5586814bf6SAlex Lorenz
updateWithRealPath(SmallVectorImpl<char> & Path)56080952a9SDuncan P. N. Exon Smith void FileCollector::PathCanonicalizer::updateWithRealPath(
57080952a9SDuncan P. N. Exon Smith SmallVectorImpl<char> &Path) {
58080952a9SDuncan P. N. Exon Smith StringRef SrcPath(Path.begin(), Path.size());
59080952a9SDuncan P. N. Exon Smith StringRef Filename = sys::path::filename(SrcPath);
60080952a9SDuncan P. N. Exon Smith StringRef Directory = sys::path::parent_path(SrcPath);
6186814bf6SAlex Lorenz
62080952a9SDuncan P. N. Exon Smith // Use real_path to fix any symbolic link component present in the directory
63080952a9SDuncan P. N. Exon Smith // part of the path, caching the search because computing the real path is
64080952a9SDuncan P. N. Exon Smith // expensive.
65080952a9SDuncan P. N. Exon Smith SmallString<256> RealPath;
66080952a9SDuncan P. N. Exon Smith auto DirWithSymlink = CachedDirs.find(Directory);
67080952a9SDuncan P. N. Exon Smith if (DirWithSymlink == CachedDirs.end()) {
68080952a9SDuncan P. N. Exon Smith // FIXME: Should this be a call to FileSystem::getRealpath(), in some
69080952a9SDuncan P. N. Exon Smith // cases? What if there is nothing on disk?
70080952a9SDuncan P. N. Exon Smith if (sys::fs::real_path(Directory, RealPath))
71080952a9SDuncan P. N. Exon Smith return;
72080952a9SDuncan P. N. Exon Smith CachedDirs[Directory] = std::string(RealPath.str());
7386814bf6SAlex Lorenz } else {
74eb1b4c5dSJonas Devlieghere RealPath = DirWithSymlink->second;
7586814bf6SAlex Lorenz }
7686814bf6SAlex Lorenz
77080952a9SDuncan P. N. Exon Smith // Finish recreating the path by appending the original filename, since we
78080952a9SDuncan P. N. Exon Smith // don't need to resolve symlinks in the filename.
79080952a9SDuncan P. N. Exon Smith //
80080952a9SDuncan P. N. Exon Smith // FIXME: If we can cope with this, maybe we can cope without calling
81080952a9SDuncan P. N. Exon Smith // getRealPath() at all when there's no ".." component.
82080952a9SDuncan P. N. Exon Smith sys::path::append(RealPath, Filename);
83080952a9SDuncan P. N. Exon Smith
84080952a9SDuncan P. N. Exon Smith // Swap to create the output.
85080952a9SDuncan P. N. Exon Smith Path.swap(RealPath);
8686814bf6SAlex Lorenz }
8786814bf6SAlex Lorenz
88080952a9SDuncan P. N. Exon Smith /// Make Path absolute.
makeAbsolute(SmallVectorImpl<char> & Path)89080952a9SDuncan P. N. Exon Smith static void makeAbsolute(SmallVectorImpl<char> &Path) {
9086814bf6SAlex Lorenz // We need an absolute src path to append to the root.
91080952a9SDuncan P. N. Exon Smith sys::fs::make_absolute(Path);
9286814bf6SAlex Lorenz
9386814bf6SAlex Lorenz // Canonicalize src to a native path to avoid mixed separator styles.
94080952a9SDuncan P. N. Exon Smith sys::path::native(Path);
9586814bf6SAlex Lorenz
9686814bf6SAlex Lorenz // Remove redundant leading "./" pieces and consecutive separators.
97080952a9SDuncan P. N. Exon Smith Path.erase(Path.begin(), sys::path::remove_leading_dotslash(
98080952a9SDuncan P. N. Exon Smith StringRef(Path.begin(), Path.size()))
99080952a9SDuncan P. N. Exon Smith .begin());
100080952a9SDuncan P. N. Exon Smith }
10186814bf6SAlex Lorenz
102080952a9SDuncan P. N. Exon Smith FileCollector::PathCanonicalizer::PathStorage
canonicalize(StringRef SrcPath)103080952a9SDuncan P. N. Exon Smith FileCollector::PathCanonicalizer::canonicalize(StringRef SrcPath) {
104080952a9SDuncan P. N. Exon Smith PathStorage Paths;
105080952a9SDuncan P. N. Exon Smith Paths.VirtualPath = SrcPath;
106080952a9SDuncan P. N. Exon Smith makeAbsolute(Paths.VirtualPath);
10786814bf6SAlex Lorenz
10886814bf6SAlex Lorenz // If a ".." component is present after a symlink component, remove_dots may
10986814bf6SAlex Lorenz // lead to the wrong real destination path. Let the source be canonicalized
11086814bf6SAlex Lorenz // like that but make sure we always use the real path for the destination.
111080952a9SDuncan P. N. Exon Smith Paths.CopyFrom = Paths.VirtualPath;
112080952a9SDuncan P. N. Exon Smith updateWithRealPath(Paths.CopyFrom);
113080952a9SDuncan P. N. Exon Smith
114080952a9SDuncan P. N. Exon Smith // Canonicalize the virtual path by removing "..", "." components.
115080952a9SDuncan P. N. Exon Smith sys::path::remove_dots(Paths.VirtualPath, /*remove_dot_dot=*/true);
116080952a9SDuncan P. N. Exon Smith
117080952a9SDuncan P. N. Exon Smith return Paths;
118080952a9SDuncan P. N. Exon Smith }
119080952a9SDuncan P. N. Exon Smith
addFileImpl(StringRef SrcPath)120080952a9SDuncan P. N. Exon Smith void FileCollector::addFileImpl(StringRef SrcPath) {
121080952a9SDuncan P. N. Exon Smith PathCanonicalizer::PathStorage Paths = Canonicalizer.canonicalize(SrcPath);
12286814bf6SAlex Lorenz
123eb1b4c5dSJonas Devlieghere SmallString<256> DstPath = StringRef(Root);
124080952a9SDuncan P. N. Exon Smith sys::path::append(DstPath, sys::path::relative_path(Paths.CopyFrom));
12586814bf6SAlex Lorenz
12686814bf6SAlex Lorenz // Always map a canonical src path to its real path into the YAML, by doing
12786814bf6SAlex Lorenz // this we map different virtual src paths to the same entry in the VFS
12886814bf6SAlex Lorenz // overlay, which is a way to emulate symlink inside the VFS; this is also
12986814bf6SAlex Lorenz // needed for correctness, not doing that can lead to module redefinition
13086814bf6SAlex Lorenz // errors.
131080952a9SDuncan P. N. Exon Smith addFileToMapping(Paths.VirtualPath, DstPath);
13286814bf6SAlex Lorenz }
13386814bf6SAlex Lorenz
1344151f2d0SJonas Devlieghere llvm::vfs::directory_iterator
addDirectoryImpl(const llvm::Twine & Dir,IntrusiveRefCntPtr<vfs::FileSystem> FS,std::error_code & EC)1354151f2d0SJonas Devlieghere FileCollector::addDirectoryImpl(const llvm::Twine &Dir,
1364151f2d0SJonas Devlieghere IntrusiveRefCntPtr<vfs::FileSystem> FS,
1374151f2d0SJonas Devlieghere std::error_code &EC) {
1384151f2d0SJonas Devlieghere auto It = FS->dir_begin(Dir, EC);
1394151f2d0SJonas Devlieghere if (EC)
1404151f2d0SJonas Devlieghere return It;
1414151f2d0SJonas Devlieghere addFile(Dir);
1424151f2d0SJonas Devlieghere for (; !EC && It != llvm::vfs::directory_iterator(); It.increment(EC)) {
1434151f2d0SJonas Devlieghere if (It->type() == sys::fs::file_type::regular_file ||
1444151f2d0SJonas Devlieghere It->type() == sys::fs::file_type::directory_file ||
1454151f2d0SJonas Devlieghere It->type() == sys::fs::file_type::symlink_file) {
1464151f2d0SJonas Devlieghere addFile(It->path());
1474151f2d0SJonas Devlieghere }
1484151f2d0SJonas Devlieghere }
1494151f2d0SJonas Devlieghere if (EC)
1504151f2d0SJonas Devlieghere return It;
1514151f2d0SJonas Devlieghere // Return a new iterator.
1524151f2d0SJonas Devlieghere return FS->dir_begin(Dir, EC);
1534151f2d0SJonas Devlieghere }
1544151f2d0SJonas Devlieghere
15586814bf6SAlex Lorenz /// Set the access and modification time for the given file from the given
15686814bf6SAlex Lorenz /// status object.
15786814bf6SAlex Lorenz static std::error_code
copyAccessAndModificationTime(StringRef Filename,const sys::fs::file_status & Stat)158eb1b4c5dSJonas Devlieghere copyAccessAndModificationTime(StringRef Filename,
159eb1b4c5dSJonas Devlieghere const sys::fs::file_status &Stat) {
160eb1b4c5dSJonas Devlieghere int FD;
16186814bf6SAlex Lorenz
162eb1b4c5dSJonas Devlieghere if (auto EC =
163eb1b4c5dSJonas Devlieghere sys::fs::openFileForWrite(Filename, FD, sys::fs::CD_OpenExisting))
164eb1b4c5dSJonas Devlieghere return EC;
16586814bf6SAlex Lorenz
166eb1b4c5dSJonas Devlieghere if (auto EC = sys::fs::setLastAccessAndModificationTime(
167eb1b4c5dSJonas Devlieghere FD, Stat.getLastAccessedTime(), Stat.getLastModificationTime()))
168eb1b4c5dSJonas Devlieghere return EC;
16986814bf6SAlex Lorenz
170eb1b4c5dSJonas Devlieghere if (auto EC = sys::Process::SafelyCloseFileDescriptor(FD))
171eb1b4c5dSJonas Devlieghere return EC;
17286814bf6SAlex Lorenz
17386814bf6SAlex Lorenz return {};
17486814bf6SAlex Lorenz }
17586814bf6SAlex Lorenz
copyFiles(bool StopOnError)176eb1b4c5dSJonas Devlieghere std::error_code FileCollector::copyFiles(bool StopOnError) {
1774c53f420SJan Korous auto Err = sys::fs::create_directories(Root, /*IgnoreExisting=*/true);
1784c53f420SJan Korous if (Err) {
1794c53f420SJan Korous return Err;
1804c53f420SJan Korous }
1814c53f420SJan Korous
1821e43cab3SJan Korous std::lock_guard<std::mutex> lock(Mutex);
1831e43cab3SJan Korous
184eb1b4c5dSJonas Devlieghere for (auto &entry : VFSWriter.getMappings()) {
185b680422eSAlex Lorenz // Get the status of the original file/directory.
186b680422eSAlex Lorenz sys::fs::file_status Stat;
187b680422eSAlex Lorenz if (std::error_code EC = sys::fs::status(entry.VPath, Stat)) {
188b680422eSAlex Lorenz if (StopOnError)
189b680422eSAlex Lorenz return EC;
190b680422eSAlex Lorenz continue;
191b680422eSAlex Lorenz }
192b680422eSAlex Lorenz
193295eb54dSJonas Devlieghere // Continue if the file doesn't exist.
194295eb54dSJonas Devlieghere if (Stat.type() == sys::fs::file_type::file_not_found)
195295eb54dSJonas Devlieghere continue;
196295eb54dSJonas Devlieghere
197295eb54dSJonas Devlieghere // Create directory tree.
198295eb54dSJonas Devlieghere if (std::error_code EC =
199295eb54dSJonas Devlieghere sys::fs::create_directories(sys::path::parent_path(entry.RPath),
200295eb54dSJonas Devlieghere /*IgnoreExisting=*/true)) {
201295eb54dSJonas Devlieghere if (StopOnError)
202295eb54dSJonas Devlieghere return EC;
203295eb54dSJonas Devlieghere }
204295eb54dSJonas Devlieghere
205b680422eSAlex Lorenz if (Stat.type() == sys::fs::file_type::directory_file) {
206b680422eSAlex Lorenz // Construct a directory when it's just a directory entry.
207b680422eSAlex Lorenz if (std::error_code EC =
208b680422eSAlex Lorenz sys::fs::create_directories(entry.RPath,
209b680422eSAlex Lorenz /*IgnoreExisting=*/true)) {
210b680422eSAlex Lorenz if (StopOnError)
211b680422eSAlex Lorenz return EC;
212b680422eSAlex Lorenz }
213b680422eSAlex Lorenz continue;
214b680422eSAlex Lorenz }
215b680422eSAlex Lorenz
21686814bf6SAlex Lorenz // Copy file over.
217eb1b4c5dSJonas Devlieghere if (std::error_code EC = sys::fs::copy_file(entry.VPath, entry.RPath)) {
218eb1b4c5dSJonas Devlieghere if (StopOnError)
219eb1b4c5dSJonas Devlieghere return EC;
22086814bf6SAlex Lorenz }
22186814bf6SAlex Lorenz
22286814bf6SAlex Lorenz // Copy over permissions.
22386814bf6SAlex Lorenz if (auto perms = sys::fs::getPermissions(entry.VPath)) {
224eb1b4c5dSJonas Devlieghere if (std::error_code EC = sys::fs::setPermissions(entry.RPath, *perms)) {
225eb1b4c5dSJonas Devlieghere if (StopOnError)
226eb1b4c5dSJonas Devlieghere return EC;
22786814bf6SAlex Lorenz }
22886814bf6SAlex Lorenz }
22986814bf6SAlex Lorenz
23086814bf6SAlex Lorenz // Copy over modification time.
231eb1b4c5dSJonas Devlieghere copyAccessAndModificationTime(entry.RPath, Stat);
23286814bf6SAlex Lorenz }
23386814bf6SAlex Lorenz return {};
23486814bf6SAlex Lorenz }
23586814bf6SAlex Lorenz
writeMapping(StringRef MappingFile)2364151f2d0SJonas Devlieghere std::error_code FileCollector::writeMapping(StringRef MappingFile) {
237eb1b4c5dSJonas Devlieghere std::lock_guard<std::mutex> lock(Mutex);
23886814bf6SAlex Lorenz
239eb1b4c5dSJonas Devlieghere VFSWriter.setOverlayDir(OverlayRoot);
240eb1b4c5dSJonas Devlieghere VFSWriter.setCaseSensitivity(isCaseSensitivePath(OverlayRoot));
241eb1b4c5dSJonas Devlieghere VFSWriter.setUseExternalNames(false);
24286814bf6SAlex Lorenz
243eb1b4c5dSJonas Devlieghere std::error_code EC;
244*82b3e28eSAbhina Sreeskantharajan raw_fd_ostream os(MappingFile, EC, sys::fs::OF_TextWithCRLF);
245eb1b4c5dSJonas Devlieghere if (EC)
246eb1b4c5dSJonas Devlieghere return EC;
24786814bf6SAlex Lorenz
248eb1b4c5dSJonas Devlieghere VFSWriter.write(os);
24986814bf6SAlex Lorenz
25086814bf6SAlex Lorenz return {};
25186814bf6SAlex Lorenz }
2529e38f4d9SAlex Lorenz
2534151f2d0SJonas Devlieghere namespace llvm {
2549e38f4d9SAlex Lorenz
2559e38f4d9SAlex Lorenz class FileCollectorFileSystem : public vfs::FileSystem {
2569e38f4d9SAlex Lorenz public:
FileCollectorFileSystem(IntrusiveRefCntPtr<vfs::FileSystem> FS,std::shared_ptr<FileCollector> Collector)2579e38f4d9SAlex Lorenz explicit FileCollectorFileSystem(IntrusiveRefCntPtr<vfs::FileSystem> FS,
2589e38f4d9SAlex Lorenz std::shared_ptr<FileCollector> Collector)
2599e38f4d9SAlex Lorenz : FS(std::move(FS)), Collector(std::move(Collector)) {}
2609e38f4d9SAlex Lorenz
status(const Twine & Path)2619e38f4d9SAlex Lorenz llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override {
2629e38f4d9SAlex Lorenz auto Result = FS->status(Path);
2639e38f4d9SAlex Lorenz if (Result && Result->exists())
2649e38f4d9SAlex Lorenz Collector->addFile(Path);
2659e38f4d9SAlex Lorenz return Result;
2669e38f4d9SAlex Lorenz }
2679e38f4d9SAlex Lorenz
2689e38f4d9SAlex Lorenz llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
openFileForRead(const Twine & Path)2699e38f4d9SAlex Lorenz openFileForRead(const Twine &Path) override {
2709e38f4d9SAlex Lorenz auto Result = FS->openFileForRead(Path);
2719e38f4d9SAlex Lorenz if (Result && *Result)
2729e38f4d9SAlex Lorenz Collector->addFile(Path);
2739e38f4d9SAlex Lorenz return Result;
2749e38f4d9SAlex Lorenz }
2759e38f4d9SAlex Lorenz
dir_begin(const llvm::Twine & Dir,std::error_code & EC)2769e38f4d9SAlex Lorenz llvm::vfs::directory_iterator dir_begin(const llvm::Twine &Dir,
2779e38f4d9SAlex Lorenz std::error_code &EC) override {
2784151f2d0SJonas Devlieghere return Collector->addDirectoryImpl(Dir, FS, EC);
2799e38f4d9SAlex Lorenz }
2809e38f4d9SAlex Lorenz
getRealPath(const Twine & Path,SmallVectorImpl<char> & Output) const2819e38f4d9SAlex Lorenz std::error_code getRealPath(const Twine &Path,
2829e38f4d9SAlex Lorenz SmallVectorImpl<char> &Output) const override {
2839e38f4d9SAlex Lorenz auto EC = FS->getRealPath(Path, Output);
2849e38f4d9SAlex Lorenz if (!EC) {
2859e38f4d9SAlex Lorenz Collector->addFile(Path);
2869e38f4d9SAlex Lorenz if (Output.size() > 0)
2879e38f4d9SAlex Lorenz Collector->addFile(Output);
2889e38f4d9SAlex Lorenz }
2899e38f4d9SAlex Lorenz return EC;
2909e38f4d9SAlex Lorenz }
2919e38f4d9SAlex Lorenz
isLocal(const Twine & Path,bool & Result)2929e38f4d9SAlex Lorenz std::error_code isLocal(const Twine &Path, bool &Result) override {
2939e38f4d9SAlex Lorenz return FS->isLocal(Path, Result);
2949e38f4d9SAlex Lorenz }
2959e38f4d9SAlex Lorenz
getCurrentWorkingDirectory() const2969e38f4d9SAlex Lorenz llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override {
2979e38f4d9SAlex Lorenz return FS->getCurrentWorkingDirectory();
2989e38f4d9SAlex Lorenz }
2999e38f4d9SAlex Lorenz
setCurrentWorkingDirectory(const llvm::Twine & Path)3009e38f4d9SAlex Lorenz std::error_code setCurrentWorkingDirectory(const llvm::Twine &Path) override {
3019e38f4d9SAlex Lorenz return FS->setCurrentWorkingDirectory(Path);
3029e38f4d9SAlex Lorenz }
3039e38f4d9SAlex Lorenz
3049e38f4d9SAlex Lorenz private:
3059e38f4d9SAlex Lorenz IntrusiveRefCntPtr<vfs::FileSystem> FS;
3069e38f4d9SAlex Lorenz std::shared_ptr<FileCollector> Collector;
3079e38f4d9SAlex Lorenz };
3089e38f4d9SAlex Lorenz
3094151f2d0SJonas Devlieghere } // namespace llvm
3109e38f4d9SAlex Lorenz
3119e38f4d9SAlex Lorenz IntrusiveRefCntPtr<vfs::FileSystem>
createCollectorVFS(IntrusiveRefCntPtr<vfs::FileSystem> BaseFS,std::shared_ptr<FileCollector> Collector)3129e38f4d9SAlex Lorenz FileCollector::createCollectorVFS(IntrusiveRefCntPtr<vfs::FileSystem> BaseFS,
3139e38f4d9SAlex Lorenz std::shared_ptr<FileCollector> Collector) {
3149e38f4d9SAlex Lorenz return new FileCollectorFileSystem(std::move(BaseFS), std::move(Collector));
3159e38f4d9SAlex Lorenz }
316