1 //===-- FileSpec.cpp --------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Utility/FileSpec.h"
10 #include "lldb/Utility/RegularExpression.h"
11 #include "lldb/Utility/Stream.h"
12 
13 #include "llvm/ADT/SmallString.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/ADT/Triple.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/Support/ErrorOr.h"
19 #include "llvm/Support/FileSystem.h"
20 #include "llvm/Support/Program.h"
21 #include "llvm/Support/raw_ostream.h"
22 
23 #include <algorithm>
24 #include <system_error>
25 #include <vector>
26 
27 #include <assert.h>
28 #include <limits.h>
29 #include <stdio.h>
30 #include <string.h>
31 
32 using namespace lldb;
33 using namespace lldb_private;
34 
35 namespace {
36 
37 static constexpr FileSpec::Style GetNativeStyle() {
38 #if defined(_WIN32)
39   return FileSpec::Style::windows;
40 #else
41   return FileSpec::Style::posix;
42 #endif
43 }
44 
45 bool PathStyleIsPosix(FileSpec::Style style) {
46   return (style == FileSpec::Style::posix ||
47           (style == FileSpec::Style::native &&
48            GetNativeStyle() == FileSpec::Style::posix));
49 }
50 
51 const char *GetPathSeparators(FileSpec::Style style) {
52   return llvm::sys::path::get_separator(style).data();
53 }
54 
55 char GetPreferredPathSeparator(FileSpec::Style style) {
56   return GetPathSeparators(style)[0];
57 }
58 
59 void Denormalize(llvm::SmallVectorImpl<char> &path, FileSpec::Style style) {
60   if (PathStyleIsPosix(style))
61     return;
62 
63   std::replace(path.begin(), path.end(), '/', '\\');
64 }
65 
66 } // end anonymous namespace
67 
68 FileSpec::FileSpec() : m_style(GetNativeStyle()) {}
69 
70 // Default constructor that can take an optional full path to a file on disk.
71 FileSpec::FileSpec(llvm::StringRef path, Style style) : m_style(style) {
72   SetFile(path, style);
73 }
74 
75 FileSpec::FileSpec(llvm::StringRef path, const llvm::Triple &triple)
76     : FileSpec{path, triple.isOSWindows() ? Style::windows : Style::posix} {}
77 
78 // Copy constructor
79 FileSpec::FileSpec(const FileSpec *rhs) : m_directory(), m_filename() {
80   if (rhs)
81     *this = *rhs;
82 }
83 
84 // Virtual destructor in case anyone inherits from this class.
85 FileSpec::~FileSpec() {}
86 
87 namespace {
88 /// Safely get a character at the specified index.
89 ///
90 /// \param[in] path
91 ///     A full, partial, or relative path to a file.
92 ///
93 /// \param[in] i
94 ///     An index into path which may or may not be valid.
95 ///
96 /// \return
97 ///   The character at index \a i if the index is valid, or 0 if
98 ///   the index is not valid.
99 inline char safeCharAtIndex(const llvm::StringRef &path, size_t i) {
100   if (i < path.size())
101     return path[i];
102   return 0;
103 }
104 
105 /// Check if a path needs to be normalized.
106 ///
107 /// Check if a path needs to be normalized. We currently consider a
108 /// path to need normalization if any of the following are true
109 ///  - path contains "/./"
110 ///  - path contains "/../"
111 ///  - path contains "//"
112 ///  - path ends with "/"
113 /// Paths that start with "./" or with "../" are not considered to
114 /// need normalization since we aren't trying to resolve the path,
115 /// we are just trying to remove redundant things from the path.
116 ///
117 /// \param[in] path
118 ///     A full, partial, or relative path to a file.
119 ///
120 /// \return
121 ///   Returns \b true if the path needs to be normalized.
122 bool needsNormalization(const llvm::StringRef &path) {
123   if (path.empty())
124     return false;
125   // We strip off leading "." values so these paths need to be normalized
126   if (path[0] == '.')
127     return true;
128   for (auto i = path.find_first_of("\\/"); i != llvm::StringRef::npos;
129        i = path.find_first_of("\\/", i + 1)) {
130     const auto next = safeCharAtIndex(path, i+1);
131     switch (next) {
132       case 0:
133         // path separator char at the end of the string which should be
134         // stripped unless it is the one and only character
135         return i > 0;
136       case '/':
137       case '\\':
138         // two path separator chars in the middle of a path needs to be
139         // normalized
140         if (i > 0)
141           return true;
142         ++i;
143         break;
144 
145       case '.': {
146           const auto next_next = safeCharAtIndex(path, i+2);
147           switch (next_next) {
148             default: break;
149             case 0: return true; // ends with "/."
150             case '/':
151             case '\\':
152               return true; // contains "/./"
153             case '.': {
154               const auto next_next_next = safeCharAtIndex(path, i+3);
155               switch (next_next_next) {
156                 default: break;
157                 case 0: return true; // ends with "/.."
158                 case '/':
159                 case '\\':
160                   return true; // contains "/../"
161               }
162               break;
163             }
164           }
165         }
166         break;
167 
168       default:
169         break;
170     }
171   }
172   return false;
173 }
174 
175 
176 }
177 
178 void FileSpec::SetFile(llvm::StringRef pathname) { SetFile(pathname, m_style); }
179 
180 // Update the contents of this object with a new path. The path will be split
181 // up into a directory and filename and stored as uniqued string values for
182 // quick comparison and efficient memory usage.
183 void FileSpec::SetFile(llvm::StringRef pathname, Style style) {
184   m_filename.Clear();
185   m_directory.Clear();
186   m_is_resolved = false;
187   m_style = (style == Style::native) ? GetNativeStyle() : style;
188 
189   if (pathname.empty())
190     return;
191 
192   llvm::SmallString<128> resolved(pathname);
193 
194   // Normalize the path by removing ".", ".." and other redundant components.
195   if (needsNormalization(resolved))
196     llvm::sys::path::remove_dots(resolved, true, m_style);
197 
198   // Normalize back slashes to forward slashes
199   if (m_style == Style::windows)
200     std::replace(resolved.begin(), resolved.end(), '\\', '/');
201 
202   if (resolved.empty()) {
203     // If we have no path after normalization set the path to the current
204     // directory. This matches what python does and also a few other path
205     // utilities.
206     m_filename.SetString(".");
207     return;
208   }
209 
210   // Split path into filename and directory. We rely on the underlying char
211   // pointer to be nullptr when the components are empty.
212   llvm::StringRef filename = llvm::sys::path::filename(resolved, m_style);
213   if(!filename.empty())
214     m_filename.SetString(filename);
215 
216   llvm::StringRef directory = llvm::sys::path::parent_path(resolved, m_style);
217   if(!directory.empty())
218     m_directory.SetString(directory);
219 }
220 
221 void FileSpec::SetFile(llvm::StringRef path, const llvm::Triple &triple) {
222   return SetFile(path, triple.isOSWindows() ? Style::windows : Style::posix);
223 }
224 
225 // Convert to pointer operator. This allows code to check any FileSpec objects
226 // to see if they contain anything valid using code such as:
227 //
228 //  if (file_spec)
229 //  {}
230 FileSpec::operator bool() const { return m_filename || m_directory; }
231 
232 // Logical NOT operator. This allows code to check any FileSpec objects to see
233 // if they are invalid using code such as:
234 //
235 //  if (!file_spec)
236 //  {}
237 bool FileSpec::operator!() const { return !m_directory && !m_filename; }
238 
239 bool FileSpec::DirectoryEquals(const FileSpec &rhs) const {
240   const bool case_sensitive = IsCaseSensitive() || rhs.IsCaseSensitive();
241   return ConstString::Equals(m_directory, rhs.m_directory, case_sensitive);
242 }
243 
244 bool FileSpec::FileEquals(const FileSpec &rhs) const {
245   const bool case_sensitive = IsCaseSensitive() || rhs.IsCaseSensitive();
246   return ConstString::Equals(m_filename, rhs.m_filename, case_sensitive);
247 }
248 
249 // Equal to operator
250 bool FileSpec::operator==(const FileSpec &rhs) const {
251   return FileEquals(rhs) && DirectoryEquals(rhs);
252 }
253 
254 // Not equal to operator
255 bool FileSpec::operator!=(const FileSpec &rhs) const { return !(*this == rhs); }
256 
257 // Less than operator
258 bool FileSpec::operator<(const FileSpec &rhs) const {
259   return FileSpec::Compare(*this, rhs, true) < 0;
260 }
261 
262 // Dump a FileSpec object to a stream
263 Stream &lldb_private::operator<<(Stream &s, const FileSpec &f) {
264   f.Dump(&s);
265   return s;
266 }
267 
268 // Clear this object by releasing both the directory and filename string values
269 // and making them both the empty string.
270 void FileSpec::Clear() {
271   m_directory.Clear();
272   m_filename.Clear();
273 }
274 
275 // Compare two FileSpec objects. If "full" is true, then both the directory and
276 // the filename must match. If "full" is false, then the directory names for
277 // "a" and "b" are only compared if they are both non-empty. This allows a
278 // FileSpec object to only contain a filename and it can match FileSpec objects
279 // that have matching filenames with different paths.
280 //
281 // Return -1 if the "a" is less than "b", 0 if "a" is equal to "b" and "1" if
282 // "a" is greater than "b".
283 int FileSpec::Compare(const FileSpec &a, const FileSpec &b, bool full) {
284   int result = 0;
285 
286   // case sensitivity of compare
287   const bool case_sensitive = a.IsCaseSensitive() || b.IsCaseSensitive();
288 
289   // If full is true, then we must compare both the directory and filename.
290 
291   // If full is false, then if either directory is empty, then we match on the
292   // basename only, and if both directories have valid values, we still do a
293   // full compare. This allows for matching when we just have a filename in one
294   // of the FileSpec objects.
295 
296   if (full || (a.m_directory && b.m_directory)) {
297     result = ConstString::Compare(a.m_directory, b.m_directory, case_sensitive);
298     if (result)
299       return result;
300   }
301   return ConstString::Compare(a.m_filename, b.m_filename, case_sensitive);
302 }
303 
304 bool FileSpec::Equal(const FileSpec &a, const FileSpec &b, bool full) {
305   if (full || (a.GetDirectory() && b.GetDirectory()))
306     return a == b;
307 
308   return a.FileEquals(b);
309 }
310 
311 bool FileSpec::Match(const FileSpec &pattern, const FileSpec &file) {
312   if (pattern.GetDirectory())
313     return pattern == file;
314   if (pattern.GetFilename())
315     return pattern.FileEquals(file);
316   return true;
317 }
318 
319 llvm::Optional<FileSpec::Style> FileSpec::GuessPathStyle(llvm::StringRef absolute_path) {
320   if (absolute_path.startswith("/"))
321     return Style::posix;
322   if (absolute_path.startswith(R"(\\)"))
323     return Style::windows;
324   if (absolute_path.size() > 3 && llvm::isAlpha(absolute_path[0]) &&
325       absolute_path.substr(1, 2) == R"(:\)")
326     return Style::windows;
327   return llvm::None;
328 }
329 
330 // Dump the object to the supplied stream. If the object contains a valid
331 // directory name, it will be displayed followed by a directory delimiter, and
332 // the filename.
333 void FileSpec::Dump(Stream *s) const {
334   if (s) {
335     std::string path{GetPath(true)};
336     s->PutCString(path);
337     char path_separator = GetPreferredPathSeparator(m_style);
338     if (!m_filename && !path.empty() && path.back() != path_separator)
339       s->PutChar(path_separator);
340   }
341 }
342 
343 FileSpec::Style FileSpec::GetPathStyle() const { return m_style; }
344 
345 // Directory string get accessor.
346 ConstString &FileSpec::GetDirectory() { return m_directory; }
347 
348 // Directory string const get accessor.
349 ConstString FileSpec::GetDirectory() const { return m_directory; }
350 
351 // Filename string get accessor.
352 ConstString &FileSpec::GetFilename() { return m_filename; }
353 
354 // Filename string const get accessor.
355 ConstString FileSpec::GetFilename() const { return m_filename; }
356 
357 // Extract the directory and path into a fixed buffer. This is needed as the
358 // directory and path are stored in separate string values.
359 size_t FileSpec::GetPath(char *path, size_t path_max_len,
360                          bool denormalize) const {
361   if (!path)
362     return 0;
363 
364   std::string result = GetPath(denormalize);
365   ::snprintf(path, path_max_len, "%s", result.c_str());
366   return std::min(path_max_len - 1, result.length());
367 }
368 
369 std::string FileSpec::GetPath(bool denormalize) const {
370   llvm::SmallString<64> result;
371   GetPath(result, denormalize);
372   return std::string(result.begin(), result.end());
373 }
374 
375 const char *FileSpec::GetCString(bool denormalize) const {
376   return ConstString{GetPath(denormalize)}.AsCString(nullptr);
377 }
378 
379 void FileSpec::GetPath(llvm::SmallVectorImpl<char> &path,
380                        bool denormalize) const {
381   path.append(m_directory.GetStringRef().begin(),
382               m_directory.GetStringRef().end());
383   // Since the path was normalized and all paths use '/' when stored in these
384   // objects, we don't need to look for the actual syntax specific path
385   // separator, we just look for and insert '/'.
386   if (m_directory && m_filename && m_directory.GetStringRef().back() != '/' &&
387       m_filename.GetStringRef().back() != '/')
388     path.insert(path.end(), '/');
389   path.append(m_filename.GetStringRef().begin(),
390               m_filename.GetStringRef().end());
391   if (denormalize && !path.empty())
392     Denormalize(path, m_style);
393 }
394 
395 ConstString FileSpec::GetFileNameExtension() const {
396   return ConstString(
397       llvm::sys::path::extension(m_filename.GetStringRef(), m_style));
398 }
399 
400 ConstString FileSpec::GetFileNameStrippingExtension() const {
401   return ConstString(llvm::sys::path::stem(m_filename.GetStringRef(), m_style));
402 }
403 
404 // Return the size in bytes that this object takes in memory. This returns the
405 // size in bytes of this object, not any shared string values it may refer to.
406 size_t FileSpec::MemorySize() const {
407   return m_filename.MemorySize() + m_directory.MemorySize();
408 }
409 
410 FileSpec
411 FileSpec::CopyByAppendingPathComponent(llvm::StringRef component) const {
412   FileSpec ret = *this;
413   ret.AppendPathComponent(component);
414   return ret;
415 }
416 
417 FileSpec FileSpec::CopyByRemovingLastPathComponent() const {
418   llvm::SmallString<64> current_path;
419   GetPath(current_path, false);
420   if (llvm::sys::path::has_parent_path(current_path, m_style))
421     return FileSpec(llvm::sys::path::parent_path(current_path, m_style),
422                     m_style);
423   return *this;
424 }
425 
426 ConstString FileSpec::GetLastPathComponent() const {
427   llvm::SmallString<64> current_path;
428   GetPath(current_path, false);
429   return ConstString(llvm::sys::path::filename(current_path, m_style));
430 }
431 
432 void FileSpec::PrependPathComponent(llvm::StringRef component) {
433   llvm::SmallString<64> new_path(component);
434   llvm::SmallString<64> current_path;
435   GetPath(current_path, false);
436   llvm::sys::path::append(new_path,
437                           llvm::sys::path::begin(current_path, m_style),
438                           llvm::sys::path::end(current_path), m_style);
439   SetFile(new_path, m_style);
440 }
441 
442 void FileSpec::PrependPathComponent(const FileSpec &new_path) {
443   return PrependPathComponent(new_path.GetPath(false));
444 }
445 
446 void FileSpec::AppendPathComponent(llvm::StringRef component) {
447   llvm::SmallString<64> current_path;
448   GetPath(current_path, false);
449   llvm::sys::path::append(current_path, m_style, component);
450   SetFile(current_path, m_style);
451 }
452 
453 void FileSpec::AppendPathComponent(const FileSpec &new_path) {
454   return AppendPathComponent(new_path.GetPath(false));
455 }
456 
457 bool FileSpec::RemoveLastPathComponent() {
458   llvm::SmallString<64> current_path;
459   GetPath(current_path, false);
460   if (llvm::sys::path::has_parent_path(current_path, m_style)) {
461     SetFile(llvm::sys::path::parent_path(current_path, m_style));
462     return true;
463   }
464   return false;
465 }
466 /// Returns true if the filespec represents an implementation source
467 /// file (files with a ".c", ".cpp", ".m", ".mm" (many more)
468 /// extension).
469 ///
470 /// \return
471 ///     \b true if the filespec represents an implementation source
472 ///     file, \b false otherwise.
473 bool FileSpec::IsSourceImplementationFile() const {
474   ConstString extension(GetFileNameExtension());
475   if (!extension)
476     return false;
477 
478   static RegularExpression g_source_file_regex(llvm::StringRef(
479       "^.([cC]|[mM]|[mM][mM]|[cC][pP][pP]|[cC]\\+\\+|[cC][xX][xX]|[cC][cC]|["
480       "cC][pP]|[sS]|[aA][sS][mM]|[fF]|[fF]77|[fF]90|[fF]95|[fF]03|[fF][oO]["
481       "rR]|[fF][tT][nN]|[fF][pP][pP]|[aA][dD][aA]|[aA][dD][bB]|[aA][dD][sS])"
482       "$"));
483   return g_source_file_regex.Execute(extension.GetStringRef());
484 }
485 
486 bool FileSpec::IsRelative() const {
487   return !IsAbsolute();
488 }
489 
490 bool FileSpec::IsAbsolute() const {
491   llvm::SmallString<64> current_path;
492   GetPath(current_path, false);
493 
494   // Early return if the path is empty.
495   if (current_path.empty())
496     return false;
497 
498   // We consider paths starting with ~ to be absolute.
499   if (current_path[0] == '~')
500     return true;
501 
502   return llvm::sys::path::is_absolute(current_path, m_style);
503 }
504 
505 void FileSpec::MakeAbsolute(const FileSpec &dir) {
506   if (IsRelative())
507     PrependPathComponent(dir);
508 }
509 
510 void llvm::format_provider<FileSpec>::format(const FileSpec &F,
511                                              raw_ostream &Stream,
512                                              StringRef Style) {
513   assert(
514       (Style.empty() || Style.equals_lower("F") || Style.equals_lower("D")) &&
515       "Invalid FileSpec style!");
516 
517   StringRef dir = F.GetDirectory().GetStringRef();
518   StringRef file = F.GetFilename().GetStringRef();
519 
520   if (dir.empty() && file.empty()) {
521     Stream << "(empty)";
522     return;
523   }
524 
525   if (Style.equals_lower("F")) {
526     Stream << (file.empty() ? "(empty)" : file);
527     return;
528   }
529 
530   // Style is either D or empty, either way we need to print the directory.
531   if (!dir.empty()) {
532     // Directory is stored in normalized form, which might be different than
533     // preferred form.  In order to handle this, we need to cut off the
534     // filename, then denormalize, then write the entire denorm'ed directory.
535     llvm::SmallString<64> denormalized_dir = dir;
536     Denormalize(denormalized_dir, F.GetPathStyle());
537     Stream << denormalized_dir;
538     Stream << GetPreferredPathSeparator(F.GetPathStyle());
539   }
540 
541   if (Style.equals_lower("D")) {
542     // We only want to print the directory, so now just exit.
543     if (dir.empty())
544       Stream << "(empty)";
545     return;
546   }
547 
548   if (!file.empty())
549     Stream << file;
550 }
551