1 //===-- FileSpec.cpp --------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Utility/FileSpec.h"
10 #include "lldb/Utility/RegularExpression.h"
11 #include "lldb/Utility/Stream.h"
12 
13 #include "llvm/ADT/SmallString.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/ADT/Triple.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/Support/ErrorOr.h"
19 #include "llvm/Support/FileSystem.h"
20 #include "llvm/Support/Program.h"
21 #include "llvm/Support/raw_ostream.h"
22 
23 #include <algorithm>
24 #include <system_error>
25 #include <vector>
26 
27 #include <assert.h>
28 #include <limits.h>
29 #include <stdio.h>
30 #include <string.h>
31 
32 using namespace lldb;
33 using namespace lldb_private;
34 
35 namespace {
36 
37 static constexpr FileSpec::Style GetNativeStyle() {
38 #if defined(_WIN32)
39   return FileSpec::Style::windows;
40 #else
41   return FileSpec::Style::posix;
42 #endif
43 }
44 
45 bool PathStyleIsPosix(FileSpec::Style style) {
46   return (style == FileSpec::Style::posix ||
47           (style == FileSpec::Style::native &&
48            GetNativeStyle() == FileSpec::Style::posix));
49 }
50 
51 const char *GetPathSeparators(FileSpec::Style style) {
52   return llvm::sys::path::get_separator(style).data();
53 }
54 
55 char GetPreferredPathSeparator(FileSpec::Style style) {
56   return GetPathSeparators(style)[0];
57 }
58 
59 void Denormalize(llvm::SmallVectorImpl<char> &path, FileSpec::Style style) {
60   if (PathStyleIsPosix(style))
61     return;
62 
63   std::replace(path.begin(), path.end(), '/', '\\');
64 }
65 
66 } // end anonymous namespace
67 
68 FileSpec::FileSpec() : m_style(GetNativeStyle()) {}
69 
70 // Default constructor that can take an optional full path to a file on disk.
71 FileSpec::FileSpec(llvm::StringRef path, Style style) : m_style(style) {
72   SetFile(path, style);
73 }
74 
75 FileSpec::FileSpec(llvm::StringRef path, const llvm::Triple &triple)
76     : FileSpec{path, triple.isOSWindows() ? Style::windows : Style::posix} {}
77 
78 // Copy constructor
79 FileSpec::FileSpec(const FileSpec *rhs) : m_directory(), m_filename() {
80   if (rhs)
81     *this = *rhs;
82 }
83 
84 // Virtual destructor in case anyone inherits from this class.
85 FileSpec::~FileSpec() {}
86 
87 namespace {
88 /// Safely get a character at the specified index.
89 ///
90 /// \param[in] path
91 ///     A full, partial, or relative path to a file.
92 ///
93 /// \param[in] i
94 ///     An index into path which may or may not be valid.
95 ///
96 /// \return
97 ///   The character at index \a i if the index is valid, or 0 if
98 ///   the index is not valid.
99 inline char safeCharAtIndex(const llvm::StringRef &path, size_t i) {
100   if (i < path.size())
101     return path[i];
102   return 0;
103 }
104 
105 /// Check if a path needs to be normalized.
106 ///
107 /// Check if a path needs to be normalized. We currently consider a
108 /// path to need normalization if any of the following are true
109 ///  - path contains "/./"
110 ///  - path contains "/../"
111 ///  - path contains "//"
112 ///  - path ends with "/"
113 /// Paths that start with "./" or with "../" are not considered to
114 /// need normalization since we aren't trying to resolve the path,
115 /// we are just trying to remove redundant things from the path.
116 ///
117 /// \param[in] path
118 ///     A full, partial, or relative path to a file.
119 ///
120 /// \return
121 ///   Returns \b true if the path needs to be normalized.
122 bool needsNormalization(const llvm::StringRef &path) {
123   if (path.empty())
124     return false;
125   // We strip off leading "." values so these paths need to be normalized
126   if (path[0] == '.')
127     return true;
128   for (auto i = path.find_first_of("\\/"); i != llvm::StringRef::npos;
129        i = path.find_first_of("\\/", i + 1)) {
130     const auto next = safeCharAtIndex(path, i+1);
131     switch (next) {
132       case 0:
133         // path separator char at the end of the string which should be
134         // stripped unless it is the one and only character
135         return i > 0;
136       case '/':
137       case '\\':
138         // two path separator chars in the middle of a path needs to be
139         // normalized
140         if (i > 0)
141           return true;
142         ++i;
143         break;
144 
145       case '.': {
146           const auto next_next = safeCharAtIndex(path, i+2);
147           switch (next_next) {
148             default: break;
149             case 0: return true; // ends with "/."
150             case '/':
151             case '\\':
152               return true; // contains "/./"
153             case '.': {
154               const auto next_next_next = safeCharAtIndex(path, i+3);
155               switch (next_next_next) {
156                 default: break;
157                 case 0: return true; // ends with "/.."
158                 case '/':
159                 case '\\':
160                   return true; // contains "/../"
161               }
162               break;
163             }
164           }
165         }
166         break;
167 
168       default:
169         break;
170     }
171   }
172   return false;
173 }
174 
175 
176 }
177 
178 void FileSpec::SetFile(llvm::StringRef pathname) { SetFile(pathname, m_style); }
179 
180 // Update the contents of this object with a new path. The path will be split
181 // up into a directory and filename and stored as uniqued string values for
182 // quick comparison and efficient memory usage.
183 void FileSpec::SetFile(llvm::StringRef pathname, Style style) {
184   m_filename.Clear();
185   m_directory.Clear();
186   m_is_resolved = false;
187   m_style = (style == Style::native) ? GetNativeStyle() : style;
188 
189   if (pathname.empty())
190     return;
191 
192   llvm::SmallString<128> resolved(pathname);
193 
194   // Normalize the path by removing ".", ".." and other redundant components.
195   if (needsNormalization(resolved))
196     llvm::sys::path::remove_dots(resolved, true, m_style);
197 
198   // Normalize back slashes to forward slashes
199   if (m_style == Style::windows)
200     std::replace(resolved.begin(), resolved.end(), '\\', '/');
201 
202   if (resolved.empty()) {
203     // If we have no path after normalization set the path to the current
204     // directory. This matches what python does and also a few other path
205     // utilities.
206     m_filename.SetString(".");
207     return;
208   }
209 
210   // Split path into filename and directory. We rely on the underlying char
211   // pointer to be nullptr when the components are empty.
212   llvm::StringRef filename = llvm::sys::path::filename(resolved, m_style);
213   if(!filename.empty())
214     m_filename.SetString(filename);
215 
216   llvm::StringRef directory = llvm::sys::path::parent_path(resolved, m_style);
217   if(!directory.empty())
218     m_directory.SetString(directory);
219 }
220 
221 void FileSpec::SetFile(llvm::StringRef path, const llvm::Triple &triple) {
222   return SetFile(path, triple.isOSWindows() ? Style::windows : Style::posix);
223 }
224 
225 // Convert to pointer operator. This allows code to check any FileSpec objects
226 // to see if they contain anything valid using code such as:
227 //
228 //  if (file_spec)
229 //  {}
230 FileSpec::operator bool() const { return m_filename || m_directory; }
231 
232 // Logical NOT operator. This allows code to check any FileSpec objects to see
233 // if they are invalid using code such as:
234 //
235 //  if (!file_spec)
236 //  {}
237 bool FileSpec::operator!() const { return !m_directory && !m_filename; }
238 
239 bool FileSpec::DirectoryEquals(const FileSpec &rhs) const {
240   const bool case_sensitive = IsCaseSensitive() || rhs.IsCaseSensitive();
241   return ConstString::Equals(m_directory, rhs.m_directory, case_sensitive);
242 }
243 
244 bool FileSpec::FileEquals(const FileSpec &rhs) const {
245   const bool case_sensitive = IsCaseSensitive() || rhs.IsCaseSensitive();
246   return ConstString::Equals(m_filename, rhs.m_filename, case_sensitive);
247 }
248 
249 // Equal to operator
250 bool FileSpec::operator==(const FileSpec &rhs) const {
251   return FileEquals(rhs) && DirectoryEquals(rhs);
252 }
253 
254 // Not equal to operator
255 bool FileSpec::operator!=(const FileSpec &rhs) const { return !(*this == rhs); }
256 
257 // Less than operator
258 bool FileSpec::operator<(const FileSpec &rhs) const {
259   return FileSpec::Compare(*this, rhs, true) < 0;
260 }
261 
262 // Dump a FileSpec object to a stream
263 Stream &lldb_private::operator<<(Stream &s, const FileSpec &f) {
264   f.Dump(&s);
265   return s;
266 }
267 
268 // Clear this object by releasing both the directory and filename string values
269 // and making them both the empty string.
270 void FileSpec::Clear() {
271   m_directory.Clear();
272   m_filename.Clear();
273 }
274 
275 // Compare two FileSpec objects. If "full" is true, then both the directory and
276 // the filename must match. If "full" is false, then the directory names for
277 // "a" and "b" are only compared if they are both non-empty. This allows a
278 // FileSpec object to only contain a filename and it can match FileSpec objects
279 // that have matching filenames with different paths.
280 //
281 // Return -1 if the "a" is less than "b", 0 if "a" is equal to "b" and "1" if
282 // "a" is greater than "b".
283 int FileSpec::Compare(const FileSpec &a, const FileSpec &b, bool full) {
284   int result = 0;
285 
286   // case sensitivity of compare
287   const bool case_sensitive = a.IsCaseSensitive() || b.IsCaseSensitive();
288 
289   // If full is true, then we must compare both the directory and filename.
290 
291   // If full is false, then if either directory is empty, then we match on the
292   // basename only, and if both directories have valid values, we still do a
293   // full compare. This allows for matching when we just have a filename in one
294   // of the FileSpec objects.
295 
296   if (full || (a.m_directory && b.m_directory)) {
297     result = ConstString::Compare(a.m_directory, b.m_directory, case_sensitive);
298     if (result)
299       return result;
300   }
301   return ConstString::Compare(a.m_filename, b.m_filename, case_sensitive);
302 }
303 
304 bool FileSpec::Equal(const FileSpec &a, const FileSpec &b, bool full) {
305   if (full || (a.GetDirectory() && b.GetDirectory()))
306     return a == b;
307 
308   return a.FileEquals(b);
309 }
310 
311 llvm::Optional<FileSpec::Style> FileSpec::GuessPathStyle(llvm::StringRef absolute_path) {
312   if (absolute_path.startswith("/"))
313     return Style::posix;
314   if (absolute_path.startswith(R"(\\)"))
315     return Style::windows;
316   if (absolute_path.size() > 3 && llvm::isAlpha(absolute_path[0]) &&
317       absolute_path.substr(1, 2) == R"(:\)")
318     return Style::windows;
319   return llvm::None;
320 }
321 
322 // Dump the object to the supplied stream. If the object contains a valid
323 // directory name, it will be displayed followed by a directory delimiter, and
324 // the filename.
325 void FileSpec::Dump(Stream *s) const {
326   if (s) {
327     std::string path{GetPath(true)};
328     s->PutCString(path);
329     char path_separator = GetPreferredPathSeparator(m_style);
330     if (!m_filename && !path.empty() && path.back() != path_separator)
331       s->PutChar(path_separator);
332   }
333 }
334 
335 FileSpec::Style FileSpec::GetPathStyle() const { return m_style; }
336 
337 // Directory string get accessor.
338 ConstString &FileSpec::GetDirectory() { return m_directory; }
339 
340 // Directory string const get accessor.
341 ConstString FileSpec::GetDirectory() const { return m_directory; }
342 
343 // Filename string get accessor.
344 ConstString &FileSpec::GetFilename() { return m_filename; }
345 
346 // Filename string const get accessor.
347 ConstString FileSpec::GetFilename() const { return m_filename; }
348 
349 // Extract the directory and path into a fixed buffer. This is needed as the
350 // directory and path are stored in separate string values.
351 size_t FileSpec::GetPath(char *path, size_t path_max_len,
352                          bool denormalize) const {
353   if (!path)
354     return 0;
355 
356   std::string result = GetPath(denormalize);
357   ::snprintf(path, path_max_len, "%s", result.c_str());
358   return std::min(path_max_len - 1, result.length());
359 }
360 
361 std::string FileSpec::GetPath(bool denormalize) const {
362   llvm::SmallString<64> result;
363   GetPath(result, denormalize);
364   return std::string(result.begin(), result.end());
365 }
366 
367 const char *FileSpec::GetCString(bool denormalize) const {
368   return ConstString{GetPath(denormalize)}.AsCString(nullptr);
369 }
370 
371 void FileSpec::GetPath(llvm::SmallVectorImpl<char> &path,
372                        bool denormalize) const {
373   path.append(m_directory.GetStringRef().begin(),
374               m_directory.GetStringRef().end());
375   // Since the path was normalized and all paths use '/' when stored in these
376   // objects, we don't need to look for the actual syntax specific path
377   // separator, we just look for and insert '/'.
378   if (m_directory && m_filename && m_directory.GetStringRef().back() != '/' &&
379       m_filename.GetStringRef().back() != '/')
380     path.insert(path.end(), '/');
381   path.append(m_filename.GetStringRef().begin(),
382               m_filename.GetStringRef().end());
383   if (denormalize && !path.empty())
384     Denormalize(path, m_style);
385 }
386 
387 ConstString FileSpec::GetFileNameExtension() const {
388   return ConstString(
389       llvm::sys::path::extension(m_filename.GetStringRef(), m_style));
390 }
391 
392 ConstString FileSpec::GetFileNameStrippingExtension() const {
393   return ConstString(llvm::sys::path::stem(m_filename.GetStringRef(), m_style));
394 }
395 
396 // Return the size in bytes that this object takes in memory. This returns the
397 // size in bytes of this object, not any shared string values it may refer to.
398 size_t FileSpec::MemorySize() const {
399   return m_filename.MemorySize() + m_directory.MemorySize();
400 }
401 
402 FileSpec
403 FileSpec::CopyByAppendingPathComponent(llvm::StringRef component) const {
404   FileSpec ret = *this;
405   ret.AppendPathComponent(component);
406   return ret;
407 }
408 
409 FileSpec FileSpec::CopyByRemovingLastPathComponent() const {
410   llvm::SmallString<64> current_path;
411   GetPath(current_path, false);
412   if (llvm::sys::path::has_parent_path(current_path, m_style))
413     return FileSpec(llvm::sys::path::parent_path(current_path, m_style),
414                     m_style);
415   return *this;
416 }
417 
418 ConstString FileSpec::GetLastPathComponent() const {
419   llvm::SmallString<64> current_path;
420   GetPath(current_path, false);
421   return ConstString(llvm::sys::path::filename(current_path, m_style));
422 }
423 
424 void FileSpec::PrependPathComponent(llvm::StringRef component) {
425   llvm::SmallString<64> new_path(component);
426   llvm::SmallString<64> current_path;
427   GetPath(current_path, false);
428   llvm::sys::path::append(new_path,
429                           llvm::sys::path::begin(current_path, m_style),
430                           llvm::sys::path::end(current_path), m_style);
431   SetFile(new_path, m_style);
432 }
433 
434 void FileSpec::PrependPathComponent(const FileSpec &new_path) {
435   return PrependPathComponent(new_path.GetPath(false));
436 }
437 
438 void FileSpec::AppendPathComponent(llvm::StringRef component) {
439   llvm::SmallString<64> current_path;
440   GetPath(current_path, false);
441   llvm::sys::path::append(current_path, m_style, component);
442   SetFile(current_path, m_style);
443 }
444 
445 void FileSpec::AppendPathComponent(const FileSpec &new_path) {
446   return AppendPathComponent(new_path.GetPath(false));
447 }
448 
449 bool FileSpec::RemoveLastPathComponent() {
450   llvm::SmallString<64> current_path;
451   GetPath(current_path, false);
452   if (llvm::sys::path::has_parent_path(current_path, m_style)) {
453     SetFile(llvm::sys::path::parent_path(current_path, m_style));
454     return true;
455   }
456   return false;
457 }
458 /// Returns true if the filespec represents an implementation source
459 /// file (files with a ".c", ".cpp", ".m", ".mm" (many more)
460 /// extension).
461 ///
462 /// \return
463 ///     \b true if the filespec represents an implementation source
464 ///     file, \b false otherwise.
465 bool FileSpec::IsSourceImplementationFile() const {
466   ConstString extension(GetFileNameExtension());
467   if (!extension)
468     return false;
469 
470   static RegularExpression g_source_file_regex(llvm::StringRef(
471       "^.([cC]|[mM]|[mM][mM]|[cC][pP][pP]|[cC]\\+\\+|[cC][xX][xX]|[cC][cC]|["
472       "cC][pP]|[sS]|[aA][sS][mM]|[fF]|[fF]77|[fF]90|[fF]95|[fF]03|[fF][oO]["
473       "rR]|[fF][tT][nN]|[fF][pP][pP]|[aA][dD][aA]|[aA][dD][bB]|[aA][dD][sS])"
474       "$"));
475   return g_source_file_regex.Execute(extension.GetStringRef());
476 }
477 
478 bool FileSpec::IsRelative() const {
479   return !IsAbsolute();
480 }
481 
482 bool FileSpec::IsAbsolute() const {
483   llvm::SmallString<64> current_path;
484   GetPath(current_path, false);
485 
486   // Early return if the path is empty.
487   if (current_path.empty())
488     return false;
489 
490   // We consider paths starting with ~ to be absolute.
491   if (current_path[0] == '~')
492     return true;
493 
494   return llvm::sys::path::is_absolute(current_path, m_style);
495 }
496 
497 void FileSpec::MakeAbsolute(const FileSpec &dir) {
498   if (IsRelative())
499     PrependPathComponent(dir);
500 }
501 
502 void llvm::format_provider<FileSpec>::format(const FileSpec &F,
503                                              raw_ostream &Stream,
504                                              StringRef Style) {
505   assert(
506       (Style.empty() || Style.equals_lower("F") || Style.equals_lower("D")) &&
507       "Invalid FileSpec style!");
508 
509   StringRef dir = F.GetDirectory().GetStringRef();
510   StringRef file = F.GetFilename().GetStringRef();
511 
512   if (dir.empty() && file.empty()) {
513     Stream << "(empty)";
514     return;
515   }
516 
517   if (Style.equals_lower("F")) {
518     Stream << (file.empty() ? "(empty)" : file);
519     return;
520   }
521 
522   // Style is either D or empty, either way we need to print the directory.
523   if (!dir.empty()) {
524     // Directory is stored in normalized form, which might be different than
525     // preferred form.  In order to handle this, we need to cut off the
526     // filename, then denormalize, then write the entire denorm'ed directory.
527     llvm::SmallString<64> denormalized_dir = dir;
528     Denormalize(denormalized_dir, F.GetPathStyle());
529     Stream << denormalized_dir;
530     Stream << GetPreferredPathSeparator(F.GetPathStyle());
531   }
532 
533   if (Style.equals_lower("D")) {
534     // We only want to print the directory, so now just exit.
535     if (dir.empty())
536       Stream << "(empty)";
537     return;
538   }
539 
540   if (!file.empty())
541     Stream << file;
542 }
543