1 //===-- FileSpec.cpp --------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Utility/FileSpec.h"
10 #include "lldb/Utility/RegularExpression.h"
11 #include "lldb/Utility/Stream.h"
12 
13 #include "llvm/ADT/SmallString.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/ADT/Triple.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/Support/ErrorOr.h"
19 #include "llvm/Support/FileSystem.h"
20 #include "llvm/Support/Program.h"
21 #include "llvm/Support/raw_ostream.h"
22 
23 #include <algorithm>
24 #include <system_error>
25 #include <vector>
26 
27 #include <assert.h>
28 #include <limits.h>
29 #include <stdio.h>
30 #include <string.h>
31 
32 using namespace lldb;
33 using namespace lldb_private;
34 
35 namespace {
36 
37 static constexpr FileSpec::Style GetNativeStyle() {
38 #if defined(_WIN32)
39   return FileSpec::Style::windows;
40 #else
41   return FileSpec::Style::posix;
42 #endif
43 }
44 
45 bool PathStyleIsPosix(FileSpec::Style style) {
46   return (style == FileSpec::Style::posix ||
47           (style == FileSpec::Style::native &&
48            GetNativeStyle() == FileSpec::Style::posix));
49 }
50 
51 const char *GetPathSeparators(FileSpec::Style style) {
52   return llvm::sys::path::get_separator(style).data();
53 }
54 
55 char GetPreferredPathSeparator(FileSpec::Style style) {
56   return GetPathSeparators(style)[0];
57 }
58 
59 void Denormalize(llvm::SmallVectorImpl<char> &path, FileSpec::Style style) {
60   if (PathStyleIsPosix(style))
61     return;
62 
63   std::replace(path.begin(), path.end(), '/', '\\');
64 }
65 
66 } // end anonymous namespace
67 
68 FileSpec::FileSpec() : m_style(GetNativeStyle()) {}
69 
70 //------------------------------------------------------------------
71 // Default constructor that can take an optional full path to a file on disk.
72 //------------------------------------------------------------------
73 FileSpec::FileSpec(llvm::StringRef path, Style style) : m_style(style) {
74   SetFile(path, style);
75 }
76 
77 FileSpec::FileSpec(llvm::StringRef path, const llvm::Triple &Triple)
78     : FileSpec{path, Triple.isOSWindows() ? Style::windows : Style::posix} {}
79 
80 //------------------------------------------------------------------
81 // Copy constructor
82 //------------------------------------------------------------------
83 FileSpec::FileSpec(const FileSpec &rhs)
84     : m_directory(rhs.m_directory), m_filename(rhs.m_filename),
85       m_is_resolved(rhs.m_is_resolved), m_style(rhs.m_style) {}
86 
87 //------------------------------------------------------------------
88 // Copy constructor
89 //------------------------------------------------------------------
90 FileSpec::FileSpec(const FileSpec *rhs) : m_directory(), m_filename() {
91   if (rhs)
92     *this = *rhs;
93 }
94 
95 //------------------------------------------------------------------
96 // Virtual destructor in case anyone inherits from this class.
97 //------------------------------------------------------------------
98 FileSpec::~FileSpec() {}
99 
100 namespace {
101 //------------------------------------------------------------------
102 /// Safely get a character at the specified index.
103 ///
104 /// \param[in] path
105 ///     A full, partial, or relative path to a file.
106 ///
107 /// \param[in] i
108 ///     An index into path which may or may not be valid.
109 ///
110 /// \return
111 ///   The character at index \a i if the index is valid, or 0 if
112 ///   the index is not valid.
113 //------------------------------------------------------------------
114 inline char safeCharAtIndex(const llvm::StringRef &path, size_t i) {
115   if (i < path.size())
116     return path[i];
117   return 0;
118 }
119 
120 //------------------------------------------------------------------
121 /// Check if a path needs to be normalized.
122 ///
123 /// Check if a path needs to be normalized. We currently consider a
124 /// path to need normalization if any of the following are true
125 ///  - path contains "/./"
126 ///  - path contains "/../"
127 ///  - path contains "//"
128 ///  - path ends with "/"
129 /// Paths that start with "./" or with "../" are not considered to
130 /// need normalization since we aren't trying to resolve the path,
131 /// we are just trying to remove redundant things from the path.
132 ///
133 /// \param[in] path
134 ///     A full, partial, or relative path to a file.
135 ///
136 /// \return
137 ///   Returns \b true if the path needs to be normalized.
138 //------------------------------------------------------------------
139 bool needsNormalization(const llvm::StringRef &path) {
140   if (path.empty())
141     return false;
142   // We strip off leading "." values so these paths need to be normalized
143   if (path[0] == '.')
144     return true;
145   for (auto i = path.find_first_of("\\/"); i != llvm::StringRef::npos;
146        i = path.find_first_of("\\/", i + 1)) {
147     const auto next = safeCharAtIndex(path, i+1);
148     switch (next) {
149       case 0:
150         // path separator char at the end of the string which should be
151         // stripped unless it is the one and only character
152         return i > 0;
153       case '/':
154       case '\\':
155         // two path separator chars in the middle of a path needs to be
156         // normalized
157         if (i > 0)
158           return true;
159         ++i;
160         break;
161 
162       case '.': {
163           const auto next_next = safeCharAtIndex(path, i+2);
164           switch (next_next) {
165             default: break;
166             case 0: return true; // ends with "/."
167             case '/':
168             case '\\':
169               return true; // contains "/./"
170             case '.': {
171               const auto next_next_next = safeCharAtIndex(path, i+3);
172               switch (next_next_next) {
173                 default: break;
174                 case 0: return true; // ends with "/.."
175                 case '/':
176                 case '\\':
177                   return true; // contains "/../"
178               }
179               break;
180             }
181           }
182         }
183         break;
184 
185       default:
186         break;
187     }
188   }
189   return false;
190 }
191 
192 
193 }
194 //------------------------------------------------------------------
195 // Assignment operator.
196 //------------------------------------------------------------------
197 const FileSpec &FileSpec::operator=(const FileSpec &rhs) {
198   if (this != &rhs) {
199     m_directory = rhs.m_directory;
200     m_filename = rhs.m_filename;
201     m_is_resolved = rhs.m_is_resolved;
202     m_style = rhs.m_style;
203   }
204   return *this;
205 }
206 
207 void FileSpec::SetFile(llvm::StringRef pathname) { SetFile(pathname, m_style); }
208 
209 //------------------------------------------------------------------
210 // Update the contents of this object with a new path. The path will be split
211 // up into a directory and filename and stored as uniqued string values for
212 // quick comparison and efficient memory usage.
213 //------------------------------------------------------------------
214 void FileSpec::SetFile(llvm::StringRef pathname, Style style) {
215   m_filename.Clear();
216   m_directory.Clear();
217   m_is_resolved = false;
218   m_style = (style == Style::native) ? GetNativeStyle() : style;
219 
220   if (pathname.empty())
221     return;
222 
223   llvm::SmallString<128> resolved(pathname);
224 
225   // Normalize the path by removing ".", ".." and other redundant components.
226   if (needsNormalization(resolved))
227     llvm::sys::path::remove_dots(resolved, true, m_style);
228 
229   // Normalize back slashes to forward slashes
230   if (m_style == Style::windows)
231     std::replace(resolved.begin(), resolved.end(), '\\', '/');
232 
233   if (resolved.empty()) {
234     // If we have no path after normalization set the path to the current
235     // directory. This matches what python does and also a few other path
236     // utilities.
237     m_filename.SetString(".");
238     return;
239   }
240 
241   // Split path into filename and directory. We rely on the underlying char
242   // pointer to be nullptr when the components are empty.
243   llvm::StringRef filename = llvm::sys::path::filename(resolved, m_style);
244   if(!filename.empty())
245     m_filename.SetString(filename);
246 
247   llvm::StringRef directory = llvm::sys::path::parent_path(resolved, m_style);
248   if(!directory.empty())
249     m_directory.SetString(directory);
250 }
251 
252 void FileSpec::SetFile(llvm::StringRef path, const llvm::Triple &Triple) {
253   return SetFile(path, Triple.isOSWindows() ? Style::windows : Style::posix);
254 }
255 
256 //----------------------------------------------------------------------
257 // Convert to pointer operator. This allows code to check any FileSpec objects
258 // to see if they contain anything valid using code such as:
259 //
260 //  if (file_spec)
261 //  {}
262 //----------------------------------------------------------------------
263 FileSpec::operator bool() const { return m_filename || m_directory; }
264 
265 //----------------------------------------------------------------------
266 // Logical NOT operator. This allows code to check any FileSpec objects to see
267 // if they are invalid using code such as:
268 //
269 //  if (!file_spec)
270 //  {}
271 //----------------------------------------------------------------------
272 bool FileSpec::operator!() const { return !m_directory && !m_filename; }
273 
274 bool FileSpec::DirectoryEquals(const FileSpec &rhs) const {
275   const bool case_sensitive = IsCaseSensitive() || rhs.IsCaseSensitive();
276   return ConstString::Equals(m_directory, rhs.m_directory, case_sensitive);
277 }
278 
279 bool FileSpec::FileEquals(const FileSpec &rhs) const {
280   const bool case_sensitive = IsCaseSensitive() || rhs.IsCaseSensitive();
281   return ConstString::Equals(m_filename, rhs.m_filename, case_sensitive);
282 }
283 
284 //------------------------------------------------------------------
285 // Equal to operator
286 //------------------------------------------------------------------
287 bool FileSpec::operator==(const FileSpec &rhs) const {
288   return FileEquals(rhs) && DirectoryEquals(rhs);
289 }
290 
291 //------------------------------------------------------------------
292 // Not equal to operator
293 //------------------------------------------------------------------
294 bool FileSpec::operator!=(const FileSpec &rhs) const { return !(*this == rhs); }
295 
296 //------------------------------------------------------------------
297 // Less than operator
298 //------------------------------------------------------------------
299 bool FileSpec::operator<(const FileSpec &rhs) const {
300   return FileSpec::Compare(*this, rhs, true) < 0;
301 }
302 
303 //------------------------------------------------------------------
304 // Dump a FileSpec object to a stream
305 //------------------------------------------------------------------
306 Stream &lldb_private::operator<<(Stream &s, const FileSpec &f) {
307   f.Dump(&s);
308   return s;
309 }
310 
311 //------------------------------------------------------------------
312 // Clear this object by releasing both the directory and filename string values
313 // and making them both the empty string.
314 //------------------------------------------------------------------
315 void FileSpec::Clear() {
316   m_directory.Clear();
317   m_filename.Clear();
318 }
319 
320 //------------------------------------------------------------------
321 // Compare two FileSpec objects. If "full" is true, then both the directory and
322 // the filename must match. If "full" is false, then the directory names for
323 // "a" and "b" are only compared if they are both non-empty. This allows a
324 // FileSpec object to only contain a filename and it can match FileSpec objects
325 // that have matching filenames with different paths.
326 //
327 // Return -1 if the "a" is less than "b", 0 if "a" is equal to "b" and "1" if
328 // "a" is greater than "b".
329 //------------------------------------------------------------------
330 int FileSpec::Compare(const FileSpec &a, const FileSpec &b, bool full) {
331   int result = 0;
332 
333   // case sensitivity of compare
334   const bool case_sensitive = a.IsCaseSensitive() || b.IsCaseSensitive();
335 
336   // If full is true, then we must compare both the directory and filename.
337 
338   // If full is false, then if either directory is empty, then we match on the
339   // basename only, and if both directories have valid values, we still do a
340   // full compare. This allows for matching when we just have a filename in one
341   // of the FileSpec objects.
342 
343   if (full || (a.m_directory && b.m_directory)) {
344     result = ConstString::Compare(a.m_directory, b.m_directory, case_sensitive);
345     if (result)
346       return result;
347   }
348   return ConstString::Compare(a.m_filename, b.m_filename, case_sensitive);
349 }
350 
351 bool FileSpec::Equal(const FileSpec &a, const FileSpec &b, bool full) {
352   // case sensitivity of equality test
353   const bool case_sensitive = a.IsCaseSensitive() || b.IsCaseSensitive();
354 
355   const bool filenames_equal = ConstString::Equals(a.m_filename,
356                                                    b.m_filename,
357                                                    case_sensitive);
358 
359   if (!filenames_equal)
360     return false;
361 
362   if (!full && (a.GetDirectory().IsEmpty() || b.GetDirectory().IsEmpty()))
363     return filenames_equal;
364 
365   return a == b;
366 }
367 
368 llvm::Optional<FileSpec::Style> FileSpec::GuessPathStyle(llvm::StringRef absolute_path) {
369   if (absolute_path.startswith("/"))
370     return Style::posix;
371   if (absolute_path.startswith(R"(\\)"))
372     return Style::windows;
373   if (absolute_path.size() > 3 && llvm::isAlpha(absolute_path[0]) &&
374       absolute_path.substr(1, 2) == R"(:\)")
375     return Style::windows;
376   return llvm::None;
377 }
378 
379 //------------------------------------------------------------------
380 // Dump the object to the supplied stream. If the object contains a valid
381 // directory name, it will be displayed followed by a directory delimiter, and
382 // the filename.
383 //------------------------------------------------------------------
384 void FileSpec::Dump(Stream *s) const {
385   if (s) {
386     std::string path{GetPath(true)};
387     s->PutCString(path);
388     char path_separator = GetPreferredPathSeparator(m_style);
389     if (!m_filename && !path.empty() && path.back() != path_separator)
390       s->PutChar(path_separator);
391   }
392 }
393 
394 FileSpec::Style FileSpec::GetPathStyle() const { return m_style; }
395 
396 //------------------------------------------------------------------
397 // Directory string get accessor.
398 //------------------------------------------------------------------
399 ConstString &FileSpec::GetDirectory() { return m_directory; }
400 
401 //------------------------------------------------------------------
402 // Directory string const get accessor.
403 //------------------------------------------------------------------
404 ConstString FileSpec::GetDirectory() const { return m_directory; }
405 
406 //------------------------------------------------------------------
407 // Filename string get accessor.
408 //------------------------------------------------------------------
409 ConstString &FileSpec::GetFilename() { return m_filename; }
410 
411 //------------------------------------------------------------------
412 // Filename string const get accessor.
413 //------------------------------------------------------------------
414 ConstString FileSpec::GetFilename() const { return m_filename; }
415 
416 //------------------------------------------------------------------
417 // Extract the directory and path into a fixed buffer. This is needed as the
418 // directory and path are stored in separate string values.
419 //------------------------------------------------------------------
420 size_t FileSpec::GetPath(char *path, size_t path_max_len,
421                          bool denormalize) const {
422   if (!path)
423     return 0;
424 
425   std::string result = GetPath(denormalize);
426   ::snprintf(path, path_max_len, "%s", result.c_str());
427   return std::min(path_max_len - 1, result.length());
428 }
429 
430 std::string FileSpec::GetPath(bool denormalize) const {
431   llvm::SmallString<64> result;
432   GetPath(result, denormalize);
433   return std::string(result.begin(), result.end());
434 }
435 
436 const char *FileSpec::GetCString(bool denormalize) const {
437   return ConstString{GetPath(denormalize)}.AsCString(nullptr);
438 }
439 
440 void FileSpec::GetPath(llvm::SmallVectorImpl<char> &path,
441                        bool denormalize) const {
442   path.append(m_directory.GetStringRef().begin(),
443               m_directory.GetStringRef().end());
444   // Since the path was normalized and all paths use '/' when stored in these
445   // objects, we don't need to look for the actual syntax specific path
446   // separator, we just look for and insert '/'.
447   if (m_directory && m_filename && m_directory.GetStringRef().back() != '/' &&
448       m_filename.GetStringRef().back() != '/')
449     path.insert(path.end(), '/');
450   path.append(m_filename.GetStringRef().begin(),
451               m_filename.GetStringRef().end());
452   if (denormalize && !path.empty())
453     Denormalize(path, m_style);
454 }
455 
456 ConstString FileSpec::GetFileNameExtension() const {
457   return ConstString(
458       llvm::sys::path::extension(m_filename.GetStringRef(), m_style));
459 }
460 
461 ConstString FileSpec::GetFileNameStrippingExtension() const {
462   return ConstString(llvm::sys::path::stem(m_filename.GetStringRef(), m_style));
463 }
464 
465 //------------------------------------------------------------------
466 // Return the size in bytes that this object takes in memory. This returns the
467 // size in bytes of this object, not any shared string values it may refer to.
468 //------------------------------------------------------------------
469 size_t FileSpec::MemorySize() const {
470   return m_filename.MemorySize() + m_directory.MemorySize();
471 }
472 
473 FileSpec
474 FileSpec::CopyByAppendingPathComponent(llvm::StringRef component) const {
475   FileSpec ret = *this;
476   ret.AppendPathComponent(component);
477   return ret;
478 }
479 
480 FileSpec FileSpec::CopyByRemovingLastPathComponent() const {
481   llvm::SmallString<64> current_path;
482   GetPath(current_path, false);
483   if (llvm::sys::path::has_parent_path(current_path, m_style))
484     return FileSpec(llvm::sys::path::parent_path(current_path, m_style),
485                     m_style);
486   return *this;
487 }
488 
489 ConstString FileSpec::GetLastPathComponent() const {
490   llvm::SmallString<64> current_path;
491   GetPath(current_path, false);
492   return ConstString(llvm::sys::path::filename(current_path, m_style));
493 }
494 
495 void FileSpec::PrependPathComponent(llvm::StringRef component) {
496   llvm::SmallString<64> new_path(component);
497   llvm::SmallString<64> current_path;
498   GetPath(current_path, false);
499   llvm::sys::path::append(new_path,
500                           llvm::sys::path::begin(current_path, m_style),
501                           llvm::sys::path::end(current_path), m_style);
502   SetFile(new_path, m_style);
503 }
504 
505 void FileSpec::PrependPathComponent(const FileSpec &new_path) {
506   return PrependPathComponent(new_path.GetPath(false));
507 }
508 
509 void FileSpec::AppendPathComponent(llvm::StringRef component) {
510   llvm::SmallString<64> current_path;
511   GetPath(current_path, false);
512   llvm::sys::path::append(current_path, m_style, component);
513   SetFile(current_path, m_style);
514 }
515 
516 void FileSpec::AppendPathComponent(const FileSpec &new_path) {
517   return AppendPathComponent(new_path.GetPath(false));
518 }
519 
520 bool FileSpec::RemoveLastPathComponent() {
521   llvm::SmallString<64> current_path;
522   GetPath(current_path, false);
523   if (llvm::sys::path::has_parent_path(current_path, m_style)) {
524     SetFile(llvm::sys::path::parent_path(current_path, m_style));
525     return true;
526   }
527   return false;
528 }
529 //------------------------------------------------------------------
530 /// Returns true if the filespec represents an implementation source
531 /// file (files with a ".c", ".cpp", ".m", ".mm" (many more)
532 /// extension).
533 ///
534 /// \return
535 ///     \b true if the filespec represents an implementation source
536 ///     file, \b false otherwise.
537 //------------------------------------------------------------------
538 bool FileSpec::IsSourceImplementationFile() const {
539   ConstString extension(GetFileNameExtension());
540   if (!extension)
541     return false;
542 
543   static RegularExpression g_source_file_regex(llvm::StringRef(
544       "^.([cC]|[mM]|[mM][mM]|[cC][pP][pP]|[cC]\\+\\+|[cC][xX][xX]|[cC][cC]|["
545       "cC][pP]|[sS]|[aA][sS][mM]|[fF]|[fF]77|[fF]90|[fF]95|[fF]03|[fF][oO]["
546       "rR]|[fF][tT][nN]|[fF][pP][pP]|[aA][dD][aA]|[aA][dD][bB]|[aA][dD][sS])"
547       "$"));
548   return g_source_file_regex.Execute(extension.GetStringRef());
549 }
550 
551 bool FileSpec::IsRelative() const {
552   return !IsAbsolute();
553 }
554 
555 bool FileSpec::IsAbsolute() const {
556   llvm::SmallString<64> current_path;
557   GetPath(current_path, false);
558 
559   // Early return if the path is empty.
560   if (current_path.empty())
561     return false;
562 
563   // We consider paths starting with ~ to be absolute.
564   if (current_path[0] == '~')
565     return true;
566 
567   return llvm::sys::path::is_absolute(current_path, m_style);
568 }
569 
570 void FileSpec::MakeAbsolute(const FileSpec &dir) {
571   if (IsRelative())
572     PrependPathComponent(dir);
573 }
574 
575 void llvm::format_provider<FileSpec>::format(const FileSpec &F,
576                                              raw_ostream &Stream,
577                                              StringRef Style) {
578   assert(
579       (Style.empty() || Style.equals_lower("F") || Style.equals_lower("D")) &&
580       "Invalid FileSpec style!");
581 
582   StringRef dir = F.GetDirectory().GetStringRef();
583   StringRef file = F.GetFilename().GetStringRef();
584 
585   if (dir.empty() && file.empty()) {
586     Stream << "(empty)";
587     return;
588   }
589 
590   if (Style.equals_lower("F")) {
591     Stream << (file.empty() ? "(empty)" : file);
592     return;
593   }
594 
595   // Style is either D or empty, either way we need to print the directory.
596   if (!dir.empty()) {
597     // Directory is stored in normalized form, which might be different than
598     // preferred form.  In order to handle this, we need to cut off the
599     // filename, then denormalize, then write the entire denorm'ed directory.
600     llvm::SmallString<64> denormalized_dir = dir;
601     Denormalize(denormalized_dir, F.GetPathStyle());
602     Stream << denormalized_dir;
603     Stream << GetPreferredPathSeparator(F.GetPathStyle());
604   }
605 
606   if (Style.equals_lower("D")) {
607     // We only want to print the directory, so now just exit.
608     if (dir.empty())
609       Stream << "(empty)";
610     return;
611   }
612 
613   if (!file.empty())
614     Stream << file;
615 }
616