1 //===-- FileSpec.cpp --------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Utility/FileSpec.h"
10 #include "lldb/Utility/RegularExpression.h"
11 #include "lldb/Utility/Stream.h"
12 
13 #include "llvm/ADT/SmallString.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/ADT/Triple.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/Support/ErrorOr.h"
19 #include "llvm/Support/FileSystem.h"
20 #include "llvm/Support/Program.h"
21 #include "llvm/Support/raw_ostream.h"
22 
23 #include <algorithm>
24 #include <system_error>
25 #include <vector>
26 
27 #include <assert.h>
28 #include <limits.h>
29 #include <stdio.h>
30 #include <string.h>
31 
32 using namespace lldb;
33 using namespace lldb_private;
34 
35 namespace {
36 
37 static constexpr FileSpec::Style GetNativeStyle() {
38 #if defined(_WIN32)
39   return FileSpec::Style::windows;
40 #else
41   return FileSpec::Style::posix;
42 #endif
43 }
44 
45 bool PathStyleIsPosix(FileSpec::Style style) {
46   return (style == FileSpec::Style::posix ||
47           (style == FileSpec::Style::native &&
48            GetNativeStyle() == FileSpec::Style::posix));
49 }
50 
51 const char *GetPathSeparators(FileSpec::Style style) {
52   return llvm::sys::path::get_separator(style).data();
53 }
54 
55 char GetPreferredPathSeparator(FileSpec::Style style) {
56   return GetPathSeparators(style)[0];
57 }
58 
59 void Denormalize(llvm::SmallVectorImpl<char> &path, FileSpec::Style style) {
60   if (PathStyleIsPosix(style))
61     return;
62 
63   std::replace(path.begin(), path.end(), '/', '\\');
64 }
65 
66 } // end anonymous namespace
67 
68 FileSpec::FileSpec() : m_style(GetNativeStyle()) {}
69 
70 // Default constructor that can take an optional full path to a file on disk.
71 FileSpec::FileSpec(llvm::StringRef path, Style style) : m_style(style) {
72   SetFile(path, style);
73 }
74 
75 FileSpec::FileSpec(llvm::StringRef path, const llvm::Triple &triple)
76     : FileSpec{path, triple.isOSWindows() ? Style::windows : Style::posix} {}
77 
78 // Copy constructor
79 FileSpec::FileSpec(const FileSpec *rhs) : m_directory(), m_filename() {
80   if (rhs)
81     *this = *rhs;
82 }
83 
84 // Virtual destructor in case anyone inherits from this class.
85 FileSpec::~FileSpec() {}
86 
87 namespace {
88 /// Safely get a character at the specified index.
89 ///
90 /// \param[in] path
91 ///     A full, partial, or relative path to a file.
92 ///
93 /// \param[in] i
94 ///     An index into path which may or may not be valid.
95 ///
96 /// \return
97 ///   The character at index \a i if the index is valid, or 0 if
98 ///   the index is not valid.
99 inline char safeCharAtIndex(const llvm::StringRef &path, size_t i) {
100   if (i < path.size())
101     return path[i];
102   return 0;
103 }
104 
105 /// Check if a path needs to be normalized.
106 ///
107 /// Check if a path needs to be normalized. We currently consider a
108 /// path to need normalization if any of the following are true
109 ///  - path contains "/./"
110 ///  - path contains "/../"
111 ///  - path contains "//"
112 ///  - path ends with "/"
113 /// Paths that start with "./" or with "../" are not considered to
114 /// need normalization since we aren't trying to resolve the path,
115 /// we are just trying to remove redundant things from the path.
116 ///
117 /// \param[in] path
118 ///     A full, partial, or relative path to a file.
119 ///
120 /// \return
121 ///   Returns \b true if the path needs to be normalized.
122 bool needsNormalization(const llvm::StringRef &path) {
123   if (path.empty())
124     return false;
125   // We strip off leading "." values so these paths need to be normalized
126   if (path[0] == '.')
127     return true;
128   for (auto i = path.find_first_of("\\/"); i != llvm::StringRef::npos;
129        i = path.find_first_of("\\/", i + 1)) {
130     const auto next = safeCharAtIndex(path, i+1);
131     switch (next) {
132       case 0:
133         // path separator char at the end of the string which should be
134         // stripped unless it is the one and only character
135         return i > 0;
136       case '/':
137       case '\\':
138         // two path separator chars in the middle of a path needs to be
139         // normalized
140         if (i > 0)
141           return true;
142         ++i;
143         break;
144 
145       case '.': {
146           const auto next_next = safeCharAtIndex(path, i+2);
147           switch (next_next) {
148             default: break;
149             case 0: return true; // ends with "/."
150             case '/':
151             case '\\':
152               return true; // contains "/./"
153             case '.': {
154               const auto next_next_next = safeCharAtIndex(path, i+3);
155               switch (next_next_next) {
156                 default: break;
157                 case 0: return true; // ends with "/.."
158                 case '/':
159                 case '\\':
160                   return true; // contains "/../"
161               }
162               break;
163             }
164           }
165         }
166         break;
167 
168       default:
169         break;
170     }
171   }
172   return false;
173 }
174 
175 
176 }
177 
178 void FileSpec::SetFile(llvm::StringRef pathname) { SetFile(pathname, m_style); }
179 
180 // Update the contents of this object with a new path. The path will be split
181 // up into a directory and filename and stored as uniqued string values for
182 // quick comparison and efficient memory usage.
183 void FileSpec::SetFile(llvm::StringRef pathname, Style style) {
184   m_filename.Clear();
185   m_directory.Clear();
186   m_is_resolved = false;
187   m_style = (style == Style::native) ? GetNativeStyle() : style;
188 
189   if (pathname.empty())
190     return;
191 
192   llvm::SmallString<128> resolved(pathname);
193 
194   // Normalize the path by removing ".", ".." and other redundant components.
195   if (needsNormalization(resolved))
196     llvm::sys::path::remove_dots(resolved, true, m_style);
197 
198   // Normalize back slashes to forward slashes
199   if (m_style == Style::windows)
200     std::replace(resolved.begin(), resolved.end(), '\\', '/');
201 
202   if (resolved.empty()) {
203     // If we have no path after normalization set the path to the current
204     // directory. This matches what python does and also a few other path
205     // utilities.
206     m_filename.SetString(".");
207     return;
208   }
209 
210   // Split path into filename and directory. We rely on the underlying char
211   // pointer to be nullptr when the components are empty.
212   llvm::StringRef filename = llvm::sys::path::filename(resolved, m_style);
213   if(!filename.empty())
214     m_filename.SetString(filename);
215 
216   llvm::StringRef directory = llvm::sys::path::parent_path(resolved, m_style);
217   if(!directory.empty())
218     m_directory.SetString(directory);
219 }
220 
221 void FileSpec::SetFile(llvm::StringRef path, const llvm::Triple &triple) {
222   return SetFile(path, triple.isOSWindows() ? Style::windows : Style::posix);
223 }
224 
225 // Convert to pointer operator. This allows code to check any FileSpec objects
226 // to see if they contain anything valid using code such as:
227 //
228 //  if (file_spec)
229 //  {}
230 FileSpec::operator bool() const { return m_filename || m_directory; }
231 
232 // Logical NOT operator. This allows code to check any FileSpec objects to see
233 // if they are invalid using code such as:
234 //
235 //  if (!file_spec)
236 //  {}
237 bool FileSpec::operator!() const { return !m_directory && !m_filename; }
238 
239 bool FileSpec::DirectoryEquals(const FileSpec &rhs) const {
240   const bool case_sensitive = IsCaseSensitive() || rhs.IsCaseSensitive();
241   return ConstString::Equals(m_directory, rhs.m_directory, case_sensitive);
242 }
243 
244 bool FileSpec::FileEquals(const FileSpec &rhs) const {
245   const bool case_sensitive = IsCaseSensitive() || rhs.IsCaseSensitive();
246   return ConstString::Equals(m_filename, rhs.m_filename, case_sensitive);
247 }
248 
249 // Equal to operator
250 bool FileSpec::operator==(const FileSpec &rhs) const {
251   return FileEquals(rhs) && DirectoryEquals(rhs);
252 }
253 
254 // Not equal to operator
255 bool FileSpec::operator!=(const FileSpec &rhs) const { return !(*this == rhs); }
256 
257 // Less than operator
258 bool FileSpec::operator<(const FileSpec &rhs) const {
259   return FileSpec::Compare(*this, rhs, true) < 0;
260 }
261 
262 // Dump a FileSpec object to a stream
263 Stream &lldb_private::operator<<(Stream &s, const FileSpec &f) {
264   f.Dump(&s);
265   return s;
266 }
267 
268 // Clear this object by releasing both the directory and filename string values
269 // and making them both the empty string.
270 void FileSpec::Clear() {
271   m_directory.Clear();
272   m_filename.Clear();
273 }
274 
275 // Compare two FileSpec objects. If "full" is true, then both the directory and
276 // the filename must match. If "full" is false, then the directory names for
277 // "a" and "b" are only compared if they are both non-empty. This allows a
278 // FileSpec object to only contain a filename and it can match FileSpec objects
279 // that have matching filenames with different paths.
280 //
281 // Return -1 if the "a" is less than "b", 0 if "a" is equal to "b" and "1" if
282 // "a" is greater than "b".
283 int FileSpec::Compare(const FileSpec &a, const FileSpec &b, bool full) {
284   int result = 0;
285 
286   // case sensitivity of compare
287   const bool case_sensitive = a.IsCaseSensitive() || b.IsCaseSensitive();
288 
289   // If full is true, then we must compare both the directory and filename.
290 
291   // If full is false, then if either directory is empty, then we match on the
292   // basename only, and if both directories have valid values, we still do a
293   // full compare. This allows for matching when we just have a filename in one
294   // of the FileSpec objects.
295 
296   if (full || (a.m_directory && b.m_directory)) {
297     result = ConstString::Compare(a.m_directory, b.m_directory, case_sensitive);
298     if (result)
299       return result;
300   }
301   return ConstString::Compare(a.m_filename, b.m_filename, case_sensitive);
302 }
303 
304 bool FileSpec::Equal(const FileSpec &a, const FileSpec &b, bool full) {
305   // case sensitivity of equality test
306   const bool case_sensitive = a.IsCaseSensitive() || b.IsCaseSensitive();
307 
308   const bool filenames_equal = ConstString::Equals(a.m_filename,
309                                                    b.m_filename,
310                                                    case_sensitive);
311 
312   if (!filenames_equal)
313     return false;
314 
315   if (!full && (a.GetDirectory().IsEmpty() || b.GetDirectory().IsEmpty()))
316     return filenames_equal;
317 
318   return a == b;
319 }
320 
321 llvm::Optional<FileSpec::Style> FileSpec::GuessPathStyle(llvm::StringRef absolute_path) {
322   if (absolute_path.startswith("/"))
323     return Style::posix;
324   if (absolute_path.startswith(R"(\\)"))
325     return Style::windows;
326   if (absolute_path.size() > 3 && llvm::isAlpha(absolute_path[0]) &&
327       absolute_path.substr(1, 2) == R"(:\)")
328     return Style::windows;
329   return llvm::None;
330 }
331 
332 // Dump the object to the supplied stream. If the object contains a valid
333 // directory name, it will be displayed followed by a directory delimiter, and
334 // the filename.
335 void FileSpec::Dump(Stream *s) const {
336   if (s) {
337     std::string path{GetPath(true)};
338     s->PutCString(path);
339     char path_separator = GetPreferredPathSeparator(m_style);
340     if (!m_filename && !path.empty() && path.back() != path_separator)
341       s->PutChar(path_separator);
342   }
343 }
344 
345 FileSpec::Style FileSpec::GetPathStyle() const { return m_style; }
346 
347 // Directory string get accessor.
348 ConstString &FileSpec::GetDirectory() { return m_directory; }
349 
350 // Directory string const get accessor.
351 ConstString FileSpec::GetDirectory() const { return m_directory; }
352 
353 // Filename string get accessor.
354 ConstString &FileSpec::GetFilename() { return m_filename; }
355 
356 // Filename string const get accessor.
357 ConstString FileSpec::GetFilename() const { return m_filename; }
358 
359 // Extract the directory and path into a fixed buffer. This is needed as the
360 // directory and path are stored in separate string values.
361 size_t FileSpec::GetPath(char *path, size_t path_max_len,
362                          bool denormalize) const {
363   if (!path)
364     return 0;
365 
366   std::string result = GetPath(denormalize);
367   ::snprintf(path, path_max_len, "%s", result.c_str());
368   return std::min(path_max_len - 1, result.length());
369 }
370 
371 std::string FileSpec::GetPath(bool denormalize) const {
372   llvm::SmallString<64> result;
373   GetPath(result, denormalize);
374   return std::string(result.begin(), result.end());
375 }
376 
377 const char *FileSpec::GetCString(bool denormalize) const {
378   return ConstString{GetPath(denormalize)}.AsCString(nullptr);
379 }
380 
381 void FileSpec::GetPath(llvm::SmallVectorImpl<char> &path,
382                        bool denormalize) const {
383   path.append(m_directory.GetStringRef().begin(),
384               m_directory.GetStringRef().end());
385   // Since the path was normalized and all paths use '/' when stored in these
386   // objects, we don't need to look for the actual syntax specific path
387   // separator, we just look for and insert '/'.
388   if (m_directory && m_filename && m_directory.GetStringRef().back() != '/' &&
389       m_filename.GetStringRef().back() != '/')
390     path.insert(path.end(), '/');
391   path.append(m_filename.GetStringRef().begin(),
392               m_filename.GetStringRef().end());
393   if (denormalize && !path.empty())
394     Denormalize(path, m_style);
395 }
396 
397 ConstString FileSpec::GetFileNameExtension() const {
398   return ConstString(
399       llvm::sys::path::extension(m_filename.GetStringRef(), m_style));
400 }
401 
402 ConstString FileSpec::GetFileNameStrippingExtension() const {
403   return ConstString(llvm::sys::path::stem(m_filename.GetStringRef(), m_style));
404 }
405 
406 // Return the size in bytes that this object takes in memory. This returns the
407 // size in bytes of this object, not any shared string values it may refer to.
408 size_t FileSpec::MemorySize() const {
409   return m_filename.MemorySize() + m_directory.MemorySize();
410 }
411 
412 FileSpec
413 FileSpec::CopyByAppendingPathComponent(llvm::StringRef component) const {
414   FileSpec ret = *this;
415   ret.AppendPathComponent(component);
416   return ret;
417 }
418 
419 FileSpec FileSpec::CopyByRemovingLastPathComponent() const {
420   llvm::SmallString<64> current_path;
421   GetPath(current_path, false);
422   if (llvm::sys::path::has_parent_path(current_path, m_style))
423     return FileSpec(llvm::sys::path::parent_path(current_path, m_style),
424                     m_style);
425   return *this;
426 }
427 
428 ConstString FileSpec::GetLastPathComponent() const {
429   llvm::SmallString<64> current_path;
430   GetPath(current_path, false);
431   return ConstString(llvm::sys::path::filename(current_path, m_style));
432 }
433 
434 void FileSpec::PrependPathComponent(llvm::StringRef component) {
435   llvm::SmallString<64> new_path(component);
436   llvm::SmallString<64> current_path;
437   GetPath(current_path, false);
438   llvm::sys::path::append(new_path,
439                           llvm::sys::path::begin(current_path, m_style),
440                           llvm::sys::path::end(current_path), m_style);
441   SetFile(new_path, m_style);
442 }
443 
444 void FileSpec::PrependPathComponent(const FileSpec &new_path) {
445   return PrependPathComponent(new_path.GetPath(false));
446 }
447 
448 void FileSpec::AppendPathComponent(llvm::StringRef component) {
449   llvm::SmallString<64> current_path;
450   GetPath(current_path, false);
451   llvm::sys::path::append(current_path, m_style, component);
452   SetFile(current_path, m_style);
453 }
454 
455 void FileSpec::AppendPathComponent(const FileSpec &new_path) {
456   return AppendPathComponent(new_path.GetPath(false));
457 }
458 
459 bool FileSpec::RemoveLastPathComponent() {
460   llvm::SmallString<64> current_path;
461   GetPath(current_path, false);
462   if (llvm::sys::path::has_parent_path(current_path, m_style)) {
463     SetFile(llvm::sys::path::parent_path(current_path, m_style));
464     return true;
465   }
466   return false;
467 }
468 /// Returns true if the filespec represents an implementation source
469 /// file (files with a ".c", ".cpp", ".m", ".mm" (many more)
470 /// extension).
471 ///
472 /// \return
473 ///     \b true if the filespec represents an implementation source
474 ///     file, \b false otherwise.
475 bool FileSpec::IsSourceImplementationFile() const {
476   ConstString extension(GetFileNameExtension());
477   if (!extension)
478     return false;
479 
480   static RegularExpression g_source_file_regex(llvm::StringRef(
481       "^.([cC]|[mM]|[mM][mM]|[cC][pP][pP]|[cC]\\+\\+|[cC][xX][xX]|[cC][cC]|["
482       "cC][pP]|[sS]|[aA][sS][mM]|[fF]|[fF]77|[fF]90|[fF]95|[fF]03|[fF][oO]["
483       "rR]|[fF][tT][nN]|[fF][pP][pP]|[aA][dD][aA]|[aA][dD][bB]|[aA][dD][sS])"
484       "$"));
485   return g_source_file_regex.Execute(extension.GetStringRef());
486 }
487 
488 bool FileSpec::IsRelative() const {
489   return !IsAbsolute();
490 }
491 
492 bool FileSpec::IsAbsolute() const {
493   llvm::SmallString<64> current_path;
494   GetPath(current_path, false);
495 
496   // Early return if the path is empty.
497   if (current_path.empty())
498     return false;
499 
500   // We consider paths starting with ~ to be absolute.
501   if (current_path[0] == '~')
502     return true;
503 
504   return llvm::sys::path::is_absolute(current_path, m_style);
505 }
506 
507 void FileSpec::MakeAbsolute(const FileSpec &dir) {
508   if (IsRelative())
509     PrependPathComponent(dir);
510 }
511 
512 void llvm::format_provider<FileSpec>::format(const FileSpec &F,
513                                              raw_ostream &Stream,
514                                              StringRef Style) {
515   assert(
516       (Style.empty() || Style.equals_lower("F") || Style.equals_lower("D")) &&
517       "Invalid FileSpec style!");
518 
519   StringRef dir = F.GetDirectory().GetStringRef();
520   StringRef file = F.GetFilename().GetStringRef();
521 
522   if (dir.empty() && file.empty()) {
523     Stream << "(empty)";
524     return;
525   }
526 
527   if (Style.equals_lower("F")) {
528     Stream << (file.empty() ? "(empty)" : file);
529     return;
530   }
531 
532   // Style is either D or empty, either way we need to print the directory.
533   if (!dir.empty()) {
534     // Directory is stored in normalized form, which might be different than
535     // preferred form.  In order to handle this, we need to cut off the
536     // filename, then denormalize, then write the entire denorm'ed directory.
537     llvm::SmallString<64> denormalized_dir = dir;
538     Denormalize(denormalized_dir, F.GetPathStyle());
539     Stream << denormalized_dir;
540     Stream << GetPreferredPathSeparator(F.GetPathStyle());
541   }
542 
543   if (Style.equals_lower("D")) {
544     // We only want to print the directory, so now just exit.
545     if (dir.empty())
546       Stream << "(empty)";
547     return;
548   }
549 
550   if (!file.empty())
551     Stream << file;
552 }
553