1 //===-- FileSpec.cpp --------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Utility/FileSpec.h"
10 #include "lldb/Utility/RegularExpression.h"
11 #include "lldb/Utility/Stream.h"
12 
13 #include "llvm/ADT/SmallString.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/ADT/Triple.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/Support/ErrorOr.h"
19 #include "llvm/Support/FileSystem.h"
20 #include "llvm/Support/Program.h"
21 #include "llvm/Support/raw_ostream.h"
22 
23 #include <algorithm>
24 #include <system_error>
25 #include <vector>
26 
27 #include <assert.h>
28 #include <limits.h>
29 #include <stdio.h>
30 #include <string.h>
31 
32 using namespace lldb;
33 using namespace lldb_private;
34 
35 namespace {
36 
37 static constexpr FileSpec::Style GetNativeStyle() {
38 #if defined(_WIN32)
39   return FileSpec::Style::windows;
40 #else
41   return FileSpec::Style::posix;
42 #endif
43 }
44 
45 bool PathStyleIsPosix(FileSpec::Style style) {
46   return (style == FileSpec::Style::posix ||
47           (style == FileSpec::Style::native &&
48            GetNativeStyle() == FileSpec::Style::posix));
49 }
50 
51 const char *GetPathSeparators(FileSpec::Style style) {
52   return llvm::sys::path::get_separator(style).data();
53 }
54 
55 char GetPreferredPathSeparator(FileSpec::Style style) {
56   return GetPathSeparators(style)[0];
57 }
58 
59 void Denormalize(llvm::SmallVectorImpl<char> &path, FileSpec::Style style) {
60   if (PathStyleIsPosix(style))
61     return;
62 
63   std::replace(path.begin(), path.end(), '/', '\\');
64 }
65 
66 } // end anonymous namespace
67 
68 FileSpec::FileSpec() : m_style(GetNativeStyle()) {}
69 
70 // Default constructor that can take an optional full path to a file on disk.
71 FileSpec::FileSpec(llvm::StringRef path, Style style) : m_style(style) {
72   SetFile(path, style);
73 }
74 
75 FileSpec::FileSpec(llvm::StringRef path, const llvm::Triple &triple)
76     : FileSpec{path, triple.isOSWindows() ? Style::windows : Style::posix} {}
77 
78 namespace {
79 /// Safely get a character at the specified index.
80 ///
81 /// \param[in] path
82 ///     A full, partial, or relative path to a file.
83 ///
84 /// \param[in] i
85 ///     An index into path which may or may not be valid.
86 ///
87 /// \return
88 ///   The character at index \a i if the index is valid, or 0 if
89 ///   the index is not valid.
90 inline char safeCharAtIndex(const llvm::StringRef &path, size_t i) {
91   if (i < path.size())
92     return path[i];
93   return 0;
94 }
95 
96 /// Check if a path needs to be normalized.
97 ///
98 /// Check if a path needs to be normalized. We currently consider a
99 /// path to need normalization if any of the following are true
100 ///  - path contains "/./"
101 ///  - path contains "/../"
102 ///  - path contains "//"
103 ///  - path ends with "/"
104 /// Paths that start with "./" or with "../" are not considered to
105 /// need normalization since we aren't trying to resolve the path,
106 /// we are just trying to remove redundant things from the path.
107 ///
108 /// \param[in] path
109 ///     A full, partial, or relative path to a file.
110 ///
111 /// \return
112 ///   Returns \b true if the path needs to be normalized.
113 bool needsNormalization(const llvm::StringRef &path) {
114   if (path.empty())
115     return false;
116   // We strip off leading "." values so these paths need to be normalized
117   if (path[0] == '.')
118     return true;
119   for (auto i = path.find_first_of("\\/"); i != llvm::StringRef::npos;
120        i = path.find_first_of("\\/", i + 1)) {
121     const auto next = safeCharAtIndex(path, i+1);
122     switch (next) {
123       case 0:
124         // path separator char at the end of the string which should be
125         // stripped unless it is the one and only character
126         return i > 0;
127       case '/':
128       case '\\':
129         // two path separator chars in the middle of a path needs to be
130         // normalized
131         if (i > 0)
132           return true;
133         ++i;
134         break;
135 
136       case '.': {
137           const auto next_next = safeCharAtIndex(path, i+2);
138           switch (next_next) {
139             default: break;
140             case 0: return true; // ends with "/."
141             case '/':
142             case '\\':
143               return true; // contains "/./"
144             case '.': {
145               const auto next_next_next = safeCharAtIndex(path, i+3);
146               switch (next_next_next) {
147                 default: break;
148                 case 0: return true; // ends with "/.."
149                 case '/':
150                 case '\\':
151                   return true; // contains "/../"
152               }
153               break;
154             }
155           }
156         }
157         break;
158 
159       default:
160         break;
161     }
162   }
163   return false;
164 }
165 
166 
167 }
168 
169 void FileSpec::SetFile(llvm::StringRef pathname) { SetFile(pathname, m_style); }
170 
171 // Update the contents of this object with a new path. The path will be split
172 // up into a directory and filename and stored as uniqued string values for
173 // quick comparison and efficient memory usage.
174 void FileSpec::SetFile(llvm::StringRef pathname, Style style) {
175   m_filename.Clear();
176   m_directory.Clear();
177   m_is_resolved = false;
178   m_style = (style == Style::native) ? GetNativeStyle() : style;
179 
180   if (pathname.empty())
181     return;
182 
183   llvm::SmallString<128> resolved(pathname);
184 
185   // Normalize the path by removing ".", ".." and other redundant components.
186   if (needsNormalization(resolved))
187     llvm::sys::path::remove_dots(resolved, true, m_style);
188 
189   // Normalize back slashes to forward slashes
190   if (m_style == Style::windows)
191     std::replace(resolved.begin(), resolved.end(), '\\', '/');
192 
193   if (resolved.empty()) {
194     // If we have no path after normalization set the path to the current
195     // directory. This matches what python does and also a few other path
196     // utilities.
197     m_filename.SetString(".");
198     return;
199   }
200 
201   // Split path into filename and directory. We rely on the underlying char
202   // pointer to be nullptr when the components are empty.
203   llvm::StringRef filename = llvm::sys::path::filename(resolved, m_style);
204   if(!filename.empty())
205     m_filename.SetString(filename);
206 
207   llvm::StringRef directory = llvm::sys::path::parent_path(resolved, m_style);
208   if(!directory.empty())
209     m_directory.SetString(directory);
210 }
211 
212 void FileSpec::SetFile(llvm::StringRef path, const llvm::Triple &triple) {
213   return SetFile(path, triple.isOSWindows() ? Style::windows : Style::posix);
214 }
215 
216 // Convert to pointer operator. This allows code to check any FileSpec objects
217 // to see if they contain anything valid using code such as:
218 //
219 //  if (file_spec)
220 //  {}
221 FileSpec::operator bool() const { return m_filename || m_directory; }
222 
223 // Logical NOT operator. This allows code to check any FileSpec objects to see
224 // if they are invalid using code such as:
225 //
226 //  if (!file_spec)
227 //  {}
228 bool FileSpec::operator!() const { return !m_directory && !m_filename; }
229 
230 bool FileSpec::DirectoryEquals(const FileSpec &rhs) const {
231   const bool case_sensitive = IsCaseSensitive() || rhs.IsCaseSensitive();
232   return ConstString::Equals(m_directory, rhs.m_directory, case_sensitive);
233 }
234 
235 bool FileSpec::FileEquals(const FileSpec &rhs) const {
236   const bool case_sensitive = IsCaseSensitive() || rhs.IsCaseSensitive();
237   return ConstString::Equals(m_filename, rhs.m_filename, case_sensitive);
238 }
239 
240 // Equal to operator
241 bool FileSpec::operator==(const FileSpec &rhs) const {
242   return FileEquals(rhs) && DirectoryEquals(rhs);
243 }
244 
245 // Not equal to operator
246 bool FileSpec::operator!=(const FileSpec &rhs) const { return !(*this == rhs); }
247 
248 // Less than operator
249 bool FileSpec::operator<(const FileSpec &rhs) const {
250   return FileSpec::Compare(*this, rhs, true) < 0;
251 }
252 
253 // Dump a FileSpec object to a stream
254 Stream &lldb_private::operator<<(Stream &s, const FileSpec &f) {
255   f.Dump(&s);
256   return s;
257 }
258 
259 // Clear this object by releasing both the directory and filename string values
260 // and making them both the empty string.
261 void FileSpec::Clear() {
262   m_directory.Clear();
263   m_filename.Clear();
264 }
265 
266 // Compare two FileSpec objects. If "full" is true, then both the directory and
267 // the filename must match. If "full" is false, then the directory names for
268 // "a" and "b" are only compared if they are both non-empty. This allows a
269 // FileSpec object to only contain a filename and it can match FileSpec objects
270 // that have matching filenames with different paths.
271 //
272 // Return -1 if the "a" is less than "b", 0 if "a" is equal to "b" and "1" if
273 // "a" is greater than "b".
274 int FileSpec::Compare(const FileSpec &a, const FileSpec &b, bool full) {
275   int result = 0;
276 
277   // case sensitivity of compare
278   const bool case_sensitive = a.IsCaseSensitive() || b.IsCaseSensitive();
279 
280   // If full is true, then we must compare both the directory and filename.
281 
282   // If full is false, then if either directory is empty, then we match on the
283   // basename only, and if both directories have valid values, we still do a
284   // full compare. This allows for matching when we just have a filename in one
285   // of the FileSpec objects.
286 
287   if (full || (a.m_directory && b.m_directory)) {
288     result = ConstString::Compare(a.m_directory, b.m_directory, case_sensitive);
289     if (result)
290       return result;
291   }
292   return ConstString::Compare(a.m_filename, b.m_filename, case_sensitive);
293 }
294 
295 bool FileSpec::Equal(const FileSpec &a, const FileSpec &b, bool full) {
296   if (full || (a.GetDirectory() && b.GetDirectory()))
297     return a == b;
298 
299   return a.FileEquals(b);
300 }
301 
302 bool FileSpec::Match(const FileSpec &pattern, const FileSpec &file) {
303   if (pattern.GetDirectory())
304     return pattern == file;
305   if (pattern.GetFilename())
306     return pattern.FileEquals(file);
307   return true;
308 }
309 
310 llvm::Optional<FileSpec::Style> FileSpec::GuessPathStyle(llvm::StringRef absolute_path) {
311   if (absolute_path.startswith("/"))
312     return Style::posix;
313   if (absolute_path.startswith(R"(\\)"))
314     return Style::windows;
315   if (absolute_path.size() > 3 && llvm::isAlpha(absolute_path[0]) &&
316       absolute_path.substr(1, 2) == R"(:\)")
317     return Style::windows;
318   return llvm::None;
319 }
320 
321 // Dump the object to the supplied stream. If the object contains a valid
322 // directory name, it will be displayed followed by a directory delimiter, and
323 // the filename.
324 void FileSpec::Dump(Stream *s) const {
325   if (s) {
326     std::string path{GetPath(true)};
327     s->PutCString(path);
328     char path_separator = GetPreferredPathSeparator(m_style);
329     if (!m_filename && !path.empty() && path.back() != path_separator)
330       s->PutChar(path_separator);
331   }
332 }
333 
334 FileSpec::Style FileSpec::GetPathStyle() const { return m_style; }
335 
336 // Directory string get accessor.
337 ConstString &FileSpec::GetDirectory() { return m_directory; }
338 
339 // Directory string const get accessor.
340 ConstString FileSpec::GetDirectory() const { return m_directory; }
341 
342 // Filename string get accessor.
343 ConstString &FileSpec::GetFilename() { return m_filename; }
344 
345 // Filename string const get accessor.
346 ConstString FileSpec::GetFilename() const { return m_filename; }
347 
348 // Extract the directory and path into a fixed buffer. This is needed as the
349 // directory and path are stored in separate string values.
350 size_t FileSpec::GetPath(char *path, size_t path_max_len,
351                          bool denormalize) const {
352   if (!path)
353     return 0;
354 
355   std::string result = GetPath(denormalize);
356   ::snprintf(path, path_max_len, "%s", result.c_str());
357   return std::min(path_max_len - 1, result.length());
358 }
359 
360 std::string FileSpec::GetPath(bool denormalize) const {
361   llvm::SmallString<64> result;
362   GetPath(result, denormalize);
363   return std::string(result.begin(), result.end());
364 }
365 
366 const char *FileSpec::GetCString(bool denormalize) const {
367   return ConstString{GetPath(denormalize)}.AsCString(nullptr);
368 }
369 
370 void FileSpec::GetPath(llvm::SmallVectorImpl<char> &path,
371                        bool denormalize) const {
372   path.append(m_directory.GetStringRef().begin(),
373               m_directory.GetStringRef().end());
374   // Since the path was normalized and all paths use '/' when stored in these
375   // objects, we don't need to look for the actual syntax specific path
376   // separator, we just look for and insert '/'.
377   if (m_directory && m_filename && m_directory.GetStringRef().back() != '/' &&
378       m_filename.GetStringRef().back() != '/')
379     path.insert(path.end(), '/');
380   path.append(m_filename.GetStringRef().begin(),
381               m_filename.GetStringRef().end());
382   if (denormalize && !path.empty())
383     Denormalize(path, m_style);
384 }
385 
386 ConstString FileSpec::GetFileNameExtension() const {
387   return ConstString(
388       llvm::sys::path::extension(m_filename.GetStringRef(), m_style));
389 }
390 
391 ConstString FileSpec::GetFileNameStrippingExtension() const {
392   return ConstString(llvm::sys::path::stem(m_filename.GetStringRef(), m_style));
393 }
394 
395 // Return the size in bytes that this object takes in memory. This returns the
396 // size in bytes of this object, not any shared string values it may refer to.
397 size_t FileSpec::MemorySize() const {
398   return m_filename.MemorySize() + m_directory.MemorySize();
399 }
400 
401 FileSpec
402 FileSpec::CopyByAppendingPathComponent(llvm::StringRef component) const {
403   FileSpec ret = *this;
404   ret.AppendPathComponent(component);
405   return ret;
406 }
407 
408 FileSpec FileSpec::CopyByRemovingLastPathComponent() const {
409   llvm::SmallString<64> current_path;
410   GetPath(current_path, false);
411   if (llvm::sys::path::has_parent_path(current_path, m_style))
412     return FileSpec(llvm::sys::path::parent_path(current_path, m_style),
413                     m_style);
414   return *this;
415 }
416 
417 ConstString FileSpec::GetLastPathComponent() const {
418   llvm::SmallString<64> current_path;
419   GetPath(current_path, false);
420   return ConstString(llvm::sys::path::filename(current_path, m_style));
421 }
422 
423 void FileSpec::PrependPathComponent(llvm::StringRef component) {
424   llvm::SmallString<64> new_path(component);
425   llvm::SmallString<64> current_path;
426   GetPath(current_path, false);
427   llvm::sys::path::append(new_path,
428                           llvm::sys::path::begin(current_path, m_style),
429                           llvm::sys::path::end(current_path), m_style);
430   SetFile(new_path, m_style);
431 }
432 
433 void FileSpec::PrependPathComponent(const FileSpec &new_path) {
434   return PrependPathComponent(new_path.GetPath(false));
435 }
436 
437 void FileSpec::AppendPathComponent(llvm::StringRef component) {
438   llvm::SmallString<64> current_path;
439   GetPath(current_path, false);
440   llvm::sys::path::append(current_path, m_style, component);
441   SetFile(current_path, m_style);
442 }
443 
444 void FileSpec::AppendPathComponent(const FileSpec &new_path) {
445   return AppendPathComponent(new_path.GetPath(false));
446 }
447 
448 bool FileSpec::RemoveLastPathComponent() {
449   llvm::SmallString<64> current_path;
450   GetPath(current_path, false);
451   if (llvm::sys::path::has_parent_path(current_path, m_style)) {
452     SetFile(llvm::sys::path::parent_path(current_path, m_style));
453     return true;
454   }
455   return false;
456 }
457 /// Returns true if the filespec represents an implementation source
458 /// file (files with a ".c", ".cpp", ".m", ".mm" (many more)
459 /// extension).
460 ///
461 /// \return
462 ///     \b true if the filespec represents an implementation source
463 ///     file, \b false otherwise.
464 bool FileSpec::IsSourceImplementationFile() const {
465   ConstString extension(GetFileNameExtension());
466   if (!extension)
467     return false;
468 
469   static RegularExpression g_source_file_regex(llvm::StringRef(
470       "^.([cC]|[mM]|[mM][mM]|[cC][pP][pP]|[cC]\\+\\+|[cC][xX][xX]|[cC][cC]|["
471       "cC][pP]|[sS]|[aA][sS][mM]|[fF]|[fF]77|[fF]90|[fF]95|[fF]03|[fF][oO]["
472       "rR]|[fF][tT][nN]|[fF][pP][pP]|[aA][dD][aA]|[aA][dD][bB]|[aA][dD][sS])"
473       "$"));
474   return g_source_file_regex.Execute(extension.GetStringRef());
475 }
476 
477 bool FileSpec::IsRelative() const {
478   return !IsAbsolute();
479 }
480 
481 bool FileSpec::IsAbsolute() const {
482   llvm::SmallString<64> current_path;
483   GetPath(current_path, false);
484 
485   // Early return if the path is empty.
486   if (current_path.empty())
487     return false;
488 
489   // We consider paths starting with ~ to be absolute.
490   if (current_path[0] == '~')
491     return true;
492 
493   return llvm::sys::path::is_absolute(current_path, m_style);
494 }
495 
496 void FileSpec::MakeAbsolute(const FileSpec &dir) {
497   if (IsRelative())
498     PrependPathComponent(dir);
499 }
500 
501 void llvm::format_provider<FileSpec>::format(const FileSpec &F,
502                                              raw_ostream &Stream,
503                                              StringRef Style) {
504   assert(
505       (Style.empty() || Style.equals_lower("F") || Style.equals_lower("D")) &&
506       "Invalid FileSpec style!");
507 
508   StringRef dir = F.GetDirectory().GetStringRef();
509   StringRef file = F.GetFilename().GetStringRef();
510 
511   if (dir.empty() && file.empty()) {
512     Stream << "(empty)";
513     return;
514   }
515 
516   if (Style.equals_lower("F")) {
517     Stream << (file.empty() ? "(empty)" : file);
518     return;
519   }
520 
521   // Style is either D or empty, either way we need to print the directory.
522   if (!dir.empty()) {
523     // Directory is stored in normalized form, which might be different than
524     // preferred form.  In order to handle this, we need to cut off the
525     // filename, then denormalize, then write the entire denorm'ed directory.
526     llvm::SmallString<64> denormalized_dir = dir;
527     Denormalize(denormalized_dir, F.GetPathStyle());
528     Stream << denormalized_dir;
529     Stream << GetPreferredPathSeparator(F.GetPathStyle());
530   }
531 
532   if (Style.equals_lower("D")) {
533     // We only want to print the directory, so now just exit.
534     if (dir.empty())
535       Stream << "(empty)";
536     return;
537   }
538 
539   if (!file.empty())
540     Stream << file;
541 }
542