1 //===-- FileSpec.cpp ------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Utility/FileSpec.h"
10 #include "lldb/Utility/RegularExpression.h"
11 #include "lldb/Utility/Stream.h"
12 
13 #include "llvm/ADT/SmallString.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/ADT/Triple.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/Support/ErrorOr.h"
19 #include "llvm/Support/FileSystem.h"
20 #include "llvm/Support/Program.h"
21 #include "llvm/Support/raw_ostream.h"
22 
23 #include <algorithm>
24 #include <system_error>
25 #include <vector>
26 
27 #include <cassert>
28 #include <climits>
29 #include <cstdio>
30 #include <cstring>
31 
32 using namespace lldb;
33 using namespace lldb_private;
34 
35 namespace {
36 
37 static constexpr FileSpec::Style GetNativeStyle() {
38 #if defined(_WIN32)
39   return FileSpec::Style::windows;
40 #else
41   return FileSpec::Style::posix;
42 #endif
43 }
44 
45 bool PathStyleIsPosix(FileSpec::Style style) {
46   return llvm::sys::path::is_style_posix(style);
47 }
48 
49 const char *GetPathSeparators(FileSpec::Style style) {
50   return llvm::sys::path::get_separator(style).data();
51 }
52 
53 char GetPreferredPathSeparator(FileSpec::Style style) {
54   return GetPathSeparators(style)[0];
55 }
56 
57 void Denormalize(llvm::SmallVectorImpl<char> &path, FileSpec::Style style) {
58   if (PathStyleIsPosix(style))
59     return;
60 
61   std::replace(path.begin(), path.end(), '/', '\\');
62 }
63 
64 } // end anonymous namespace
65 
66 FileSpec::FileSpec() : m_style(GetNativeStyle()) {}
67 
68 // Default constructor that can take an optional full path to a file on disk.
69 FileSpec::FileSpec(llvm::StringRef path, Style style) : m_style(style) {
70   SetFile(path, style);
71 }
72 
73 FileSpec::FileSpec(llvm::StringRef path, const llvm::Triple &triple)
74     : FileSpec{path, triple.isOSWindows() ? Style::windows : Style::posix} {}
75 
76 namespace {
77 /// Safely get a character at the specified index.
78 ///
79 /// \param[in] path
80 ///     A full, partial, or relative path to a file.
81 ///
82 /// \param[in] i
83 ///     An index into path which may or may not be valid.
84 ///
85 /// \return
86 ///   The character at index \a i if the index is valid, or 0 if
87 ///   the index is not valid.
88 inline char safeCharAtIndex(const llvm::StringRef &path, size_t i) {
89   if (i < path.size())
90     return path[i];
91   return 0;
92 }
93 
94 /// Check if a path needs to be normalized.
95 ///
96 /// Check if a path needs to be normalized. We currently consider a
97 /// path to need normalization if any of the following are true
98 ///  - path contains "/./"
99 ///  - path contains "/../"
100 ///  - path contains "//"
101 ///  - path ends with "/"
102 /// Paths that start with "./" or with "../" are not considered to
103 /// need normalization since we aren't trying to resolve the path,
104 /// we are just trying to remove redundant things from the path.
105 ///
106 /// \param[in] path
107 ///     A full, partial, or relative path to a file.
108 ///
109 /// \return
110 ///   Returns \b true if the path needs to be normalized.
111 bool needsNormalization(const llvm::StringRef &path) {
112   if (path.empty())
113     return false;
114   // We strip off leading "." values so these paths need to be normalized
115   if (path[0] == '.')
116     return true;
117   for (auto i = path.find_first_of("\\/"); i != llvm::StringRef::npos;
118        i = path.find_first_of("\\/", i + 1)) {
119     const auto next = safeCharAtIndex(path, i+1);
120     switch (next) {
121       case 0:
122         // path separator char at the end of the string which should be
123         // stripped unless it is the one and only character
124         return i > 0;
125       case '/':
126       case '\\':
127         // two path separator chars in the middle of a path needs to be
128         // normalized
129         if (i > 0)
130           return true;
131         ++i;
132         break;
133 
134       case '.': {
135           const auto next_next = safeCharAtIndex(path, i+2);
136           switch (next_next) {
137             default: break;
138             case 0: return true; // ends with "/."
139             case '/':
140             case '\\':
141               return true; // contains "/./"
142             case '.': {
143               const auto next_next_next = safeCharAtIndex(path, i+3);
144               switch (next_next_next) {
145                 default: break;
146                 case 0: return true; // ends with "/.."
147                 case '/':
148                 case '\\':
149                   return true; // contains "/../"
150               }
151               break;
152             }
153           }
154         }
155         break;
156 
157       default:
158         break;
159     }
160   }
161   return false;
162 }
163 
164 
165 }
166 
167 void FileSpec::SetFile(llvm::StringRef pathname) { SetFile(pathname, m_style); }
168 
169 // Update the contents of this object with a new path. The path will be split
170 // up into a directory and filename and stored as uniqued string values for
171 // quick comparison and efficient memory usage.
172 void FileSpec::SetFile(llvm::StringRef pathname, Style style) {
173   Clear();
174   m_style = (style == Style::native) ? GetNativeStyle() : style;
175 
176   if (pathname.empty())
177     return;
178 
179   llvm::SmallString<128> resolved(pathname);
180 
181   // Normalize the path by removing ".", ".." and other redundant components.
182   if (needsNormalization(resolved))
183     llvm::sys::path::remove_dots(resolved, true, m_style);
184 
185   // Normalize back slashes to forward slashes
186   if (m_style == Style::windows)
187     std::replace(resolved.begin(), resolved.end(), '\\', '/');
188 
189   if (resolved.empty()) {
190     // If we have no path after normalization set the path to the current
191     // directory. This matches what python does and also a few other path
192     // utilities.
193     m_filename.SetString(".");
194     return;
195   }
196 
197   // Split path into filename and directory. We rely on the underlying char
198   // pointer to be nullptr when the components are empty.
199   llvm::StringRef filename = llvm::sys::path::filename(resolved, m_style);
200   if(!filename.empty())
201     m_filename.SetString(filename);
202 
203   llvm::StringRef directory = llvm::sys::path::parent_path(resolved, m_style);
204   if(!directory.empty())
205     m_directory.SetString(directory);
206 }
207 
208 void FileSpec::SetFile(llvm::StringRef path, const llvm::Triple &triple) {
209   return SetFile(path, triple.isOSWindows() ? Style::windows : Style::posix);
210 }
211 
212 // Convert to pointer operator. This allows code to check any FileSpec objects
213 // to see if they contain anything valid using code such as:
214 //
215 //  if (file_spec)
216 //  {}
217 FileSpec::operator bool() const { return m_filename || m_directory; }
218 
219 // Logical NOT operator. This allows code to check any FileSpec objects to see
220 // if they are invalid using code such as:
221 //
222 //  if (!file_spec)
223 //  {}
224 bool FileSpec::operator!() const { return !m_directory && !m_filename; }
225 
226 bool FileSpec::DirectoryEquals(const FileSpec &rhs) const {
227   const bool case_sensitive = IsCaseSensitive() || rhs.IsCaseSensitive();
228   return ConstString::Equals(m_directory, rhs.m_directory, case_sensitive);
229 }
230 
231 bool FileSpec::FileEquals(const FileSpec &rhs) const {
232   const bool case_sensitive = IsCaseSensitive() || rhs.IsCaseSensitive();
233   return ConstString::Equals(m_filename, rhs.m_filename, case_sensitive);
234 }
235 
236 // Equal to operator
237 bool FileSpec::operator==(const FileSpec &rhs) const {
238   return FileEquals(rhs) && DirectoryEquals(rhs);
239 }
240 
241 // Not equal to operator
242 bool FileSpec::operator!=(const FileSpec &rhs) const { return !(*this == rhs); }
243 
244 // Less than operator
245 bool FileSpec::operator<(const FileSpec &rhs) const {
246   return FileSpec::Compare(*this, rhs, true) < 0;
247 }
248 
249 // Dump a FileSpec object to a stream
250 Stream &lldb_private::operator<<(Stream &s, const FileSpec &f) {
251   f.Dump(s.AsRawOstream());
252   return s;
253 }
254 
255 // Clear this object by releasing both the directory and filename string values
256 // and making them both the empty string.
257 void FileSpec::Clear() {
258   m_directory.Clear();
259   m_filename.Clear();
260   PathWasModified();
261 }
262 
263 // Compare two FileSpec objects. If "full" is true, then both the directory and
264 // the filename must match. If "full" is false, then the directory names for
265 // "a" and "b" are only compared if they are both non-empty. This allows a
266 // FileSpec object to only contain a filename and it can match FileSpec objects
267 // that have matching filenames with different paths.
268 //
269 // Return -1 if the "a" is less than "b", 0 if "a" is equal to "b" and "1" if
270 // "a" is greater than "b".
271 int FileSpec::Compare(const FileSpec &a, const FileSpec &b, bool full) {
272   int result = 0;
273 
274   // case sensitivity of compare
275   const bool case_sensitive = a.IsCaseSensitive() || b.IsCaseSensitive();
276 
277   // If full is true, then we must compare both the directory and filename.
278 
279   // If full is false, then if either directory is empty, then we match on the
280   // basename only, and if both directories have valid values, we still do a
281   // full compare. This allows for matching when we just have a filename in one
282   // of the FileSpec objects.
283 
284   if (full || (a.m_directory && b.m_directory)) {
285     result = ConstString::Compare(a.m_directory, b.m_directory, case_sensitive);
286     if (result)
287       return result;
288   }
289   return ConstString::Compare(a.m_filename, b.m_filename, case_sensitive);
290 }
291 
292 bool FileSpec::Equal(const FileSpec &a, const FileSpec &b, bool full) {
293   if (full || (a.GetDirectory() && b.GetDirectory()))
294     return a == b;
295 
296   return a.FileEquals(b);
297 }
298 
299 bool FileSpec::Match(const FileSpec &pattern, const FileSpec &file) {
300   if (pattern.GetDirectory())
301     return pattern == file;
302   if (pattern.GetFilename())
303     return pattern.FileEquals(file);
304   return true;
305 }
306 
307 llvm::Optional<FileSpec::Style> FileSpec::GuessPathStyle(llvm::StringRef absolute_path) {
308   if (absolute_path.startswith("/"))
309     return Style::posix;
310   if (absolute_path.startswith(R"(\\)"))
311     return Style::windows;
312   if (absolute_path.size() >= 3 && llvm::isAlpha(absolute_path[0]) &&
313       (absolute_path.substr(1, 2) == R"(:\)" ||
314        absolute_path.substr(1, 2) == R"(:/)"))
315     return Style::windows;
316   return llvm::None;
317 }
318 
319 // Dump the object to the supplied stream. If the object contains a valid
320 // directory name, it will be displayed followed by a directory delimiter, and
321 // the filename.
322 void FileSpec::Dump(llvm::raw_ostream &s) const {
323   std::string path{GetPath(true)};
324   s << path;
325   char path_separator = GetPreferredPathSeparator(m_style);
326   if (!m_filename && !path.empty() && path.back() != path_separator)
327     s << path_separator;
328 }
329 
330 FileSpec::Style FileSpec::GetPathStyle() const { return m_style; }
331 
332 void FileSpec::SetDirectory(ConstString directory) {
333   m_directory = directory;
334   PathWasModified();
335 }
336 
337 void FileSpec::SetDirectory(llvm::StringRef directory) {
338   m_directory = ConstString(directory);
339   PathWasModified();
340 }
341 
342 void FileSpec::SetFilename(ConstString filename) {
343   m_filename = filename;
344   PathWasModified();
345 }
346 
347 void FileSpec::SetFilename(llvm::StringRef filename) {
348   m_filename = ConstString(filename);
349   PathWasModified();
350 }
351 
352 void FileSpec::ClearFilename() {
353   m_filename.Clear();
354   PathWasModified();
355 }
356 
357 void FileSpec::ClearDirectory() {
358   m_directory.Clear();
359   PathWasModified();
360 }
361 
362 // Extract the directory and path into a fixed buffer. This is needed as the
363 // directory and path are stored in separate string values.
364 size_t FileSpec::GetPath(char *path, size_t path_max_len,
365                          bool denormalize) const {
366   if (!path)
367     return 0;
368 
369   std::string result = GetPath(denormalize);
370   ::snprintf(path, path_max_len, "%s", result.c_str());
371   return std::min(path_max_len - 1, result.length());
372 }
373 
374 std::string FileSpec::GetPath(bool denormalize) const {
375   llvm::SmallString<64> result;
376   GetPath(result, denormalize);
377   return static_cast<std::string>(result);
378 }
379 
380 ConstString FileSpec::GetPathAsConstString(bool denormalize) const {
381   return ConstString{GetPath(denormalize)};
382 }
383 
384 void FileSpec::GetPath(llvm::SmallVectorImpl<char> &path,
385                        bool denormalize) const {
386   path.append(m_directory.GetStringRef().begin(),
387               m_directory.GetStringRef().end());
388   // Since the path was normalized and all paths use '/' when stored in these
389   // objects, we don't need to look for the actual syntax specific path
390   // separator, we just look for and insert '/'.
391   if (m_directory && m_filename && m_directory.GetStringRef().back() != '/' &&
392       m_filename.GetStringRef().back() != '/')
393     path.insert(path.end(), '/');
394   path.append(m_filename.GetStringRef().begin(),
395               m_filename.GetStringRef().end());
396   if (denormalize && !path.empty())
397     Denormalize(path, m_style);
398 }
399 
400 ConstString FileSpec::GetFileNameExtension() const {
401   return ConstString(
402       llvm::sys::path::extension(m_filename.GetStringRef(), m_style));
403 }
404 
405 ConstString FileSpec::GetFileNameStrippingExtension() const {
406   return ConstString(llvm::sys::path::stem(m_filename.GetStringRef(), m_style));
407 }
408 
409 // Return the size in bytes that this object takes in memory. This returns the
410 // size in bytes of this object, not any shared string values it may refer to.
411 size_t FileSpec::MemorySize() const {
412   return m_filename.MemorySize() + m_directory.MemorySize();
413 }
414 
415 FileSpec
416 FileSpec::CopyByAppendingPathComponent(llvm::StringRef component) const {
417   FileSpec ret = *this;
418   ret.AppendPathComponent(component);
419   return ret;
420 }
421 
422 FileSpec FileSpec::CopyByRemovingLastPathComponent() const {
423   llvm::SmallString<64> current_path;
424   GetPath(current_path, false);
425   if (llvm::sys::path::has_parent_path(current_path, m_style))
426     return FileSpec(llvm::sys::path::parent_path(current_path, m_style),
427                     m_style);
428   return *this;
429 }
430 
431 ConstString FileSpec::GetLastPathComponent() const {
432   llvm::SmallString<64> current_path;
433   GetPath(current_path, false);
434   return ConstString(llvm::sys::path::filename(current_path, m_style));
435 }
436 
437 void FileSpec::PrependPathComponent(llvm::StringRef component) {
438   llvm::SmallString<64> new_path(component);
439   llvm::SmallString<64> current_path;
440   GetPath(current_path, false);
441   llvm::sys::path::append(new_path,
442                           llvm::sys::path::begin(current_path, m_style),
443                           llvm::sys::path::end(current_path), m_style);
444   SetFile(new_path, m_style);
445 }
446 
447 void FileSpec::PrependPathComponent(const FileSpec &new_path) {
448   return PrependPathComponent(new_path.GetPath(false));
449 }
450 
451 void FileSpec::AppendPathComponent(llvm::StringRef component) {
452   llvm::SmallString<64> current_path;
453   GetPath(current_path, false);
454   llvm::sys::path::append(current_path, m_style, component);
455   SetFile(current_path, m_style);
456 }
457 
458 void FileSpec::AppendPathComponent(const FileSpec &new_path) {
459   return AppendPathComponent(new_path.GetPath(false));
460 }
461 
462 bool FileSpec::RemoveLastPathComponent() {
463   llvm::SmallString<64> current_path;
464   GetPath(current_path, false);
465   if (llvm::sys::path::has_parent_path(current_path, m_style)) {
466     SetFile(llvm::sys::path::parent_path(current_path, m_style));
467     return true;
468   }
469   return false;
470 }
471 /// Returns true if the filespec represents an implementation source
472 /// file (files with a ".c", ".cpp", ".m", ".mm" (many more)
473 /// extension).
474 ///
475 /// \return
476 ///     \b true if the filespec represents an implementation source
477 ///     file, \b false otherwise.
478 bool FileSpec::IsSourceImplementationFile() const {
479   ConstString extension(GetFileNameExtension());
480   if (!extension)
481     return false;
482 
483   static RegularExpression g_source_file_regex(llvm::StringRef(
484       "^.([cC]|[mM]|[mM][mM]|[cC][pP][pP]|[cC]\\+\\+|[cC][xX][xX]|[cC][cC]|["
485       "cC][pP]|[sS]|[aA][sS][mM]|[fF]|[fF]77|[fF]90|[fF]95|[fF]03|[fF][oO]["
486       "rR]|[fF][tT][nN]|[fF][pP][pP]|[aA][dD][aA]|[aA][dD][bB]|[aA][dD][sS])"
487       "$"));
488   return g_source_file_regex.Execute(extension.GetStringRef());
489 }
490 
491 bool FileSpec::IsRelative() const {
492   return !IsAbsolute();
493 }
494 
495 bool FileSpec::IsAbsolute() const {
496   // Check if we have cached if this path is absolute to avoid recalculating.
497   if (m_absolute != Absolute::Calculate)
498     return m_absolute == Absolute::Yes;
499 
500   m_absolute = Absolute::No;
501 
502   llvm::SmallString<64> path;
503   GetPath(path, false);
504 
505   if (!path.empty()) {
506     // We consider paths starting with ~ to be absolute.
507     if (path[0] == '~' || llvm::sys::path::is_absolute(path, m_style))
508       m_absolute = Absolute::Yes;
509   }
510 
511   return m_absolute == Absolute::Yes;
512 }
513 
514 void FileSpec::MakeAbsolute(const FileSpec &dir) {
515   if (IsRelative())
516     PrependPathComponent(dir);
517 }
518 
519 void llvm::format_provider<FileSpec>::format(const FileSpec &F,
520                                              raw_ostream &Stream,
521                                              StringRef Style) {
522   assert((Style.empty() || Style.equals_insensitive("F") ||
523           Style.equals_insensitive("D")) &&
524          "Invalid FileSpec style!");
525 
526   StringRef dir = F.GetDirectory().GetStringRef();
527   StringRef file = F.GetFilename().GetStringRef();
528 
529   if (dir.empty() && file.empty()) {
530     Stream << "(empty)";
531     return;
532   }
533 
534   if (Style.equals_insensitive("F")) {
535     Stream << (file.empty() ? "(empty)" : file);
536     return;
537   }
538 
539   // Style is either D or empty, either way we need to print the directory.
540   if (!dir.empty()) {
541     // Directory is stored in normalized form, which might be different than
542     // preferred form.  In order to handle this, we need to cut off the
543     // filename, then denormalize, then write the entire denorm'ed directory.
544     llvm::SmallString<64> denormalized_dir = dir;
545     Denormalize(denormalized_dir, F.GetPathStyle());
546     Stream << denormalized_dir;
547     Stream << GetPreferredPathSeparator(F.GetPathStyle());
548   }
549 
550   if (Style.equals_insensitive("D")) {
551     // We only want to print the directory, so now just exit.
552     if (dir.empty())
553       Stream << "(empty)";
554     return;
555   }
556 
557   if (!file.empty())
558     Stream << file;
559 }
560 
561 void llvm::yaml::ScalarEnumerationTraits<FileSpecStyle>::enumeration(
562     IO &io, FileSpecStyle &value) {
563   io.enumCase(value, "windows", FileSpecStyle(FileSpec::Style::windows));
564   io.enumCase(value, "posix", FileSpecStyle(FileSpec::Style::posix));
565   io.enumCase(value, "native", FileSpecStyle(FileSpec::Style::native));
566 }
567 
568 void llvm::yaml::MappingTraits<FileSpec>::mapping(IO &io, FileSpec &f) {
569   io.mapRequired("directory", f.m_directory);
570   io.mapRequired("file", f.m_filename);
571   io.mapRequired("resolved", f.m_is_resolved);
572   FileSpecStyle style = f.m_style;
573   io.mapRequired("style", style);
574   f.m_style = style;
575 }
576