1e710425bSDimitry Andric //===--- MatchFilePath.cpp - Match file path with pattern -------*- C++ -*-===//
2e710425bSDimitry Andric //
3e710425bSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e710425bSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e710425bSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e710425bSDimitry Andric //
7e710425bSDimitry Andric //===----------------------------------------------------------------------===//
8e710425bSDimitry Andric ///
9e710425bSDimitry Andric /// \file
10e710425bSDimitry Andric /// This file implements the functionality of matching a file path name to
11e710425bSDimitry Andric /// a pattern, similar to the POSIX fnmatch() function.
12e710425bSDimitry Andric ///
13e710425bSDimitry Andric //===----------------------------------------------------------------------===//
14e710425bSDimitry Andric 
15e710425bSDimitry Andric #include "MatchFilePath.h"
16e710425bSDimitry Andric 
17e710425bSDimitry Andric using namespace llvm;
18e710425bSDimitry Andric 
19e710425bSDimitry Andric namespace clang {
20e710425bSDimitry Andric namespace format {
21e710425bSDimitry Andric 
22*de8261c4SDimitry Andric // Check whether `FilePath` matches `Pattern` based on POSIX 2.13.1, 2.13.2, and
23*de8261c4SDimitry Andric // Rule 1 of 2.13.3.
matchFilePath(StringRef Pattern,StringRef FilePath)24e710425bSDimitry Andric bool matchFilePath(StringRef Pattern, StringRef FilePath) {
25e710425bSDimitry Andric   assert(!Pattern.empty());
26e710425bSDimitry Andric   assert(!FilePath.empty());
27e710425bSDimitry Andric 
28e710425bSDimitry Andric   // No match if `Pattern` ends with a non-meta character not equal to the last
29e710425bSDimitry Andric   // character of `FilePath`.
30e710425bSDimitry Andric   if (const auto C = Pattern.back(); !strchr("?*]", C) && C != FilePath.back())
31e710425bSDimitry Andric     return false;
32e710425bSDimitry Andric 
33e710425bSDimitry Andric   constexpr auto Separator = '/';
34e710425bSDimitry Andric   const auto EOP = Pattern.size();  // End of `Pattern`.
35e710425bSDimitry Andric   const auto End = FilePath.size(); // End of `FilePath`.
36e710425bSDimitry Andric   unsigned I = 0;                   // Index to `Pattern`.
37e710425bSDimitry Andric 
38e710425bSDimitry Andric   for (unsigned J = 0; J < End; ++J) {
39e710425bSDimitry Andric     if (I == EOP)
40e710425bSDimitry Andric       return false;
41e710425bSDimitry Andric 
42e710425bSDimitry Andric     switch (const auto F = FilePath[J]; Pattern[I]) {
43e710425bSDimitry Andric     case '\\':
44e710425bSDimitry Andric       if (++I == EOP || F != Pattern[I])
45e710425bSDimitry Andric         return false;
46e710425bSDimitry Andric       break;
47e710425bSDimitry Andric     case '?':
48e710425bSDimitry Andric       if (F == Separator)
49e710425bSDimitry Andric         return false;
50e710425bSDimitry Andric       break;
51e710425bSDimitry Andric     case '*': {
52e710425bSDimitry Andric       while (++I < EOP && Pattern[I] == '*') { // Skip consecutive stars.
53e710425bSDimitry Andric       }
54e710425bSDimitry Andric       const auto K = FilePath.find(Separator, J); // Index of next `Separator`.
55e710425bSDimitry Andric       const bool NoMoreSeparatorsInFilePath = K == StringRef::npos;
56e710425bSDimitry Andric       if (I == EOP) // `Pattern` ends with a star.
57e710425bSDimitry Andric         return NoMoreSeparatorsInFilePath;
58e710425bSDimitry Andric       // `Pattern` ends with a lone backslash.
59e710425bSDimitry Andric       if (Pattern[I] == '\\' && ++I == EOP)
60e710425bSDimitry Andric         return false;
61e710425bSDimitry Andric       // The star is followed by a (possibly escaped) `Separator`.
62e710425bSDimitry Andric       if (Pattern[I] == Separator) {
63e710425bSDimitry Andric         if (NoMoreSeparatorsInFilePath)
64e710425bSDimitry Andric           return false;
65e710425bSDimitry Andric         J = K; // Skip to next `Separator` in `FilePath`.
66e710425bSDimitry Andric         break;
67e710425bSDimitry Andric       }
68e710425bSDimitry Andric       // Recurse.
69e710425bSDimitry Andric       for (auto Pat = Pattern.substr(I); J < End && FilePath[J] != Separator;
70e710425bSDimitry Andric            ++J) {
71e710425bSDimitry Andric         if (matchFilePath(Pat, FilePath.substr(J)))
72e710425bSDimitry Andric           return true;
73e710425bSDimitry Andric       }
74e710425bSDimitry Andric       return false;
75e710425bSDimitry Andric     }
76e710425bSDimitry Andric     case '[':
77e710425bSDimitry Andric       // Skip e.g. `[!]`.
78e710425bSDimitry Andric       if (I + 3 < EOP || (I + 3 == EOP && Pattern[I + 1] != '!')) {
79e710425bSDimitry Andric         // Skip unpaired `[`, brackets containing slashes, and `[]`.
80e710425bSDimitry Andric         if (const auto K = Pattern.find_first_of("]/", I + 1);
81e710425bSDimitry Andric             K != StringRef::npos && Pattern[K] == ']' && K > I + 1) {
82e710425bSDimitry Andric           if (F == Separator)
83e710425bSDimitry Andric             return false;
84e710425bSDimitry Andric           ++I; // After the `[`.
85e710425bSDimitry Andric           bool Negated = false;
86e710425bSDimitry Andric           if (Pattern[I] == '!') {
87e710425bSDimitry Andric             Negated = true;
88e710425bSDimitry Andric             ++I; // After the `!`.
89e710425bSDimitry Andric           }
90e710425bSDimitry Andric           bool Match = false;
91e710425bSDimitry Andric           do {
92e710425bSDimitry Andric             if (I + 2 < K && Pattern[I + 1] == '-') {
93e710425bSDimitry Andric               Match = Pattern[I] <= F && F <= Pattern[I + 2];
94e710425bSDimitry Andric               I += 3; // After the range, e.g. `A-Z`.
95e710425bSDimitry Andric             } else {
96e710425bSDimitry Andric               Match = F == Pattern[I++];
97e710425bSDimitry Andric             }
98e710425bSDimitry Andric           } while (!Match && I < K);
99e710425bSDimitry Andric           if (Negated ? Match : !Match)
100e710425bSDimitry Andric             return false;
101e710425bSDimitry Andric           I = K + 1; // After the `]`.
102e710425bSDimitry Andric           continue;
103e710425bSDimitry Andric         }
104e710425bSDimitry Andric       }
105e710425bSDimitry Andric       [[fallthrough]]; // Match `[` literally.
106e710425bSDimitry Andric     default:
107e710425bSDimitry Andric       if (F != Pattern[I])
108e710425bSDimitry Andric         return false;
109e710425bSDimitry Andric     }
110e710425bSDimitry Andric 
111e710425bSDimitry Andric     ++I;
112e710425bSDimitry Andric   }
113e710425bSDimitry Andric 
114e710425bSDimitry Andric   // Match trailing stars with null strings.
115e710425bSDimitry Andric   while (I < EOP && Pattern[I] == '*')
116e710425bSDimitry Andric     ++I;
117e710425bSDimitry Andric 
118e710425bSDimitry Andric   return I == EOP;
119e710425bSDimitry Andric }
120e710425bSDimitry Andric 
121e710425bSDimitry Andric } // namespace format
122e710425bSDimitry Andric } // namespace clang
123