1e710425bSDimitry Andric //===--- MatchFilePath.cpp - Match file path with pattern -------*- C++ -*-===//
2e710425bSDimitry Andric //
3e710425bSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e710425bSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e710425bSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e710425bSDimitry Andric //
7e710425bSDimitry Andric //===----------------------------------------------------------------------===//
8e710425bSDimitry Andric ///
9e710425bSDimitry Andric /// \file
10e710425bSDimitry Andric /// This file implements the functionality of matching a file path name to
11e710425bSDimitry Andric /// a pattern, similar to the POSIX fnmatch() function.
12e710425bSDimitry Andric ///
13e710425bSDimitry Andric //===----------------------------------------------------------------------===//
14e710425bSDimitry Andric
15e710425bSDimitry Andric #include "MatchFilePath.h"
16e710425bSDimitry Andric
17e710425bSDimitry Andric using namespace llvm;
18e710425bSDimitry Andric
19e710425bSDimitry Andric namespace clang {
20e710425bSDimitry Andric namespace format {
21e710425bSDimitry Andric
22*de8261c4SDimitry Andric // Check whether `FilePath` matches `Pattern` based on POSIX 2.13.1, 2.13.2, and
23*de8261c4SDimitry Andric // Rule 1 of 2.13.3.
matchFilePath(StringRef Pattern,StringRef FilePath)24e710425bSDimitry Andric bool matchFilePath(StringRef Pattern, StringRef FilePath) {
25e710425bSDimitry Andric assert(!Pattern.empty());
26e710425bSDimitry Andric assert(!FilePath.empty());
27e710425bSDimitry Andric
28e710425bSDimitry Andric // No match if `Pattern` ends with a non-meta character not equal to the last
29e710425bSDimitry Andric // character of `FilePath`.
30e710425bSDimitry Andric if (const auto C = Pattern.back(); !strchr("?*]", C) && C != FilePath.back())
31e710425bSDimitry Andric return false;
32e710425bSDimitry Andric
33e710425bSDimitry Andric constexpr auto Separator = '/';
34e710425bSDimitry Andric const auto EOP = Pattern.size(); // End of `Pattern`.
35e710425bSDimitry Andric const auto End = FilePath.size(); // End of `FilePath`.
36e710425bSDimitry Andric unsigned I = 0; // Index to `Pattern`.
37e710425bSDimitry Andric
38e710425bSDimitry Andric for (unsigned J = 0; J < End; ++J) {
39e710425bSDimitry Andric if (I == EOP)
40e710425bSDimitry Andric return false;
41e710425bSDimitry Andric
42e710425bSDimitry Andric switch (const auto F = FilePath[J]; Pattern[I]) {
43e710425bSDimitry Andric case '\\':
44e710425bSDimitry Andric if (++I == EOP || F != Pattern[I])
45e710425bSDimitry Andric return false;
46e710425bSDimitry Andric break;
47e710425bSDimitry Andric case '?':
48e710425bSDimitry Andric if (F == Separator)
49e710425bSDimitry Andric return false;
50e710425bSDimitry Andric break;
51e710425bSDimitry Andric case '*': {
52e710425bSDimitry Andric while (++I < EOP && Pattern[I] == '*') { // Skip consecutive stars.
53e710425bSDimitry Andric }
54e710425bSDimitry Andric const auto K = FilePath.find(Separator, J); // Index of next `Separator`.
55e710425bSDimitry Andric const bool NoMoreSeparatorsInFilePath = K == StringRef::npos;
56e710425bSDimitry Andric if (I == EOP) // `Pattern` ends with a star.
57e710425bSDimitry Andric return NoMoreSeparatorsInFilePath;
58e710425bSDimitry Andric // `Pattern` ends with a lone backslash.
59e710425bSDimitry Andric if (Pattern[I] == '\\' && ++I == EOP)
60e710425bSDimitry Andric return false;
61e710425bSDimitry Andric // The star is followed by a (possibly escaped) `Separator`.
62e710425bSDimitry Andric if (Pattern[I] == Separator) {
63e710425bSDimitry Andric if (NoMoreSeparatorsInFilePath)
64e710425bSDimitry Andric return false;
65e710425bSDimitry Andric J = K; // Skip to next `Separator` in `FilePath`.
66e710425bSDimitry Andric break;
67e710425bSDimitry Andric }
68e710425bSDimitry Andric // Recurse.
69e710425bSDimitry Andric for (auto Pat = Pattern.substr(I); J < End && FilePath[J] != Separator;
70e710425bSDimitry Andric ++J) {
71e710425bSDimitry Andric if (matchFilePath(Pat, FilePath.substr(J)))
72e710425bSDimitry Andric return true;
73e710425bSDimitry Andric }
74e710425bSDimitry Andric return false;
75e710425bSDimitry Andric }
76e710425bSDimitry Andric case '[':
77e710425bSDimitry Andric // Skip e.g. `[!]`.
78e710425bSDimitry Andric if (I + 3 < EOP || (I + 3 == EOP && Pattern[I + 1] != '!')) {
79e710425bSDimitry Andric // Skip unpaired `[`, brackets containing slashes, and `[]`.
80e710425bSDimitry Andric if (const auto K = Pattern.find_first_of("]/", I + 1);
81e710425bSDimitry Andric K != StringRef::npos && Pattern[K] == ']' && K > I + 1) {
82e710425bSDimitry Andric if (F == Separator)
83e710425bSDimitry Andric return false;
84e710425bSDimitry Andric ++I; // After the `[`.
85e710425bSDimitry Andric bool Negated = false;
86e710425bSDimitry Andric if (Pattern[I] == '!') {
87e710425bSDimitry Andric Negated = true;
88e710425bSDimitry Andric ++I; // After the `!`.
89e710425bSDimitry Andric }
90e710425bSDimitry Andric bool Match = false;
91e710425bSDimitry Andric do {
92e710425bSDimitry Andric if (I + 2 < K && Pattern[I + 1] == '-') {
93e710425bSDimitry Andric Match = Pattern[I] <= F && F <= Pattern[I + 2];
94e710425bSDimitry Andric I += 3; // After the range, e.g. `A-Z`.
95e710425bSDimitry Andric } else {
96e710425bSDimitry Andric Match = F == Pattern[I++];
97e710425bSDimitry Andric }
98e710425bSDimitry Andric } while (!Match && I < K);
99e710425bSDimitry Andric if (Negated ? Match : !Match)
100e710425bSDimitry Andric return false;
101e710425bSDimitry Andric I = K + 1; // After the `]`.
102e710425bSDimitry Andric continue;
103e710425bSDimitry Andric }
104e710425bSDimitry Andric }
105e710425bSDimitry Andric [[fallthrough]]; // Match `[` literally.
106e710425bSDimitry Andric default:
107e710425bSDimitry Andric if (F != Pattern[I])
108e710425bSDimitry Andric return false;
109e710425bSDimitry Andric }
110e710425bSDimitry Andric
111e710425bSDimitry Andric ++I;
112e710425bSDimitry Andric }
113e710425bSDimitry Andric
114e710425bSDimitry Andric // Match trailing stars with null strings.
115e710425bSDimitry Andric while (I < EOP && Pattern[I] == '*')
116e710425bSDimitry Andric ++I;
117e710425bSDimitry Andric
118e710425bSDimitry Andric return I == EOP;
119e710425bSDimitry Andric }
120e710425bSDimitry Andric
121e710425bSDimitry Andric } // namespace format
122e710425bSDimitry Andric } // namespace clang
123