1 //===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a glob pattern matcher. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Support/GlobPattern.h" 14 #include "llvm/ADT/ArrayRef.h" 15 #include "llvm/ADT/BitVector.h" 16 #include "llvm/ADT/Optional.h" 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/Support/Errc.h" 19 20 using namespace llvm; 21 22 static bool hasWildcard(StringRef S) { 23 return S.find_first_of("?*[\\") != StringRef::npos; 24 } 25 26 // Expands character ranges and returns a bitmap. 27 // For example, "a-cf-hz" is expanded to "abcfghz". 28 static Expected<BitVector> expand(StringRef S, StringRef Original) { 29 BitVector BV(256, false); 30 31 // Expand X-Y. 32 for (;;) { 33 if (S.size() < 3) 34 break; 35 36 uint8_t Start = S[0]; 37 uint8_t End = S[2]; 38 39 // If it doesn't start with something like X-Y, 40 // consume the first character and proceed. 41 if (S[1] != '-') { 42 BV[Start] = true; 43 S = S.substr(1); 44 continue; 45 } 46 47 // It must be in the form of X-Y. 48 // Validate it and then interpret the range. 49 if (Start > End) 50 return make_error<StringError>("invalid glob pattern: " + Original, 51 errc::invalid_argument); 52 53 for (int C = Start; C <= End; ++C) 54 BV[(uint8_t)C] = true; 55 S = S.substr(3); 56 } 57 58 for (char C : S) 59 BV[(uint8_t)C] = true; 60 return BV; 61 } 62 63 // This is a scanner for the glob pattern. 64 // A glob pattern token is one of "*", "?", "\", "[<chars>]", "[^<chars>]" 65 // (which is a negative form of "[<chars>]"), "[!<chars>]" (which is 66 // equivalent to "[^<chars>]"), or a non-meta character. 67 // This function returns the first token in S. 68 static Expected<BitVector> scan(StringRef &S, StringRef Original) { 69 switch (S[0]) { 70 case '*': 71 S = S.substr(1); 72 // '*' is represented by an empty bitvector. 73 // All other bitvectors are 256-bit long. 74 return BitVector(); 75 case '?': 76 S = S.substr(1); 77 return BitVector(256, true); 78 case '[': { 79 // ']' is allowed as the first character of a character class. '[]' is 80 // invalid. So, just skip the first character. 81 size_t End = S.find(']', 2); 82 if (End == StringRef::npos) 83 return make_error<StringError>("invalid glob pattern: " + Original, 84 errc::invalid_argument); 85 86 StringRef Chars = S.substr(1, End - 1); 87 S = S.substr(End + 1); 88 if (Chars.startswith("^") || Chars.startswith("!")) { 89 Expected<BitVector> BV = expand(Chars.substr(1), Original); 90 if (!BV) 91 return BV.takeError(); 92 return BV->flip(); 93 } 94 return expand(Chars, Original); 95 } 96 case '\\': 97 // Eat this character and fall through below to treat it like a non-meta 98 // character. 99 S = S.substr(1); 100 LLVM_FALLTHROUGH; 101 default: 102 BitVector BV(256, false); 103 BV[(uint8_t)S[0]] = true; 104 S = S.substr(1); 105 return BV; 106 } 107 } 108 109 Expected<GlobPattern> GlobPattern::create(StringRef S) { 110 GlobPattern Pat; 111 112 // S doesn't contain any metacharacter, 113 // so the regular string comparison should work. 114 if (!hasWildcard(S)) { 115 Pat.Exact = S; 116 return Pat; 117 } 118 119 // S is something like "foo*", and the "* is not escaped. We can use 120 // startswith(). 121 if (S.endswith("*") && !S.endswith("\\*") && !hasWildcard(S.drop_back())) { 122 Pat.Prefix = S.drop_back(); 123 return Pat; 124 } 125 126 // S is something like "*foo". We can use endswith(). 127 if (S.startswith("*") && !hasWildcard(S.drop_front())) { 128 Pat.Suffix = S.drop_front(); 129 return Pat; 130 } 131 132 // Otherwise, we need to do real glob pattern matching. 133 // Parse the pattern now. 134 StringRef Original = S; 135 while (!S.empty()) { 136 Expected<BitVector> BV = scan(S, Original); 137 if (!BV) 138 return BV.takeError(); 139 Pat.Tokens.push_back(*BV); 140 } 141 return Pat; 142 } 143 144 bool GlobPattern::match(StringRef S) const { 145 if (Exact) 146 return S == *Exact; 147 if (Prefix) 148 return S.startswith(*Prefix); 149 if (Suffix) 150 return S.endswith(*Suffix); 151 return matchOne(Tokens, S); 152 } 153 154 // Runs glob pattern Pats against string S. 155 bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const { 156 for (;;) { 157 if (Pats.empty()) 158 return S.empty(); 159 160 // If Pats[0] is '*', try to match Pats[1..] against all possible 161 // tail strings of S to see at least one pattern succeeds. 162 if (Pats[0].size() == 0) { 163 Pats = Pats.slice(1); 164 if (Pats.empty()) 165 // Fast path. If a pattern is '*', it matches anything. 166 return true; 167 for (size_t I = 0, E = S.size(); I < E; ++I) 168 if (matchOne(Pats, S.substr(I))) 169 return true; 170 return false; 171 } 172 173 // If Pats[0] is not '*', it must consume one character. 174 if (S.empty() || !Pats[0][(uint8_t)S[0]]) 175 return false; 176 Pats = Pats.slice(1); 177 S = S.substr(1); 178 } 179 } 180