1 //===-- StringRef.cpp - Lightweight String References ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "llvm/ADT/StringRef.h" 11 12 using namespace llvm; 13 14 // MSVC emits references to this into the translation units which reference it. 15 #ifndef _MSC_VER 16 const size_t StringRef::npos; 17 #endif 18 19 static char ascii_tolower(char x) { 20 if (x >= 'A' && x <= 'Z') 21 return x - 'A' + 'a'; 22 return x; 23 } 24 25 /// compare_lower - Compare strings, ignoring case. 26 int StringRef::compare_lower(StringRef RHS) const { 27 for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) { 28 char LHC = ascii_tolower(Data[I]); 29 char RHC = ascii_tolower(RHS.Data[I]); 30 if (LHC != RHC) 31 return LHC < RHC ? -1 : 1; 32 } 33 34 if (Length == RHS.Length) 35 return 0; 36 return Length < RHS.Length ? -1 : 1; 37 } 38 39 // Compute the edit distance between the two given strings. 40 unsigned StringRef::edit_distance(llvm::StringRef Other, 41 bool AllowReplacements) { 42 // The algorithm implemented below is the "classic" 43 // dynamic-programming algorithm for computing the Levenshtein 44 // distance, which is described here: 45 // 46 // http://en.wikipedia.org/wiki/Levenshtein_distance 47 // 48 // Although the algorithm is typically described using an m x n 49 // array, only two rows are used at a time, so this implemenation 50 // just keeps two separate vectors for those two rows. 51 size_type m = size(); 52 size_type n = Other.size(); 53 54 const unsigned SmallBufferSize = 64; 55 unsigned SmallBuffer[SmallBufferSize]; 56 unsigned *Allocated = 0; 57 unsigned *previous = SmallBuffer; 58 if (2*(n + 1) > SmallBufferSize) 59 Allocated = previous = new unsigned [2*(n+1)]; 60 unsigned *current = previous + (n + 1); 61 62 for (unsigned i = 0; i <= n; ++i) 63 previous[i] = i; 64 65 for (size_type y = 1; y <= m; ++y) { 66 current[0] = y; 67 for (size_type x = 1; x <= n; ++x) { 68 if (AllowReplacements) { 69 current[x] = min(previous[x-1] + ((*this)[y-1] == Other[x-1]? 0u:1u), 70 min(current[x-1], previous[x])+1); 71 } 72 else { 73 if ((*this)[y-1] == Other[x-1]) current[x] = previous[x-1]; 74 else current[x] = min(current[x-1], previous[x]) + 1; 75 } 76 } 77 78 unsigned *tmp = current; 79 current = previous; 80 previous = tmp; 81 } 82 83 unsigned Result = previous[n]; 84 delete [] Allocated; 85 86 return Result; 87 } 88 89 //===----------------------------------------------------------------------===// 90 // String Searching 91 //===----------------------------------------------------------------------===// 92 93 94 /// find - Search for the first string \arg Str in the string. 95 /// 96 /// \return - The index of the first occurence of \arg Str, or npos if not 97 /// found. 98 size_t StringRef::find(StringRef Str, size_t From) const { 99 size_t N = Str.size(); 100 if (N > Length) 101 return npos; 102 for (size_t e = Length - N + 1, i = min(From, e); i != e; ++i) 103 if (substr(i, N).equals(Str)) 104 return i; 105 return npos; 106 } 107 108 /// rfind - Search for the last string \arg Str in the string. 109 /// 110 /// \return - The index of the last occurence of \arg Str, or npos if not 111 /// found. 112 size_t StringRef::rfind(StringRef Str) const { 113 size_t N = Str.size(); 114 if (N > Length) 115 return npos; 116 for (size_t i = Length - N + 1, e = 0; i != e;) { 117 --i; 118 if (substr(i, N).equals(Str)) 119 return i; 120 } 121 return npos; 122 } 123 124 /// find_first_of - Find the first character in the string that is in \arg 125 /// Chars, or npos if not found. 126 /// 127 /// Note: O(size() * Chars.size()) 128 StringRef::size_type StringRef::find_first_of(StringRef Chars, 129 size_t From) const { 130 for (size_type i = min(From, Length), e = Length; i != e; ++i) 131 if (Chars.find(Data[i]) != npos) 132 return i; 133 return npos; 134 } 135 136 /// find_first_not_of - Find the first character in the string that is not 137 /// \arg C or npos if not found. 138 StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const { 139 for (size_type i = min(From, Length), e = Length; i != e; ++i) 140 if (Data[i] != C) 141 return i; 142 return npos; 143 } 144 145 /// find_first_not_of - Find the first character in the string that is not 146 /// in the string \arg Chars, or npos if not found. 147 /// 148 /// Note: O(size() * Chars.size()) 149 StringRef::size_type StringRef::find_first_not_of(StringRef Chars, 150 size_t From) const { 151 for (size_type i = min(From, Length), e = Length; i != e; ++i) 152 if (Chars.find(Data[i]) == npos) 153 return i; 154 return npos; 155 } 156 157 158 //===----------------------------------------------------------------------===// 159 // Helpful Algorithms 160 //===----------------------------------------------------------------------===// 161 162 /// count - Return the number of non-overlapped occurrences of \arg Str in 163 /// the string. 164 size_t StringRef::count(StringRef Str) const { 165 size_t Count = 0; 166 size_t N = Str.size(); 167 if (N > Length) 168 return 0; 169 for (size_t i = 0, e = Length - N + 1; i != e; ++i) 170 if (substr(i, N).equals(Str)) 171 ++Count; 172 return Count; 173 } 174 175 /// GetAsUnsignedInteger - Workhorse method that converts a integer character 176 /// sequence of radix up to 36 to an unsigned long long value. 177 static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix, 178 unsigned long long &Result) { 179 // Autosense radix if not specified. 180 if (Radix == 0) { 181 if (Str.startswith("0x")) { 182 Str = Str.substr(2); 183 Radix = 16; 184 } else if (Str.startswith("0b")) { 185 Str = Str.substr(2); 186 Radix = 2; 187 } else if (Str.startswith("0")) 188 Radix = 8; 189 else 190 Radix = 10; 191 } 192 193 // Empty strings (after the radix autosense) are invalid. 194 if (Str.empty()) return true; 195 196 // Parse all the bytes of the string given this radix. Watch for overflow. 197 Result = 0; 198 while (!Str.empty()) { 199 unsigned CharVal; 200 if (Str[0] >= '0' && Str[0] <= '9') 201 CharVal = Str[0]-'0'; 202 else if (Str[0] >= 'a' && Str[0] <= 'z') 203 CharVal = Str[0]-'a'+10; 204 else if (Str[0] >= 'A' && Str[0] <= 'Z') 205 CharVal = Str[0]-'A'+10; 206 else 207 return true; 208 209 // If the parsed value is larger than the integer radix, the string is 210 // invalid. 211 if (CharVal >= Radix) 212 return true; 213 214 // Add in this character. 215 unsigned long long PrevResult = Result; 216 Result = Result*Radix+CharVal; 217 218 // Check for overflow. 219 if (Result < PrevResult) 220 return true; 221 222 Str = Str.substr(1); 223 } 224 225 return false; 226 } 227 228 bool StringRef::getAsInteger(unsigned Radix, unsigned long long &Result) const { 229 return GetAsUnsignedInteger(*this, Radix, Result); 230 } 231 232 233 bool StringRef::getAsInteger(unsigned Radix, long long &Result) const { 234 unsigned long long ULLVal; 235 236 // Handle positive strings first. 237 if (empty() || front() != '-') { 238 if (GetAsUnsignedInteger(*this, Radix, ULLVal) || 239 // Check for value so large it overflows a signed value. 240 (long long)ULLVal < 0) 241 return true; 242 Result = ULLVal; 243 return false; 244 } 245 246 // Get the positive part of the value. 247 if (GetAsUnsignedInteger(substr(1), Radix, ULLVal) || 248 // Reject values so large they'd overflow as negative signed, but allow 249 // "-0". This negates the unsigned so that the negative isn't undefined 250 // on signed overflow. 251 (long long)-ULLVal > 0) 252 return true; 253 254 Result = -ULLVal; 255 return false; 256 } 257 258 bool StringRef::getAsInteger(unsigned Radix, int &Result) const { 259 long long Val; 260 if (getAsInteger(Radix, Val) || 261 (int)Val != Val) 262 return true; 263 Result = Val; 264 return false; 265 } 266 267 bool StringRef::getAsInteger(unsigned Radix, unsigned &Result) const { 268 unsigned long long Val; 269 if (getAsInteger(Radix, Val) || 270 (unsigned)Val != Val) 271 return true; 272 Result = Val; 273 return false; 274 } 275