1 //===-- StringRef.cpp - Lightweight String References ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "llvm/ADT/StringRef.h" 11 #include "llvm/ADT/APInt.h" 12 13 using namespace llvm; 14 15 // MSVC emits references to this into the translation units which reference it. 16 #ifndef _MSC_VER 17 const size_t StringRef::npos; 18 #endif 19 20 static char ascii_tolower(char x) { 21 if (x >= 'A' && x <= 'Z') 22 return x - 'A' + 'a'; 23 return x; 24 } 25 26 /// compare_lower - Compare strings, ignoring case. 27 int StringRef::compare_lower(StringRef RHS) const { 28 for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) { 29 char LHC = ascii_tolower(Data[I]); 30 char RHC = ascii_tolower(RHS.Data[I]); 31 if (LHC != RHC) 32 return LHC < RHC ? -1 : 1; 33 } 34 35 if (Length == RHS.Length) 36 return 0; 37 return Length < RHS.Length ? -1 : 1; 38 } 39 40 // Compute the edit distance between the two given strings. 41 unsigned StringRef::edit_distance(llvm::StringRef Other, 42 bool AllowReplacements) { 43 // The algorithm implemented below is the "classic" 44 // dynamic-programming algorithm for computing the Levenshtein 45 // distance, which is described here: 46 // 47 // http://en.wikipedia.org/wiki/Levenshtein_distance 48 // 49 // Although the algorithm is typically described using an m x n 50 // array, only two rows are used at a time, so this implemenation 51 // just keeps two separate vectors for those two rows. 52 size_type m = size(); 53 size_type n = Other.size(); 54 55 const unsigned SmallBufferSize = 64; 56 unsigned SmallBuffer[SmallBufferSize]; 57 unsigned *Allocated = 0; 58 unsigned *previous = SmallBuffer; 59 if (2*(n + 1) > SmallBufferSize) 60 Allocated = previous = new unsigned [2*(n+1)]; 61 unsigned *current = previous + (n + 1); 62 63 for (unsigned i = 0; i <= n; ++i) 64 previous[i] = i; 65 66 for (size_type y = 1; y <= m; ++y) { 67 current[0] = y; 68 for (size_type x = 1; x <= n; ++x) { 69 if (AllowReplacements) { 70 current[x] = min(previous[x-1] + ((*this)[y-1] == Other[x-1]? 0u:1u), 71 min(current[x-1], previous[x])+1); 72 } 73 else { 74 if ((*this)[y-1] == Other[x-1]) current[x] = previous[x-1]; 75 else current[x] = min(current[x-1], previous[x]) + 1; 76 } 77 } 78 79 unsigned *tmp = current; 80 current = previous; 81 previous = tmp; 82 } 83 84 unsigned Result = previous[n]; 85 delete [] Allocated; 86 87 return Result; 88 } 89 90 //===----------------------------------------------------------------------===// 91 // String Searching 92 //===----------------------------------------------------------------------===// 93 94 95 /// find - Search for the first string \arg Str in the string. 96 /// 97 /// \return - The index of the first occurence of \arg Str, or npos if not 98 /// found. 99 size_t StringRef::find(StringRef Str, size_t From) const { 100 size_t N = Str.size(); 101 if (N > Length) 102 return npos; 103 for (size_t e = Length - N + 1, i = min(From, e); i != e; ++i) 104 if (substr(i, N).equals(Str)) 105 return i; 106 return npos; 107 } 108 109 /// rfind - Search for the last string \arg Str in the string. 110 /// 111 /// \return - The index of the last occurence of \arg Str, or npos if not 112 /// found. 113 size_t StringRef::rfind(StringRef Str) const { 114 size_t N = Str.size(); 115 if (N > Length) 116 return npos; 117 for (size_t i = Length - N + 1, e = 0; i != e;) { 118 --i; 119 if (substr(i, N).equals(Str)) 120 return i; 121 } 122 return npos; 123 } 124 125 /// find_first_of - Find the first character in the string that is in \arg 126 /// Chars, or npos if not found. 127 /// 128 /// Note: O(size() * Chars.size()) 129 StringRef::size_type StringRef::find_first_of(StringRef Chars, 130 size_t From) const { 131 for (size_type i = min(From, Length), e = Length; i != e; ++i) 132 if (Chars.find(Data[i]) != npos) 133 return i; 134 return npos; 135 } 136 137 /// find_first_not_of - Find the first character in the string that is not 138 /// \arg C or npos if not found. 139 StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const { 140 for (size_type i = min(From, Length), e = Length; i != e; ++i) 141 if (Data[i] != C) 142 return i; 143 return npos; 144 } 145 146 /// find_first_not_of - Find the first character in the string that is not 147 /// in the string \arg Chars, or npos if not found. 148 /// 149 /// Note: O(size() * Chars.size()) 150 StringRef::size_type StringRef::find_first_not_of(StringRef Chars, 151 size_t From) const { 152 for (size_type i = min(From, Length), e = Length; i != e; ++i) 153 if (Chars.find(Data[i]) == npos) 154 return i; 155 return npos; 156 } 157 158 159 //===----------------------------------------------------------------------===// 160 // Helpful Algorithms 161 //===----------------------------------------------------------------------===// 162 163 /// count - Return the number of non-overlapped occurrences of \arg Str in 164 /// the string. 165 size_t StringRef::count(StringRef Str) const { 166 size_t Count = 0; 167 size_t N = Str.size(); 168 if (N > Length) 169 return 0; 170 for (size_t i = 0, e = Length - N + 1; i != e; ++i) 171 if (substr(i, N).equals(Str)) 172 ++Count; 173 return Count; 174 } 175 176 static unsigned GetAutoSenseRadix(StringRef &Str) { 177 if (Str.startswith("0x")) { 178 Str = Str.substr(2); 179 return 16; 180 } else if (Str.startswith("0b")) { 181 Str = Str.substr(2); 182 return 2; 183 } else if (Str.startswith("0")) { 184 return 8; 185 } else { 186 return 10; 187 } 188 } 189 190 191 /// GetAsUnsignedInteger - Workhorse method that converts a integer character 192 /// sequence of radix up to 36 to an unsigned long long value. 193 static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix, 194 unsigned long long &Result) { 195 // Autosense radix if not specified. 196 if (Radix == 0) 197 Radix = GetAutoSenseRadix(Str); 198 199 // Empty strings (after the radix autosense) are invalid. 200 if (Str.empty()) return true; 201 202 // Parse all the bytes of the string given this radix. Watch for overflow. 203 Result = 0; 204 while (!Str.empty()) { 205 unsigned CharVal; 206 if (Str[0] >= '0' && Str[0] <= '9') 207 CharVal = Str[0]-'0'; 208 else if (Str[0] >= 'a' && Str[0] <= 'z') 209 CharVal = Str[0]-'a'+10; 210 else if (Str[0] >= 'A' && Str[0] <= 'Z') 211 CharVal = Str[0]-'A'+10; 212 else 213 return true; 214 215 // If the parsed value is larger than the integer radix, the string is 216 // invalid. 217 if (CharVal >= Radix) 218 return true; 219 220 // Add in this character. 221 unsigned long long PrevResult = Result; 222 Result = Result*Radix+CharVal; 223 224 // Check for overflow. 225 if (Result < PrevResult) 226 return true; 227 228 Str = Str.substr(1); 229 } 230 231 return false; 232 } 233 234 bool StringRef::getAsInteger(unsigned Radix, unsigned long long &Result) const { 235 return GetAsUnsignedInteger(*this, Radix, Result); 236 } 237 238 239 bool StringRef::getAsInteger(unsigned Radix, long long &Result) const { 240 unsigned long long ULLVal; 241 242 // Handle positive strings first. 243 if (empty() || front() != '-') { 244 if (GetAsUnsignedInteger(*this, Radix, ULLVal) || 245 // Check for value so large it overflows a signed value. 246 (long long)ULLVal < 0) 247 return true; 248 Result = ULLVal; 249 return false; 250 } 251 252 // Get the positive part of the value. 253 if (GetAsUnsignedInteger(substr(1), Radix, ULLVal) || 254 // Reject values so large they'd overflow as negative signed, but allow 255 // "-0". This negates the unsigned so that the negative isn't undefined 256 // on signed overflow. 257 (long long)-ULLVal > 0) 258 return true; 259 260 Result = -ULLVal; 261 return false; 262 } 263 264 bool StringRef::getAsInteger(unsigned Radix, int &Result) const { 265 long long Val; 266 if (getAsInteger(Radix, Val) || 267 (int)Val != Val) 268 return true; 269 Result = Val; 270 return false; 271 } 272 273 bool StringRef::getAsInteger(unsigned Radix, unsigned &Result) const { 274 unsigned long long Val; 275 if (getAsInteger(Radix, Val) || 276 (unsigned)Val != Val) 277 return true; 278 Result = Val; 279 return false; 280 } 281 282 bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const { 283 StringRef Str = *this; 284 285 // Autosense radix if not specified. 286 if (Radix == 0) 287 Radix = GetAutoSenseRadix(Str); 288 289 assert(Radix > 1 && Radix <= 36); 290 291 // Empty strings (after the radix autosense) are invalid. 292 if (Str.empty()) return true; 293 294 // Skip leading zeroes. This can be a significant improvement if 295 // it means we don't need > 64 bits. 296 while (!Str.empty() && Str.front() == '0') 297 Str = Str.substr(1); 298 299 // If it was nothing but zeroes.... 300 if (Str.empty()) { 301 Result = APInt(64, 0); 302 return false; 303 } 304 305 // (Over-)estimate the required number of bits. 306 unsigned Log2Radix = 0; 307 while ((1U << Log2Radix) < Radix) Log2Radix++; 308 bool IsPowerOf2Radix = ((1U << Log2Radix) == Radix); 309 310 unsigned BitWidth = Log2Radix * Str.size(); 311 if (BitWidth < Result.getBitWidth()) 312 BitWidth = Result.getBitWidth(); // don't shrink the result 313 else 314 Result.zext(BitWidth); 315 316 APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix 317 if (!IsPowerOf2Radix) { 318 // These must have the same bit-width as Result. 319 RadixAP = APInt(BitWidth, Radix); 320 CharAP = APInt(BitWidth, 0); 321 } 322 323 // Parse all the bytes of the string given this radix. 324 Result = 0; 325 while (!Str.empty()) { 326 unsigned CharVal; 327 if (Str[0] >= '0' && Str[0] <= '9') 328 CharVal = Str[0]-'0'; 329 else if (Str[0] >= 'a' && Str[0] <= 'z') 330 CharVal = Str[0]-'a'+10; 331 else if (Str[0] >= 'A' && Str[0] <= 'Z') 332 CharVal = Str[0]-'A'+10; 333 else 334 return true; 335 336 // If the parsed value is larger than the integer radix, the string is 337 // invalid. 338 if (CharVal >= Radix) 339 return true; 340 341 // Add in this character. 342 if (IsPowerOf2Radix) { 343 Result <<= Log2Radix; 344 Result |= CharVal; 345 } else { 346 Result *= RadixAP; 347 CharAP = CharVal; 348 Result += CharAP; 349 } 350 351 Str = Str.substr(1); 352 } 353 354 return false; 355 } 356