10b57cec5SDimitry Andric //===-- StringRef.cpp - Lightweight String References ---------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h"
100b57cec5SDimitry Andric #include "llvm/ADT/APFloat.h"
110b57cec5SDimitry Andric #include "llvm/ADT/APInt.h"
120b57cec5SDimitry Andric #include "llvm/ADT/Hashing.h"
130b57cec5SDimitry Andric #include "llvm/ADT/StringExtras.h"
140b57cec5SDimitry Andric #include "llvm/ADT/edit_distance.h"
15480093f4SDimitry Andric #include "llvm/Support/Error.h"
160b57cec5SDimitry Andric #include <bitset>
170b57cec5SDimitry Andric 
180b57cec5SDimitry Andric using namespace llvm;
190b57cec5SDimitry Andric 
200b57cec5SDimitry Andric // MSVC emits references to this into the translation units which reference it.
210b57cec5SDimitry Andric #ifndef _MSC_VER
225ffd83dbSDimitry Andric constexpr size_t StringRef::npos;
230b57cec5SDimitry Andric #endif
240b57cec5SDimitry Andric 
250b57cec5SDimitry Andric // strncasecmp() is not available on non-POSIX systems, so define an
260b57cec5SDimitry Andric // alternative function here.
ascii_strncasecmp(const char * LHS,const char * RHS,size_t Length)270b57cec5SDimitry Andric static int ascii_strncasecmp(const char *LHS, const char *RHS, size_t Length) {
280b57cec5SDimitry Andric   for (size_t I = 0; I < Length; ++I) {
290b57cec5SDimitry Andric     unsigned char LHC = toLower(LHS[I]);
300b57cec5SDimitry Andric     unsigned char RHC = toLower(RHS[I]);
310b57cec5SDimitry Andric     if (LHC != RHC)
320b57cec5SDimitry Andric       return LHC < RHC ? -1 : 1;
330b57cec5SDimitry Andric   }
340b57cec5SDimitry Andric   return 0;
350b57cec5SDimitry Andric }
360b57cec5SDimitry Andric 
compare_insensitive(StringRef RHS) const37*5f7ddb14SDimitry Andric int StringRef::compare_insensitive(StringRef RHS) const {
380b57cec5SDimitry Andric   if (int Res = ascii_strncasecmp(Data, RHS.Data, std::min(Length, RHS.Length)))
390b57cec5SDimitry Andric     return Res;
400b57cec5SDimitry Andric   if (Length == RHS.Length)
410b57cec5SDimitry Andric     return 0;
420b57cec5SDimitry Andric   return Length < RHS.Length ? -1 : 1;
430b57cec5SDimitry Andric }
440b57cec5SDimitry Andric 
startswith_insensitive(StringRef Prefix) const45*5f7ddb14SDimitry Andric bool StringRef::startswith_insensitive(StringRef Prefix) const {
460b57cec5SDimitry Andric   return Length >= Prefix.Length &&
470b57cec5SDimitry Andric       ascii_strncasecmp(Data, Prefix.Data, Prefix.Length) == 0;
480b57cec5SDimitry Andric }
490b57cec5SDimitry Andric 
endswith_insensitive(StringRef Suffix) const50*5f7ddb14SDimitry Andric bool StringRef::endswith_insensitive(StringRef Suffix) const {
510b57cec5SDimitry Andric   return Length >= Suffix.Length &&
520b57cec5SDimitry Andric       ascii_strncasecmp(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
530b57cec5SDimitry Andric }
540b57cec5SDimitry Andric 
find_insensitive(char C,size_t From) const55*5f7ddb14SDimitry Andric size_t StringRef::find_insensitive(char C, size_t From) const {
560b57cec5SDimitry Andric   char L = toLower(C);
570b57cec5SDimitry Andric   return find_if([L](char D) { return toLower(D) == L; }, From);
580b57cec5SDimitry Andric }
590b57cec5SDimitry Andric 
600b57cec5SDimitry Andric /// compare_numeric - Compare strings, handle embedded numbers.
compare_numeric(StringRef RHS) const610b57cec5SDimitry Andric int StringRef::compare_numeric(StringRef RHS) const {
620b57cec5SDimitry Andric   for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) {
630b57cec5SDimitry Andric     // Check for sequences of digits.
640b57cec5SDimitry Andric     if (isDigit(Data[I]) && isDigit(RHS.Data[I])) {
650b57cec5SDimitry Andric       // The longer sequence of numbers is considered larger.
660b57cec5SDimitry Andric       // This doesn't really handle prefixed zeros well.
670b57cec5SDimitry Andric       size_t J;
680b57cec5SDimitry Andric       for (J = I + 1; J != E + 1; ++J) {
690b57cec5SDimitry Andric         bool ld = J < Length && isDigit(Data[J]);
700b57cec5SDimitry Andric         bool rd = J < RHS.Length && isDigit(RHS.Data[J]);
710b57cec5SDimitry Andric         if (ld != rd)
720b57cec5SDimitry Andric           return rd ? -1 : 1;
730b57cec5SDimitry Andric         if (!rd)
740b57cec5SDimitry Andric           break;
750b57cec5SDimitry Andric       }
760b57cec5SDimitry Andric       // The two number sequences have the same length (J-I), just memcmp them.
770b57cec5SDimitry Andric       if (int Res = compareMemory(Data + I, RHS.Data + I, J - I))
780b57cec5SDimitry Andric         return Res < 0 ? -1 : 1;
790b57cec5SDimitry Andric       // Identical number sequences, continue search after the numbers.
800b57cec5SDimitry Andric       I = J - 1;
810b57cec5SDimitry Andric       continue;
820b57cec5SDimitry Andric     }
830b57cec5SDimitry Andric     if (Data[I] != RHS.Data[I])
840b57cec5SDimitry Andric       return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1;
850b57cec5SDimitry Andric   }
860b57cec5SDimitry Andric   if (Length == RHS.Length)
870b57cec5SDimitry Andric     return 0;
880b57cec5SDimitry Andric   return Length < RHS.Length ? -1 : 1;
890b57cec5SDimitry Andric }
900b57cec5SDimitry Andric 
910b57cec5SDimitry Andric // Compute the edit distance between the two given strings.
edit_distance(llvm::StringRef Other,bool AllowReplacements,unsigned MaxEditDistance) const920b57cec5SDimitry Andric unsigned StringRef::edit_distance(llvm::StringRef Other,
930b57cec5SDimitry Andric                                   bool AllowReplacements,
940b57cec5SDimitry Andric                                   unsigned MaxEditDistance) const {
950b57cec5SDimitry Andric   return llvm::ComputeEditDistance(
960b57cec5SDimitry Andric       makeArrayRef(data(), size()),
970b57cec5SDimitry Andric       makeArrayRef(Other.data(), Other.size()),
980b57cec5SDimitry Andric       AllowReplacements, MaxEditDistance);
990b57cec5SDimitry Andric }
1000b57cec5SDimitry Andric 
1010b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
1020b57cec5SDimitry Andric // String Operations
1030b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
1040b57cec5SDimitry Andric 
lower() const1050b57cec5SDimitry Andric std::string StringRef::lower() const {
1065ffd83dbSDimitry Andric   return std::string(map_iterator(begin(), toLower),
1075ffd83dbSDimitry Andric                      map_iterator(end(), toLower));
1080b57cec5SDimitry Andric }
1090b57cec5SDimitry Andric 
upper() const1100b57cec5SDimitry Andric std::string StringRef::upper() const {
1115ffd83dbSDimitry Andric   return std::string(map_iterator(begin(), toUpper),
1125ffd83dbSDimitry Andric                      map_iterator(end(), toUpper));
1130b57cec5SDimitry Andric }
1140b57cec5SDimitry Andric 
1150b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
1160b57cec5SDimitry Andric // String Searching
1170b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
1180b57cec5SDimitry Andric 
1190b57cec5SDimitry Andric 
1200b57cec5SDimitry Andric /// find - Search for the first string \arg Str in the string.
1210b57cec5SDimitry Andric ///
1220b57cec5SDimitry Andric /// \return - The index of the first occurrence of \arg Str, or npos if not
1230b57cec5SDimitry Andric /// found.
find(StringRef Str,size_t From) const1240b57cec5SDimitry Andric size_t StringRef::find(StringRef Str, size_t From) const {
1250b57cec5SDimitry Andric   if (From > Length)
1260b57cec5SDimitry Andric     return npos;
1270b57cec5SDimitry Andric 
1280b57cec5SDimitry Andric   const char *Start = Data + From;
1290b57cec5SDimitry Andric   size_t Size = Length - From;
1300b57cec5SDimitry Andric 
1310b57cec5SDimitry Andric   const char *Needle = Str.data();
1320b57cec5SDimitry Andric   size_t N = Str.size();
1330b57cec5SDimitry Andric   if (N == 0)
1340b57cec5SDimitry Andric     return From;
1350b57cec5SDimitry Andric   if (Size < N)
1360b57cec5SDimitry Andric     return npos;
1370b57cec5SDimitry Andric   if (N == 1) {
1380b57cec5SDimitry Andric     const char *Ptr = (const char *)::memchr(Start, Needle[0], Size);
1390b57cec5SDimitry Andric     return Ptr == nullptr ? npos : Ptr - Data;
1400b57cec5SDimitry Andric   }
1410b57cec5SDimitry Andric 
1420b57cec5SDimitry Andric   const char *Stop = Start + (Size - N + 1);
1430b57cec5SDimitry Andric 
1440b57cec5SDimitry Andric   // For short haystacks or unsupported needles fall back to the naive algorithm
1450b57cec5SDimitry Andric   if (Size < 16 || N > 255) {
1460b57cec5SDimitry Andric     do {
1470b57cec5SDimitry Andric       if (std::memcmp(Start, Needle, N) == 0)
1480b57cec5SDimitry Andric         return Start - Data;
1490b57cec5SDimitry Andric       ++Start;
1500b57cec5SDimitry Andric     } while (Start < Stop);
1510b57cec5SDimitry Andric     return npos;
1520b57cec5SDimitry Andric   }
1530b57cec5SDimitry Andric 
1540b57cec5SDimitry Andric   // Build the bad char heuristic table, with uint8_t to reduce cache thrashing.
1550b57cec5SDimitry Andric   uint8_t BadCharSkip[256];
1560b57cec5SDimitry Andric   std::memset(BadCharSkip, N, 256);
1570b57cec5SDimitry Andric   for (unsigned i = 0; i != N-1; ++i)
1580b57cec5SDimitry Andric     BadCharSkip[(uint8_t)Str[i]] = N-1-i;
1590b57cec5SDimitry Andric 
1600b57cec5SDimitry Andric   do {
1610b57cec5SDimitry Andric     uint8_t Last = Start[N - 1];
1620b57cec5SDimitry Andric     if (LLVM_UNLIKELY(Last == (uint8_t)Needle[N - 1]))
1630b57cec5SDimitry Andric       if (std::memcmp(Start, Needle, N - 1) == 0)
1640b57cec5SDimitry Andric         return Start - Data;
1650b57cec5SDimitry Andric 
1660b57cec5SDimitry Andric     // Otherwise skip the appropriate number of bytes.
1670b57cec5SDimitry Andric     Start += BadCharSkip[Last];
1680b57cec5SDimitry Andric   } while (Start < Stop);
1690b57cec5SDimitry Andric 
1700b57cec5SDimitry Andric   return npos;
1710b57cec5SDimitry Andric }
1720b57cec5SDimitry Andric 
find_insensitive(StringRef Str,size_t From) const173*5f7ddb14SDimitry Andric size_t StringRef::find_insensitive(StringRef Str, size_t From) const {
1740b57cec5SDimitry Andric   StringRef This = substr(From);
1750b57cec5SDimitry Andric   while (This.size() >= Str.size()) {
176*5f7ddb14SDimitry Andric     if (This.startswith_insensitive(Str))
1770b57cec5SDimitry Andric       return From;
1780b57cec5SDimitry Andric     This = This.drop_front();
1790b57cec5SDimitry Andric     ++From;
1800b57cec5SDimitry Andric   }
1810b57cec5SDimitry Andric   return npos;
1820b57cec5SDimitry Andric }
1830b57cec5SDimitry Andric 
rfind_insensitive(char C,size_t From) const184*5f7ddb14SDimitry Andric size_t StringRef::rfind_insensitive(char C, size_t From) const {
1850b57cec5SDimitry Andric   From = std::min(From, Length);
1860b57cec5SDimitry Andric   size_t i = From;
1870b57cec5SDimitry Andric   while (i != 0) {
1880b57cec5SDimitry Andric     --i;
1890b57cec5SDimitry Andric     if (toLower(Data[i]) == toLower(C))
1900b57cec5SDimitry Andric       return i;
1910b57cec5SDimitry Andric   }
1920b57cec5SDimitry Andric   return npos;
1930b57cec5SDimitry Andric }
1940b57cec5SDimitry Andric 
1950b57cec5SDimitry Andric /// rfind - Search for the last string \arg Str in the string.
1960b57cec5SDimitry Andric ///
1970b57cec5SDimitry Andric /// \return - The index of the last occurrence of \arg Str, or npos if not
1980b57cec5SDimitry Andric /// found.
rfind(StringRef Str) const1990b57cec5SDimitry Andric size_t StringRef::rfind(StringRef Str) const {
2000b57cec5SDimitry Andric   size_t N = Str.size();
2010b57cec5SDimitry Andric   if (N > Length)
2020b57cec5SDimitry Andric     return npos;
2030b57cec5SDimitry Andric   for (size_t i = Length - N + 1, e = 0; i != e;) {
2040b57cec5SDimitry Andric     --i;
2050b57cec5SDimitry Andric     if (substr(i, N).equals(Str))
2060b57cec5SDimitry Andric       return i;
2070b57cec5SDimitry Andric   }
2080b57cec5SDimitry Andric   return npos;
2090b57cec5SDimitry Andric }
2100b57cec5SDimitry Andric 
rfind_insensitive(StringRef Str) const211*5f7ddb14SDimitry Andric size_t StringRef::rfind_insensitive(StringRef Str) const {
2120b57cec5SDimitry Andric   size_t N = Str.size();
2130b57cec5SDimitry Andric   if (N > Length)
2140b57cec5SDimitry Andric     return npos;
2150b57cec5SDimitry Andric   for (size_t i = Length - N + 1, e = 0; i != e;) {
2160b57cec5SDimitry Andric     --i;
217*5f7ddb14SDimitry Andric     if (substr(i, N).equals_insensitive(Str))
2180b57cec5SDimitry Andric       return i;
2190b57cec5SDimitry Andric   }
2200b57cec5SDimitry Andric   return npos;
2210b57cec5SDimitry Andric }
2220b57cec5SDimitry Andric 
2230b57cec5SDimitry Andric /// find_first_of - Find the first character in the string that is in \arg
2240b57cec5SDimitry Andric /// Chars, or npos if not found.
2250b57cec5SDimitry Andric ///
2260b57cec5SDimitry Andric /// Note: O(size() + Chars.size())
find_first_of(StringRef Chars,size_t From) const2270b57cec5SDimitry Andric StringRef::size_type StringRef::find_first_of(StringRef Chars,
2280b57cec5SDimitry Andric                                               size_t From) const {
2290b57cec5SDimitry Andric   std::bitset<1 << CHAR_BIT> CharBits;
2300b57cec5SDimitry Andric   for (size_type i = 0; i != Chars.size(); ++i)
2310b57cec5SDimitry Andric     CharBits.set((unsigned char)Chars[i]);
2320b57cec5SDimitry Andric 
2330b57cec5SDimitry Andric   for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
2340b57cec5SDimitry Andric     if (CharBits.test((unsigned char)Data[i]))
2350b57cec5SDimitry Andric       return i;
2360b57cec5SDimitry Andric   return npos;
2370b57cec5SDimitry Andric }
2380b57cec5SDimitry Andric 
2390b57cec5SDimitry Andric /// find_first_not_of - Find the first character in the string that is not
2400b57cec5SDimitry Andric /// \arg C or npos if not found.
find_first_not_of(char C,size_t From) const2410b57cec5SDimitry Andric StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
2420b57cec5SDimitry Andric   for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
2430b57cec5SDimitry Andric     if (Data[i] != C)
2440b57cec5SDimitry Andric       return i;
2450b57cec5SDimitry Andric   return npos;
2460b57cec5SDimitry Andric }
2470b57cec5SDimitry Andric 
2480b57cec5SDimitry Andric /// find_first_not_of - Find the first character in the string that is not
2490b57cec5SDimitry Andric /// in the string \arg Chars, or npos if not found.
2500b57cec5SDimitry Andric ///
2510b57cec5SDimitry Andric /// Note: O(size() + Chars.size())
find_first_not_of(StringRef Chars,size_t From) const2520b57cec5SDimitry Andric StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
2530b57cec5SDimitry Andric                                                   size_t From) const {
2540b57cec5SDimitry Andric   std::bitset<1 << CHAR_BIT> CharBits;
2550b57cec5SDimitry Andric   for (size_type i = 0; i != Chars.size(); ++i)
2560b57cec5SDimitry Andric     CharBits.set((unsigned char)Chars[i]);
2570b57cec5SDimitry Andric 
2580b57cec5SDimitry Andric   for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
2590b57cec5SDimitry Andric     if (!CharBits.test((unsigned char)Data[i]))
2600b57cec5SDimitry Andric       return i;
2610b57cec5SDimitry Andric   return npos;
2620b57cec5SDimitry Andric }
2630b57cec5SDimitry Andric 
2640b57cec5SDimitry Andric /// find_last_of - Find the last character in the string that is in \arg C,
2650b57cec5SDimitry Andric /// or npos if not found.
2660b57cec5SDimitry Andric ///
2670b57cec5SDimitry Andric /// Note: O(size() + Chars.size())
find_last_of(StringRef Chars,size_t From) const2680b57cec5SDimitry Andric StringRef::size_type StringRef::find_last_of(StringRef Chars,
2690b57cec5SDimitry Andric                                              size_t From) const {
2700b57cec5SDimitry Andric   std::bitset<1 << CHAR_BIT> CharBits;
2710b57cec5SDimitry Andric   for (size_type i = 0; i != Chars.size(); ++i)
2720b57cec5SDimitry Andric     CharBits.set((unsigned char)Chars[i]);
2730b57cec5SDimitry Andric 
2740b57cec5SDimitry Andric   for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
2750b57cec5SDimitry Andric     if (CharBits.test((unsigned char)Data[i]))
2760b57cec5SDimitry Andric       return i;
2770b57cec5SDimitry Andric   return npos;
2780b57cec5SDimitry Andric }
2790b57cec5SDimitry Andric 
2800b57cec5SDimitry Andric /// find_last_not_of - Find the last character in the string that is not
2810b57cec5SDimitry Andric /// \arg C, or npos if not found.
find_last_not_of(char C,size_t From) const2820b57cec5SDimitry Andric StringRef::size_type StringRef::find_last_not_of(char C, size_t From) const {
2830b57cec5SDimitry Andric   for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
2840b57cec5SDimitry Andric     if (Data[i] != C)
2850b57cec5SDimitry Andric       return i;
2860b57cec5SDimitry Andric   return npos;
2870b57cec5SDimitry Andric }
2880b57cec5SDimitry Andric 
2890b57cec5SDimitry Andric /// find_last_not_of - Find the last character in the string that is not in
2900b57cec5SDimitry Andric /// \arg Chars, or npos if not found.
2910b57cec5SDimitry Andric ///
2920b57cec5SDimitry Andric /// Note: O(size() + Chars.size())
find_last_not_of(StringRef Chars,size_t From) const2930b57cec5SDimitry Andric StringRef::size_type StringRef::find_last_not_of(StringRef Chars,
2940b57cec5SDimitry Andric                                                  size_t From) const {
2950b57cec5SDimitry Andric   std::bitset<1 << CHAR_BIT> CharBits;
2960b57cec5SDimitry Andric   for (size_type i = 0, e = Chars.size(); i != e; ++i)
2970b57cec5SDimitry Andric     CharBits.set((unsigned char)Chars[i]);
2980b57cec5SDimitry Andric 
2990b57cec5SDimitry Andric   for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
3000b57cec5SDimitry Andric     if (!CharBits.test((unsigned char)Data[i]))
3010b57cec5SDimitry Andric       return i;
3020b57cec5SDimitry Andric   return npos;
3030b57cec5SDimitry Andric }
3040b57cec5SDimitry Andric 
split(SmallVectorImpl<StringRef> & A,StringRef Separator,int MaxSplit,bool KeepEmpty) const3050b57cec5SDimitry Andric void StringRef::split(SmallVectorImpl<StringRef> &A,
3060b57cec5SDimitry Andric                       StringRef Separator, int MaxSplit,
3070b57cec5SDimitry Andric                       bool KeepEmpty) const {
3080b57cec5SDimitry Andric   StringRef S = *this;
3090b57cec5SDimitry Andric 
3100b57cec5SDimitry Andric   // Count down from MaxSplit. When MaxSplit is -1, this will just split
3110b57cec5SDimitry Andric   // "forever". This doesn't support splitting more than 2^31 times
3120b57cec5SDimitry Andric   // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
3130b57cec5SDimitry Andric   // but that seems unlikely to be useful.
3140b57cec5SDimitry Andric   while (MaxSplit-- != 0) {
3150b57cec5SDimitry Andric     size_t Idx = S.find(Separator);
3160b57cec5SDimitry Andric     if (Idx == npos)
3170b57cec5SDimitry Andric       break;
3180b57cec5SDimitry Andric 
3190b57cec5SDimitry Andric     // Push this split.
3200b57cec5SDimitry Andric     if (KeepEmpty || Idx > 0)
3210b57cec5SDimitry Andric       A.push_back(S.slice(0, Idx));
3220b57cec5SDimitry Andric 
3230b57cec5SDimitry Andric     // Jump forward.
3240b57cec5SDimitry Andric     S = S.slice(Idx + Separator.size(), npos);
3250b57cec5SDimitry Andric   }
3260b57cec5SDimitry Andric 
3270b57cec5SDimitry Andric   // Push the tail.
3280b57cec5SDimitry Andric   if (KeepEmpty || !S.empty())
3290b57cec5SDimitry Andric     A.push_back(S);
3300b57cec5SDimitry Andric }
3310b57cec5SDimitry Andric 
split(SmallVectorImpl<StringRef> & A,char Separator,int MaxSplit,bool KeepEmpty) const3320b57cec5SDimitry Andric void StringRef::split(SmallVectorImpl<StringRef> &A, char Separator,
3330b57cec5SDimitry Andric                       int MaxSplit, bool KeepEmpty) const {
3340b57cec5SDimitry Andric   StringRef S = *this;
3350b57cec5SDimitry Andric 
3360b57cec5SDimitry Andric   // Count down from MaxSplit. When MaxSplit is -1, this will just split
3370b57cec5SDimitry Andric   // "forever". This doesn't support splitting more than 2^31 times
3380b57cec5SDimitry Andric   // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
3390b57cec5SDimitry Andric   // but that seems unlikely to be useful.
3400b57cec5SDimitry Andric   while (MaxSplit-- != 0) {
3410b57cec5SDimitry Andric     size_t Idx = S.find(Separator);
3420b57cec5SDimitry Andric     if (Idx == npos)
3430b57cec5SDimitry Andric       break;
3440b57cec5SDimitry Andric 
3450b57cec5SDimitry Andric     // Push this split.
3460b57cec5SDimitry Andric     if (KeepEmpty || Idx > 0)
3470b57cec5SDimitry Andric       A.push_back(S.slice(0, Idx));
3480b57cec5SDimitry Andric 
3490b57cec5SDimitry Andric     // Jump forward.
3500b57cec5SDimitry Andric     S = S.slice(Idx + 1, npos);
3510b57cec5SDimitry Andric   }
3520b57cec5SDimitry Andric 
3530b57cec5SDimitry Andric   // Push the tail.
3540b57cec5SDimitry Andric   if (KeepEmpty || !S.empty())
3550b57cec5SDimitry Andric     A.push_back(S);
3560b57cec5SDimitry Andric }
3570b57cec5SDimitry Andric 
3580b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
3590b57cec5SDimitry Andric // Helpful Algorithms
3600b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
3610b57cec5SDimitry Andric 
3620b57cec5SDimitry Andric /// count - Return the number of non-overlapped occurrences of \arg Str in
3630b57cec5SDimitry Andric /// the string.
count(StringRef Str) const3640b57cec5SDimitry Andric size_t StringRef::count(StringRef Str) const {
3650b57cec5SDimitry Andric   size_t Count = 0;
3660b57cec5SDimitry Andric   size_t N = Str.size();
367480093f4SDimitry Andric   if (!N || N > Length)
3680b57cec5SDimitry Andric     return 0;
369480093f4SDimitry Andric   for (size_t i = 0, e = Length - N + 1; i < e;) {
370480093f4SDimitry Andric     if (substr(i, N).equals(Str)) {
3710b57cec5SDimitry Andric       ++Count;
372480093f4SDimitry Andric       i += N;
373480093f4SDimitry Andric     }
374480093f4SDimitry Andric     else
375480093f4SDimitry Andric       ++i;
376480093f4SDimitry Andric   }
3770b57cec5SDimitry Andric   return Count;
3780b57cec5SDimitry Andric }
3790b57cec5SDimitry Andric 
GetAutoSenseRadix(StringRef & Str)3800b57cec5SDimitry Andric static unsigned GetAutoSenseRadix(StringRef &Str) {
3810b57cec5SDimitry Andric   if (Str.empty())
3820b57cec5SDimitry Andric     return 10;
3830b57cec5SDimitry Andric 
3840b57cec5SDimitry Andric   if (Str.startswith("0x") || Str.startswith("0X")) {
3850b57cec5SDimitry Andric     Str = Str.substr(2);
3860b57cec5SDimitry Andric     return 16;
3870b57cec5SDimitry Andric   }
3880b57cec5SDimitry Andric 
3890b57cec5SDimitry Andric   if (Str.startswith("0b") || Str.startswith("0B")) {
3900b57cec5SDimitry Andric     Str = Str.substr(2);
3910b57cec5SDimitry Andric     return 2;
3920b57cec5SDimitry Andric   }
3930b57cec5SDimitry Andric 
3940b57cec5SDimitry Andric   if (Str.startswith("0o")) {
3950b57cec5SDimitry Andric     Str = Str.substr(2);
3960b57cec5SDimitry Andric     return 8;
3970b57cec5SDimitry Andric   }
3980b57cec5SDimitry Andric 
3990b57cec5SDimitry Andric   if (Str[0] == '0' && Str.size() > 1 && isDigit(Str[1])) {
4000b57cec5SDimitry Andric     Str = Str.substr(1);
4010b57cec5SDimitry Andric     return 8;
4020b57cec5SDimitry Andric   }
4030b57cec5SDimitry Andric 
4040b57cec5SDimitry Andric   return 10;
4050b57cec5SDimitry Andric }
4060b57cec5SDimitry Andric 
consumeUnsignedInteger(StringRef & Str,unsigned Radix,unsigned long long & Result)4070b57cec5SDimitry Andric bool llvm::consumeUnsignedInteger(StringRef &Str, unsigned Radix,
4080b57cec5SDimitry Andric                                   unsigned long long &Result) {
4090b57cec5SDimitry Andric   // Autosense radix if not specified.
4100b57cec5SDimitry Andric   if (Radix == 0)
4110b57cec5SDimitry Andric     Radix = GetAutoSenseRadix(Str);
4120b57cec5SDimitry Andric 
4130b57cec5SDimitry Andric   // Empty strings (after the radix autosense) are invalid.
4140b57cec5SDimitry Andric   if (Str.empty()) return true;
4150b57cec5SDimitry Andric 
4160b57cec5SDimitry Andric   // Parse all the bytes of the string given this radix.  Watch for overflow.
4170b57cec5SDimitry Andric   StringRef Str2 = Str;
4180b57cec5SDimitry Andric   Result = 0;
4190b57cec5SDimitry Andric   while (!Str2.empty()) {
4200b57cec5SDimitry Andric     unsigned CharVal;
4210b57cec5SDimitry Andric     if (Str2[0] >= '0' && Str2[0] <= '9')
4220b57cec5SDimitry Andric       CharVal = Str2[0] - '0';
4230b57cec5SDimitry Andric     else if (Str2[0] >= 'a' && Str2[0] <= 'z')
4240b57cec5SDimitry Andric       CharVal = Str2[0] - 'a' + 10;
4250b57cec5SDimitry Andric     else if (Str2[0] >= 'A' && Str2[0] <= 'Z')
4260b57cec5SDimitry Andric       CharVal = Str2[0] - 'A' + 10;
4270b57cec5SDimitry Andric     else
4280b57cec5SDimitry Andric       break;
4290b57cec5SDimitry Andric 
4300b57cec5SDimitry Andric     // If the parsed value is larger than the integer radix, we cannot
4310b57cec5SDimitry Andric     // consume any more characters.
4320b57cec5SDimitry Andric     if (CharVal >= Radix)
4330b57cec5SDimitry Andric       break;
4340b57cec5SDimitry Andric 
4350b57cec5SDimitry Andric     // Add in this character.
4360b57cec5SDimitry Andric     unsigned long long PrevResult = Result;
4370b57cec5SDimitry Andric     Result = Result * Radix + CharVal;
4380b57cec5SDimitry Andric 
4390b57cec5SDimitry Andric     // Check for overflow by shifting back and seeing if bits were lost.
4400b57cec5SDimitry Andric     if (Result / Radix < PrevResult)
4410b57cec5SDimitry Andric       return true;
4420b57cec5SDimitry Andric 
4430b57cec5SDimitry Andric     Str2 = Str2.substr(1);
4440b57cec5SDimitry Andric   }
4450b57cec5SDimitry Andric 
4460b57cec5SDimitry Andric   // We consider the operation a failure if no characters were consumed
4470b57cec5SDimitry Andric   // successfully.
4480b57cec5SDimitry Andric   if (Str.size() == Str2.size())
4490b57cec5SDimitry Andric     return true;
4500b57cec5SDimitry Andric 
4510b57cec5SDimitry Andric   Str = Str2;
4520b57cec5SDimitry Andric   return false;
4530b57cec5SDimitry Andric }
4540b57cec5SDimitry Andric 
consumeSignedInteger(StringRef & Str,unsigned Radix,long long & Result)4550b57cec5SDimitry Andric bool llvm::consumeSignedInteger(StringRef &Str, unsigned Radix,
4560b57cec5SDimitry Andric                                 long long &Result) {
4570b57cec5SDimitry Andric   unsigned long long ULLVal;
4580b57cec5SDimitry Andric 
4590b57cec5SDimitry Andric   // Handle positive strings first.
4600b57cec5SDimitry Andric   if (Str.empty() || Str.front() != '-') {
4610b57cec5SDimitry Andric     if (consumeUnsignedInteger(Str, Radix, ULLVal) ||
4620b57cec5SDimitry Andric         // Check for value so large it overflows a signed value.
4630b57cec5SDimitry Andric         (long long)ULLVal < 0)
4640b57cec5SDimitry Andric       return true;
4650b57cec5SDimitry Andric     Result = ULLVal;
4660b57cec5SDimitry Andric     return false;
4670b57cec5SDimitry Andric   }
4680b57cec5SDimitry Andric 
4690b57cec5SDimitry Andric   // Get the positive part of the value.
4700b57cec5SDimitry Andric   StringRef Str2 = Str.drop_front(1);
4710b57cec5SDimitry Andric   if (consumeUnsignedInteger(Str2, Radix, ULLVal) ||
4720b57cec5SDimitry Andric       // Reject values so large they'd overflow as negative signed, but allow
4730b57cec5SDimitry Andric       // "-0".  This negates the unsigned so that the negative isn't undefined
4740b57cec5SDimitry Andric       // on signed overflow.
4750b57cec5SDimitry Andric       (long long)-ULLVal > 0)
4760b57cec5SDimitry Andric     return true;
4770b57cec5SDimitry Andric 
4780b57cec5SDimitry Andric   Str = Str2;
4790b57cec5SDimitry Andric   Result = -ULLVal;
4800b57cec5SDimitry Andric   return false;
4810b57cec5SDimitry Andric }
4820b57cec5SDimitry Andric 
4830b57cec5SDimitry Andric /// GetAsUnsignedInteger - Workhorse method that converts a integer character
4840b57cec5SDimitry Andric /// sequence of radix up to 36 to an unsigned long long value.
getAsUnsignedInteger(StringRef Str,unsigned Radix,unsigned long long & Result)4850b57cec5SDimitry Andric bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix,
4860b57cec5SDimitry Andric                                 unsigned long long &Result) {
4870b57cec5SDimitry Andric   if (consumeUnsignedInteger(Str, Radix, Result))
4880b57cec5SDimitry Andric     return true;
4890b57cec5SDimitry Andric 
4900b57cec5SDimitry Andric   // For getAsUnsignedInteger, we require the whole string to be consumed or
4910b57cec5SDimitry Andric   // else we consider it a failure.
4920b57cec5SDimitry Andric   return !Str.empty();
4930b57cec5SDimitry Andric }
4940b57cec5SDimitry Andric 
getAsSignedInteger(StringRef Str,unsigned Radix,long long & Result)4950b57cec5SDimitry Andric bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
4960b57cec5SDimitry Andric                               long long &Result) {
4970b57cec5SDimitry Andric   if (consumeSignedInteger(Str, Radix, Result))
4980b57cec5SDimitry Andric     return true;
4990b57cec5SDimitry Andric 
5000b57cec5SDimitry Andric   // For getAsSignedInteger, we require the whole string to be consumed or else
5010b57cec5SDimitry Andric   // we consider it a failure.
5020b57cec5SDimitry Andric   return !Str.empty();
5030b57cec5SDimitry Andric }
5040b57cec5SDimitry Andric 
getAsInteger(unsigned Radix,APInt & Result) const5050b57cec5SDimitry Andric bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
5060b57cec5SDimitry Andric   StringRef Str = *this;
5070b57cec5SDimitry Andric 
5080b57cec5SDimitry Andric   // Autosense radix if not specified.
5090b57cec5SDimitry Andric   if (Radix == 0)
5100b57cec5SDimitry Andric     Radix = GetAutoSenseRadix(Str);
5110b57cec5SDimitry Andric 
5120b57cec5SDimitry Andric   assert(Radix > 1 && Radix <= 36);
5130b57cec5SDimitry Andric 
5140b57cec5SDimitry Andric   // Empty strings (after the radix autosense) are invalid.
5150b57cec5SDimitry Andric   if (Str.empty()) return true;
5160b57cec5SDimitry Andric 
5170b57cec5SDimitry Andric   // Skip leading zeroes.  This can be a significant improvement if
5180b57cec5SDimitry Andric   // it means we don't need > 64 bits.
5190b57cec5SDimitry Andric   while (!Str.empty() && Str.front() == '0')
5200b57cec5SDimitry Andric     Str = Str.substr(1);
5210b57cec5SDimitry Andric 
5220b57cec5SDimitry Andric   // If it was nothing but zeroes....
5230b57cec5SDimitry Andric   if (Str.empty()) {
5240b57cec5SDimitry Andric     Result = APInt(64, 0);
5250b57cec5SDimitry Andric     return false;
5260b57cec5SDimitry Andric   }
5270b57cec5SDimitry Andric 
5280b57cec5SDimitry Andric   // (Over-)estimate the required number of bits.
5290b57cec5SDimitry Andric   unsigned Log2Radix = 0;
5300b57cec5SDimitry Andric   while ((1U << Log2Radix) < Radix) Log2Radix++;
5310b57cec5SDimitry Andric   bool IsPowerOf2Radix = ((1U << Log2Radix) == Radix);
5320b57cec5SDimitry Andric 
5330b57cec5SDimitry Andric   unsigned BitWidth = Log2Radix * Str.size();
5340b57cec5SDimitry Andric   if (BitWidth < Result.getBitWidth())
5350b57cec5SDimitry Andric     BitWidth = Result.getBitWidth(); // don't shrink the result
5360b57cec5SDimitry Andric   else if (BitWidth > Result.getBitWidth())
5370b57cec5SDimitry Andric     Result = Result.zext(BitWidth);
5380b57cec5SDimitry Andric 
5390b57cec5SDimitry Andric   APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix
5400b57cec5SDimitry Andric   if (!IsPowerOf2Radix) {
5410b57cec5SDimitry Andric     // These must have the same bit-width as Result.
5420b57cec5SDimitry Andric     RadixAP = APInt(BitWidth, Radix);
5430b57cec5SDimitry Andric     CharAP = APInt(BitWidth, 0);
5440b57cec5SDimitry Andric   }
5450b57cec5SDimitry Andric 
5460b57cec5SDimitry Andric   // Parse all the bytes of the string given this radix.
5470b57cec5SDimitry Andric   Result = 0;
5480b57cec5SDimitry Andric   while (!Str.empty()) {
5490b57cec5SDimitry Andric     unsigned CharVal;
5500b57cec5SDimitry Andric     if (Str[0] >= '0' && Str[0] <= '9')
5510b57cec5SDimitry Andric       CharVal = Str[0]-'0';
5520b57cec5SDimitry Andric     else if (Str[0] >= 'a' && Str[0] <= 'z')
5530b57cec5SDimitry Andric       CharVal = Str[0]-'a'+10;
5540b57cec5SDimitry Andric     else if (Str[0] >= 'A' && Str[0] <= 'Z')
5550b57cec5SDimitry Andric       CharVal = Str[0]-'A'+10;
5560b57cec5SDimitry Andric     else
5570b57cec5SDimitry Andric       return true;
5580b57cec5SDimitry Andric 
5590b57cec5SDimitry Andric     // If the parsed value is larger than the integer radix, the string is
5600b57cec5SDimitry Andric     // invalid.
5610b57cec5SDimitry Andric     if (CharVal >= Radix)
5620b57cec5SDimitry Andric       return true;
5630b57cec5SDimitry Andric 
5640b57cec5SDimitry Andric     // Add in this character.
5650b57cec5SDimitry Andric     if (IsPowerOf2Radix) {
5660b57cec5SDimitry Andric       Result <<= Log2Radix;
5670b57cec5SDimitry Andric       Result |= CharVal;
5680b57cec5SDimitry Andric     } else {
5690b57cec5SDimitry Andric       Result *= RadixAP;
5700b57cec5SDimitry Andric       CharAP = CharVal;
5710b57cec5SDimitry Andric       Result += CharAP;
5720b57cec5SDimitry Andric     }
5730b57cec5SDimitry Andric 
5740b57cec5SDimitry Andric     Str = Str.substr(1);
5750b57cec5SDimitry Andric   }
5760b57cec5SDimitry Andric 
5770b57cec5SDimitry Andric   return false;
5780b57cec5SDimitry Andric }
5790b57cec5SDimitry Andric 
getAsDouble(double & Result,bool AllowInexact) const5800b57cec5SDimitry Andric bool StringRef::getAsDouble(double &Result, bool AllowInexact) const {
5810b57cec5SDimitry Andric   APFloat F(0.0);
582480093f4SDimitry Andric   auto StatusOrErr = F.convertFromString(*this, APFloat::rmNearestTiesToEven);
583480093f4SDimitry Andric   if (errorToBool(StatusOrErr.takeError()))
584480093f4SDimitry Andric     return true;
585480093f4SDimitry Andric 
586480093f4SDimitry Andric   APFloat::opStatus Status = *StatusOrErr;
5870b57cec5SDimitry Andric   if (Status != APFloat::opOK) {
5880b57cec5SDimitry Andric     if (!AllowInexact || !(Status & APFloat::opInexact))
5890b57cec5SDimitry Andric       return true;
5900b57cec5SDimitry Andric   }
5910b57cec5SDimitry Andric 
5920b57cec5SDimitry Andric   Result = F.convertToDouble();
5930b57cec5SDimitry Andric   return false;
5940b57cec5SDimitry Andric }
5950b57cec5SDimitry Andric 
5960b57cec5SDimitry Andric // Implementation of StringRef hashing.
hash_value(StringRef S)5970b57cec5SDimitry Andric hash_code llvm::hash_value(StringRef S) {
5980b57cec5SDimitry Andric   return hash_combine_range(S.begin(), S.end());
5990b57cec5SDimitry Andric }
600