1f22ef01cSRoman Divacky //===-- StringRef.cpp - Lightweight String References ---------------------===//
2f22ef01cSRoman Divacky //
3f22ef01cSRoman Divacky //                     The LLVM Compiler Infrastructure
4f22ef01cSRoman Divacky //
5f22ef01cSRoman Divacky // This file is distributed under the University of Illinois Open Source
6f22ef01cSRoman Divacky // License. See LICENSE.TXT for details.
7f22ef01cSRoman Divacky //
8f22ef01cSRoman Divacky //===----------------------------------------------------------------------===//
9f22ef01cSRoman Divacky 
10f22ef01cSRoman Divacky #include "llvm/ADT/StringRef.h"
117a7e6055SDimitry Andric #include "llvm/ADT/APFloat.h"
12f22ef01cSRoman Divacky #include "llvm/ADT/APInt.h"
13dff0c46cSDimitry Andric #include "llvm/ADT/Hashing.h"
142cab237bSDimitry Andric #include "llvm/ADT/StringExtras.h"
15dff0c46cSDimitry Andric #include "llvm/ADT/edit_distance.h"
16e580952dSDimitry Andric #include <bitset>
17f22ef01cSRoman Divacky 
18f22ef01cSRoman Divacky using namespace llvm;
19f22ef01cSRoman Divacky 
20f22ef01cSRoman Divacky // MSVC emits references to this into the translation units which reference it.
21f22ef01cSRoman Divacky #ifndef _MSC_VER
22f22ef01cSRoman Divacky const size_t StringRef::npos;
23f22ef01cSRoman Divacky #endif
24f22ef01cSRoman Divacky 
25f785676fSDimitry Andric // strncasecmp() is not available on non-POSIX systems, so define an
26f785676fSDimitry Andric // alternative function here.
ascii_strncasecmp(const char * LHS,const char * RHS,size_t Length)27f785676fSDimitry Andric static int ascii_strncasecmp(const char *LHS, const char *RHS, size_t Length) {
28f785676fSDimitry Andric   for (size_t I = 0; I < Length; ++I) {
292cab237bSDimitry Andric     unsigned char LHC = toLower(LHS[I]);
302cab237bSDimitry Andric     unsigned char RHC = toLower(RHS[I]);
31f22ef01cSRoman Divacky     if (LHC != RHC)
32f22ef01cSRoman Divacky       return LHC < RHC ? -1 : 1;
33f22ef01cSRoman Divacky   }
34f785676fSDimitry Andric   return 0;
35f785676fSDimitry Andric }
36f22ef01cSRoman Divacky 
37f785676fSDimitry Andric /// compare_lower - Compare strings, ignoring case.
compare_lower(StringRef RHS) const38f785676fSDimitry Andric int StringRef::compare_lower(StringRef RHS) const {
3939d628a0SDimitry Andric   if (int Res = ascii_strncasecmp(Data, RHS.Data, std::min(Length, RHS.Length)))
40f785676fSDimitry Andric     return Res;
41f22ef01cSRoman Divacky   if (Length == RHS.Length)
42f22ef01cSRoman Divacky     return 0;
43f22ef01cSRoman Divacky   return Length < RHS.Length ? -1 : 1;
44f22ef01cSRoman Divacky }
45f22ef01cSRoman Divacky 
46f785676fSDimitry Andric /// Check if this string starts with the given \p Prefix, ignoring case.
startswith_lower(StringRef Prefix) const47f785676fSDimitry Andric bool StringRef::startswith_lower(StringRef Prefix) const {
48f785676fSDimitry Andric   return Length >= Prefix.Length &&
49f785676fSDimitry Andric       ascii_strncasecmp(Data, Prefix.Data, Prefix.Length) == 0;
50f785676fSDimitry Andric }
51f785676fSDimitry Andric 
52f785676fSDimitry Andric /// Check if this string ends with the given \p Suffix, ignoring case.
endswith_lower(StringRef Suffix) const53f785676fSDimitry Andric bool StringRef::endswith_lower(StringRef Suffix) const {
54f785676fSDimitry Andric   return Length >= Suffix.Length &&
55f785676fSDimitry Andric       ascii_strncasecmp(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
56f785676fSDimitry Andric }
57f785676fSDimitry Andric 
find_lower(char C,size_t From) const58d88c1a5aSDimitry Andric size_t StringRef::find_lower(char C, size_t From) const {
592cab237bSDimitry Andric   char L = toLower(C);
602cab237bSDimitry Andric   return find_if([L](char D) { return toLower(D) == L; }, From);
61d88c1a5aSDimitry Andric }
62d88c1a5aSDimitry Andric 
63f22ef01cSRoman Divacky /// compare_numeric - Compare strings, handle embedded numbers.
compare_numeric(StringRef RHS) const64f22ef01cSRoman Divacky int StringRef::compare_numeric(StringRef RHS) const {
6539d628a0SDimitry Andric   for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) {
666122f3e6SDimitry Andric     // Check for sequences of digits.
672cab237bSDimitry Andric     if (isDigit(Data[I]) && isDigit(RHS.Data[I])) {
686122f3e6SDimitry Andric       // The longer sequence of numbers is considered larger.
696122f3e6SDimitry Andric       // This doesn't really handle prefixed zeros well.
706122f3e6SDimitry Andric       size_t J;
716122f3e6SDimitry Andric       for (J = I + 1; J != E + 1; ++J) {
722cab237bSDimitry Andric         bool ld = J < Length && isDigit(Data[J]);
732cab237bSDimitry Andric         bool rd = J < RHS.Length && isDigit(RHS.Data[J]);
74f22ef01cSRoman Divacky         if (ld != rd)
75f22ef01cSRoman Divacky           return rd ? -1 : 1;
76f22ef01cSRoman Divacky         if (!rd)
77f22ef01cSRoman Divacky           break;
78f22ef01cSRoman Divacky       }
796122f3e6SDimitry Andric       // The two number sequences have the same length (J-I), just memcmp them.
806122f3e6SDimitry Andric       if (int Res = compareMemory(Data + I, RHS.Data + I, J - I))
816122f3e6SDimitry Andric         return Res < 0 ? -1 : 1;
826122f3e6SDimitry Andric       // Identical number sequences, continue search after the numbers.
836122f3e6SDimitry Andric       I = J - 1;
846122f3e6SDimitry Andric       continue;
85f22ef01cSRoman Divacky     }
866122f3e6SDimitry Andric     if (Data[I] != RHS.Data[I])
87e580952dSDimitry Andric       return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1;
88f22ef01cSRoman Divacky   }
89f22ef01cSRoman Divacky   if (Length == RHS.Length)
90f22ef01cSRoman Divacky     return 0;
91f22ef01cSRoman Divacky   return Length < RHS.Length ? -1 : 1;
92f22ef01cSRoman Divacky }
93f22ef01cSRoman Divacky 
94f22ef01cSRoman Divacky // Compute the edit distance between the two given strings.
edit_distance(llvm::StringRef Other,bool AllowReplacements,unsigned MaxEditDistance) const95f22ef01cSRoman Divacky unsigned StringRef::edit_distance(llvm::StringRef Other,
962754fe60SDimitry Andric                                   bool AllowReplacements,
97f785676fSDimitry Andric                                   unsigned MaxEditDistance) const {
98dff0c46cSDimitry Andric   return llvm::ComputeEditDistance(
9939d628a0SDimitry Andric       makeArrayRef(data(), size()),
10039d628a0SDimitry Andric       makeArrayRef(Other.data(), Other.size()),
101dff0c46cSDimitry Andric       AllowReplacements, MaxEditDistance);
102f22ef01cSRoman Divacky }
103f22ef01cSRoman Divacky 
104dff0c46cSDimitry Andric //===----------------------------------------------------------------------===//
105dff0c46cSDimitry Andric // String Operations
106dff0c46cSDimitry Andric //===----------------------------------------------------------------------===//
1072754fe60SDimitry Andric 
lower() const108dff0c46cSDimitry Andric std::string StringRef::lower() const {
109dff0c46cSDimitry Andric   std::string Result(size(), char());
110dff0c46cSDimitry Andric   for (size_type i = 0, e = size(); i != e; ++i) {
1112cab237bSDimitry Andric     Result[i] = toLower(Data[i]);
112dff0c46cSDimitry Andric   }
113dff0c46cSDimitry Andric   return Result;
114f22ef01cSRoman Divacky }
115f22ef01cSRoman Divacky 
upper() const116dff0c46cSDimitry Andric std::string StringRef::upper() const {
117dff0c46cSDimitry Andric   std::string Result(size(), char());
118dff0c46cSDimitry Andric   for (size_type i = 0, e = size(); i != e; ++i) {
1192cab237bSDimitry Andric     Result[i] = toUpper(Data[i]);
120dff0c46cSDimitry Andric   }
121f22ef01cSRoman Divacky   return Result;
122f22ef01cSRoman Divacky }
123f22ef01cSRoman Divacky 
124f22ef01cSRoman Divacky //===----------------------------------------------------------------------===//
125f22ef01cSRoman Divacky // String Searching
126f22ef01cSRoman Divacky //===----------------------------------------------------------------------===//
127f22ef01cSRoman Divacky 
128f22ef01cSRoman Divacky 
129f22ef01cSRoman Divacky /// find - Search for the first string \arg Str in the string.
130f22ef01cSRoman Divacky ///
1313b0f4066SDimitry Andric /// \return - The index of the first occurrence of \arg Str, or npos if not
132f22ef01cSRoman Divacky /// found.
find(StringRef Str,size_t From) const133f22ef01cSRoman Divacky size_t StringRef::find(StringRef Str, size_t From) const {
1347d523365SDimitry Andric   if (From > Length)
135f22ef01cSRoman Divacky     return npos;
136dff0c46cSDimitry Andric 
137d88c1a5aSDimitry Andric   const char *Start = Data + From;
138d88c1a5aSDimitry Andric   size_t Size = Length - From;
139d88c1a5aSDimitry Andric 
1407d523365SDimitry Andric   const char *Needle = Str.data();
1417d523365SDimitry Andric   size_t N = Str.size();
1427d523365SDimitry Andric   if (N == 0)
1437d523365SDimitry Andric     return From;
1447d523365SDimitry Andric   if (Size < N)
1457d523365SDimitry Andric     return npos;
146d88c1a5aSDimitry Andric   if (N == 1) {
147d88c1a5aSDimitry Andric     const char *Ptr = (const char *)::memchr(Start, Needle[0], Size);
148d88c1a5aSDimitry Andric     return Ptr == nullptr ? npos : Ptr - Data;
149d88c1a5aSDimitry Andric   }
1507d523365SDimitry Andric 
1517d523365SDimitry Andric   const char *Stop = Start + (Size - N + 1);
1527d523365SDimitry Andric 
153dff0c46cSDimitry Andric   // For short haystacks or unsupported needles fall back to the naive algorithm
1547d523365SDimitry Andric   if (Size < 16 || N > 255) {
1557d523365SDimitry Andric     do {
1567d523365SDimitry Andric       if (std::memcmp(Start, Needle, N) == 0)
1577d523365SDimitry Andric         return Start - Data;
1587d523365SDimitry Andric       ++Start;
1597d523365SDimitry Andric     } while (Start < Stop);
160f22ef01cSRoman Divacky     return npos;
161f22ef01cSRoman Divacky   }
162f22ef01cSRoman Divacky 
163dff0c46cSDimitry Andric   // Build the bad char heuristic table, with uint8_t to reduce cache thrashing.
164dff0c46cSDimitry Andric   uint8_t BadCharSkip[256];
165dff0c46cSDimitry Andric   std::memset(BadCharSkip, N, 256);
166dff0c46cSDimitry Andric   for (unsigned i = 0; i != N-1; ++i)
167dff0c46cSDimitry Andric     BadCharSkip[(uint8_t)Str[i]] = N-1-i;
168dff0c46cSDimitry Andric 
1697d523365SDimitry Andric   do {
170d88c1a5aSDimitry Andric     uint8_t Last = Start[N - 1];
171d88c1a5aSDimitry Andric     if (LLVM_UNLIKELY(Last == (uint8_t)Needle[N - 1]))
172d88c1a5aSDimitry Andric       if (std::memcmp(Start, Needle, N - 1) == 0)
1737d523365SDimitry Andric         return Start - Data;
174dff0c46cSDimitry Andric 
175dff0c46cSDimitry Andric     // Otherwise skip the appropriate number of bytes.
176d88c1a5aSDimitry Andric     Start += BadCharSkip[Last];
1777d523365SDimitry Andric   } while (Start < Stop);
178dff0c46cSDimitry Andric 
179dff0c46cSDimitry Andric   return npos;
180dff0c46cSDimitry Andric }
181dff0c46cSDimitry Andric 
find_lower(StringRef Str,size_t From) const182d88c1a5aSDimitry Andric size_t StringRef::find_lower(StringRef Str, size_t From) const {
183d88c1a5aSDimitry Andric   StringRef This = substr(From);
184d88c1a5aSDimitry Andric   while (This.size() >= Str.size()) {
185d88c1a5aSDimitry Andric     if (This.startswith_lower(Str))
186d88c1a5aSDimitry Andric       return From;
187d88c1a5aSDimitry Andric     This = This.drop_front();
188d88c1a5aSDimitry Andric     ++From;
189d88c1a5aSDimitry Andric   }
190d88c1a5aSDimitry Andric   return npos;
191d88c1a5aSDimitry Andric }
192d88c1a5aSDimitry Andric 
rfind_lower(char C,size_t From) const193d88c1a5aSDimitry Andric size_t StringRef::rfind_lower(char C, size_t From) const {
194d88c1a5aSDimitry Andric   From = std::min(From, Length);
195d88c1a5aSDimitry Andric   size_t i = From;
196d88c1a5aSDimitry Andric   while (i != 0) {
197d88c1a5aSDimitry Andric     --i;
1982cab237bSDimitry Andric     if (toLower(Data[i]) == toLower(C))
199d88c1a5aSDimitry Andric       return i;
200d88c1a5aSDimitry Andric   }
201d88c1a5aSDimitry Andric   return npos;
202d88c1a5aSDimitry Andric }
203d88c1a5aSDimitry Andric 
204f22ef01cSRoman Divacky /// rfind - Search for the last string \arg Str in the string.
205f22ef01cSRoman Divacky ///
2063b0f4066SDimitry Andric /// \return - The index of the last occurrence of \arg Str, or npos if not
207f22ef01cSRoman Divacky /// found.
rfind(StringRef Str) const208f22ef01cSRoman Divacky size_t StringRef::rfind(StringRef Str) const {
209f22ef01cSRoman Divacky   size_t N = Str.size();
210f22ef01cSRoman Divacky   if (N > Length)
211f22ef01cSRoman Divacky     return npos;
212f22ef01cSRoman Divacky   for (size_t i = Length - N + 1, e = 0; i != e;) {
213f22ef01cSRoman Divacky     --i;
214f22ef01cSRoman Divacky     if (substr(i, N).equals(Str))
215f22ef01cSRoman Divacky       return i;
216f22ef01cSRoman Divacky   }
217f22ef01cSRoman Divacky   return npos;
218f22ef01cSRoman Divacky }
219f22ef01cSRoman Divacky 
rfind_lower(StringRef Str) const220d88c1a5aSDimitry Andric size_t StringRef::rfind_lower(StringRef Str) const {
221d88c1a5aSDimitry Andric   size_t N = Str.size();
222d88c1a5aSDimitry Andric   if (N > Length)
223d88c1a5aSDimitry Andric     return npos;
224d88c1a5aSDimitry Andric   for (size_t i = Length - N + 1, e = 0; i != e;) {
225d88c1a5aSDimitry Andric     --i;
226d88c1a5aSDimitry Andric     if (substr(i, N).equals_lower(Str))
227d88c1a5aSDimitry Andric       return i;
228d88c1a5aSDimitry Andric   }
229d88c1a5aSDimitry Andric   return npos;
230d88c1a5aSDimitry Andric }
231d88c1a5aSDimitry Andric 
232f22ef01cSRoman Divacky /// find_first_of - Find the first character in the string that is in \arg
233f22ef01cSRoman Divacky /// Chars, or npos if not found.
234f22ef01cSRoman Divacky ///
235e580952dSDimitry Andric /// Note: O(size() + Chars.size())
find_first_of(StringRef Chars,size_t From) const236f22ef01cSRoman Divacky StringRef::size_type StringRef::find_first_of(StringRef Chars,
237f22ef01cSRoman Divacky                                               size_t From) const {
238e580952dSDimitry Andric   std::bitset<1 << CHAR_BIT> CharBits;
239e580952dSDimitry Andric   for (size_type i = 0; i != Chars.size(); ++i)
240e580952dSDimitry Andric     CharBits.set((unsigned char)Chars[i]);
241e580952dSDimitry Andric 
24239d628a0SDimitry Andric   for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
243e580952dSDimitry Andric     if (CharBits.test((unsigned char)Data[i]))
244f22ef01cSRoman Divacky       return i;
245f22ef01cSRoman Divacky   return npos;
246f22ef01cSRoman Divacky }
247f22ef01cSRoman Divacky 
248f22ef01cSRoman Divacky /// find_first_not_of - Find the first character in the string that is not
249f22ef01cSRoman Divacky /// \arg C or npos if not found.
find_first_not_of(char C,size_t From) const250f22ef01cSRoman Divacky StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
25139d628a0SDimitry Andric   for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
252f22ef01cSRoman Divacky     if (Data[i] != C)
253f22ef01cSRoman Divacky       return i;
254f22ef01cSRoman Divacky   return npos;
255f22ef01cSRoman Divacky }
256f22ef01cSRoman Divacky 
257f22ef01cSRoman Divacky /// find_first_not_of - Find the first character in the string that is not
258f22ef01cSRoman Divacky /// in the string \arg Chars, or npos if not found.
259f22ef01cSRoman Divacky ///
260e580952dSDimitry Andric /// Note: O(size() + Chars.size())
find_first_not_of(StringRef Chars,size_t From) const261f22ef01cSRoman Divacky StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
262f22ef01cSRoman Divacky                                                   size_t From) const {
263e580952dSDimitry Andric   std::bitset<1 << CHAR_BIT> CharBits;
264e580952dSDimitry Andric   for (size_type i = 0; i != Chars.size(); ++i)
265e580952dSDimitry Andric     CharBits.set((unsigned char)Chars[i]);
266e580952dSDimitry Andric 
26739d628a0SDimitry Andric   for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
268e580952dSDimitry Andric     if (!CharBits.test((unsigned char)Data[i]))
269f22ef01cSRoman Divacky       return i;
270f22ef01cSRoman Divacky   return npos;
271f22ef01cSRoman Divacky }
272f22ef01cSRoman Divacky 
2732754fe60SDimitry Andric /// find_last_of - Find the last character in the string that is in \arg C,
2742754fe60SDimitry Andric /// or npos if not found.
2752754fe60SDimitry Andric ///
2762754fe60SDimitry Andric /// Note: O(size() + Chars.size())
find_last_of(StringRef Chars,size_t From) const2772754fe60SDimitry Andric StringRef::size_type StringRef::find_last_of(StringRef Chars,
2782754fe60SDimitry Andric                                              size_t From) const {
2792754fe60SDimitry Andric   std::bitset<1 << CHAR_BIT> CharBits;
2802754fe60SDimitry Andric   for (size_type i = 0; i != Chars.size(); ++i)
2812754fe60SDimitry Andric     CharBits.set((unsigned char)Chars[i]);
2822754fe60SDimitry Andric 
28339d628a0SDimitry Andric   for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
2842754fe60SDimitry Andric     if (CharBits.test((unsigned char)Data[i]))
2852754fe60SDimitry Andric       return i;
2862754fe60SDimitry Andric   return npos;
2872754fe60SDimitry Andric }
288f22ef01cSRoman Divacky 
2897ae0e2c9SDimitry Andric /// find_last_not_of - Find the last character in the string that is not
2907ae0e2c9SDimitry Andric /// \arg C, or npos if not found.
find_last_not_of(char C,size_t From) const2917ae0e2c9SDimitry Andric StringRef::size_type StringRef::find_last_not_of(char C, size_t From) const {
29239d628a0SDimitry Andric   for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
2937ae0e2c9SDimitry Andric     if (Data[i] != C)
2947ae0e2c9SDimitry Andric       return i;
2957ae0e2c9SDimitry Andric   return npos;
2967ae0e2c9SDimitry Andric }
2977ae0e2c9SDimitry Andric 
2987ae0e2c9SDimitry Andric /// find_last_not_of - Find the last character in the string that is not in
2997ae0e2c9SDimitry Andric /// \arg Chars, or npos if not found.
3007ae0e2c9SDimitry Andric ///
3017ae0e2c9SDimitry Andric /// Note: O(size() + Chars.size())
find_last_not_of(StringRef Chars,size_t From) const3027ae0e2c9SDimitry Andric StringRef::size_type StringRef::find_last_not_of(StringRef Chars,
3037ae0e2c9SDimitry Andric                                                  size_t From) const {
3047ae0e2c9SDimitry Andric   std::bitset<1 << CHAR_BIT> CharBits;
3057ae0e2c9SDimitry Andric   for (size_type i = 0, e = Chars.size(); i != e; ++i)
3067ae0e2c9SDimitry Andric     CharBits.set((unsigned char)Chars[i]);
3077ae0e2c9SDimitry Andric 
30839d628a0SDimitry Andric   for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
3097ae0e2c9SDimitry Andric     if (!CharBits.test((unsigned char)Data[i]))
3107ae0e2c9SDimitry Andric       return i;
3117ae0e2c9SDimitry Andric   return npos;
3127ae0e2c9SDimitry Andric }
3137ae0e2c9SDimitry Andric 
split(SmallVectorImpl<StringRef> & A,StringRef Separator,int MaxSplit,bool KeepEmpty) const314dff0c46cSDimitry Andric void StringRef::split(SmallVectorImpl<StringRef> &A,
3157d523365SDimitry Andric                       StringRef Separator, int MaxSplit,
316dff0c46cSDimitry Andric                       bool KeepEmpty) const {
3177d523365SDimitry Andric   StringRef S = *this;
318dff0c46cSDimitry Andric 
3197d523365SDimitry Andric   // Count down from MaxSplit. When MaxSplit is -1, this will just split
3207d523365SDimitry Andric   // "forever". This doesn't support splitting more than 2^31 times
3217d523365SDimitry Andric   // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
3227d523365SDimitry Andric   // but that seems unlikely to be useful.
3237d523365SDimitry Andric   while (MaxSplit-- != 0) {
3247d523365SDimitry Andric     size_t Idx = S.find(Separator);
3257d523365SDimitry Andric     if (Idx == npos)
3267d523365SDimitry Andric       break;
327dff0c46cSDimitry Andric 
3287d523365SDimitry Andric     // Push this split.
3297d523365SDimitry Andric     if (KeepEmpty || Idx > 0)
3307d523365SDimitry Andric       A.push_back(S.slice(0, Idx));
3317d523365SDimitry Andric 
3327d523365SDimitry Andric     // Jump forward.
3337d523365SDimitry Andric     S = S.slice(Idx + Separator.size(), npos);
334dff0c46cSDimitry Andric   }
3357d523365SDimitry Andric 
3367d523365SDimitry Andric   // Push the tail.
3377d523365SDimitry Andric   if (KeepEmpty || !S.empty())
3387d523365SDimitry Andric     A.push_back(S);
3397d523365SDimitry Andric }
3407d523365SDimitry Andric 
split(SmallVectorImpl<StringRef> & A,char Separator,int MaxSplit,bool KeepEmpty) const3417d523365SDimitry Andric void StringRef::split(SmallVectorImpl<StringRef> &A, char Separator,
3427d523365SDimitry Andric                       int MaxSplit, bool KeepEmpty) const {
3437d523365SDimitry Andric   StringRef S = *this;
3447d523365SDimitry Andric 
3457d523365SDimitry Andric   // Count down from MaxSplit. When MaxSplit is -1, this will just split
3467d523365SDimitry Andric   // "forever". This doesn't support splitting more than 2^31 times
3477d523365SDimitry Andric   // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
3487d523365SDimitry Andric   // but that seems unlikely to be useful.
3497d523365SDimitry Andric   while (MaxSplit-- != 0) {
3507d523365SDimitry Andric     size_t Idx = S.find(Separator);
3517d523365SDimitry Andric     if (Idx == npos)
3527d523365SDimitry Andric       break;
3537d523365SDimitry Andric 
3547d523365SDimitry Andric     // Push this split.
3557d523365SDimitry Andric     if (KeepEmpty || Idx > 0)
3567d523365SDimitry Andric       A.push_back(S.slice(0, Idx));
3577d523365SDimitry Andric 
3587d523365SDimitry Andric     // Jump forward.
3597d523365SDimitry Andric     S = S.slice(Idx + 1, npos);
3607d523365SDimitry Andric   }
3617d523365SDimitry Andric 
3627d523365SDimitry Andric   // Push the tail.
3637d523365SDimitry Andric   if (KeepEmpty || !S.empty())
3647d523365SDimitry Andric     A.push_back(S);
365dff0c46cSDimitry Andric }
366dff0c46cSDimitry Andric 
367f22ef01cSRoman Divacky //===----------------------------------------------------------------------===//
368f22ef01cSRoman Divacky // Helpful Algorithms
369f22ef01cSRoman Divacky //===----------------------------------------------------------------------===//
370f22ef01cSRoman Divacky 
371f22ef01cSRoman Divacky /// count - Return the number of non-overlapped occurrences of \arg Str in
372f22ef01cSRoman Divacky /// the string.
count(StringRef Str) const373f22ef01cSRoman Divacky size_t StringRef::count(StringRef Str) const {
374f22ef01cSRoman Divacky   size_t Count = 0;
375f22ef01cSRoman Divacky   size_t N = Str.size();
376f22ef01cSRoman Divacky   if (N > Length)
377f22ef01cSRoman Divacky     return 0;
378f22ef01cSRoman Divacky   for (size_t i = 0, e = Length - N + 1; i != e; ++i)
379f22ef01cSRoman Divacky     if (substr(i, N).equals(Str))
380f22ef01cSRoman Divacky       ++Count;
381f22ef01cSRoman Divacky   return Count;
382f22ef01cSRoman Divacky }
383f22ef01cSRoman Divacky 
GetAutoSenseRadix(StringRef & Str)384f22ef01cSRoman Divacky static unsigned GetAutoSenseRadix(StringRef &Str) {
385d88c1a5aSDimitry Andric   if (Str.empty())
386d88c1a5aSDimitry Andric     return 10;
387d88c1a5aSDimitry Andric 
3883ca95b02SDimitry Andric   if (Str.startswith("0x") || Str.startswith("0X")) {
389f22ef01cSRoman Divacky     Str = Str.substr(2);
390f22ef01cSRoman Divacky     return 16;
3917ae0e2c9SDimitry Andric   }
3927ae0e2c9SDimitry Andric 
3933ca95b02SDimitry Andric   if (Str.startswith("0b") || Str.startswith("0B")) {
394f22ef01cSRoman Divacky     Str = Str.substr(2);
395f22ef01cSRoman Divacky     return 2;
396f22ef01cSRoman Divacky   }
3977ae0e2c9SDimitry Andric 
3987ae0e2c9SDimitry Andric   if (Str.startswith("0o")) {
3997ae0e2c9SDimitry Andric     Str = Str.substr(2);
4007ae0e2c9SDimitry Andric     return 8;
4017ae0e2c9SDimitry Andric   }
4027ae0e2c9SDimitry Andric 
4032cab237bSDimitry Andric   if (Str[0] == '0' && Str.size() > 1 && isDigit(Str[1])) {
404d88c1a5aSDimitry Andric     Str = Str.substr(1);
4057ae0e2c9SDimitry Andric     return 8;
406d88c1a5aSDimitry Andric   }
4077ae0e2c9SDimitry Andric 
4087ae0e2c9SDimitry Andric   return 10;
409f22ef01cSRoman Divacky }
410f22ef01cSRoman Divacky 
consumeUnsignedInteger(StringRef & Str,unsigned Radix,unsigned long long & Result)411d88c1a5aSDimitry Andric bool llvm::consumeUnsignedInteger(StringRef &Str, unsigned Radix,
412f22ef01cSRoman Divacky                                   unsigned long long &Result) {
413f22ef01cSRoman Divacky   // Autosense radix if not specified.
414f22ef01cSRoman Divacky   if (Radix == 0)
415f22ef01cSRoman Divacky     Radix = GetAutoSenseRadix(Str);
416f22ef01cSRoman Divacky 
417f22ef01cSRoman Divacky   // Empty strings (after the radix autosense) are invalid.
418f22ef01cSRoman Divacky   if (Str.empty()) return true;
419f22ef01cSRoman Divacky 
420f22ef01cSRoman Divacky   // Parse all the bytes of the string given this radix.  Watch for overflow.
421d88c1a5aSDimitry Andric   StringRef Str2 = Str;
422f22ef01cSRoman Divacky   Result = 0;
423d88c1a5aSDimitry Andric   while (!Str2.empty()) {
424f22ef01cSRoman Divacky     unsigned CharVal;
425d88c1a5aSDimitry Andric     if (Str2[0] >= '0' && Str2[0] <= '9')
426d88c1a5aSDimitry Andric       CharVal = Str2[0] - '0';
427d88c1a5aSDimitry Andric     else if (Str2[0] >= 'a' && Str2[0] <= 'z')
428d88c1a5aSDimitry Andric       CharVal = Str2[0] - 'a' + 10;
429d88c1a5aSDimitry Andric     else if (Str2[0] >= 'A' && Str2[0] <= 'Z')
430d88c1a5aSDimitry Andric       CharVal = Str2[0] - 'A' + 10;
431f22ef01cSRoman Divacky     else
432d88c1a5aSDimitry Andric       break;
433f22ef01cSRoman Divacky 
434d88c1a5aSDimitry Andric     // If the parsed value is larger than the integer radix, we cannot
435d88c1a5aSDimitry Andric     // consume any more characters.
436f22ef01cSRoman Divacky     if (CharVal >= Radix)
437d88c1a5aSDimitry Andric       break;
438f22ef01cSRoman Divacky 
439f22ef01cSRoman Divacky     // Add in this character.
440f22ef01cSRoman Divacky     unsigned long long PrevResult = Result;
441f22ef01cSRoman Divacky     Result = Result * Radix + CharVal;
442f22ef01cSRoman Divacky 
4433861d79fSDimitry Andric     // Check for overflow by shifting back and seeing if bits were lost.
4443861d79fSDimitry Andric     if (Result / Radix < PrevResult)
445f22ef01cSRoman Divacky       return true;
446f22ef01cSRoman Divacky 
447d88c1a5aSDimitry Andric     Str2 = Str2.substr(1);
448f22ef01cSRoman Divacky   }
449f22ef01cSRoman Divacky 
450d88c1a5aSDimitry Andric   // We consider the operation a failure if no characters were consumed
451d88c1a5aSDimitry Andric   // successfully.
452d88c1a5aSDimitry Andric   if (Str.size() == Str2.size())
453d88c1a5aSDimitry Andric     return true;
454d88c1a5aSDimitry Andric 
455d88c1a5aSDimitry Andric   Str = Str2;
456f22ef01cSRoman Divacky   return false;
457f22ef01cSRoman Divacky }
458f22ef01cSRoman Divacky 
consumeSignedInteger(StringRef & Str,unsigned Radix,long long & Result)459d88c1a5aSDimitry Andric bool llvm::consumeSignedInteger(StringRef &Str, unsigned Radix,
460dff0c46cSDimitry Andric                                 long long &Result) {
461f22ef01cSRoman Divacky   unsigned long long ULLVal;
462f22ef01cSRoman Divacky 
463f22ef01cSRoman Divacky   // Handle positive strings first.
464dff0c46cSDimitry Andric   if (Str.empty() || Str.front() != '-') {
465d88c1a5aSDimitry Andric     if (consumeUnsignedInteger(Str, Radix, ULLVal) ||
466f22ef01cSRoman Divacky         // Check for value so large it overflows a signed value.
467f22ef01cSRoman Divacky         (long long)ULLVal < 0)
468f22ef01cSRoman Divacky       return true;
469f22ef01cSRoman Divacky     Result = ULLVal;
470f22ef01cSRoman Divacky     return false;
471f22ef01cSRoman Divacky   }
472f22ef01cSRoman Divacky 
473f22ef01cSRoman Divacky   // Get the positive part of the value.
474d88c1a5aSDimitry Andric   StringRef Str2 = Str.drop_front(1);
475d88c1a5aSDimitry Andric   if (consumeUnsignedInteger(Str2, Radix, ULLVal) ||
476f22ef01cSRoman Divacky       // Reject values so large they'd overflow as negative signed, but allow
477f22ef01cSRoman Divacky       // "-0".  This negates the unsigned so that the negative isn't undefined
478f22ef01cSRoman Divacky       // on signed overflow.
479f22ef01cSRoman Divacky       (long long)-ULLVal > 0)
480f22ef01cSRoman Divacky     return true;
481f22ef01cSRoman Divacky 
482d88c1a5aSDimitry Andric   Str = Str2;
483f22ef01cSRoman Divacky   Result = -ULLVal;
484f22ef01cSRoman Divacky   return false;
485f22ef01cSRoman Divacky }
486f22ef01cSRoman Divacky 
487d88c1a5aSDimitry Andric /// GetAsUnsignedInteger - Workhorse method that converts a integer character
488d88c1a5aSDimitry Andric /// sequence of radix up to 36 to an unsigned long long value.
getAsUnsignedInteger(StringRef Str,unsigned Radix,unsigned long long & Result)489d88c1a5aSDimitry Andric bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix,
490d88c1a5aSDimitry Andric                                 unsigned long long &Result) {
491d88c1a5aSDimitry Andric   if (consumeUnsignedInteger(Str, Radix, Result))
492d88c1a5aSDimitry Andric     return true;
493d88c1a5aSDimitry Andric 
494d88c1a5aSDimitry Andric   // For getAsUnsignedInteger, we require the whole string to be consumed or
495d88c1a5aSDimitry Andric   // else we consider it a failure.
496d88c1a5aSDimitry Andric   return !Str.empty();
497d88c1a5aSDimitry Andric }
498d88c1a5aSDimitry Andric 
getAsSignedInteger(StringRef Str,unsigned Radix,long long & Result)499d88c1a5aSDimitry Andric bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
500d88c1a5aSDimitry Andric                               long long &Result) {
501d88c1a5aSDimitry Andric   if (consumeSignedInteger(Str, Radix, Result))
502d88c1a5aSDimitry Andric     return true;
503d88c1a5aSDimitry Andric 
504d88c1a5aSDimitry Andric   // For getAsSignedInteger, we require the whole string to be consumed or else
505d88c1a5aSDimitry Andric   // we consider it a failure.
506d88c1a5aSDimitry Andric   return !Str.empty();
507d88c1a5aSDimitry Andric }
508d88c1a5aSDimitry Andric 
getAsInteger(unsigned Radix,APInt & Result) const509f22ef01cSRoman Divacky bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
510f22ef01cSRoman Divacky   StringRef Str = *this;
511f22ef01cSRoman Divacky 
512f22ef01cSRoman Divacky   // Autosense radix if not specified.
513f22ef01cSRoman Divacky   if (Radix == 0)
514f22ef01cSRoman Divacky     Radix = GetAutoSenseRadix(Str);
515f22ef01cSRoman Divacky 
516f22ef01cSRoman Divacky   assert(Radix > 1 && Radix <= 36);
517f22ef01cSRoman Divacky 
518f22ef01cSRoman Divacky   // Empty strings (after the radix autosense) are invalid.
519f22ef01cSRoman Divacky   if (Str.empty()) return true;
520f22ef01cSRoman Divacky 
521f22ef01cSRoman Divacky   // Skip leading zeroes.  This can be a significant improvement if
522f22ef01cSRoman Divacky   // it means we don't need > 64 bits.
523f22ef01cSRoman Divacky   while (!Str.empty() && Str.front() == '0')
524f22ef01cSRoman Divacky     Str = Str.substr(1);
525f22ef01cSRoman Divacky 
526f22ef01cSRoman Divacky   // If it was nothing but zeroes....
527f22ef01cSRoman Divacky   if (Str.empty()) {
528f22ef01cSRoman Divacky     Result = APInt(64, 0);
529f22ef01cSRoman Divacky     return false;
530f22ef01cSRoman Divacky   }
531f22ef01cSRoman Divacky 
532f22ef01cSRoman Divacky   // (Over-)estimate the required number of bits.
533f22ef01cSRoman Divacky   unsigned Log2Radix = 0;
534f22ef01cSRoman Divacky   while ((1U << Log2Radix) < Radix) Log2Radix++;
535f22ef01cSRoman Divacky   bool IsPowerOf2Radix = ((1U << Log2Radix) == Radix);
536f22ef01cSRoman Divacky 
537f22ef01cSRoman Divacky   unsigned BitWidth = Log2Radix * Str.size();
538f22ef01cSRoman Divacky   if (BitWidth < Result.getBitWidth())
539f22ef01cSRoman Divacky     BitWidth = Result.getBitWidth(); // don't shrink the result
5407ae0e2c9SDimitry Andric   else if (BitWidth > Result.getBitWidth())
5412754fe60SDimitry Andric     Result = Result.zext(BitWidth);
542f22ef01cSRoman Divacky 
543f22ef01cSRoman Divacky   APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix
544f22ef01cSRoman Divacky   if (!IsPowerOf2Radix) {
545f22ef01cSRoman Divacky     // These must have the same bit-width as Result.
546f22ef01cSRoman Divacky     RadixAP = APInt(BitWidth, Radix);
547f22ef01cSRoman Divacky     CharAP = APInt(BitWidth, 0);
548f22ef01cSRoman Divacky   }
549f22ef01cSRoman Divacky 
550f22ef01cSRoman Divacky   // Parse all the bytes of the string given this radix.
551f22ef01cSRoman Divacky   Result = 0;
552f22ef01cSRoman Divacky   while (!Str.empty()) {
553f22ef01cSRoman Divacky     unsigned CharVal;
554f22ef01cSRoman Divacky     if (Str[0] >= '0' && Str[0] <= '9')
555f22ef01cSRoman Divacky       CharVal = Str[0]-'0';
556f22ef01cSRoman Divacky     else if (Str[0] >= 'a' && Str[0] <= 'z')
557f22ef01cSRoman Divacky       CharVal = Str[0]-'a'+10;
558f22ef01cSRoman Divacky     else if (Str[0] >= 'A' && Str[0] <= 'Z')
559f22ef01cSRoman Divacky       CharVal = Str[0]-'A'+10;
560f22ef01cSRoman Divacky     else
561f22ef01cSRoman Divacky       return true;
562f22ef01cSRoman Divacky 
563f22ef01cSRoman Divacky     // If the parsed value is larger than the integer radix, the string is
564f22ef01cSRoman Divacky     // invalid.
565f22ef01cSRoman Divacky     if (CharVal >= Radix)
566f22ef01cSRoman Divacky       return true;
567f22ef01cSRoman Divacky 
568f22ef01cSRoman Divacky     // Add in this character.
569f22ef01cSRoman Divacky     if (IsPowerOf2Radix) {
570f22ef01cSRoman Divacky       Result <<= Log2Radix;
571f22ef01cSRoman Divacky       Result |= CharVal;
572f22ef01cSRoman Divacky     } else {
573f22ef01cSRoman Divacky       Result *= RadixAP;
574f22ef01cSRoman Divacky       CharAP = CharVal;
575f22ef01cSRoman Divacky       Result += CharAP;
576f22ef01cSRoman Divacky     }
577f22ef01cSRoman Divacky 
578f22ef01cSRoman Divacky     Str = Str.substr(1);
579f22ef01cSRoman Divacky   }
580f22ef01cSRoman Divacky 
581f22ef01cSRoman Divacky   return false;
582f22ef01cSRoman Divacky }
583dff0c46cSDimitry Andric 
getAsDouble(double & Result,bool AllowInexact) const5847a7e6055SDimitry Andric bool StringRef::getAsDouble(double &Result, bool AllowInexact) const {
5857a7e6055SDimitry Andric   APFloat F(0.0);
5867a7e6055SDimitry Andric   APFloat::opStatus Status =
5877a7e6055SDimitry Andric       F.convertFromString(*this, APFloat::rmNearestTiesToEven);
5887a7e6055SDimitry Andric   if (Status != APFloat::opOK) {
589da09e106SDimitry Andric     if (!AllowInexact || !(Status & APFloat::opInexact))
5907a7e6055SDimitry Andric       return true;
5917a7e6055SDimitry Andric   }
5927a7e6055SDimitry Andric 
5937a7e6055SDimitry Andric   Result = F.convertToDouble();
5947a7e6055SDimitry Andric   return false;
5957a7e6055SDimitry Andric }
596dff0c46cSDimitry Andric 
597dff0c46cSDimitry Andric // Implementation of StringRef hashing.
hash_value(StringRef S)598dff0c46cSDimitry Andric hash_code llvm::hash_value(StringRef S) {
599dff0c46cSDimitry Andric   return hash_combine_range(S.begin(), S.end());
600dff0c46cSDimitry Andric }
601