1f22ef01cSRoman Divacky //===-- StringRef.cpp - Lightweight String References ---------------------===//
2f22ef01cSRoman Divacky //
3f22ef01cSRoman Divacky // The LLVM Compiler Infrastructure
4f22ef01cSRoman Divacky //
5f22ef01cSRoman Divacky // This file is distributed under the University of Illinois Open Source
6f22ef01cSRoman Divacky // License. See LICENSE.TXT for details.
7f22ef01cSRoman Divacky //
8f22ef01cSRoman Divacky //===----------------------------------------------------------------------===//
9f22ef01cSRoman Divacky
10f22ef01cSRoman Divacky #include "llvm/ADT/StringRef.h"
117a7e6055SDimitry Andric #include "llvm/ADT/APFloat.h"
12f22ef01cSRoman Divacky #include "llvm/ADT/APInt.h"
13dff0c46cSDimitry Andric #include "llvm/ADT/Hashing.h"
142cab237bSDimitry Andric #include "llvm/ADT/StringExtras.h"
15dff0c46cSDimitry Andric #include "llvm/ADT/edit_distance.h"
16e580952dSDimitry Andric #include <bitset>
17f22ef01cSRoman Divacky
18f22ef01cSRoman Divacky using namespace llvm;
19f22ef01cSRoman Divacky
20f22ef01cSRoman Divacky // MSVC emits references to this into the translation units which reference it.
21f22ef01cSRoman Divacky #ifndef _MSC_VER
22f22ef01cSRoman Divacky const size_t StringRef::npos;
23f22ef01cSRoman Divacky #endif
24f22ef01cSRoman Divacky
25f785676fSDimitry Andric // strncasecmp() is not available on non-POSIX systems, so define an
26f785676fSDimitry Andric // alternative function here.
ascii_strncasecmp(const char * LHS,const char * RHS,size_t Length)27f785676fSDimitry Andric static int ascii_strncasecmp(const char *LHS, const char *RHS, size_t Length) {
28f785676fSDimitry Andric for (size_t I = 0; I < Length; ++I) {
292cab237bSDimitry Andric unsigned char LHC = toLower(LHS[I]);
302cab237bSDimitry Andric unsigned char RHC = toLower(RHS[I]);
31f22ef01cSRoman Divacky if (LHC != RHC)
32f22ef01cSRoman Divacky return LHC < RHC ? -1 : 1;
33f22ef01cSRoman Divacky }
34f785676fSDimitry Andric return 0;
35f785676fSDimitry Andric }
36f22ef01cSRoman Divacky
37f785676fSDimitry Andric /// compare_lower - Compare strings, ignoring case.
compare_lower(StringRef RHS) const38f785676fSDimitry Andric int StringRef::compare_lower(StringRef RHS) const {
3939d628a0SDimitry Andric if (int Res = ascii_strncasecmp(Data, RHS.Data, std::min(Length, RHS.Length)))
40f785676fSDimitry Andric return Res;
41f22ef01cSRoman Divacky if (Length == RHS.Length)
42f22ef01cSRoman Divacky return 0;
43f22ef01cSRoman Divacky return Length < RHS.Length ? -1 : 1;
44f22ef01cSRoman Divacky }
45f22ef01cSRoman Divacky
46f785676fSDimitry Andric /// Check if this string starts with the given \p Prefix, ignoring case.
startswith_lower(StringRef Prefix) const47f785676fSDimitry Andric bool StringRef::startswith_lower(StringRef Prefix) const {
48f785676fSDimitry Andric return Length >= Prefix.Length &&
49f785676fSDimitry Andric ascii_strncasecmp(Data, Prefix.Data, Prefix.Length) == 0;
50f785676fSDimitry Andric }
51f785676fSDimitry Andric
52f785676fSDimitry Andric /// Check if this string ends with the given \p Suffix, ignoring case.
endswith_lower(StringRef Suffix) const53f785676fSDimitry Andric bool StringRef::endswith_lower(StringRef Suffix) const {
54f785676fSDimitry Andric return Length >= Suffix.Length &&
55f785676fSDimitry Andric ascii_strncasecmp(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
56f785676fSDimitry Andric }
57f785676fSDimitry Andric
find_lower(char C,size_t From) const58d88c1a5aSDimitry Andric size_t StringRef::find_lower(char C, size_t From) const {
592cab237bSDimitry Andric char L = toLower(C);
602cab237bSDimitry Andric return find_if([L](char D) { return toLower(D) == L; }, From);
61d88c1a5aSDimitry Andric }
62d88c1a5aSDimitry Andric
63f22ef01cSRoman Divacky /// compare_numeric - Compare strings, handle embedded numbers.
compare_numeric(StringRef RHS) const64f22ef01cSRoman Divacky int StringRef::compare_numeric(StringRef RHS) const {
6539d628a0SDimitry Andric for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) {
666122f3e6SDimitry Andric // Check for sequences of digits.
672cab237bSDimitry Andric if (isDigit(Data[I]) && isDigit(RHS.Data[I])) {
686122f3e6SDimitry Andric // The longer sequence of numbers is considered larger.
696122f3e6SDimitry Andric // This doesn't really handle prefixed zeros well.
706122f3e6SDimitry Andric size_t J;
716122f3e6SDimitry Andric for (J = I + 1; J != E + 1; ++J) {
722cab237bSDimitry Andric bool ld = J < Length && isDigit(Data[J]);
732cab237bSDimitry Andric bool rd = J < RHS.Length && isDigit(RHS.Data[J]);
74f22ef01cSRoman Divacky if (ld != rd)
75f22ef01cSRoman Divacky return rd ? -1 : 1;
76f22ef01cSRoman Divacky if (!rd)
77f22ef01cSRoman Divacky break;
78f22ef01cSRoman Divacky }
796122f3e6SDimitry Andric // The two number sequences have the same length (J-I), just memcmp them.
806122f3e6SDimitry Andric if (int Res = compareMemory(Data + I, RHS.Data + I, J - I))
816122f3e6SDimitry Andric return Res < 0 ? -1 : 1;
826122f3e6SDimitry Andric // Identical number sequences, continue search after the numbers.
836122f3e6SDimitry Andric I = J - 1;
846122f3e6SDimitry Andric continue;
85f22ef01cSRoman Divacky }
866122f3e6SDimitry Andric if (Data[I] != RHS.Data[I])
87e580952dSDimitry Andric return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1;
88f22ef01cSRoman Divacky }
89f22ef01cSRoman Divacky if (Length == RHS.Length)
90f22ef01cSRoman Divacky return 0;
91f22ef01cSRoman Divacky return Length < RHS.Length ? -1 : 1;
92f22ef01cSRoman Divacky }
93f22ef01cSRoman Divacky
94f22ef01cSRoman Divacky // Compute the edit distance between the two given strings.
edit_distance(llvm::StringRef Other,bool AllowReplacements,unsigned MaxEditDistance) const95f22ef01cSRoman Divacky unsigned StringRef::edit_distance(llvm::StringRef Other,
962754fe60SDimitry Andric bool AllowReplacements,
97f785676fSDimitry Andric unsigned MaxEditDistance) const {
98dff0c46cSDimitry Andric return llvm::ComputeEditDistance(
9939d628a0SDimitry Andric makeArrayRef(data(), size()),
10039d628a0SDimitry Andric makeArrayRef(Other.data(), Other.size()),
101dff0c46cSDimitry Andric AllowReplacements, MaxEditDistance);
102f22ef01cSRoman Divacky }
103f22ef01cSRoman Divacky
104dff0c46cSDimitry Andric //===----------------------------------------------------------------------===//
105dff0c46cSDimitry Andric // String Operations
106dff0c46cSDimitry Andric //===----------------------------------------------------------------------===//
1072754fe60SDimitry Andric
lower() const108dff0c46cSDimitry Andric std::string StringRef::lower() const {
109dff0c46cSDimitry Andric std::string Result(size(), char());
110dff0c46cSDimitry Andric for (size_type i = 0, e = size(); i != e; ++i) {
1112cab237bSDimitry Andric Result[i] = toLower(Data[i]);
112dff0c46cSDimitry Andric }
113dff0c46cSDimitry Andric return Result;
114f22ef01cSRoman Divacky }
115f22ef01cSRoman Divacky
upper() const116dff0c46cSDimitry Andric std::string StringRef::upper() const {
117dff0c46cSDimitry Andric std::string Result(size(), char());
118dff0c46cSDimitry Andric for (size_type i = 0, e = size(); i != e; ++i) {
1192cab237bSDimitry Andric Result[i] = toUpper(Data[i]);
120dff0c46cSDimitry Andric }
121f22ef01cSRoman Divacky return Result;
122f22ef01cSRoman Divacky }
123f22ef01cSRoman Divacky
124f22ef01cSRoman Divacky //===----------------------------------------------------------------------===//
125f22ef01cSRoman Divacky // String Searching
126f22ef01cSRoman Divacky //===----------------------------------------------------------------------===//
127f22ef01cSRoman Divacky
128f22ef01cSRoman Divacky
129f22ef01cSRoman Divacky /// find - Search for the first string \arg Str in the string.
130f22ef01cSRoman Divacky ///
1313b0f4066SDimitry Andric /// \return - The index of the first occurrence of \arg Str, or npos if not
132f22ef01cSRoman Divacky /// found.
find(StringRef Str,size_t From) const133f22ef01cSRoman Divacky size_t StringRef::find(StringRef Str, size_t From) const {
1347d523365SDimitry Andric if (From > Length)
135f22ef01cSRoman Divacky return npos;
136dff0c46cSDimitry Andric
137d88c1a5aSDimitry Andric const char *Start = Data + From;
138d88c1a5aSDimitry Andric size_t Size = Length - From;
139d88c1a5aSDimitry Andric
1407d523365SDimitry Andric const char *Needle = Str.data();
1417d523365SDimitry Andric size_t N = Str.size();
1427d523365SDimitry Andric if (N == 0)
1437d523365SDimitry Andric return From;
1447d523365SDimitry Andric if (Size < N)
1457d523365SDimitry Andric return npos;
146d88c1a5aSDimitry Andric if (N == 1) {
147d88c1a5aSDimitry Andric const char *Ptr = (const char *)::memchr(Start, Needle[0], Size);
148d88c1a5aSDimitry Andric return Ptr == nullptr ? npos : Ptr - Data;
149d88c1a5aSDimitry Andric }
1507d523365SDimitry Andric
1517d523365SDimitry Andric const char *Stop = Start + (Size - N + 1);
1527d523365SDimitry Andric
153dff0c46cSDimitry Andric // For short haystacks or unsupported needles fall back to the naive algorithm
1547d523365SDimitry Andric if (Size < 16 || N > 255) {
1557d523365SDimitry Andric do {
1567d523365SDimitry Andric if (std::memcmp(Start, Needle, N) == 0)
1577d523365SDimitry Andric return Start - Data;
1587d523365SDimitry Andric ++Start;
1597d523365SDimitry Andric } while (Start < Stop);
160f22ef01cSRoman Divacky return npos;
161f22ef01cSRoman Divacky }
162f22ef01cSRoman Divacky
163dff0c46cSDimitry Andric // Build the bad char heuristic table, with uint8_t to reduce cache thrashing.
164dff0c46cSDimitry Andric uint8_t BadCharSkip[256];
165dff0c46cSDimitry Andric std::memset(BadCharSkip, N, 256);
166dff0c46cSDimitry Andric for (unsigned i = 0; i != N-1; ++i)
167dff0c46cSDimitry Andric BadCharSkip[(uint8_t)Str[i]] = N-1-i;
168dff0c46cSDimitry Andric
1697d523365SDimitry Andric do {
170d88c1a5aSDimitry Andric uint8_t Last = Start[N - 1];
171d88c1a5aSDimitry Andric if (LLVM_UNLIKELY(Last == (uint8_t)Needle[N - 1]))
172d88c1a5aSDimitry Andric if (std::memcmp(Start, Needle, N - 1) == 0)
1737d523365SDimitry Andric return Start - Data;
174dff0c46cSDimitry Andric
175dff0c46cSDimitry Andric // Otherwise skip the appropriate number of bytes.
176d88c1a5aSDimitry Andric Start += BadCharSkip[Last];
1777d523365SDimitry Andric } while (Start < Stop);
178dff0c46cSDimitry Andric
179dff0c46cSDimitry Andric return npos;
180dff0c46cSDimitry Andric }
181dff0c46cSDimitry Andric
find_lower(StringRef Str,size_t From) const182d88c1a5aSDimitry Andric size_t StringRef::find_lower(StringRef Str, size_t From) const {
183d88c1a5aSDimitry Andric StringRef This = substr(From);
184d88c1a5aSDimitry Andric while (This.size() >= Str.size()) {
185d88c1a5aSDimitry Andric if (This.startswith_lower(Str))
186d88c1a5aSDimitry Andric return From;
187d88c1a5aSDimitry Andric This = This.drop_front();
188d88c1a5aSDimitry Andric ++From;
189d88c1a5aSDimitry Andric }
190d88c1a5aSDimitry Andric return npos;
191d88c1a5aSDimitry Andric }
192d88c1a5aSDimitry Andric
rfind_lower(char C,size_t From) const193d88c1a5aSDimitry Andric size_t StringRef::rfind_lower(char C, size_t From) const {
194d88c1a5aSDimitry Andric From = std::min(From, Length);
195d88c1a5aSDimitry Andric size_t i = From;
196d88c1a5aSDimitry Andric while (i != 0) {
197d88c1a5aSDimitry Andric --i;
1982cab237bSDimitry Andric if (toLower(Data[i]) == toLower(C))
199d88c1a5aSDimitry Andric return i;
200d88c1a5aSDimitry Andric }
201d88c1a5aSDimitry Andric return npos;
202d88c1a5aSDimitry Andric }
203d88c1a5aSDimitry Andric
204f22ef01cSRoman Divacky /// rfind - Search for the last string \arg Str in the string.
205f22ef01cSRoman Divacky ///
2063b0f4066SDimitry Andric /// \return - The index of the last occurrence of \arg Str, or npos if not
207f22ef01cSRoman Divacky /// found.
rfind(StringRef Str) const208f22ef01cSRoman Divacky size_t StringRef::rfind(StringRef Str) const {
209f22ef01cSRoman Divacky size_t N = Str.size();
210f22ef01cSRoman Divacky if (N > Length)
211f22ef01cSRoman Divacky return npos;
212f22ef01cSRoman Divacky for (size_t i = Length - N + 1, e = 0; i != e;) {
213f22ef01cSRoman Divacky --i;
214f22ef01cSRoman Divacky if (substr(i, N).equals(Str))
215f22ef01cSRoman Divacky return i;
216f22ef01cSRoman Divacky }
217f22ef01cSRoman Divacky return npos;
218f22ef01cSRoman Divacky }
219f22ef01cSRoman Divacky
rfind_lower(StringRef Str) const220d88c1a5aSDimitry Andric size_t StringRef::rfind_lower(StringRef Str) const {
221d88c1a5aSDimitry Andric size_t N = Str.size();
222d88c1a5aSDimitry Andric if (N > Length)
223d88c1a5aSDimitry Andric return npos;
224d88c1a5aSDimitry Andric for (size_t i = Length - N + 1, e = 0; i != e;) {
225d88c1a5aSDimitry Andric --i;
226d88c1a5aSDimitry Andric if (substr(i, N).equals_lower(Str))
227d88c1a5aSDimitry Andric return i;
228d88c1a5aSDimitry Andric }
229d88c1a5aSDimitry Andric return npos;
230d88c1a5aSDimitry Andric }
231d88c1a5aSDimitry Andric
232f22ef01cSRoman Divacky /// find_first_of - Find the first character in the string that is in \arg
233f22ef01cSRoman Divacky /// Chars, or npos if not found.
234f22ef01cSRoman Divacky ///
235e580952dSDimitry Andric /// Note: O(size() + Chars.size())
find_first_of(StringRef Chars,size_t From) const236f22ef01cSRoman Divacky StringRef::size_type StringRef::find_first_of(StringRef Chars,
237f22ef01cSRoman Divacky size_t From) const {
238e580952dSDimitry Andric std::bitset<1 << CHAR_BIT> CharBits;
239e580952dSDimitry Andric for (size_type i = 0; i != Chars.size(); ++i)
240e580952dSDimitry Andric CharBits.set((unsigned char)Chars[i]);
241e580952dSDimitry Andric
24239d628a0SDimitry Andric for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
243e580952dSDimitry Andric if (CharBits.test((unsigned char)Data[i]))
244f22ef01cSRoman Divacky return i;
245f22ef01cSRoman Divacky return npos;
246f22ef01cSRoman Divacky }
247f22ef01cSRoman Divacky
248f22ef01cSRoman Divacky /// find_first_not_of - Find the first character in the string that is not
249f22ef01cSRoman Divacky /// \arg C or npos if not found.
find_first_not_of(char C,size_t From) const250f22ef01cSRoman Divacky StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
25139d628a0SDimitry Andric for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
252f22ef01cSRoman Divacky if (Data[i] != C)
253f22ef01cSRoman Divacky return i;
254f22ef01cSRoman Divacky return npos;
255f22ef01cSRoman Divacky }
256f22ef01cSRoman Divacky
257f22ef01cSRoman Divacky /// find_first_not_of - Find the first character in the string that is not
258f22ef01cSRoman Divacky /// in the string \arg Chars, or npos if not found.
259f22ef01cSRoman Divacky ///
260e580952dSDimitry Andric /// Note: O(size() + Chars.size())
find_first_not_of(StringRef Chars,size_t From) const261f22ef01cSRoman Divacky StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
262f22ef01cSRoman Divacky size_t From) const {
263e580952dSDimitry Andric std::bitset<1 << CHAR_BIT> CharBits;
264e580952dSDimitry Andric for (size_type i = 0; i != Chars.size(); ++i)
265e580952dSDimitry Andric CharBits.set((unsigned char)Chars[i]);
266e580952dSDimitry Andric
26739d628a0SDimitry Andric for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
268e580952dSDimitry Andric if (!CharBits.test((unsigned char)Data[i]))
269f22ef01cSRoman Divacky return i;
270f22ef01cSRoman Divacky return npos;
271f22ef01cSRoman Divacky }
272f22ef01cSRoman Divacky
2732754fe60SDimitry Andric /// find_last_of - Find the last character in the string that is in \arg C,
2742754fe60SDimitry Andric /// or npos if not found.
2752754fe60SDimitry Andric ///
2762754fe60SDimitry Andric /// Note: O(size() + Chars.size())
find_last_of(StringRef Chars,size_t From) const2772754fe60SDimitry Andric StringRef::size_type StringRef::find_last_of(StringRef Chars,
2782754fe60SDimitry Andric size_t From) const {
2792754fe60SDimitry Andric std::bitset<1 << CHAR_BIT> CharBits;
2802754fe60SDimitry Andric for (size_type i = 0; i != Chars.size(); ++i)
2812754fe60SDimitry Andric CharBits.set((unsigned char)Chars[i]);
2822754fe60SDimitry Andric
28339d628a0SDimitry Andric for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
2842754fe60SDimitry Andric if (CharBits.test((unsigned char)Data[i]))
2852754fe60SDimitry Andric return i;
2862754fe60SDimitry Andric return npos;
2872754fe60SDimitry Andric }
288f22ef01cSRoman Divacky
2897ae0e2c9SDimitry Andric /// find_last_not_of - Find the last character in the string that is not
2907ae0e2c9SDimitry Andric /// \arg C, or npos if not found.
find_last_not_of(char C,size_t From) const2917ae0e2c9SDimitry Andric StringRef::size_type StringRef::find_last_not_of(char C, size_t From) const {
29239d628a0SDimitry Andric for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
2937ae0e2c9SDimitry Andric if (Data[i] != C)
2947ae0e2c9SDimitry Andric return i;
2957ae0e2c9SDimitry Andric return npos;
2967ae0e2c9SDimitry Andric }
2977ae0e2c9SDimitry Andric
2987ae0e2c9SDimitry Andric /// find_last_not_of - Find the last character in the string that is not in
2997ae0e2c9SDimitry Andric /// \arg Chars, or npos if not found.
3007ae0e2c9SDimitry Andric ///
3017ae0e2c9SDimitry Andric /// Note: O(size() + Chars.size())
find_last_not_of(StringRef Chars,size_t From) const3027ae0e2c9SDimitry Andric StringRef::size_type StringRef::find_last_not_of(StringRef Chars,
3037ae0e2c9SDimitry Andric size_t From) const {
3047ae0e2c9SDimitry Andric std::bitset<1 << CHAR_BIT> CharBits;
3057ae0e2c9SDimitry Andric for (size_type i = 0, e = Chars.size(); i != e; ++i)
3067ae0e2c9SDimitry Andric CharBits.set((unsigned char)Chars[i]);
3077ae0e2c9SDimitry Andric
30839d628a0SDimitry Andric for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
3097ae0e2c9SDimitry Andric if (!CharBits.test((unsigned char)Data[i]))
3107ae0e2c9SDimitry Andric return i;
3117ae0e2c9SDimitry Andric return npos;
3127ae0e2c9SDimitry Andric }
3137ae0e2c9SDimitry Andric
split(SmallVectorImpl<StringRef> & A,StringRef Separator,int MaxSplit,bool KeepEmpty) const314dff0c46cSDimitry Andric void StringRef::split(SmallVectorImpl<StringRef> &A,
3157d523365SDimitry Andric StringRef Separator, int MaxSplit,
316dff0c46cSDimitry Andric bool KeepEmpty) const {
3177d523365SDimitry Andric StringRef S = *this;
318dff0c46cSDimitry Andric
3197d523365SDimitry Andric // Count down from MaxSplit. When MaxSplit is -1, this will just split
3207d523365SDimitry Andric // "forever". This doesn't support splitting more than 2^31 times
3217d523365SDimitry Andric // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
3227d523365SDimitry Andric // but that seems unlikely to be useful.
3237d523365SDimitry Andric while (MaxSplit-- != 0) {
3247d523365SDimitry Andric size_t Idx = S.find(Separator);
3257d523365SDimitry Andric if (Idx == npos)
3267d523365SDimitry Andric break;
327dff0c46cSDimitry Andric
3287d523365SDimitry Andric // Push this split.
3297d523365SDimitry Andric if (KeepEmpty || Idx > 0)
3307d523365SDimitry Andric A.push_back(S.slice(0, Idx));
3317d523365SDimitry Andric
3327d523365SDimitry Andric // Jump forward.
3337d523365SDimitry Andric S = S.slice(Idx + Separator.size(), npos);
334dff0c46cSDimitry Andric }
3357d523365SDimitry Andric
3367d523365SDimitry Andric // Push the tail.
3377d523365SDimitry Andric if (KeepEmpty || !S.empty())
3387d523365SDimitry Andric A.push_back(S);
3397d523365SDimitry Andric }
3407d523365SDimitry Andric
split(SmallVectorImpl<StringRef> & A,char Separator,int MaxSplit,bool KeepEmpty) const3417d523365SDimitry Andric void StringRef::split(SmallVectorImpl<StringRef> &A, char Separator,
3427d523365SDimitry Andric int MaxSplit, bool KeepEmpty) const {
3437d523365SDimitry Andric StringRef S = *this;
3447d523365SDimitry Andric
3457d523365SDimitry Andric // Count down from MaxSplit. When MaxSplit is -1, this will just split
3467d523365SDimitry Andric // "forever". This doesn't support splitting more than 2^31 times
3477d523365SDimitry Andric // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
3487d523365SDimitry Andric // but that seems unlikely to be useful.
3497d523365SDimitry Andric while (MaxSplit-- != 0) {
3507d523365SDimitry Andric size_t Idx = S.find(Separator);
3517d523365SDimitry Andric if (Idx == npos)
3527d523365SDimitry Andric break;
3537d523365SDimitry Andric
3547d523365SDimitry Andric // Push this split.
3557d523365SDimitry Andric if (KeepEmpty || Idx > 0)
3567d523365SDimitry Andric A.push_back(S.slice(0, Idx));
3577d523365SDimitry Andric
3587d523365SDimitry Andric // Jump forward.
3597d523365SDimitry Andric S = S.slice(Idx + 1, npos);
3607d523365SDimitry Andric }
3617d523365SDimitry Andric
3627d523365SDimitry Andric // Push the tail.
3637d523365SDimitry Andric if (KeepEmpty || !S.empty())
3647d523365SDimitry Andric A.push_back(S);
365dff0c46cSDimitry Andric }
366dff0c46cSDimitry Andric
367f22ef01cSRoman Divacky //===----------------------------------------------------------------------===//
368f22ef01cSRoman Divacky // Helpful Algorithms
369f22ef01cSRoman Divacky //===----------------------------------------------------------------------===//
370f22ef01cSRoman Divacky
371f22ef01cSRoman Divacky /// count - Return the number of non-overlapped occurrences of \arg Str in
372f22ef01cSRoman Divacky /// the string.
count(StringRef Str) const373f22ef01cSRoman Divacky size_t StringRef::count(StringRef Str) const {
374f22ef01cSRoman Divacky size_t Count = 0;
375f22ef01cSRoman Divacky size_t N = Str.size();
376f22ef01cSRoman Divacky if (N > Length)
377f22ef01cSRoman Divacky return 0;
378f22ef01cSRoman Divacky for (size_t i = 0, e = Length - N + 1; i != e; ++i)
379f22ef01cSRoman Divacky if (substr(i, N).equals(Str))
380f22ef01cSRoman Divacky ++Count;
381f22ef01cSRoman Divacky return Count;
382f22ef01cSRoman Divacky }
383f22ef01cSRoman Divacky
GetAutoSenseRadix(StringRef & Str)384f22ef01cSRoman Divacky static unsigned GetAutoSenseRadix(StringRef &Str) {
385d88c1a5aSDimitry Andric if (Str.empty())
386d88c1a5aSDimitry Andric return 10;
387d88c1a5aSDimitry Andric
3883ca95b02SDimitry Andric if (Str.startswith("0x") || Str.startswith("0X")) {
389f22ef01cSRoman Divacky Str = Str.substr(2);
390f22ef01cSRoman Divacky return 16;
3917ae0e2c9SDimitry Andric }
3927ae0e2c9SDimitry Andric
3933ca95b02SDimitry Andric if (Str.startswith("0b") || Str.startswith("0B")) {
394f22ef01cSRoman Divacky Str = Str.substr(2);
395f22ef01cSRoman Divacky return 2;
396f22ef01cSRoman Divacky }
3977ae0e2c9SDimitry Andric
3987ae0e2c9SDimitry Andric if (Str.startswith("0o")) {
3997ae0e2c9SDimitry Andric Str = Str.substr(2);
4007ae0e2c9SDimitry Andric return 8;
4017ae0e2c9SDimitry Andric }
4027ae0e2c9SDimitry Andric
4032cab237bSDimitry Andric if (Str[0] == '0' && Str.size() > 1 && isDigit(Str[1])) {
404d88c1a5aSDimitry Andric Str = Str.substr(1);
4057ae0e2c9SDimitry Andric return 8;
406d88c1a5aSDimitry Andric }
4077ae0e2c9SDimitry Andric
4087ae0e2c9SDimitry Andric return 10;
409f22ef01cSRoman Divacky }
410f22ef01cSRoman Divacky
consumeUnsignedInteger(StringRef & Str,unsigned Radix,unsigned long long & Result)411d88c1a5aSDimitry Andric bool llvm::consumeUnsignedInteger(StringRef &Str, unsigned Radix,
412f22ef01cSRoman Divacky unsigned long long &Result) {
413f22ef01cSRoman Divacky // Autosense radix if not specified.
414f22ef01cSRoman Divacky if (Radix == 0)
415f22ef01cSRoman Divacky Radix = GetAutoSenseRadix(Str);
416f22ef01cSRoman Divacky
417f22ef01cSRoman Divacky // Empty strings (after the radix autosense) are invalid.
418f22ef01cSRoman Divacky if (Str.empty()) return true;
419f22ef01cSRoman Divacky
420f22ef01cSRoman Divacky // Parse all the bytes of the string given this radix. Watch for overflow.
421d88c1a5aSDimitry Andric StringRef Str2 = Str;
422f22ef01cSRoman Divacky Result = 0;
423d88c1a5aSDimitry Andric while (!Str2.empty()) {
424f22ef01cSRoman Divacky unsigned CharVal;
425d88c1a5aSDimitry Andric if (Str2[0] >= '0' && Str2[0] <= '9')
426d88c1a5aSDimitry Andric CharVal = Str2[0] - '0';
427d88c1a5aSDimitry Andric else if (Str2[0] >= 'a' && Str2[0] <= 'z')
428d88c1a5aSDimitry Andric CharVal = Str2[0] - 'a' + 10;
429d88c1a5aSDimitry Andric else if (Str2[0] >= 'A' && Str2[0] <= 'Z')
430d88c1a5aSDimitry Andric CharVal = Str2[0] - 'A' + 10;
431f22ef01cSRoman Divacky else
432d88c1a5aSDimitry Andric break;
433f22ef01cSRoman Divacky
434d88c1a5aSDimitry Andric // If the parsed value is larger than the integer radix, we cannot
435d88c1a5aSDimitry Andric // consume any more characters.
436f22ef01cSRoman Divacky if (CharVal >= Radix)
437d88c1a5aSDimitry Andric break;
438f22ef01cSRoman Divacky
439f22ef01cSRoman Divacky // Add in this character.
440f22ef01cSRoman Divacky unsigned long long PrevResult = Result;
441f22ef01cSRoman Divacky Result = Result * Radix + CharVal;
442f22ef01cSRoman Divacky
4433861d79fSDimitry Andric // Check for overflow by shifting back and seeing if bits were lost.
4443861d79fSDimitry Andric if (Result / Radix < PrevResult)
445f22ef01cSRoman Divacky return true;
446f22ef01cSRoman Divacky
447d88c1a5aSDimitry Andric Str2 = Str2.substr(1);
448f22ef01cSRoman Divacky }
449f22ef01cSRoman Divacky
450d88c1a5aSDimitry Andric // We consider the operation a failure if no characters were consumed
451d88c1a5aSDimitry Andric // successfully.
452d88c1a5aSDimitry Andric if (Str.size() == Str2.size())
453d88c1a5aSDimitry Andric return true;
454d88c1a5aSDimitry Andric
455d88c1a5aSDimitry Andric Str = Str2;
456f22ef01cSRoman Divacky return false;
457f22ef01cSRoman Divacky }
458f22ef01cSRoman Divacky
consumeSignedInteger(StringRef & Str,unsigned Radix,long long & Result)459d88c1a5aSDimitry Andric bool llvm::consumeSignedInteger(StringRef &Str, unsigned Radix,
460dff0c46cSDimitry Andric long long &Result) {
461f22ef01cSRoman Divacky unsigned long long ULLVal;
462f22ef01cSRoman Divacky
463f22ef01cSRoman Divacky // Handle positive strings first.
464dff0c46cSDimitry Andric if (Str.empty() || Str.front() != '-') {
465d88c1a5aSDimitry Andric if (consumeUnsignedInteger(Str, Radix, ULLVal) ||
466f22ef01cSRoman Divacky // Check for value so large it overflows a signed value.
467f22ef01cSRoman Divacky (long long)ULLVal < 0)
468f22ef01cSRoman Divacky return true;
469f22ef01cSRoman Divacky Result = ULLVal;
470f22ef01cSRoman Divacky return false;
471f22ef01cSRoman Divacky }
472f22ef01cSRoman Divacky
473f22ef01cSRoman Divacky // Get the positive part of the value.
474d88c1a5aSDimitry Andric StringRef Str2 = Str.drop_front(1);
475d88c1a5aSDimitry Andric if (consumeUnsignedInteger(Str2, Radix, ULLVal) ||
476f22ef01cSRoman Divacky // Reject values so large they'd overflow as negative signed, but allow
477f22ef01cSRoman Divacky // "-0". This negates the unsigned so that the negative isn't undefined
478f22ef01cSRoman Divacky // on signed overflow.
479f22ef01cSRoman Divacky (long long)-ULLVal > 0)
480f22ef01cSRoman Divacky return true;
481f22ef01cSRoman Divacky
482d88c1a5aSDimitry Andric Str = Str2;
483f22ef01cSRoman Divacky Result = -ULLVal;
484f22ef01cSRoman Divacky return false;
485f22ef01cSRoman Divacky }
486f22ef01cSRoman Divacky
487d88c1a5aSDimitry Andric /// GetAsUnsignedInteger - Workhorse method that converts a integer character
488d88c1a5aSDimitry Andric /// sequence of radix up to 36 to an unsigned long long value.
getAsUnsignedInteger(StringRef Str,unsigned Radix,unsigned long long & Result)489d88c1a5aSDimitry Andric bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix,
490d88c1a5aSDimitry Andric unsigned long long &Result) {
491d88c1a5aSDimitry Andric if (consumeUnsignedInteger(Str, Radix, Result))
492d88c1a5aSDimitry Andric return true;
493d88c1a5aSDimitry Andric
494d88c1a5aSDimitry Andric // For getAsUnsignedInteger, we require the whole string to be consumed or
495d88c1a5aSDimitry Andric // else we consider it a failure.
496d88c1a5aSDimitry Andric return !Str.empty();
497d88c1a5aSDimitry Andric }
498d88c1a5aSDimitry Andric
getAsSignedInteger(StringRef Str,unsigned Radix,long long & Result)499d88c1a5aSDimitry Andric bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
500d88c1a5aSDimitry Andric long long &Result) {
501d88c1a5aSDimitry Andric if (consumeSignedInteger(Str, Radix, Result))
502d88c1a5aSDimitry Andric return true;
503d88c1a5aSDimitry Andric
504d88c1a5aSDimitry Andric // For getAsSignedInteger, we require the whole string to be consumed or else
505d88c1a5aSDimitry Andric // we consider it a failure.
506d88c1a5aSDimitry Andric return !Str.empty();
507d88c1a5aSDimitry Andric }
508d88c1a5aSDimitry Andric
getAsInteger(unsigned Radix,APInt & Result) const509f22ef01cSRoman Divacky bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
510f22ef01cSRoman Divacky StringRef Str = *this;
511f22ef01cSRoman Divacky
512f22ef01cSRoman Divacky // Autosense radix if not specified.
513f22ef01cSRoman Divacky if (Radix == 0)
514f22ef01cSRoman Divacky Radix = GetAutoSenseRadix(Str);
515f22ef01cSRoman Divacky
516f22ef01cSRoman Divacky assert(Radix > 1 && Radix <= 36);
517f22ef01cSRoman Divacky
518f22ef01cSRoman Divacky // Empty strings (after the radix autosense) are invalid.
519f22ef01cSRoman Divacky if (Str.empty()) return true;
520f22ef01cSRoman Divacky
521f22ef01cSRoman Divacky // Skip leading zeroes. This can be a significant improvement if
522f22ef01cSRoman Divacky // it means we don't need > 64 bits.
523f22ef01cSRoman Divacky while (!Str.empty() && Str.front() == '0')
524f22ef01cSRoman Divacky Str = Str.substr(1);
525f22ef01cSRoman Divacky
526f22ef01cSRoman Divacky // If it was nothing but zeroes....
527f22ef01cSRoman Divacky if (Str.empty()) {
528f22ef01cSRoman Divacky Result = APInt(64, 0);
529f22ef01cSRoman Divacky return false;
530f22ef01cSRoman Divacky }
531f22ef01cSRoman Divacky
532f22ef01cSRoman Divacky // (Over-)estimate the required number of bits.
533f22ef01cSRoman Divacky unsigned Log2Radix = 0;
534f22ef01cSRoman Divacky while ((1U << Log2Radix) < Radix) Log2Radix++;
535f22ef01cSRoman Divacky bool IsPowerOf2Radix = ((1U << Log2Radix) == Radix);
536f22ef01cSRoman Divacky
537f22ef01cSRoman Divacky unsigned BitWidth = Log2Radix * Str.size();
538f22ef01cSRoman Divacky if (BitWidth < Result.getBitWidth())
539f22ef01cSRoman Divacky BitWidth = Result.getBitWidth(); // don't shrink the result
5407ae0e2c9SDimitry Andric else if (BitWidth > Result.getBitWidth())
5412754fe60SDimitry Andric Result = Result.zext(BitWidth);
542f22ef01cSRoman Divacky
543f22ef01cSRoman Divacky APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix
544f22ef01cSRoman Divacky if (!IsPowerOf2Radix) {
545f22ef01cSRoman Divacky // These must have the same bit-width as Result.
546f22ef01cSRoman Divacky RadixAP = APInt(BitWidth, Radix);
547f22ef01cSRoman Divacky CharAP = APInt(BitWidth, 0);
548f22ef01cSRoman Divacky }
549f22ef01cSRoman Divacky
550f22ef01cSRoman Divacky // Parse all the bytes of the string given this radix.
551f22ef01cSRoman Divacky Result = 0;
552f22ef01cSRoman Divacky while (!Str.empty()) {
553f22ef01cSRoman Divacky unsigned CharVal;
554f22ef01cSRoman Divacky if (Str[0] >= '0' && Str[0] <= '9')
555f22ef01cSRoman Divacky CharVal = Str[0]-'0';
556f22ef01cSRoman Divacky else if (Str[0] >= 'a' && Str[0] <= 'z')
557f22ef01cSRoman Divacky CharVal = Str[0]-'a'+10;
558f22ef01cSRoman Divacky else if (Str[0] >= 'A' && Str[0] <= 'Z')
559f22ef01cSRoman Divacky CharVal = Str[0]-'A'+10;
560f22ef01cSRoman Divacky else
561f22ef01cSRoman Divacky return true;
562f22ef01cSRoman Divacky
563f22ef01cSRoman Divacky // If the parsed value is larger than the integer radix, the string is
564f22ef01cSRoman Divacky // invalid.
565f22ef01cSRoman Divacky if (CharVal >= Radix)
566f22ef01cSRoman Divacky return true;
567f22ef01cSRoman Divacky
568f22ef01cSRoman Divacky // Add in this character.
569f22ef01cSRoman Divacky if (IsPowerOf2Radix) {
570f22ef01cSRoman Divacky Result <<= Log2Radix;
571f22ef01cSRoman Divacky Result |= CharVal;
572f22ef01cSRoman Divacky } else {
573f22ef01cSRoman Divacky Result *= RadixAP;
574f22ef01cSRoman Divacky CharAP = CharVal;
575f22ef01cSRoman Divacky Result += CharAP;
576f22ef01cSRoman Divacky }
577f22ef01cSRoman Divacky
578f22ef01cSRoman Divacky Str = Str.substr(1);
579f22ef01cSRoman Divacky }
580f22ef01cSRoman Divacky
581f22ef01cSRoman Divacky return false;
582f22ef01cSRoman Divacky }
583dff0c46cSDimitry Andric
getAsDouble(double & Result,bool AllowInexact) const5847a7e6055SDimitry Andric bool StringRef::getAsDouble(double &Result, bool AllowInexact) const {
5857a7e6055SDimitry Andric APFloat F(0.0);
5867a7e6055SDimitry Andric APFloat::opStatus Status =
5877a7e6055SDimitry Andric F.convertFromString(*this, APFloat::rmNearestTiesToEven);
5887a7e6055SDimitry Andric if (Status != APFloat::opOK) {
589da09e106SDimitry Andric if (!AllowInexact || !(Status & APFloat::opInexact))
5907a7e6055SDimitry Andric return true;
5917a7e6055SDimitry Andric }
5927a7e6055SDimitry Andric
5937a7e6055SDimitry Andric Result = F.convertToDouble();
5947a7e6055SDimitry Andric return false;
5957a7e6055SDimitry Andric }
596dff0c46cSDimitry Andric
597dff0c46cSDimitry Andric // Implementation of StringRef hashing.
hash_value(StringRef S)598dff0c46cSDimitry Andric hash_code llvm::hash_value(StringRef S) {
599dff0c46cSDimitry Andric return hash_combine_range(S.begin(), S.end());
600dff0c46cSDimitry Andric }
601