10b57cec5SDimitry Andric //===-- StringRef.cpp - Lightweight String References ---------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric
90b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h"
100b57cec5SDimitry Andric #include "llvm/ADT/APFloat.h"
110b57cec5SDimitry Andric #include "llvm/ADT/APInt.h"
120b57cec5SDimitry Andric #include "llvm/ADT/Hashing.h"
130b57cec5SDimitry Andric #include "llvm/ADT/StringExtras.h"
140b57cec5SDimitry Andric #include "llvm/ADT/edit_distance.h"
15480093f4SDimitry Andric #include "llvm/Support/Error.h"
160b57cec5SDimitry Andric #include <bitset>
170b57cec5SDimitry Andric
180b57cec5SDimitry Andric using namespace llvm;
190b57cec5SDimitry Andric
200b57cec5SDimitry Andric // MSVC emits references to this into the translation units which reference it.
210b57cec5SDimitry Andric #ifndef _MSC_VER
225ffd83dbSDimitry Andric constexpr size_t StringRef::npos;
230b57cec5SDimitry Andric #endif
240b57cec5SDimitry Andric
250b57cec5SDimitry Andric // strncasecmp() is not available on non-POSIX systems, so define an
260b57cec5SDimitry Andric // alternative function here.
ascii_strncasecmp(const char * LHS,const char * RHS,size_t Length)270b57cec5SDimitry Andric static int ascii_strncasecmp(const char *LHS, const char *RHS, size_t Length) {
280b57cec5SDimitry Andric for (size_t I = 0; I < Length; ++I) {
290b57cec5SDimitry Andric unsigned char LHC = toLower(LHS[I]);
300b57cec5SDimitry Andric unsigned char RHC = toLower(RHS[I]);
310b57cec5SDimitry Andric if (LHC != RHC)
320b57cec5SDimitry Andric return LHC < RHC ? -1 : 1;
330b57cec5SDimitry Andric }
340b57cec5SDimitry Andric return 0;
350b57cec5SDimitry Andric }
360b57cec5SDimitry Andric
compare_insensitive(StringRef RHS) const37fe6060f1SDimitry Andric int StringRef::compare_insensitive(StringRef RHS) const {
380b57cec5SDimitry Andric if (int Res = ascii_strncasecmp(Data, RHS.Data, std::min(Length, RHS.Length)))
390b57cec5SDimitry Andric return Res;
400b57cec5SDimitry Andric if (Length == RHS.Length)
410b57cec5SDimitry Andric return 0;
420b57cec5SDimitry Andric return Length < RHS.Length ? -1 : 1;
430b57cec5SDimitry Andric }
440b57cec5SDimitry Andric
starts_with_insensitive(StringRef Prefix) const45bdd1243dSDimitry Andric bool StringRef::starts_with_insensitive(StringRef Prefix) const {
460b57cec5SDimitry Andric return Length >= Prefix.Length &&
470b57cec5SDimitry Andric ascii_strncasecmp(Data, Prefix.Data, Prefix.Length) == 0;
480b57cec5SDimitry Andric }
490b57cec5SDimitry Andric
ends_with_insensitive(StringRef Suffix) const50bdd1243dSDimitry Andric bool StringRef::ends_with_insensitive(StringRef Suffix) const {
510b57cec5SDimitry Andric return Length >= Suffix.Length &&
520b57cec5SDimitry Andric ascii_strncasecmp(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
530b57cec5SDimitry Andric }
540b57cec5SDimitry Andric
find_insensitive(char C,size_t From) const55fe6060f1SDimitry Andric size_t StringRef::find_insensitive(char C, size_t From) const {
560b57cec5SDimitry Andric char L = toLower(C);
570b57cec5SDimitry Andric return find_if([L](char D) { return toLower(D) == L; }, From);
580b57cec5SDimitry Andric }
590b57cec5SDimitry Andric
600b57cec5SDimitry Andric /// compare_numeric - Compare strings, handle embedded numbers.
compare_numeric(StringRef RHS) const610b57cec5SDimitry Andric int StringRef::compare_numeric(StringRef RHS) const {
620b57cec5SDimitry Andric for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) {
630b57cec5SDimitry Andric // Check for sequences of digits.
640b57cec5SDimitry Andric if (isDigit(Data[I]) && isDigit(RHS.Data[I])) {
650b57cec5SDimitry Andric // The longer sequence of numbers is considered larger.
660b57cec5SDimitry Andric // This doesn't really handle prefixed zeros well.
670b57cec5SDimitry Andric size_t J;
680b57cec5SDimitry Andric for (J = I + 1; J != E + 1; ++J) {
690b57cec5SDimitry Andric bool ld = J < Length && isDigit(Data[J]);
700b57cec5SDimitry Andric bool rd = J < RHS.Length && isDigit(RHS.Data[J]);
710b57cec5SDimitry Andric if (ld != rd)
720b57cec5SDimitry Andric return rd ? -1 : 1;
730b57cec5SDimitry Andric if (!rd)
740b57cec5SDimitry Andric break;
750b57cec5SDimitry Andric }
760b57cec5SDimitry Andric // The two number sequences have the same length (J-I), just memcmp them.
770b57cec5SDimitry Andric if (int Res = compareMemory(Data + I, RHS.Data + I, J - I))
780b57cec5SDimitry Andric return Res < 0 ? -1 : 1;
790b57cec5SDimitry Andric // Identical number sequences, continue search after the numbers.
800b57cec5SDimitry Andric I = J - 1;
810b57cec5SDimitry Andric continue;
820b57cec5SDimitry Andric }
830b57cec5SDimitry Andric if (Data[I] != RHS.Data[I])
840b57cec5SDimitry Andric return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1;
850b57cec5SDimitry Andric }
860b57cec5SDimitry Andric if (Length == RHS.Length)
870b57cec5SDimitry Andric return 0;
880b57cec5SDimitry Andric return Length < RHS.Length ? -1 : 1;
890b57cec5SDimitry Andric }
900b57cec5SDimitry Andric
910b57cec5SDimitry Andric // Compute the edit distance between the two given strings.
edit_distance(llvm::StringRef Other,bool AllowReplacements,unsigned MaxEditDistance) const920b57cec5SDimitry Andric unsigned StringRef::edit_distance(llvm::StringRef Other,
930b57cec5SDimitry Andric bool AllowReplacements,
940b57cec5SDimitry Andric unsigned MaxEditDistance) const {
95bdd1243dSDimitry Andric return llvm::ComputeEditDistance(ArrayRef(data(), size()),
96bdd1243dSDimitry Andric ArrayRef(Other.data(), Other.size()),
970b57cec5SDimitry Andric AllowReplacements, MaxEditDistance);
980b57cec5SDimitry Andric }
990b57cec5SDimitry Andric
edit_distance_insensitive(StringRef Other,bool AllowReplacements,unsigned MaxEditDistance) const10081ad6265SDimitry Andric unsigned llvm::StringRef::edit_distance_insensitive(
10181ad6265SDimitry Andric StringRef Other, bool AllowReplacements, unsigned MaxEditDistance) const {
10281ad6265SDimitry Andric return llvm::ComputeMappedEditDistance(
103bdd1243dSDimitry Andric ArrayRef(data(), size()), ArrayRef(Other.data(), Other.size()),
10481ad6265SDimitry Andric llvm::toLower, AllowReplacements, MaxEditDistance);
10581ad6265SDimitry Andric }
10681ad6265SDimitry Andric
1070b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
1080b57cec5SDimitry Andric // String Operations
1090b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
1100b57cec5SDimitry Andric
lower() const1110b57cec5SDimitry Andric std::string StringRef::lower() const {
1125ffd83dbSDimitry Andric return std::string(map_iterator(begin(), toLower),
1135ffd83dbSDimitry Andric map_iterator(end(), toLower));
1140b57cec5SDimitry Andric }
1150b57cec5SDimitry Andric
upper() const1160b57cec5SDimitry Andric std::string StringRef::upper() const {
1175ffd83dbSDimitry Andric return std::string(map_iterator(begin(), toUpper),
1185ffd83dbSDimitry Andric map_iterator(end(), toUpper));
1190b57cec5SDimitry Andric }
1200b57cec5SDimitry Andric
1210b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
1220b57cec5SDimitry Andric // String Searching
1230b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
1240b57cec5SDimitry Andric
1250b57cec5SDimitry Andric
1260b57cec5SDimitry Andric /// find - Search for the first string \arg Str in the string.
1270b57cec5SDimitry Andric ///
1280b57cec5SDimitry Andric /// \return - The index of the first occurrence of \arg Str, or npos if not
1290b57cec5SDimitry Andric /// found.
find(StringRef Str,size_t From) const1300b57cec5SDimitry Andric size_t StringRef::find(StringRef Str, size_t From) const {
1310b57cec5SDimitry Andric if (From > Length)
1320b57cec5SDimitry Andric return npos;
1330b57cec5SDimitry Andric
1340b57cec5SDimitry Andric const char *Start = Data + From;
1350b57cec5SDimitry Andric size_t Size = Length - From;
1360b57cec5SDimitry Andric
1370b57cec5SDimitry Andric const char *Needle = Str.data();
1380b57cec5SDimitry Andric size_t N = Str.size();
1390b57cec5SDimitry Andric if (N == 0)
1400b57cec5SDimitry Andric return From;
1410b57cec5SDimitry Andric if (Size < N)
1420b57cec5SDimitry Andric return npos;
1430b57cec5SDimitry Andric if (N == 1) {
1440b57cec5SDimitry Andric const char *Ptr = (const char *)::memchr(Start, Needle[0], Size);
1450b57cec5SDimitry Andric return Ptr == nullptr ? npos : Ptr - Data;
1460b57cec5SDimitry Andric }
1470b57cec5SDimitry Andric
1480b57cec5SDimitry Andric const char *Stop = Start + (Size - N + 1);
1490b57cec5SDimitry Andric
150bdd1243dSDimitry Andric if (N == 2) {
151bdd1243dSDimitry Andric // Provide a fast path for newline finding (CRLF case) in InclusionRewriter.
152bdd1243dSDimitry Andric // Not the most optimized strategy, but getting memcmp inlined should be
153bdd1243dSDimitry Andric // good enough.
154bdd1243dSDimitry Andric do {
155bdd1243dSDimitry Andric if (std::memcmp(Start, Needle, 2) == 0)
156bdd1243dSDimitry Andric return Start - Data;
157bdd1243dSDimitry Andric ++Start;
158bdd1243dSDimitry Andric } while (Start < Stop);
159bdd1243dSDimitry Andric return npos;
160bdd1243dSDimitry Andric }
161bdd1243dSDimitry Andric
1620b57cec5SDimitry Andric // For short haystacks or unsupported needles fall back to the naive algorithm
1630b57cec5SDimitry Andric if (Size < 16 || N > 255) {
1640b57cec5SDimitry Andric do {
1650b57cec5SDimitry Andric if (std::memcmp(Start, Needle, N) == 0)
1660b57cec5SDimitry Andric return Start - Data;
1670b57cec5SDimitry Andric ++Start;
1680b57cec5SDimitry Andric } while (Start < Stop);
1690b57cec5SDimitry Andric return npos;
1700b57cec5SDimitry Andric }
1710b57cec5SDimitry Andric
1720b57cec5SDimitry Andric // Build the bad char heuristic table, with uint8_t to reduce cache thrashing.
1730b57cec5SDimitry Andric uint8_t BadCharSkip[256];
1740b57cec5SDimitry Andric std::memset(BadCharSkip, N, 256);
1750b57cec5SDimitry Andric for (unsigned i = 0; i != N-1; ++i)
1760b57cec5SDimitry Andric BadCharSkip[(uint8_t)Str[i]] = N-1-i;
1770b57cec5SDimitry Andric
1780b57cec5SDimitry Andric do {
1790b57cec5SDimitry Andric uint8_t Last = Start[N - 1];
1800b57cec5SDimitry Andric if (LLVM_UNLIKELY(Last == (uint8_t)Needle[N - 1]))
1810b57cec5SDimitry Andric if (std::memcmp(Start, Needle, N - 1) == 0)
1820b57cec5SDimitry Andric return Start - Data;
1830b57cec5SDimitry Andric
1840b57cec5SDimitry Andric // Otherwise skip the appropriate number of bytes.
1850b57cec5SDimitry Andric Start += BadCharSkip[Last];
1860b57cec5SDimitry Andric } while (Start < Stop);
1870b57cec5SDimitry Andric
1880b57cec5SDimitry Andric return npos;
1890b57cec5SDimitry Andric }
1900b57cec5SDimitry Andric
find_insensitive(StringRef Str,size_t From) const191fe6060f1SDimitry Andric size_t StringRef::find_insensitive(StringRef Str, size_t From) const {
1920b57cec5SDimitry Andric StringRef This = substr(From);
1930b57cec5SDimitry Andric while (This.size() >= Str.size()) {
194fe013be4SDimitry Andric if (This.starts_with_insensitive(Str))
1950b57cec5SDimitry Andric return From;
1960b57cec5SDimitry Andric This = This.drop_front();
1970b57cec5SDimitry Andric ++From;
1980b57cec5SDimitry Andric }
1990b57cec5SDimitry Andric return npos;
2000b57cec5SDimitry Andric }
2010b57cec5SDimitry Andric
rfind_insensitive(char C,size_t From) const202fe6060f1SDimitry Andric size_t StringRef::rfind_insensitive(char C, size_t From) const {
2030b57cec5SDimitry Andric From = std::min(From, Length);
2040b57cec5SDimitry Andric size_t i = From;
2050b57cec5SDimitry Andric while (i != 0) {
2060b57cec5SDimitry Andric --i;
2070b57cec5SDimitry Andric if (toLower(Data[i]) == toLower(C))
2080b57cec5SDimitry Andric return i;
2090b57cec5SDimitry Andric }
2100b57cec5SDimitry Andric return npos;
2110b57cec5SDimitry Andric }
2120b57cec5SDimitry Andric
2130b57cec5SDimitry Andric /// rfind - Search for the last string \arg Str in the string.
2140b57cec5SDimitry Andric ///
2150b57cec5SDimitry Andric /// \return - The index of the last occurrence of \arg Str, or npos if not
2160b57cec5SDimitry Andric /// found.
rfind(StringRef Str) const2170b57cec5SDimitry Andric size_t StringRef::rfind(StringRef Str) const {
218bdd1243dSDimitry Andric return std::string_view(*this).rfind(Str);
2190b57cec5SDimitry Andric }
2200b57cec5SDimitry Andric
rfind_insensitive(StringRef Str) const221fe6060f1SDimitry Andric size_t StringRef::rfind_insensitive(StringRef Str) const {
2220b57cec5SDimitry Andric size_t N = Str.size();
2230b57cec5SDimitry Andric if (N > Length)
2240b57cec5SDimitry Andric return npos;
2250b57cec5SDimitry Andric for (size_t i = Length - N + 1, e = 0; i != e;) {
2260b57cec5SDimitry Andric --i;
227fe6060f1SDimitry Andric if (substr(i, N).equals_insensitive(Str))
2280b57cec5SDimitry Andric return i;
2290b57cec5SDimitry Andric }
2300b57cec5SDimitry Andric return npos;
2310b57cec5SDimitry Andric }
2320b57cec5SDimitry Andric
2330b57cec5SDimitry Andric /// find_first_of - Find the first character in the string that is in \arg
2340b57cec5SDimitry Andric /// Chars, or npos if not found.
2350b57cec5SDimitry Andric ///
2360b57cec5SDimitry Andric /// Note: O(size() + Chars.size())
find_first_of(StringRef Chars,size_t From) const2370b57cec5SDimitry Andric StringRef::size_type StringRef::find_first_of(StringRef Chars,
2380b57cec5SDimitry Andric size_t From) const {
2390b57cec5SDimitry Andric std::bitset<1 << CHAR_BIT> CharBits;
2404824e7fdSDimitry Andric for (char C : Chars)
2414824e7fdSDimitry Andric CharBits.set((unsigned char)C);
2420b57cec5SDimitry Andric
2430b57cec5SDimitry Andric for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
2440b57cec5SDimitry Andric if (CharBits.test((unsigned char)Data[i]))
2450b57cec5SDimitry Andric return i;
2460b57cec5SDimitry Andric return npos;
2470b57cec5SDimitry Andric }
2480b57cec5SDimitry Andric
2490b57cec5SDimitry Andric /// find_first_not_of - Find the first character in the string that is not
2500b57cec5SDimitry Andric /// \arg C or npos if not found.
find_first_not_of(char C,size_t From) const2510b57cec5SDimitry Andric StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
252bdd1243dSDimitry Andric return std::string_view(*this).find_first_not_of(C, From);
2530b57cec5SDimitry Andric }
2540b57cec5SDimitry Andric
2550b57cec5SDimitry Andric /// find_first_not_of - Find the first character in the string that is not
2560b57cec5SDimitry Andric /// in the string \arg Chars, or npos if not found.
2570b57cec5SDimitry Andric ///
2580b57cec5SDimitry Andric /// Note: O(size() + Chars.size())
find_first_not_of(StringRef Chars,size_t From) const2590b57cec5SDimitry Andric StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
2600b57cec5SDimitry Andric size_t From) const {
2610b57cec5SDimitry Andric std::bitset<1 << CHAR_BIT> CharBits;
2624824e7fdSDimitry Andric for (char C : Chars)
2634824e7fdSDimitry Andric CharBits.set((unsigned char)C);
2640b57cec5SDimitry Andric
2650b57cec5SDimitry Andric for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
2660b57cec5SDimitry Andric if (!CharBits.test((unsigned char)Data[i]))
2670b57cec5SDimitry Andric return i;
2680b57cec5SDimitry Andric return npos;
2690b57cec5SDimitry Andric }
2700b57cec5SDimitry Andric
2710b57cec5SDimitry Andric /// find_last_of - Find the last character in the string that is in \arg C,
2720b57cec5SDimitry Andric /// or npos if not found.
2730b57cec5SDimitry Andric ///
2740b57cec5SDimitry Andric /// Note: O(size() + Chars.size())
find_last_of(StringRef Chars,size_t From) const2750b57cec5SDimitry Andric StringRef::size_type StringRef::find_last_of(StringRef Chars,
2760b57cec5SDimitry Andric size_t From) const {
2770b57cec5SDimitry Andric std::bitset<1 << CHAR_BIT> CharBits;
2784824e7fdSDimitry Andric for (char C : Chars)
2794824e7fdSDimitry Andric CharBits.set((unsigned char)C);
2800b57cec5SDimitry Andric
2810b57cec5SDimitry Andric for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
2820b57cec5SDimitry Andric if (CharBits.test((unsigned char)Data[i]))
2830b57cec5SDimitry Andric return i;
2840b57cec5SDimitry Andric return npos;
2850b57cec5SDimitry Andric }
2860b57cec5SDimitry Andric
2870b57cec5SDimitry Andric /// find_last_not_of - Find the last character in the string that is not
2880b57cec5SDimitry Andric /// \arg C, or npos if not found.
find_last_not_of(char C,size_t From) const2890b57cec5SDimitry Andric StringRef::size_type StringRef::find_last_not_of(char C, size_t From) const {
2900b57cec5SDimitry Andric for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
2910b57cec5SDimitry Andric if (Data[i] != C)
2920b57cec5SDimitry Andric return i;
2930b57cec5SDimitry Andric return npos;
2940b57cec5SDimitry Andric }
2950b57cec5SDimitry Andric
2960b57cec5SDimitry Andric /// find_last_not_of - Find the last character in the string that is not in
2970b57cec5SDimitry Andric /// \arg Chars, or npos if not found.
2980b57cec5SDimitry Andric ///
2990b57cec5SDimitry Andric /// Note: O(size() + Chars.size())
find_last_not_of(StringRef Chars,size_t From) const3000b57cec5SDimitry Andric StringRef::size_type StringRef::find_last_not_of(StringRef Chars,
3010b57cec5SDimitry Andric size_t From) const {
3020b57cec5SDimitry Andric std::bitset<1 << CHAR_BIT> CharBits;
3034824e7fdSDimitry Andric for (char C : Chars)
3044824e7fdSDimitry Andric CharBits.set((unsigned char)C);
3050b57cec5SDimitry Andric
3060b57cec5SDimitry Andric for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
3070b57cec5SDimitry Andric if (!CharBits.test((unsigned char)Data[i]))
3080b57cec5SDimitry Andric return i;
3090b57cec5SDimitry Andric return npos;
3100b57cec5SDimitry Andric }
3110b57cec5SDimitry Andric
split(SmallVectorImpl<StringRef> & A,StringRef Separator,int MaxSplit,bool KeepEmpty) const3120b57cec5SDimitry Andric void StringRef::split(SmallVectorImpl<StringRef> &A,
3130b57cec5SDimitry Andric StringRef Separator, int MaxSplit,
3140b57cec5SDimitry Andric bool KeepEmpty) const {
3150b57cec5SDimitry Andric StringRef S = *this;
3160b57cec5SDimitry Andric
3170b57cec5SDimitry Andric // Count down from MaxSplit. When MaxSplit is -1, this will just split
3180b57cec5SDimitry Andric // "forever". This doesn't support splitting more than 2^31 times
3190b57cec5SDimitry Andric // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
3200b57cec5SDimitry Andric // but that seems unlikely to be useful.
3210b57cec5SDimitry Andric while (MaxSplit-- != 0) {
3220b57cec5SDimitry Andric size_t Idx = S.find(Separator);
3230b57cec5SDimitry Andric if (Idx == npos)
3240b57cec5SDimitry Andric break;
3250b57cec5SDimitry Andric
3260b57cec5SDimitry Andric // Push this split.
3270b57cec5SDimitry Andric if (KeepEmpty || Idx > 0)
3280b57cec5SDimitry Andric A.push_back(S.slice(0, Idx));
3290b57cec5SDimitry Andric
3300b57cec5SDimitry Andric // Jump forward.
3310b57cec5SDimitry Andric S = S.slice(Idx + Separator.size(), npos);
3320b57cec5SDimitry Andric }
3330b57cec5SDimitry Andric
3340b57cec5SDimitry Andric // Push the tail.
3350b57cec5SDimitry Andric if (KeepEmpty || !S.empty())
3360b57cec5SDimitry Andric A.push_back(S);
3370b57cec5SDimitry Andric }
3380b57cec5SDimitry Andric
split(SmallVectorImpl<StringRef> & A,char Separator,int MaxSplit,bool KeepEmpty) const3390b57cec5SDimitry Andric void StringRef::split(SmallVectorImpl<StringRef> &A, char Separator,
3400b57cec5SDimitry Andric int MaxSplit, bool KeepEmpty) const {
3410b57cec5SDimitry Andric StringRef S = *this;
3420b57cec5SDimitry Andric
3430b57cec5SDimitry Andric // Count down from MaxSplit. When MaxSplit is -1, this will just split
3440b57cec5SDimitry Andric // "forever". This doesn't support splitting more than 2^31 times
3450b57cec5SDimitry Andric // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
3460b57cec5SDimitry Andric // but that seems unlikely to be useful.
3470b57cec5SDimitry Andric while (MaxSplit-- != 0) {
3480b57cec5SDimitry Andric size_t Idx = S.find(Separator);
3490b57cec5SDimitry Andric if (Idx == npos)
3500b57cec5SDimitry Andric break;
3510b57cec5SDimitry Andric
3520b57cec5SDimitry Andric // Push this split.
3530b57cec5SDimitry Andric if (KeepEmpty || Idx > 0)
3540b57cec5SDimitry Andric A.push_back(S.slice(0, Idx));
3550b57cec5SDimitry Andric
3560b57cec5SDimitry Andric // Jump forward.
3570b57cec5SDimitry Andric S = S.slice(Idx + 1, npos);
3580b57cec5SDimitry Andric }
3590b57cec5SDimitry Andric
3600b57cec5SDimitry Andric // Push the tail.
3610b57cec5SDimitry Andric if (KeepEmpty || !S.empty())
3620b57cec5SDimitry Andric A.push_back(S);
3630b57cec5SDimitry Andric }
3640b57cec5SDimitry Andric
3650b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
3660b57cec5SDimitry Andric // Helpful Algorithms
3670b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
3680b57cec5SDimitry Andric
3690b57cec5SDimitry Andric /// count - Return the number of non-overlapped occurrences of \arg Str in
3700b57cec5SDimitry Andric /// the string.
count(StringRef Str) const3710b57cec5SDimitry Andric size_t StringRef::count(StringRef Str) const {
3720b57cec5SDimitry Andric size_t Count = 0;
373bdd1243dSDimitry Andric size_t Pos = 0;
3740b57cec5SDimitry Andric size_t N = Str.size();
375bdd1243dSDimitry Andric // TODO: For an empty `Str` we return 0 for legacy reasons. Consider changing
376bdd1243dSDimitry Andric // this to `Length + 1` which is more in-line with the function
377bdd1243dSDimitry Andric // description.
378bdd1243dSDimitry Andric if (!N)
3790b57cec5SDimitry Andric return 0;
380bdd1243dSDimitry Andric while ((Pos = find(Str, Pos)) != npos) {
3810b57cec5SDimitry Andric ++Count;
382bdd1243dSDimitry Andric Pos += N;
383480093f4SDimitry Andric }
3840b57cec5SDimitry Andric return Count;
3850b57cec5SDimitry Andric }
3860b57cec5SDimitry Andric
GetAutoSenseRadix(StringRef & Str)3870b57cec5SDimitry Andric static unsigned GetAutoSenseRadix(StringRef &Str) {
3880b57cec5SDimitry Andric if (Str.empty())
3890b57cec5SDimitry Andric return 10;
3900b57cec5SDimitry Andric
391*a58f00eaSDimitry Andric if (Str.consume_front_insensitive("0x"))
3920b57cec5SDimitry Andric return 16;
3930b57cec5SDimitry Andric
394*a58f00eaSDimitry Andric if (Str.consume_front_insensitive("0b"))
3950b57cec5SDimitry Andric return 2;
3960b57cec5SDimitry Andric
397*a58f00eaSDimitry Andric if (Str.consume_front("0o"))
3980b57cec5SDimitry Andric return 8;
3990b57cec5SDimitry Andric
4000b57cec5SDimitry Andric if (Str[0] == '0' && Str.size() > 1 && isDigit(Str[1])) {
4010b57cec5SDimitry Andric Str = Str.substr(1);
4020b57cec5SDimitry Andric return 8;
4030b57cec5SDimitry Andric }
4040b57cec5SDimitry Andric
4050b57cec5SDimitry Andric return 10;
4060b57cec5SDimitry Andric }
4070b57cec5SDimitry Andric
consumeUnsignedInteger(StringRef & Str,unsigned Radix,unsigned long long & Result)4080b57cec5SDimitry Andric bool llvm::consumeUnsignedInteger(StringRef &Str, unsigned Radix,
4090b57cec5SDimitry Andric unsigned long long &Result) {
4100b57cec5SDimitry Andric // Autosense radix if not specified.
4110b57cec5SDimitry Andric if (Radix == 0)
4120b57cec5SDimitry Andric Radix = GetAutoSenseRadix(Str);
4130b57cec5SDimitry Andric
4140b57cec5SDimitry Andric // Empty strings (after the radix autosense) are invalid.
4150b57cec5SDimitry Andric if (Str.empty()) return true;
4160b57cec5SDimitry Andric
4170b57cec5SDimitry Andric // Parse all the bytes of the string given this radix. Watch for overflow.
4180b57cec5SDimitry Andric StringRef Str2 = Str;
4190b57cec5SDimitry Andric Result = 0;
4200b57cec5SDimitry Andric while (!Str2.empty()) {
4210b57cec5SDimitry Andric unsigned CharVal;
4220b57cec5SDimitry Andric if (Str2[0] >= '0' && Str2[0] <= '9')
4230b57cec5SDimitry Andric CharVal = Str2[0] - '0';
4240b57cec5SDimitry Andric else if (Str2[0] >= 'a' && Str2[0] <= 'z')
4250b57cec5SDimitry Andric CharVal = Str2[0] - 'a' + 10;
4260b57cec5SDimitry Andric else if (Str2[0] >= 'A' && Str2[0] <= 'Z')
4270b57cec5SDimitry Andric CharVal = Str2[0] - 'A' + 10;
4280b57cec5SDimitry Andric else
4290b57cec5SDimitry Andric break;
4300b57cec5SDimitry Andric
4310b57cec5SDimitry Andric // If the parsed value is larger than the integer radix, we cannot
4320b57cec5SDimitry Andric // consume any more characters.
4330b57cec5SDimitry Andric if (CharVal >= Radix)
4340b57cec5SDimitry Andric break;
4350b57cec5SDimitry Andric
4360b57cec5SDimitry Andric // Add in this character.
4370b57cec5SDimitry Andric unsigned long long PrevResult = Result;
4380b57cec5SDimitry Andric Result = Result * Radix + CharVal;
4390b57cec5SDimitry Andric
4400b57cec5SDimitry Andric // Check for overflow by shifting back and seeing if bits were lost.
4410b57cec5SDimitry Andric if (Result / Radix < PrevResult)
4420b57cec5SDimitry Andric return true;
4430b57cec5SDimitry Andric
4440b57cec5SDimitry Andric Str2 = Str2.substr(1);
4450b57cec5SDimitry Andric }
4460b57cec5SDimitry Andric
4470b57cec5SDimitry Andric // We consider the operation a failure if no characters were consumed
4480b57cec5SDimitry Andric // successfully.
4490b57cec5SDimitry Andric if (Str.size() == Str2.size())
4500b57cec5SDimitry Andric return true;
4510b57cec5SDimitry Andric
4520b57cec5SDimitry Andric Str = Str2;
4530b57cec5SDimitry Andric return false;
4540b57cec5SDimitry Andric }
4550b57cec5SDimitry Andric
consumeSignedInteger(StringRef & Str,unsigned Radix,long long & Result)4560b57cec5SDimitry Andric bool llvm::consumeSignedInteger(StringRef &Str, unsigned Radix,
4570b57cec5SDimitry Andric long long &Result) {
4580b57cec5SDimitry Andric unsigned long long ULLVal;
4590b57cec5SDimitry Andric
4600b57cec5SDimitry Andric // Handle positive strings first.
4610b57cec5SDimitry Andric if (Str.empty() || Str.front() != '-') {
4620b57cec5SDimitry Andric if (consumeUnsignedInteger(Str, Radix, ULLVal) ||
4630b57cec5SDimitry Andric // Check for value so large it overflows a signed value.
4640b57cec5SDimitry Andric (long long)ULLVal < 0)
4650b57cec5SDimitry Andric return true;
4660b57cec5SDimitry Andric Result = ULLVal;
4670b57cec5SDimitry Andric return false;
4680b57cec5SDimitry Andric }
4690b57cec5SDimitry Andric
4700b57cec5SDimitry Andric // Get the positive part of the value.
4710b57cec5SDimitry Andric StringRef Str2 = Str.drop_front(1);
4720b57cec5SDimitry Andric if (consumeUnsignedInteger(Str2, Radix, ULLVal) ||
4730b57cec5SDimitry Andric // Reject values so large they'd overflow as negative signed, but allow
4740b57cec5SDimitry Andric // "-0". This negates the unsigned so that the negative isn't undefined
4750b57cec5SDimitry Andric // on signed overflow.
4760b57cec5SDimitry Andric (long long)-ULLVal > 0)
4770b57cec5SDimitry Andric return true;
4780b57cec5SDimitry Andric
4790b57cec5SDimitry Andric Str = Str2;
4800b57cec5SDimitry Andric Result = -ULLVal;
4810b57cec5SDimitry Andric return false;
4820b57cec5SDimitry Andric }
4830b57cec5SDimitry Andric
4840b57cec5SDimitry Andric /// GetAsUnsignedInteger - Workhorse method that converts a integer character
4850b57cec5SDimitry Andric /// sequence of radix up to 36 to an unsigned long long value.
getAsUnsignedInteger(StringRef Str,unsigned Radix,unsigned long long & Result)4860b57cec5SDimitry Andric bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix,
4870b57cec5SDimitry Andric unsigned long long &Result) {
4880b57cec5SDimitry Andric if (consumeUnsignedInteger(Str, Radix, Result))
4890b57cec5SDimitry Andric return true;
4900b57cec5SDimitry Andric
4910b57cec5SDimitry Andric // For getAsUnsignedInteger, we require the whole string to be consumed or
4920b57cec5SDimitry Andric // else we consider it a failure.
4930b57cec5SDimitry Andric return !Str.empty();
4940b57cec5SDimitry Andric }
4950b57cec5SDimitry Andric
getAsSignedInteger(StringRef Str,unsigned Radix,long long & Result)4960b57cec5SDimitry Andric bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
4970b57cec5SDimitry Andric long long &Result) {
4980b57cec5SDimitry Andric if (consumeSignedInteger(Str, Radix, Result))
4990b57cec5SDimitry Andric return true;
5000b57cec5SDimitry Andric
5010b57cec5SDimitry Andric // For getAsSignedInteger, we require the whole string to be consumed or else
5020b57cec5SDimitry Andric // we consider it a failure.
5030b57cec5SDimitry Andric return !Str.empty();
5040b57cec5SDimitry Andric }
5050b57cec5SDimitry Andric
consumeInteger(unsigned Radix,APInt & Result)506fe013be4SDimitry Andric bool StringRef::consumeInteger(unsigned Radix, APInt &Result) {
5070b57cec5SDimitry Andric StringRef Str = *this;
5080b57cec5SDimitry Andric
5090b57cec5SDimitry Andric // Autosense radix if not specified.
5100b57cec5SDimitry Andric if (Radix == 0)
5110b57cec5SDimitry Andric Radix = GetAutoSenseRadix(Str);
5120b57cec5SDimitry Andric
5130b57cec5SDimitry Andric assert(Radix > 1 && Radix <= 36);
5140b57cec5SDimitry Andric
5150b57cec5SDimitry Andric // Empty strings (after the radix autosense) are invalid.
5160b57cec5SDimitry Andric if (Str.empty()) return true;
5170b57cec5SDimitry Andric
5180b57cec5SDimitry Andric // Skip leading zeroes. This can be a significant improvement if
5190b57cec5SDimitry Andric // it means we don't need > 64 bits.
520*a58f00eaSDimitry Andric Str = Str.ltrim('0');
5210b57cec5SDimitry Andric
5220b57cec5SDimitry Andric // If it was nothing but zeroes....
5230b57cec5SDimitry Andric if (Str.empty()) {
5240b57cec5SDimitry Andric Result = APInt(64, 0);
525fe013be4SDimitry Andric *this = Str;
5260b57cec5SDimitry Andric return false;
5270b57cec5SDimitry Andric }
5280b57cec5SDimitry Andric
5290b57cec5SDimitry Andric // (Over-)estimate the required number of bits.
5300b57cec5SDimitry Andric unsigned Log2Radix = 0;
5310b57cec5SDimitry Andric while ((1U << Log2Radix) < Radix) Log2Radix++;
5320b57cec5SDimitry Andric bool IsPowerOf2Radix = ((1U << Log2Radix) == Radix);
5330b57cec5SDimitry Andric
5340b57cec5SDimitry Andric unsigned BitWidth = Log2Radix * Str.size();
5350b57cec5SDimitry Andric if (BitWidth < Result.getBitWidth())
5360b57cec5SDimitry Andric BitWidth = Result.getBitWidth(); // don't shrink the result
5370b57cec5SDimitry Andric else if (BitWidth > Result.getBitWidth())
5380b57cec5SDimitry Andric Result = Result.zext(BitWidth);
5390b57cec5SDimitry Andric
5400b57cec5SDimitry Andric APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix
5410b57cec5SDimitry Andric if (!IsPowerOf2Radix) {
5420b57cec5SDimitry Andric // These must have the same bit-width as Result.
5430b57cec5SDimitry Andric RadixAP = APInt(BitWidth, Radix);
5440b57cec5SDimitry Andric CharAP = APInt(BitWidth, 0);
5450b57cec5SDimitry Andric }
5460b57cec5SDimitry Andric
5470b57cec5SDimitry Andric // Parse all the bytes of the string given this radix.
5480b57cec5SDimitry Andric Result = 0;
5490b57cec5SDimitry Andric while (!Str.empty()) {
5500b57cec5SDimitry Andric unsigned CharVal;
5510b57cec5SDimitry Andric if (Str[0] >= '0' && Str[0] <= '9')
5520b57cec5SDimitry Andric CharVal = Str[0]-'0';
5530b57cec5SDimitry Andric else if (Str[0] >= 'a' && Str[0] <= 'z')
5540b57cec5SDimitry Andric CharVal = Str[0]-'a'+10;
5550b57cec5SDimitry Andric else if (Str[0] >= 'A' && Str[0] <= 'Z')
5560b57cec5SDimitry Andric CharVal = Str[0]-'A'+10;
5570b57cec5SDimitry Andric else
558fe013be4SDimitry Andric break;
5590b57cec5SDimitry Andric
5600b57cec5SDimitry Andric // If the parsed value is larger than the integer radix, the string is
5610b57cec5SDimitry Andric // invalid.
5620b57cec5SDimitry Andric if (CharVal >= Radix)
563fe013be4SDimitry Andric break;
5640b57cec5SDimitry Andric
5650b57cec5SDimitry Andric // Add in this character.
5660b57cec5SDimitry Andric if (IsPowerOf2Radix) {
5670b57cec5SDimitry Andric Result <<= Log2Radix;
5680b57cec5SDimitry Andric Result |= CharVal;
5690b57cec5SDimitry Andric } else {
5700b57cec5SDimitry Andric Result *= RadixAP;
5710b57cec5SDimitry Andric CharAP = CharVal;
5720b57cec5SDimitry Andric Result += CharAP;
5730b57cec5SDimitry Andric }
5740b57cec5SDimitry Andric
5750b57cec5SDimitry Andric Str = Str.substr(1);
5760b57cec5SDimitry Andric }
5770b57cec5SDimitry Andric
578fe013be4SDimitry Andric // We consider the operation a failure if no characters were consumed
579fe013be4SDimitry Andric // successfully.
580fe013be4SDimitry Andric if (size() == Str.size())
581fe013be4SDimitry Andric return true;
582fe013be4SDimitry Andric
583fe013be4SDimitry Andric *this = Str;
5840b57cec5SDimitry Andric return false;
5850b57cec5SDimitry Andric }
5860b57cec5SDimitry Andric
getAsInteger(unsigned Radix,APInt & Result) const587fe013be4SDimitry Andric bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
588fe013be4SDimitry Andric StringRef Str = *this;
589fe013be4SDimitry Andric if (Str.consumeInteger(Radix, Result))
590fe013be4SDimitry Andric return true;
591fe013be4SDimitry Andric
592fe013be4SDimitry Andric // For getAsInteger, we require the whole string to be consumed or else we
593fe013be4SDimitry Andric // consider it a failure.
594fe013be4SDimitry Andric return !Str.empty();
595fe013be4SDimitry Andric }
596fe013be4SDimitry Andric
getAsDouble(double & Result,bool AllowInexact) const5970b57cec5SDimitry Andric bool StringRef::getAsDouble(double &Result, bool AllowInexact) const {
5980b57cec5SDimitry Andric APFloat F(0.0);
599480093f4SDimitry Andric auto StatusOrErr = F.convertFromString(*this, APFloat::rmNearestTiesToEven);
600480093f4SDimitry Andric if (errorToBool(StatusOrErr.takeError()))
601480093f4SDimitry Andric return true;
602480093f4SDimitry Andric
603480093f4SDimitry Andric APFloat::opStatus Status = *StatusOrErr;
6040b57cec5SDimitry Andric if (Status != APFloat::opOK) {
6050b57cec5SDimitry Andric if (!AllowInexact || !(Status & APFloat::opInexact))
6060b57cec5SDimitry Andric return true;
6070b57cec5SDimitry Andric }
6080b57cec5SDimitry Andric
6090b57cec5SDimitry Andric Result = F.convertToDouble();
6100b57cec5SDimitry Andric return false;
6110b57cec5SDimitry Andric }
6120b57cec5SDimitry Andric
6130b57cec5SDimitry Andric // Implementation of StringRef hashing.
hash_value(StringRef S)6140b57cec5SDimitry Andric hash_code llvm::hash_value(StringRef S) {
6150b57cec5SDimitry Andric return hash_combine_range(S.begin(), S.end());
6160b57cec5SDimitry Andric }
61704eeddc0SDimitry Andric
getHashValue(StringRef Val)61804eeddc0SDimitry Andric unsigned DenseMapInfo<StringRef, void>::getHashValue(StringRef Val) {
61904eeddc0SDimitry Andric assert(Val.data() != getEmptyKey().data() &&
62004eeddc0SDimitry Andric "Cannot hash the empty key!");
62104eeddc0SDimitry Andric assert(Val.data() != getTombstoneKey().data() &&
62204eeddc0SDimitry Andric "Cannot hash the tombstone key!");
62304eeddc0SDimitry Andric return (unsigned)(hash_value(Val));
62404eeddc0SDimitry Andric }
625