180814287SRaphael Isemann //===-- StringExtractor.cpp -----------------------------------------------===//
230fdc8d8SChris Lattner //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
630fdc8d8SChris Lattner //
730fdc8d8SChris Lattner //===----------------------------------------------------------------------===//
830fdc8d8SChris Lattner 
9f805e190SPavel Labath #include "lldb/Utility/StringExtractor.h"
10*f5eaa2afSRaphael Isemann #include "llvm/ADT/StringExtras.h"
1130fdc8d8SChris Lattner 
1254695a33SZachary Turner #include <tuple>
134479ac15SZachary Turner 
14672d2c12SJonas Devlieghere #include <ctype.h>
154479ac15SZachary Turner #include <stdlib.h>
16672d2c12SJonas Devlieghere #include <string.h>
1730fdc8d8SChris Lattner 
18b9c1b51eSKate Stone static inline int xdigit_to_sint(char ch) {
1930fdc8d8SChris Lattner   if (ch >= 'a' && ch <= 'f')
2030fdc8d8SChris Lattner     return 10 + ch - 'a';
211e89cd80SBenjamin Kramer   if (ch >= 'A' && ch <= 'F')
221e89cd80SBenjamin Kramer     return 10 + ch - 'A';
236eddf8dfSVince Harron   if (ch >= '0' && ch <= '9')
2430fdc8d8SChris Lattner     return ch - '0';
256eddf8dfSVince Harron   return -1;
2630fdc8d8SChris Lattner }
2730fdc8d8SChris Lattner 
2830fdc8d8SChris Lattner // StringExtractor constructor
29b9c1b51eSKate Stone StringExtractor::StringExtractor() : m_packet(), m_index(0) {}
3030fdc8d8SChris Lattner 
31b9c1b51eSKate Stone StringExtractor::StringExtractor(llvm::StringRef packet_str)
32b9c1b51eSKate Stone     : m_packet(), m_index(0) {
3354695a33SZachary Turner   m_packet.assign(packet_str.begin(), packet_str.end());
3454695a33SZachary Turner }
3530fdc8d8SChris Lattner 
36b9c1b51eSKate Stone StringExtractor::StringExtractor(const char *packet_cstr)
37b9c1b51eSKate Stone     : m_packet(), m_index(0) {
3830fdc8d8SChris Lattner   if (packet_cstr)
3930fdc8d8SChris Lattner     m_packet.assign(packet_cstr);
4030fdc8d8SChris Lattner }
4130fdc8d8SChris Lattner 
4230fdc8d8SChris Lattner // Destructor
43b9c1b51eSKate Stone StringExtractor::~StringExtractor() {}
4430fdc8d8SChris Lattner 
45b9c1b51eSKate Stone char StringExtractor::GetChar(char fail_value) {
46b9c1b51eSKate Stone   if (m_index < m_packet.size()) {
4730fdc8d8SChris Lattner     char ch = m_packet[m_index];
4830fdc8d8SChris Lattner     ++m_index;
4930fdc8d8SChris Lattner     return ch;
5030fdc8d8SChris Lattner   }
51c7bece56SGreg Clayton   m_index = UINT64_MAX;
5230fdc8d8SChris Lattner   return fail_value;
5330fdc8d8SChris Lattner }
5430fdc8d8SChris Lattner 
5505097246SAdrian Prantl // If a pair of valid hex digits exist at the head of the StringExtractor they
5605097246SAdrian Prantl // are decoded into an unsigned byte and returned by this function
576eddf8dfSVince Harron //
586eddf8dfSVince Harron // If there is not a pair of valid hex digits at the head of the
596eddf8dfSVince Harron // StringExtractor, it is left unchanged and -1 is returned
60b9c1b51eSKate Stone int StringExtractor::DecodeHexU8() {
6115a2165dSFrancis Ricci   SkipSpaces();
62b9c1b51eSKate Stone   if (GetBytesLeft() < 2) {
636eddf8dfSVince Harron     return -1;
64b9739d40SPavel Labath   }
65b9739d40SPavel Labath   const int hi_nibble = xdigit_to_sint(m_packet[m_index]);
66b9739d40SPavel Labath   const int lo_nibble = xdigit_to_sint(m_packet[m_index + 1]);
67b9c1b51eSKate Stone   if (hi_nibble == -1 || lo_nibble == -1) {
686eddf8dfSVince Harron     return -1;
69b9739d40SPavel Labath   }
706eddf8dfSVince Harron   m_index += 2;
7124374aefSJonas Devlieghere   return static_cast<uint8_t>((hi_nibble << 4) + lo_nibble);
726eddf8dfSVince Harron }
736eddf8dfSVince Harron 
7405097246SAdrian Prantl // Extract an unsigned character from two hex ASCII chars in the packet string,
7505097246SAdrian Prantl // or return fail_value on failure
76b9c1b51eSKate Stone uint8_t StringExtractor::GetHexU8(uint8_t fail_value, bool set_eof_on_fail) {
7705097246SAdrian Prantl   // On success, fail_value will be overwritten with the next character in the
7805097246SAdrian Prantl   // stream
79554a8571SDawn Perchik   GetHexU8Ex(fail_value, set_eof_on_fail);
80554a8571SDawn Perchik   return fail_value;
81554a8571SDawn Perchik }
82554a8571SDawn Perchik 
83b9c1b51eSKate Stone bool StringExtractor::GetHexU8Ex(uint8_t &ch, bool set_eof_on_fail) {
846eddf8dfSVince Harron   int byte = DecodeHexU8();
85b9c1b51eSKate Stone   if (byte == -1) {
867b70be39SGreg Clayton     if (set_eof_on_fail || m_index >= m_packet.size())
87c7bece56SGreg Clayton       m_index = UINT64_MAX;
88554a8571SDawn Perchik     // ch should not be changed in case of failure
89554a8571SDawn Perchik     return false;
9030fdc8d8SChris Lattner   }
9124374aefSJonas Devlieghere   ch = static_cast<uint8_t>(byte);
92554a8571SDawn Perchik   return true;
936eddf8dfSVince Harron }
9430fdc8d8SChris Lattner 
95b9c1b51eSKate Stone uint32_t StringExtractor::GetU32(uint32_t fail_value, int base) {
96b9c1b51eSKate Stone   if (m_index < m_packet.size()) {
97d4612ad0SEd Maste     char *end = nullptr;
9832e0a750SGreg Clayton     const char *start = m_packet.c_str();
99e0f8f574SDaniel Malea     const char *cstr = start + m_index;
100f2d44ca8SEnrico Granata     uint32_t result = static_cast<uint32_t>(::strtoul(cstr, &end, base));
10132e0a750SGreg Clayton 
102b9c1b51eSKate Stone     if (end && end != cstr) {
103e0f8f574SDaniel Malea       m_index = end - start;
104e0f8f574SDaniel Malea       return result;
105e0f8f574SDaniel Malea     }
106e0f8f574SDaniel Malea   }
107e0f8f574SDaniel Malea   return fail_value;
108e0f8f574SDaniel Malea }
109e0f8f574SDaniel Malea 
110b9c1b51eSKate Stone int32_t StringExtractor::GetS32(int32_t fail_value, int base) {
111b9c1b51eSKate Stone   if (m_index < m_packet.size()) {
112d4612ad0SEd Maste     char *end = nullptr;
113e0f8f574SDaniel Malea     const char *start = m_packet.c_str();
114e0f8f574SDaniel Malea     const char *cstr = start + m_index;
115f2d44ca8SEnrico Granata     int32_t result = static_cast<int32_t>(::strtol(cstr, &end, base));
116e0f8f574SDaniel Malea 
117b9c1b51eSKate Stone     if (end && end != cstr) {
118e0f8f574SDaniel Malea       m_index = end - start;
119e0f8f574SDaniel Malea       return result;
120e0f8f574SDaniel Malea     }
121e0f8f574SDaniel Malea   }
122e0f8f574SDaniel Malea   return fail_value;
123e0f8f574SDaniel Malea }
124e0f8f574SDaniel Malea 
125b9c1b51eSKate Stone uint64_t StringExtractor::GetU64(uint64_t fail_value, int base) {
126b9c1b51eSKate Stone   if (m_index < m_packet.size()) {
127d4612ad0SEd Maste     char *end = nullptr;
128e0f8f574SDaniel Malea     const char *start = m_packet.c_str();
129e0f8f574SDaniel Malea     const char *cstr = start + m_index;
130e0f8f574SDaniel Malea     uint64_t result = ::strtoull(cstr, &end, base);
131e0f8f574SDaniel Malea 
132b9c1b51eSKate Stone     if (end && end != cstr) {
133e0f8f574SDaniel Malea       m_index = end - start;
134e0f8f574SDaniel Malea       return result;
135e0f8f574SDaniel Malea     }
136e0f8f574SDaniel Malea   }
137e0f8f574SDaniel Malea   return fail_value;
138e0f8f574SDaniel Malea }
139e0f8f574SDaniel Malea 
140b9c1b51eSKate Stone int64_t StringExtractor::GetS64(int64_t fail_value, int base) {
141b9c1b51eSKate Stone   if (m_index < m_packet.size()) {
142d4612ad0SEd Maste     char *end = nullptr;
143e0f8f574SDaniel Malea     const char *start = m_packet.c_str();
144e0f8f574SDaniel Malea     const char *cstr = start + m_index;
145e0f8f574SDaniel Malea     int64_t result = ::strtoll(cstr, &end, base);
146e0f8f574SDaniel Malea 
147b9c1b51eSKate Stone     if (end && end != cstr) {
14832e0a750SGreg Clayton       m_index = end - start;
14932e0a750SGreg Clayton       return result;
15032e0a750SGreg Clayton     }
15132e0a750SGreg Clayton   }
15232e0a750SGreg Clayton   return fail_value;
15332e0a750SGreg Clayton }
15432e0a750SGreg Clayton 
155b9c1b51eSKate Stone uint32_t StringExtractor::GetHexMaxU32(bool little_endian,
156b9c1b51eSKate Stone                                        uint32_t fail_value) {
157b9739d40SPavel Labath   uint32_t result = 0;
158b9739d40SPavel Labath   uint32_t nibble_count = 0;
159b9739d40SPavel Labath 
16015a2165dSFrancis Ricci   SkipSpaces();
161b9c1b51eSKate Stone   if (little_endian) {
162b9739d40SPavel Labath     uint32_t shift_amount = 0;
163b9c1b51eSKate Stone     while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
164b9739d40SPavel Labath       // Make sure we don't exceed the size of a uint32_t...
165b9c1b51eSKate Stone       if (nibble_count >= (sizeof(uint32_t) * 2)) {
166b9739d40SPavel Labath         m_index = UINT64_MAX;
167b9739d40SPavel Labath         return fail_value;
168b9739d40SPavel Labath       }
169b9739d40SPavel Labath 
170b9739d40SPavel Labath       uint8_t nibble_lo;
171b9739d40SPavel Labath       uint8_t nibble_hi = xdigit_to_sint(m_packet[m_index]);
172b9739d40SPavel Labath       ++m_index;
173b9c1b51eSKate Stone       if (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
174b9739d40SPavel Labath         nibble_lo = xdigit_to_sint(m_packet[m_index]);
175b9739d40SPavel Labath         ++m_index;
17624374aefSJonas Devlieghere         result |= (static_cast<uint32_t>(nibble_hi) << (shift_amount + 4));
17724374aefSJonas Devlieghere         result |= (static_cast<uint32_t>(nibble_lo) << shift_amount);
178b9739d40SPavel Labath         nibble_count += 2;
179b9739d40SPavel Labath         shift_amount += 8;
180b9c1b51eSKate Stone       } else {
18124374aefSJonas Devlieghere         result |= (static_cast<uint32_t>(nibble_hi) << shift_amount);
182b9739d40SPavel Labath         nibble_count += 1;
183b9739d40SPavel Labath         shift_amount += 4;
18430fdc8d8SChris Lattner       }
185b9739d40SPavel Labath     }
186b9c1b51eSKate Stone   } else {
187b9c1b51eSKate Stone     while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
188b9739d40SPavel Labath       // Make sure we don't exceed the size of a uint32_t...
189b9c1b51eSKate Stone       if (nibble_count >= (sizeof(uint32_t) * 2)) {
190b9739d40SPavel Labath         m_index = UINT64_MAX;
191b9739d40SPavel Labath         return fail_value;
192b9739d40SPavel Labath       }
193b9739d40SPavel Labath 
194b9739d40SPavel Labath       uint8_t nibble = xdigit_to_sint(m_packet[m_index]);
195b9739d40SPavel Labath       // Big Endian
196b9739d40SPavel Labath       result <<= 4;
197b9739d40SPavel Labath       result |= nibble;
198b9739d40SPavel Labath 
199b9739d40SPavel Labath       ++m_index;
200b9739d40SPavel Labath       ++nibble_count;
201b9739d40SPavel Labath     }
202b9739d40SPavel Labath   }
203b9739d40SPavel Labath   return result;
20430fdc8d8SChris Lattner }
20530fdc8d8SChris Lattner 
206b9c1b51eSKate Stone uint64_t StringExtractor::GetHexMaxU64(bool little_endian,
207b9c1b51eSKate Stone                                        uint64_t fail_value) {
208b9739d40SPavel Labath   uint64_t result = 0;
209b9739d40SPavel Labath   uint32_t nibble_count = 0;
210b9739d40SPavel Labath 
21115a2165dSFrancis Ricci   SkipSpaces();
212b9c1b51eSKate Stone   if (little_endian) {
213b9739d40SPavel Labath     uint32_t shift_amount = 0;
214b9c1b51eSKate Stone     while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
215b9739d40SPavel Labath       // Make sure we don't exceed the size of a uint64_t...
216b9c1b51eSKate Stone       if (nibble_count >= (sizeof(uint64_t) * 2)) {
217b9739d40SPavel Labath         m_index = UINT64_MAX;
218b9739d40SPavel Labath         return fail_value;
219b9739d40SPavel Labath       }
220b9739d40SPavel Labath 
221b9739d40SPavel Labath       uint8_t nibble_lo;
222b9739d40SPavel Labath       uint8_t nibble_hi = xdigit_to_sint(m_packet[m_index]);
223b9739d40SPavel Labath       ++m_index;
224b9c1b51eSKate Stone       if (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
225b9739d40SPavel Labath         nibble_lo = xdigit_to_sint(m_packet[m_index]);
226b9739d40SPavel Labath         ++m_index;
22724374aefSJonas Devlieghere         result |= (static_cast<uint64_t>(nibble_hi) << (shift_amount + 4));
22824374aefSJonas Devlieghere         result |= (static_cast<uint64_t>(nibble_lo) << shift_amount);
229b9739d40SPavel Labath         nibble_count += 2;
230b9739d40SPavel Labath         shift_amount += 8;
231b9c1b51eSKate Stone       } else {
23224374aefSJonas Devlieghere         result |= (static_cast<uint64_t>(nibble_hi) << shift_amount);
233b9739d40SPavel Labath         nibble_count += 1;
234b9739d40SPavel Labath         shift_amount += 4;
23530fdc8d8SChris Lattner       }
236b9739d40SPavel Labath     }
237b9c1b51eSKate Stone   } else {
238b9c1b51eSKate Stone     while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
239b9739d40SPavel Labath       // Make sure we don't exceed the size of a uint64_t...
240b9c1b51eSKate Stone       if (nibble_count >= (sizeof(uint64_t) * 2)) {
241b9739d40SPavel Labath         m_index = UINT64_MAX;
242b9739d40SPavel Labath         return fail_value;
243b9739d40SPavel Labath       }
244b9739d40SPavel Labath 
245b9739d40SPavel Labath       uint8_t nibble = xdigit_to_sint(m_packet[m_index]);
246b9739d40SPavel Labath       // Big Endian
247b9739d40SPavel Labath       result <<= 4;
248b9739d40SPavel Labath       result |= nibble;
249b9739d40SPavel Labath 
250b9739d40SPavel Labath       ++m_index;
251b9739d40SPavel Labath       ++nibble_count;
252b9739d40SPavel Labath     }
253b9739d40SPavel Labath   }
254b9739d40SPavel Labath   return result;
25530fdc8d8SChris Lattner }
25630fdc8d8SChris Lattner 
257e714c4f5SRavitheja Addepally bool StringExtractor::ConsumeFront(const llvm::StringRef &str) {
258e714c4f5SRavitheja Addepally   llvm::StringRef S = GetStringRef();
259e714c4f5SRavitheja Addepally   if (!S.startswith(str))
260e714c4f5SRavitheja Addepally     return false;
261e714c4f5SRavitheja Addepally   else
262e714c4f5SRavitheja Addepally     m_index += str.size();
263e714c4f5SRavitheja Addepally   return true;
264e714c4f5SRavitheja Addepally }
265e714c4f5SRavitheja Addepally 
266b9c1b51eSKate Stone size_t StringExtractor::GetHexBytes(llvm::MutableArrayRef<uint8_t> dest,
267b9c1b51eSKate Stone                                     uint8_t fail_fill_value) {
26830fdc8d8SChris Lattner   size_t bytes_extracted = 0;
269b9c1b51eSKate Stone   while (!dest.empty() && GetBytesLeft() > 0) {
270d08f09c1SZachary Turner     dest[0] = GetHexU8(fail_fill_value);
271d08f09c1SZachary Turner     if (!IsGood())
27230fdc8d8SChris Lattner       break;
273d08f09c1SZachary Turner     ++bytes_extracted;
274d08f09c1SZachary Turner     dest = dest.drop_front();
27530fdc8d8SChris Lattner   }
27630fdc8d8SChris Lattner 
277d08f09c1SZachary Turner   if (!dest.empty())
278d08f09c1SZachary Turner     ::memset(dest.data(), fail_fill_value, dest.size());
27930fdc8d8SChris Lattner 
28030fdc8d8SChris Lattner   return bytes_extracted;
28130fdc8d8SChris Lattner }
28230fdc8d8SChris Lattner 
28305097246SAdrian Prantl // Decodes all valid hex encoded bytes at the head of the StringExtractor,
28405097246SAdrian Prantl // limited by dst_len.
2856eddf8dfSVince Harron //
2866eddf8dfSVince Harron // Returns the number of bytes successfully decoded
287b9c1b51eSKate Stone size_t StringExtractor::GetHexBytesAvail(llvm::MutableArrayRef<uint8_t> dest) {
2886eddf8dfSVince Harron   size_t bytes_extracted = 0;
289b9c1b51eSKate Stone   while (!dest.empty()) {
2906eddf8dfSVince Harron     int decode = DecodeHexU8();
2916eddf8dfSVince Harron     if (decode == -1)
2926eddf8dfSVince Harron       break;
29324374aefSJonas Devlieghere     dest[0] = static_cast<uint8_t>(decode);
294d08f09c1SZachary Turner     dest = dest.drop_front();
295d08f09c1SZachary Turner     ++bytes_extracted;
2966eddf8dfSVince Harron   }
2976eddf8dfSVince Harron   return bytes_extracted;
2986eddf8dfSVince Harron }
29930fdc8d8SChris Lattner 
300b9c1b51eSKate Stone size_t StringExtractor::GetHexByteString(std::string &str) {
301de9d0494SGreg Clayton   str.clear();
3028c1b6bd7SPavel Labath   str.reserve(GetBytesLeft() / 2);
303de9d0494SGreg Clayton   char ch;
304de9d0494SGreg Clayton   while ((ch = GetHexU8()) != '\0')
305de9d0494SGreg Clayton     str.append(1, ch);
306de9d0494SGreg Clayton   return str.size();
307de9d0494SGreg Clayton }
308de9d0494SGreg Clayton 
309b9c1b51eSKate Stone size_t StringExtractor::GetHexByteStringFixedLength(std::string &str,
310b9c1b51eSKate Stone                                                     uint32_t nibble_length) {
311af245d11STodd Fiala   str.clear();
312b9739d40SPavel Labath 
313b9739d40SPavel Labath   uint32_t nibble_count = 0;
314b9c1b51eSKate Stone   for (const char *pch = Peek();
315b9c1b51eSKate Stone        (nibble_count < nibble_length) && (pch != nullptr);
316b9c1b51eSKate Stone        str.append(1, GetHexU8(0, false)), pch = Peek(), nibble_count += 2) {
317b9c1b51eSKate Stone   }
318b9739d40SPavel Labath 
319af245d11STodd Fiala   return str.size();
320af245d11STodd Fiala }
321af245d11STodd Fiala 
322b9c1b51eSKate Stone size_t StringExtractor::GetHexByteStringTerminatedBy(std::string &str,
323b9c1b51eSKate Stone                                                      char terminator) {
324e0f8f574SDaniel Malea   str.clear();
325e0f8f574SDaniel Malea   char ch;
326e0f8f574SDaniel Malea   while ((ch = GetHexU8(0, false)) != '\0')
327e0f8f574SDaniel Malea     str.append(1, ch);
328b9739d40SPavel Labath   if (Peek() && *Peek() == terminator)
329e0f8f574SDaniel Malea     return str.size();
330af245d11STodd Fiala 
331e0f8f574SDaniel Malea   str.clear();
332e0f8f574SDaniel Malea   return str.size();
333e0f8f574SDaniel Malea }
334e0f8f574SDaniel Malea 
335b9c1b51eSKate Stone bool StringExtractor::GetNameColonValue(llvm::StringRef &name,
336b9c1b51eSKate Stone                                         llvm::StringRef &value) {
33705097246SAdrian Prantl   // Read something in the form of NNNN:VVVV; where NNNN is any character that
33805097246SAdrian Prantl   // is not a colon, followed by a ':' character, then a value (one or more ';'
33905097246SAdrian Prantl   // chars), followed by a ';'
34054695a33SZachary Turner   if (m_index >= m_packet.size())
34154695a33SZachary Turner     return fail();
34254695a33SZachary Turner 
34354695a33SZachary Turner   llvm::StringRef view(m_packet);
34454695a33SZachary Turner   if (view.empty())
34554695a33SZachary Turner     return fail();
34654695a33SZachary Turner 
34754695a33SZachary Turner   llvm::StringRef a, b, c, d;
34854695a33SZachary Turner   view = view.substr(m_index);
34954695a33SZachary Turner   std::tie(a, b) = view.split(':');
35054695a33SZachary Turner   if (a.empty() || b.empty())
35154695a33SZachary Turner     return fail();
35254695a33SZachary Turner   std::tie(c, d) = b.split(';');
35354695a33SZachary Turner   if (b == c && d.empty())
35454695a33SZachary Turner     return fail();
35554695a33SZachary Turner 
35654695a33SZachary Turner   name = a;
35754695a33SZachary Turner   value = c;
35854695a33SZachary Turner   if (d.empty())
35954695a33SZachary Turner     m_index = m_packet.size();
360b9c1b51eSKate Stone   else {
36154695a33SZachary Turner     size_t bytes_consumed = d.data() - view.data();
36254695a33SZachary Turner     m_index += bytes_consumed;
36354695a33SZachary Turner   }
36430fdc8d8SChris Lattner   return true;
36530fdc8d8SChris Lattner }
36698424c44SGreg Clayton 
367b9c1b51eSKate Stone void StringExtractor::SkipSpaces() {
36898424c44SGreg Clayton   const size_t n = m_packet.size();
369*f5eaa2afSRaphael Isemann   while (m_index < n && llvm::isSpace(m_packet[m_index]))
37098424c44SGreg Clayton     ++m_index;
37198424c44SGreg Clayton }
372