1 //===-- StringExtractor.cpp -------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Utility/StringExtractor.h"
10 
11 #include <tuple>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <string.h>
16 
17 static inline int xdigit_to_sint(char ch) {
18   if (ch >= 'a' && ch <= 'f')
19     return 10 + ch - 'a';
20   if (ch >= 'A' && ch <= 'F')
21     return 10 + ch - 'A';
22   if (ch >= '0' && ch <= '9')
23     return ch - '0';
24   return -1;
25 }
26 
27 // StringExtractor constructor
28 StringExtractor::StringExtractor() : m_packet(), m_index(0) {}
29 
30 StringExtractor::StringExtractor(llvm::StringRef packet_str)
31     : m_packet(), m_index(0) {
32   m_packet.assign(packet_str.begin(), packet_str.end());
33 }
34 
35 StringExtractor::StringExtractor(const char *packet_cstr)
36     : m_packet(), m_index(0) {
37   if (packet_cstr)
38     m_packet.assign(packet_cstr);
39 }
40 
41 // StringExtractor copy constructor
42 StringExtractor::StringExtractor(const StringExtractor &rhs)
43     : m_packet(rhs.m_packet), m_index(rhs.m_index) {}
44 
45 // StringExtractor assignment operator
46 const StringExtractor &StringExtractor::operator=(const StringExtractor &rhs) {
47   if (this != &rhs) {
48     m_packet = rhs.m_packet;
49     m_index = rhs.m_index;
50   }
51   return *this;
52 }
53 
54 // Destructor
55 StringExtractor::~StringExtractor() {}
56 
57 char StringExtractor::GetChar(char fail_value) {
58   if (m_index < m_packet.size()) {
59     char ch = m_packet[m_index];
60     ++m_index;
61     return ch;
62   }
63   m_index = UINT64_MAX;
64   return fail_value;
65 }
66 
67 // If a pair of valid hex digits exist at the head of the StringExtractor they
68 // are decoded into an unsigned byte and returned by this function
69 //
70 // If there is not a pair of valid hex digits at the head of the
71 // StringExtractor, it is left unchanged and -1 is returned
72 int StringExtractor::DecodeHexU8() {
73   SkipSpaces();
74   if (GetBytesLeft() < 2) {
75     return -1;
76   }
77   const int hi_nibble = xdigit_to_sint(m_packet[m_index]);
78   const int lo_nibble = xdigit_to_sint(m_packet[m_index + 1]);
79   if (hi_nibble == -1 || lo_nibble == -1) {
80     return -1;
81   }
82   m_index += 2;
83   return (uint8_t)((hi_nibble << 4) + lo_nibble);
84 }
85 
86 // Extract an unsigned character from two hex ASCII chars in the packet string,
87 // or return fail_value on failure
88 uint8_t StringExtractor::GetHexU8(uint8_t fail_value, bool set_eof_on_fail) {
89   // On success, fail_value will be overwritten with the next character in the
90   // stream
91   GetHexU8Ex(fail_value, set_eof_on_fail);
92   return fail_value;
93 }
94 
95 bool StringExtractor::GetHexU8Ex(uint8_t &ch, bool set_eof_on_fail) {
96   int byte = DecodeHexU8();
97   if (byte == -1) {
98     if (set_eof_on_fail || m_index >= m_packet.size())
99       m_index = UINT64_MAX;
100     // ch should not be changed in case of failure
101     return false;
102   }
103   ch = (uint8_t)byte;
104   return true;
105 }
106 
107 uint32_t StringExtractor::GetU32(uint32_t fail_value, int base) {
108   if (m_index < m_packet.size()) {
109     char *end = nullptr;
110     const char *start = m_packet.c_str();
111     const char *cstr = start + m_index;
112     uint32_t result = static_cast<uint32_t>(::strtoul(cstr, &end, base));
113 
114     if (end && end != cstr) {
115       m_index = end - start;
116       return result;
117     }
118   }
119   return fail_value;
120 }
121 
122 int32_t StringExtractor::GetS32(int32_t fail_value, int base) {
123   if (m_index < m_packet.size()) {
124     char *end = nullptr;
125     const char *start = m_packet.c_str();
126     const char *cstr = start + m_index;
127     int32_t result = static_cast<int32_t>(::strtol(cstr, &end, base));
128 
129     if (end && end != cstr) {
130       m_index = end - start;
131       return result;
132     }
133   }
134   return fail_value;
135 }
136 
137 uint64_t StringExtractor::GetU64(uint64_t fail_value, int base) {
138   if (m_index < m_packet.size()) {
139     char *end = nullptr;
140     const char *start = m_packet.c_str();
141     const char *cstr = start + m_index;
142     uint64_t result = ::strtoull(cstr, &end, base);
143 
144     if (end && end != cstr) {
145       m_index = end - start;
146       return result;
147     }
148   }
149   return fail_value;
150 }
151 
152 int64_t StringExtractor::GetS64(int64_t fail_value, int base) {
153   if (m_index < m_packet.size()) {
154     char *end = nullptr;
155     const char *start = m_packet.c_str();
156     const char *cstr = start + m_index;
157     int64_t result = ::strtoll(cstr, &end, base);
158 
159     if (end && end != cstr) {
160       m_index = end - start;
161       return result;
162     }
163   }
164   return fail_value;
165 }
166 
167 uint32_t StringExtractor::GetHexMaxU32(bool little_endian,
168                                        uint32_t fail_value) {
169   uint32_t result = 0;
170   uint32_t nibble_count = 0;
171 
172   SkipSpaces();
173   if (little_endian) {
174     uint32_t shift_amount = 0;
175     while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
176       // Make sure we don't exceed the size of a uint32_t...
177       if (nibble_count >= (sizeof(uint32_t) * 2)) {
178         m_index = UINT64_MAX;
179         return fail_value;
180       }
181 
182       uint8_t nibble_lo;
183       uint8_t nibble_hi = xdigit_to_sint(m_packet[m_index]);
184       ++m_index;
185       if (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
186         nibble_lo = xdigit_to_sint(m_packet[m_index]);
187         ++m_index;
188         result |= ((uint32_t)nibble_hi << (shift_amount + 4));
189         result |= ((uint32_t)nibble_lo << shift_amount);
190         nibble_count += 2;
191         shift_amount += 8;
192       } else {
193         result |= ((uint32_t)nibble_hi << shift_amount);
194         nibble_count += 1;
195         shift_amount += 4;
196       }
197     }
198   } else {
199     while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
200       // Make sure we don't exceed the size of a uint32_t...
201       if (nibble_count >= (sizeof(uint32_t) * 2)) {
202         m_index = UINT64_MAX;
203         return fail_value;
204       }
205 
206       uint8_t nibble = xdigit_to_sint(m_packet[m_index]);
207       // Big Endian
208       result <<= 4;
209       result |= nibble;
210 
211       ++m_index;
212       ++nibble_count;
213     }
214   }
215   return result;
216 }
217 
218 uint64_t StringExtractor::GetHexMaxU64(bool little_endian,
219                                        uint64_t fail_value) {
220   uint64_t result = 0;
221   uint32_t nibble_count = 0;
222 
223   SkipSpaces();
224   if (little_endian) {
225     uint32_t shift_amount = 0;
226     while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
227       // Make sure we don't exceed the size of a uint64_t...
228       if (nibble_count >= (sizeof(uint64_t) * 2)) {
229         m_index = UINT64_MAX;
230         return fail_value;
231       }
232 
233       uint8_t nibble_lo;
234       uint8_t nibble_hi = xdigit_to_sint(m_packet[m_index]);
235       ++m_index;
236       if (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
237         nibble_lo = xdigit_to_sint(m_packet[m_index]);
238         ++m_index;
239         result |= ((uint64_t)nibble_hi << (shift_amount + 4));
240         result |= ((uint64_t)nibble_lo << shift_amount);
241         nibble_count += 2;
242         shift_amount += 8;
243       } else {
244         result |= ((uint64_t)nibble_hi << shift_amount);
245         nibble_count += 1;
246         shift_amount += 4;
247       }
248     }
249   } else {
250     while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
251       // Make sure we don't exceed the size of a uint64_t...
252       if (nibble_count >= (sizeof(uint64_t) * 2)) {
253         m_index = UINT64_MAX;
254         return fail_value;
255       }
256 
257       uint8_t nibble = xdigit_to_sint(m_packet[m_index]);
258       // Big Endian
259       result <<= 4;
260       result |= nibble;
261 
262       ++m_index;
263       ++nibble_count;
264     }
265   }
266   return result;
267 }
268 
269 bool StringExtractor::ConsumeFront(const llvm::StringRef &str) {
270   llvm::StringRef S = GetStringRef();
271   if (!S.startswith(str))
272     return false;
273   else
274     m_index += str.size();
275   return true;
276 }
277 
278 size_t StringExtractor::GetHexBytes(llvm::MutableArrayRef<uint8_t> dest,
279                                     uint8_t fail_fill_value) {
280   size_t bytes_extracted = 0;
281   while (!dest.empty() && GetBytesLeft() > 0) {
282     dest[0] = GetHexU8(fail_fill_value);
283     if (!IsGood())
284       break;
285     ++bytes_extracted;
286     dest = dest.drop_front();
287   }
288 
289   if (!dest.empty())
290     ::memset(dest.data(), fail_fill_value, dest.size());
291 
292   return bytes_extracted;
293 }
294 
295 // Decodes all valid hex encoded bytes at the head of the StringExtractor,
296 // limited by dst_len.
297 //
298 // Returns the number of bytes successfully decoded
299 size_t StringExtractor::GetHexBytesAvail(llvm::MutableArrayRef<uint8_t> dest) {
300   size_t bytes_extracted = 0;
301   while (!dest.empty()) {
302     int decode = DecodeHexU8();
303     if (decode == -1)
304       break;
305     dest[0] = (uint8_t)decode;
306     dest = dest.drop_front();
307     ++bytes_extracted;
308   }
309   return bytes_extracted;
310 }
311 
312 // Consume ASCII hex nibble character pairs until we have decoded byte_size
313 // bytes of data.
314 
315 uint64_t StringExtractor::GetHexWithFixedSize(uint32_t byte_size,
316                                               bool little_endian,
317                                               uint64_t fail_value) {
318   if (byte_size <= 8 && GetBytesLeft() >= byte_size * 2) {
319     uint64_t result = 0;
320     uint32_t i;
321     if (little_endian) {
322       // Little Endian
323       uint32_t shift_amount;
324       for (i = 0, shift_amount = 0; i < byte_size && IsGood();
325            ++i, shift_amount += 8) {
326         result |= ((uint64_t)GetHexU8() << shift_amount);
327       }
328     } else {
329       // Big Endian
330       for (i = 0; i < byte_size && IsGood(); ++i) {
331         result <<= 8;
332         result |= GetHexU8();
333       }
334     }
335   }
336   m_index = UINT64_MAX;
337   return fail_value;
338 }
339 
340 size_t StringExtractor::GetHexByteString(std::string &str) {
341   str.clear();
342   str.reserve(GetBytesLeft() / 2);
343   char ch;
344   while ((ch = GetHexU8()) != '\0')
345     str.append(1, ch);
346   return str.size();
347 }
348 
349 size_t StringExtractor::GetHexByteStringFixedLength(std::string &str,
350                                                     uint32_t nibble_length) {
351   str.clear();
352 
353   uint32_t nibble_count = 0;
354   for (const char *pch = Peek();
355        (nibble_count < nibble_length) && (pch != nullptr);
356        str.append(1, GetHexU8(0, false)), pch = Peek(), nibble_count += 2) {
357   }
358 
359   return str.size();
360 }
361 
362 size_t StringExtractor::GetHexByteStringTerminatedBy(std::string &str,
363                                                      char terminator) {
364   str.clear();
365   char ch;
366   while ((ch = GetHexU8(0, false)) != '\0')
367     str.append(1, ch);
368   if (Peek() && *Peek() == terminator)
369     return str.size();
370 
371   str.clear();
372   return str.size();
373 }
374 
375 bool StringExtractor::GetNameColonValue(llvm::StringRef &name,
376                                         llvm::StringRef &value) {
377   // Read something in the form of NNNN:VVVV; where NNNN is any character that
378   // is not a colon, followed by a ':' character, then a value (one or more ';'
379   // chars), followed by a ';'
380   if (m_index >= m_packet.size())
381     return fail();
382 
383   llvm::StringRef view(m_packet);
384   if (view.empty())
385     return fail();
386 
387   llvm::StringRef a, b, c, d;
388   view = view.substr(m_index);
389   std::tie(a, b) = view.split(':');
390   if (a.empty() || b.empty())
391     return fail();
392   std::tie(c, d) = b.split(';');
393   if (b == c && d.empty())
394     return fail();
395 
396   name = a;
397   value = c;
398   if (d.empty())
399     m_index = m_packet.size();
400   else {
401     size_t bytes_consumed = d.data() - view.data();
402     m_index += bytes_consumed;
403   }
404   return true;
405 }
406 
407 void StringExtractor::SkipSpaces() {
408   const size_t n = m_packet.size();
409   while (m_index < n && isspace(m_packet[m_index]))
410     ++m_index;
411 }
412