1 //===-- StringExtractor.cpp -------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "lldb/Utility/StringExtractor.h"
11 
12 // C Includes
13 #include <stdlib.h>
14 
15 // C++ Includes
16 #include <tuple>
17 // Other libraries and framework includes
18 // Project includes
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/Support/Endian.h"
21 
22 static inline int
23 xdigit_to_sint (char ch)
24 {
25     if (ch >= 'a' && ch <= 'f')
26         return 10 + ch - 'a';
27     if (ch >= 'A' && ch <= 'F')
28         return 10 + ch - 'A';
29     if (ch >= '0' && ch <= '9')
30         return ch - '0';
31     return -1;
32 }
33 
34 //----------------------------------------------------------------------
35 // StringExtractor constructor
36 //----------------------------------------------------------------------
37 StringExtractor::StringExtractor() :
38     m_packet(),
39     m_index (0)
40 {
41 }
42 
43 StringExtractor::StringExtractor(llvm::StringRef packet_str) : m_packet(), m_index(0)
44 {
45     m_packet.assign(packet_str.begin(), packet_str.end());
46 }
47 
48 StringExtractor::StringExtractor(const char *packet_cstr) :
49     m_packet(),
50     m_index (0)
51 {
52     if (packet_cstr)
53         m_packet.assign (packet_cstr);
54 }
55 
56 
57 //----------------------------------------------------------------------
58 // StringExtractor copy constructor
59 //----------------------------------------------------------------------
60 StringExtractor::StringExtractor(const StringExtractor& rhs) :
61     m_packet (rhs.m_packet),
62     m_index (rhs.m_index)
63 {
64 
65 }
66 
67 //----------------------------------------------------------------------
68 // StringExtractor assignment operator
69 //----------------------------------------------------------------------
70 const StringExtractor&
71 StringExtractor::operator=(const StringExtractor& rhs)
72 {
73     if (this != &rhs)
74     {
75         m_packet = rhs.m_packet;
76         m_index = rhs.m_index;
77 
78     }
79     return *this;
80 }
81 
82 //----------------------------------------------------------------------
83 // Destructor
84 //----------------------------------------------------------------------
85 StringExtractor::~StringExtractor()
86 {
87 }
88 
89 
90 char
91 StringExtractor::GetChar (char fail_value)
92 {
93     if (m_index < m_packet.size())
94     {
95         char ch = m_packet[m_index];
96         ++m_index;
97         return ch;
98     }
99     m_index = UINT64_MAX;
100     return fail_value;
101 }
102 
103 static llvm::Optional<uint8_t>
104 translateHexChar(char ch1, char ch2)
105 {
106     const int hi_nibble = xdigit_to_sint(ch1);
107     const int lo_nibble = xdigit_to_sint(ch2);
108     if (hi_nibble == -1 || lo_nibble == -1)
109         return llvm::None;
110     return (uint8_t)((hi_nibble << 4) + lo_nibble);
111 }
112 
113 //----------------------------------------------------------------------
114 // If a pair of valid hex digits exist at the head of the
115 // StringExtractor they are decoded into an unsigned byte and returned
116 // by this function
117 //
118 // If there is not a pair of valid hex digits at the head of the
119 // StringExtractor, it is left unchanged and -1 is returned
120 //----------------------------------------------------------------------
121 int
122 StringExtractor::DecodeHexU8()
123 {
124     SkipSpaces();
125     if (GetBytesLeft() < 2)
126         return -1;
127     auto result = translateHexChar(m_packet[m_index], m_packet[m_index + 1]);
128     if (!result.hasValue())
129         return -1;
130     m_index += 2;
131     return *result;
132 }
133 
134 //----------------------------------------------------------------------
135 // Extract an unsigned character from two hex ASCII chars in the packet
136 // string, or return fail_value on failure
137 //----------------------------------------------------------------------
138 uint8_t
139 StringExtractor::GetHexU8 (uint8_t fail_value, bool set_eof_on_fail)
140 {
141     // On success, fail_value will be overwritten with the next
142     // character in the stream
143     GetHexU8Ex(fail_value, set_eof_on_fail);
144     return fail_value;
145 }
146 
147 bool
148 StringExtractor::GetHexU8Ex (uint8_t& ch, bool set_eof_on_fail)
149 {
150     int byte = DecodeHexU8();
151     if (byte == -1)
152     {
153         if (set_eof_on_fail || m_index >= m_packet.size())
154             m_index = UINT64_MAX;
155         // ch should not be changed in case of failure
156         return false;
157     }
158     ch = (uint8_t)byte;
159     return true;
160 }
161 
162 uint32_t
163 StringExtractor::GetU32 (uint32_t fail_value, int base)
164 {
165     if (m_index < m_packet.size())
166     {
167         char *end = nullptr;
168         const char *start = m_packet.c_str();
169         const char *cstr = start + m_index;
170         uint32_t result = static_cast<uint32_t>(::strtoul (cstr, &end, base));
171 
172         if (end && end != cstr)
173         {
174             m_index = end - start;
175             return result;
176         }
177     }
178     return fail_value;
179 }
180 
181 int32_t
182 StringExtractor::GetS32 (int32_t fail_value, int base)
183 {
184     if (m_index < m_packet.size())
185     {
186         char *end = nullptr;
187         const char *start = m_packet.c_str();
188         const char *cstr = start + m_index;
189         int32_t result = static_cast<int32_t>(::strtol (cstr, &end, base));
190 
191         if (end && end != cstr)
192         {
193             m_index = end - start;
194             return result;
195         }
196     }
197     return fail_value;
198 }
199 
200 
201 uint64_t
202 StringExtractor::GetU64 (uint64_t fail_value, int base)
203 {
204     if (m_index < m_packet.size())
205     {
206         char *end = nullptr;
207         const char *start = m_packet.c_str();
208         const char *cstr = start + m_index;
209         uint64_t result = ::strtoull (cstr, &end, base);
210 
211         if (end && end != cstr)
212         {
213             m_index = end - start;
214             return result;
215         }
216     }
217     return fail_value;
218 }
219 
220 int64_t
221 StringExtractor::GetS64 (int64_t fail_value, int base)
222 {
223     if (m_index < m_packet.size())
224     {
225         char *end = nullptr;
226         const char *start = m_packet.c_str();
227         const char *cstr = start + m_index;
228         int64_t result = ::strtoll (cstr, &end, base);
229 
230         if (end && end != cstr)
231         {
232             m_index = end - start;
233             return result;
234         }
235     }
236     return fail_value;
237 }
238 
239 uint32_t
240 StringExtractor::GetHexMaxU32 (bool little_endian, uint32_t fail_value)
241 {
242     SkipSpaces();
243 
244     // Allocate enough space for 2 uint32's.  In big endian, if the user writes
245     // "AB" then this should be treated as 0xAB, not 0xAB000000.  In order to
246     // do this, we decode into the second half of the array, and then shift the
247     // starting point of the big endian translation left by however many bytes
248     // of a uint32 were missing from the input.  We're essentially padding left
249     // with 0's.
250     uint8_t bytes[2 * sizeof(uint32_t) - 1] = {0};
251     llvm::MutableArrayRef<uint8_t> byte_array(bytes);
252     llvm::MutableArrayRef<uint8_t> decode_loc = byte_array.take_back(sizeof(uint32_t));
253     uint32_t bytes_decoded = GetHexBytesAvail(decode_loc);
254     if (bytes_decoded == sizeof(uint32_t) && ::isxdigit(PeekChar()))
255         return fail();
256 
257     using namespace llvm::support;
258     if (little_endian)
259         return endian::read<uint32_t, endianness::little>(decode_loc.data());
260     else
261     {
262         decode_loc = byte_array.drop_front(bytes_decoded - 1).take_front(sizeof(uint32_t));
263         return endian::read<uint32_t, endianness::big>(decode_loc.data());
264     }
265 }
266 
267 uint64_t
268 StringExtractor::GetHexMaxU64 (bool little_endian, uint64_t fail_value)
269 {
270     SkipSpaces();
271 
272     // Allocate enough space for 2 uint64's.  In big endian, if the user writes
273     // "AB" then this should be treated as 0x000000AB, not 0xAB000000.  In order
274     // to do this, we decode into the second half of the array, and then shift
275     // the starting point of the big endian translation left by however many bytes
276     // of a uint32 were missing from the input.  We're essentially padding left
277     // with 0's.
278     uint8_t bytes[2 * sizeof(uint64_t) - 1] = {0};
279     llvm::MutableArrayRef<uint8_t> byte_array(bytes);
280     llvm::MutableArrayRef<uint8_t> decode_loc = byte_array.take_back(sizeof(uint64_t));
281     uint32_t bytes_decoded = GetHexBytesAvail(decode_loc);
282     if (bytes_decoded == sizeof(uint64_t) && ::isxdigit(PeekChar()))
283         return fail();
284 
285     using namespace llvm::support;
286     if (little_endian)
287         return endian::read<uint64_t, endianness::little>(decode_loc.data());
288     else
289     {
290         decode_loc = byte_array.drop_front(bytes_decoded - 1).take_front(sizeof(uint64_t));
291         return endian::read<uint64_t, endianness::big>(decode_loc.data());
292     }
293 }
294 
295 size_t
296 StringExtractor::GetHexBytes (llvm::MutableArrayRef<uint8_t> dest, uint8_t fail_fill_value)
297 {
298     size_t bytes_extracted = 0;
299     while (!dest.empty() && GetBytesLeft() > 0)
300     {
301         dest[0] = GetHexU8 (fail_fill_value);
302         if (!IsGood())
303             break;
304         ++bytes_extracted;
305         dest = dest.drop_front();
306     }
307 
308     if (!dest.empty())
309         ::memset(dest.data(), fail_fill_value, dest.size());
310 
311     return bytes_extracted;
312 }
313 
314 //----------------------------------------------------------------------
315 // Decodes all valid hex encoded bytes at the head of the
316 // StringExtractor, limited by dst_len.
317 //
318 // Returns the number of bytes successfully decoded
319 //----------------------------------------------------------------------
320 size_t
321 StringExtractor::GetHexBytesAvail (llvm::MutableArrayRef<uint8_t> dest)
322 {
323     size_t bytes_extracted = 0;
324     while (!dest.empty())
325     {
326         int decode = DecodeHexU8();
327         if (decode == -1)
328             break;
329         dest[0] = (uint8_t)decode;
330         dest = dest.drop_front();
331         ++bytes_extracted;
332     }
333     return bytes_extracted;
334 }
335 
336 size_t
337 StringExtractor::GetHexByteString (std::string &str)
338 {
339     str.clear();
340     str.reserve(GetBytesLeft() / 2);
341     char ch;
342     while ((ch = GetHexU8()) != '\0')
343         str.append(1, ch);
344     return str.size();
345 }
346 
347 size_t
348 StringExtractor::GetHexByteStringFixedLength (std::string &str, uint32_t nibble_length)
349 {
350     str.clear();
351     llvm::StringRef nibs = Peek().take_front(nibble_length);
352     while (nibs.size() >= 2)
353     {
354         auto ch = translateHexChar(nibs[0], nibs[1]);
355         if (!ch.hasValue())
356             break;
357         str.push_back(*ch);
358         nibs = nibs.drop_front(2);
359     }
360     m_index += str.size() * 2;
361     return str.size();
362 }
363 
364 size_t
365 StringExtractor::GetHexByteStringTerminatedBy (std::string &str,
366                                                char terminator)
367 {
368     str.clear();
369     char ch;
370     while ((ch = GetHexU8(0,false)) != '\0')
371         str.append(1, ch);
372     if (GetBytesLeft() > 0 && PeekChar() == terminator)
373         return str.size();
374 
375     str.clear();
376     return str.size();
377 }
378 
379 bool
380 StringExtractor::GetNameColonValue(llvm::StringRef &name, llvm::StringRef &value)
381 {
382     // Read something in the form of NNNN:VVVV; where NNNN is any character
383     // that is not a colon, followed by a ':' character, then a value (one or
384     // more ';' chars), followed by a ';'
385     if (m_index >= m_packet.size())
386         return fail();
387 
388     llvm::StringRef view(m_packet);
389     if (view.empty())
390         return fail();
391 
392     llvm::StringRef a, b, c, d;
393     view = view.substr(m_index);
394     std::tie(a, b) = view.split(':');
395     if (a.empty() || b.empty())
396         return fail();
397     std::tie(c, d) = b.split(';');
398     if (b == c && d.empty())
399         return fail();
400 
401     name = a;
402     value = c;
403     if (d.empty())
404         m_index = m_packet.size();
405     else
406     {
407         size_t bytes_consumed = d.data() - view.data();
408         m_index += bytes_consumed;
409     }
410     return true;
411 }
412 
413 void
414 StringExtractor::SkipSpaces ()
415 {
416     const size_t n = m_packet.size();
417     while (m_index < n && isspace(m_packet[m_index]))
418         ++m_index;
419 }
420 
421