180814287SRaphael Isemann //===-- StringExtractor.cpp -----------------------------------------------===// 230fdc8d8SChris Lattner // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 630fdc8d8SChris Lattner // 730fdc8d8SChris Lattner //===----------------------------------------------------------------------===// 830fdc8d8SChris Lattner 9f805e190SPavel Labath #include "lldb/Utility/StringExtractor.h" 10*f5eaa2afSRaphael Isemann #include "llvm/ADT/StringExtras.h" 1130fdc8d8SChris Lattner 1254695a33SZachary Turner #include <tuple> 134479ac15SZachary Turner 14672d2c12SJonas Devlieghere #include <ctype.h> 154479ac15SZachary Turner #include <stdlib.h> 16672d2c12SJonas Devlieghere #include <string.h> 1730fdc8d8SChris Lattner 18b9c1b51eSKate Stone static inline int xdigit_to_sint(char ch) { 1930fdc8d8SChris Lattner if (ch >= 'a' && ch <= 'f') 2030fdc8d8SChris Lattner return 10 + ch - 'a'; 211e89cd80SBenjamin Kramer if (ch >= 'A' && ch <= 'F') 221e89cd80SBenjamin Kramer return 10 + ch - 'A'; 236eddf8dfSVince Harron if (ch >= '0' && ch <= '9') 2430fdc8d8SChris Lattner return ch - '0'; 256eddf8dfSVince Harron return -1; 2630fdc8d8SChris Lattner } 2730fdc8d8SChris Lattner 2830fdc8d8SChris Lattner // StringExtractor constructor 29b9c1b51eSKate Stone StringExtractor::StringExtractor() : m_packet(), m_index(0) {} 3030fdc8d8SChris Lattner 31b9c1b51eSKate Stone StringExtractor::StringExtractor(llvm::StringRef packet_str) 32b9c1b51eSKate Stone : m_packet(), m_index(0) { 3354695a33SZachary Turner m_packet.assign(packet_str.begin(), packet_str.end()); 3454695a33SZachary Turner } 3530fdc8d8SChris Lattner 36b9c1b51eSKate Stone StringExtractor::StringExtractor(const char *packet_cstr) 37b9c1b51eSKate Stone : m_packet(), m_index(0) { 3830fdc8d8SChris Lattner if (packet_cstr) 3930fdc8d8SChris Lattner m_packet.assign(packet_cstr); 4030fdc8d8SChris Lattner } 4130fdc8d8SChris Lattner 4230fdc8d8SChris Lattner // Destructor 43b9c1b51eSKate Stone StringExtractor::~StringExtractor() {} 4430fdc8d8SChris Lattner 45b9c1b51eSKate Stone char StringExtractor::GetChar(char fail_value) { 46b9c1b51eSKate Stone if (m_index < m_packet.size()) { 4730fdc8d8SChris Lattner char ch = m_packet[m_index]; 4830fdc8d8SChris Lattner ++m_index; 4930fdc8d8SChris Lattner return ch; 5030fdc8d8SChris Lattner } 51c7bece56SGreg Clayton m_index = UINT64_MAX; 5230fdc8d8SChris Lattner return fail_value; 5330fdc8d8SChris Lattner } 5430fdc8d8SChris Lattner 5505097246SAdrian Prantl // If a pair of valid hex digits exist at the head of the StringExtractor they 5605097246SAdrian Prantl // are decoded into an unsigned byte and returned by this function 576eddf8dfSVince Harron // 586eddf8dfSVince Harron // If there is not a pair of valid hex digits at the head of the 596eddf8dfSVince Harron // StringExtractor, it is left unchanged and -1 is returned 60b9c1b51eSKate Stone int StringExtractor::DecodeHexU8() { 6115a2165dSFrancis Ricci SkipSpaces(); 62b9c1b51eSKate Stone if (GetBytesLeft() < 2) { 636eddf8dfSVince Harron return -1; 64b9739d40SPavel Labath } 65b9739d40SPavel Labath const int hi_nibble = xdigit_to_sint(m_packet[m_index]); 66b9739d40SPavel Labath const int lo_nibble = xdigit_to_sint(m_packet[m_index + 1]); 67b9c1b51eSKate Stone if (hi_nibble == -1 || lo_nibble == -1) { 686eddf8dfSVince Harron return -1; 69b9739d40SPavel Labath } 706eddf8dfSVince Harron m_index += 2; 7124374aefSJonas Devlieghere return static_cast<uint8_t>((hi_nibble << 4) + lo_nibble); 726eddf8dfSVince Harron } 736eddf8dfSVince Harron 7405097246SAdrian Prantl // Extract an unsigned character from two hex ASCII chars in the packet string, 7505097246SAdrian Prantl // or return fail_value on failure 76b9c1b51eSKate Stone uint8_t StringExtractor::GetHexU8(uint8_t fail_value, bool set_eof_on_fail) { 7705097246SAdrian Prantl // On success, fail_value will be overwritten with the next character in the 7805097246SAdrian Prantl // stream 79554a8571SDawn Perchik GetHexU8Ex(fail_value, set_eof_on_fail); 80554a8571SDawn Perchik return fail_value; 81554a8571SDawn Perchik } 82554a8571SDawn Perchik 83b9c1b51eSKate Stone bool StringExtractor::GetHexU8Ex(uint8_t &ch, bool set_eof_on_fail) { 846eddf8dfSVince Harron int byte = DecodeHexU8(); 85b9c1b51eSKate Stone if (byte == -1) { 867b70be39SGreg Clayton if (set_eof_on_fail || m_index >= m_packet.size()) 87c7bece56SGreg Clayton m_index = UINT64_MAX; 88554a8571SDawn Perchik // ch should not be changed in case of failure 89554a8571SDawn Perchik return false; 9030fdc8d8SChris Lattner } 9124374aefSJonas Devlieghere ch = static_cast<uint8_t>(byte); 92554a8571SDawn Perchik return true; 936eddf8dfSVince Harron } 9430fdc8d8SChris Lattner 95b9c1b51eSKate Stone uint32_t StringExtractor::GetU32(uint32_t fail_value, int base) { 96b9c1b51eSKate Stone if (m_index < m_packet.size()) { 97d4612ad0SEd Maste char *end = nullptr; 9832e0a750SGreg Clayton const char *start = m_packet.c_str(); 99e0f8f574SDaniel Malea const char *cstr = start + m_index; 100f2d44ca8SEnrico Granata uint32_t result = static_cast<uint32_t>(::strtoul(cstr, &end, base)); 10132e0a750SGreg Clayton 102b9c1b51eSKate Stone if (end && end != cstr) { 103e0f8f574SDaniel Malea m_index = end - start; 104e0f8f574SDaniel Malea return result; 105e0f8f574SDaniel Malea } 106e0f8f574SDaniel Malea } 107e0f8f574SDaniel Malea return fail_value; 108e0f8f574SDaniel Malea } 109e0f8f574SDaniel Malea 110b9c1b51eSKate Stone int32_t StringExtractor::GetS32(int32_t fail_value, int base) { 111b9c1b51eSKate Stone if (m_index < m_packet.size()) { 112d4612ad0SEd Maste char *end = nullptr; 113e0f8f574SDaniel Malea const char *start = m_packet.c_str(); 114e0f8f574SDaniel Malea const char *cstr = start + m_index; 115f2d44ca8SEnrico Granata int32_t result = static_cast<int32_t>(::strtol(cstr, &end, base)); 116e0f8f574SDaniel Malea 117b9c1b51eSKate Stone if (end && end != cstr) { 118e0f8f574SDaniel Malea m_index = end - start; 119e0f8f574SDaniel Malea return result; 120e0f8f574SDaniel Malea } 121e0f8f574SDaniel Malea } 122e0f8f574SDaniel Malea return fail_value; 123e0f8f574SDaniel Malea } 124e0f8f574SDaniel Malea 125b9c1b51eSKate Stone uint64_t StringExtractor::GetU64(uint64_t fail_value, int base) { 126b9c1b51eSKate Stone if (m_index < m_packet.size()) { 127d4612ad0SEd Maste char *end = nullptr; 128e0f8f574SDaniel Malea const char *start = m_packet.c_str(); 129e0f8f574SDaniel Malea const char *cstr = start + m_index; 130e0f8f574SDaniel Malea uint64_t result = ::strtoull(cstr, &end, base); 131e0f8f574SDaniel Malea 132b9c1b51eSKate Stone if (end && end != cstr) { 133e0f8f574SDaniel Malea m_index = end - start; 134e0f8f574SDaniel Malea return result; 135e0f8f574SDaniel Malea } 136e0f8f574SDaniel Malea } 137e0f8f574SDaniel Malea return fail_value; 138e0f8f574SDaniel Malea } 139e0f8f574SDaniel Malea 140b9c1b51eSKate Stone int64_t StringExtractor::GetS64(int64_t fail_value, int base) { 141b9c1b51eSKate Stone if (m_index < m_packet.size()) { 142d4612ad0SEd Maste char *end = nullptr; 143e0f8f574SDaniel Malea const char *start = m_packet.c_str(); 144e0f8f574SDaniel Malea const char *cstr = start + m_index; 145e0f8f574SDaniel Malea int64_t result = ::strtoll(cstr, &end, base); 146e0f8f574SDaniel Malea 147b9c1b51eSKate Stone if (end && end != cstr) { 14832e0a750SGreg Clayton m_index = end - start; 14932e0a750SGreg Clayton return result; 15032e0a750SGreg Clayton } 15132e0a750SGreg Clayton } 15232e0a750SGreg Clayton return fail_value; 15332e0a750SGreg Clayton } 15432e0a750SGreg Clayton 155b9c1b51eSKate Stone uint32_t StringExtractor::GetHexMaxU32(bool little_endian, 156b9c1b51eSKate Stone uint32_t fail_value) { 157b9739d40SPavel Labath uint32_t result = 0; 158b9739d40SPavel Labath uint32_t nibble_count = 0; 159b9739d40SPavel Labath 16015a2165dSFrancis Ricci SkipSpaces(); 161b9c1b51eSKate Stone if (little_endian) { 162b9739d40SPavel Labath uint32_t shift_amount = 0; 163b9c1b51eSKate Stone while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) { 164b9739d40SPavel Labath // Make sure we don't exceed the size of a uint32_t... 165b9c1b51eSKate Stone if (nibble_count >= (sizeof(uint32_t) * 2)) { 166b9739d40SPavel Labath m_index = UINT64_MAX; 167b9739d40SPavel Labath return fail_value; 168b9739d40SPavel Labath } 169b9739d40SPavel Labath 170b9739d40SPavel Labath uint8_t nibble_lo; 171b9739d40SPavel Labath uint8_t nibble_hi = xdigit_to_sint(m_packet[m_index]); 172b9739d40SPavel Labath ++m_index; 173b9c1b51eSKate Stone if (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) { 174b9739d40SPavel Labath nibble_lo = xdigit_to_sint(m_packet[m_index]); 175b9739d40SPavel Labath ++m_index; 17624374aefSJonas Devlieghere result |= (static_cast<uint32_t>(nibble_hi) << (shift_amount + 4)); 17724374aefSJonas Devlieghere result |= (static_cast<uint32_t>(nibble_lo) << shift_amount); 178b9739d40SPavel Labath nibble_count += 2; 179b9739d40SPavel Labath shift_amount += 8; 180b9c1b51eSKate Stone } else { 18124374aefSJonas Devlieghere result |= (static_cast<uint32_t>(nibble_hi) << shift_amount); 182b9739d40SPavel Labath nibble_count += 1; 183b9739d40SPavel Labath shift_amount += 4; 18430fdc8d8SChris Lattner } 185b9739d40SPavel Labath } 186b9c1b51eSKate Stone } else { 187b9c1b51eSKate Stone while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) { 188b9739d40SPavel Labath // Make sure we don't exceed the size of a uint32_t... 189b9c1b51eSKate Stone if (nibble_count >= (sizeof(uint32_t) * 2)) { 190b9739d40SPavel Labath m_index = UINT64_MAX; 191b9739d40SPavel Labath return fail_value; 192b9739d40SPavel Labath } 193b9739d40SPavel Labath 194b9739d40SPavel Labath uint8_t nibble = xdigit_to_sint(m_packet[m_index]); 195b9739d40SPavel Labath // Big Endian 196b9739d40SPavel Labath result <<= 4; 197b9739d40SPavel Labath result |= nibble; 198b9739d40SPavel Labath 199b9739d40SPavel Labath ++m_index; 200b9739d40SPavel Labath ++nibble_count; 201b9739d40SPavel Labath } 202b9739d40SPavel Labath } 203b9739d40SPavel Labath return result; 20430fdc8d8SChris Lattner } 20530fdc8d8SChris Lattner 206b9c1b51eSKate Stone uint64_t StringExtractor::GetHexMaxU64(bool little_endian, 207b9c1b51eSKate Stone uint64_t fail_value) { 208b9739d40SPavel Labath uint64_t result = 0; 209b9739d40SPavel Labath uint32_t nibble_count = 0; 210b9739d40SPavel Labath 21115a2165dSFrancis Ricci SkipSpaces(); 212b9c1b51eSKate Stone if (little_endian) { 213b9739d40SPavel Labath uint32_t shift_amount = 0; 214b9c1b51eSKate Stone while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) { 215b9739d40SPavel Labath // Make sure we don't exceed the size of a uint64_t... 216b9c1b51eSKate Stone if (nibble_count >= (sizeof(uint64_t) * 2)) { 217b9739d40SPavel Labath m_index = UINT64_MAX; 218b9739d40SPavel Labath return fail_value; 219b9739d40SPavel Labath } 220b9739d40SPavel Labath 221b9739d40SPavel Labath uint8_t nibble_lo; 222b9739d40SPavel Labath uint8_t nibble_hi = xdigit_to_sint(m_packet[m_index]); 223b9739d40SPavel Labath ++m_index; 224b9c1b51eSKate Stone if (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) { 225b9739d40SPavel Labath nibble_lo = xdigit_to_sint(m_packet[m_index]); 226b9739d40SPavel Labath ++m_index; 22724374aefSJonas Devlieghere result |= (static_cast<uint64_t>(nibble_hi) << (shift_amount + 4)); 22824374aefSJonas Devlieghere result |= (static_cast<uint64_t>(nibble_lo) << shift_amount); 229b9739d40SPavel Labath nibble_count += 2; 230b9739d40SPavel Labath shift_amount += 8; 231b9c1b51eSKate Stone } else { 23224374aefSJonas Devlieghere result |= (static_cast<uint64_t>(nibble_hi) << shift_amount); 233b9739d40SPavel Labath nibble_count += 1; 234b9739d40SPavel Labath shift_amount += 4; 23530fdc8d8SChris Lattner } 236b9739d40SPavel Labath } 237b9c1b51eSKate Stone } else { 238b9c1b51eSKate Stone while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) { 239b9739d40SPavel Labath // Make sure we don't exceed the size of a uint64_t... 240b9c1b51eSKate Stone if (nibble_count >= (sizeof(uint64_t) * 2)) { 241b9739d40SPavel Labath m_index = UINT64_MAX; 242b9739d40SPavel Labath return fail_value; 243b9739d40SPavel Labath } 244b9739d40SPavel Labath 245b9739d40SPavel Labath uint8_t nibble = xdigit_to_sint(m_packet[m_index]); 246b9739d40SPavel Labath // Big Endian 247b9739d40SPavel Labath result <<= 4; 248b9739d40SPavel Labath result |= nibble; 249b9739d40SPavel Labath 250b9739d40SPavel Labath ++m_index; 251b9739d40SPavel Labath ++nibble_count; 252b9739d40SPavel Labath } 253b9739d40SPavel Labath } 254b9739d40SPavel Labath return result; 25530fdc8d8SChris Lattner } 25630fdc8d8SChris Lattner 257e714c4f5SRavitheja Addepally bool StringExtractor::ConsumeFront(const llvm::StringRef &str) { 258e714c4f5SRavitheja Addepally llvm::StringRef S = GetStringRef(); 259e714c4f5SRavitheja Addepally if (!S.startswith(str)) 260e714c4f5SRavitheja Addepally return false; 261e714c4f5SRavitheja Addepally else 262e714c4f5SRavitheja Addepally m_index += str.size(); 263e714c4f5SRavitheja Addepally return true; 264e714c4f5SRavitheja Addepally } 265e714c4f5SRavitheja Addepally 266b9c1b51eSKate Stone size_t StringExtractor::GetHexBytes(llvm::MutableArrayRef<uint8_t> dest, 267b9c1b51eSKate Stone uint8_t fail_fill_value) { 26830fdc8d8SChris Lattner size_t bytes_extracted = 0; 269b9c1b51eSKate Stone while (!dest.empty() && GetBytesLeft() > 0) { 270d08f09c1SZachary Turner dest[0] = GetHexU8(fail_fill_value); 271d08f09c1SZachary Turner if (!IsGood()) 27230fdc8d8SChris Lattner break; 273d08f09c1SZachary Turner ++bytes_extracted; 274d08f09c1SZachary Turner dest = dest.drop_front(); 27530fdc8d8SChris Lattner } 27630fdc8d8SChris Lattner 277d08f09c1SZachary Turner if (!dest.empty()) 278d08f09c1SZachary Turner ::memset(dest.data(), fail_fill_value, dest.size()); 27930fdc8d8SChris Lattner 28030fdc8d8SChris Lattner return bytes_extracted; 28130fdc8d8SChris Lattner } 28230fdc8d8SChris Lattner 28305097246SAdrian Prantl // Decodes all valid hex encoded bytes at the head of the StringExtractor, 28405097246SAdrian Prantl // limited by dst_len. 2856eddf8dfSVince Harron // 2866eddf8dfSVince Harron // Returns the number of bytes successfully decoded 287b9c1b51eSKate Stone size_t StringExtractor::GetHexBytesAvail(llvm::MutableArrayRef<uint8_t> dest) { 2886eddf8dfSVince Harron size_t bytes_extracted = 0; 289b9c1b51eSKate Stone while (!dest.empty()) { 2906eddf8dfSVince Harron int decode = DecodeHexU8(); 2916eddf8dfSVince Harron if (decode == -1) 2926eddf8dfSVince Harron break; 29324374aefSJonas Devlieghere dest[0] = static_cast<uint8_t>(decode); 294d08f09c1SZachary Turner dest = dest.drop_front(); 295d08f09c1SZachary Turner ++bytes_extracted; 2966eddf8dfSVince Harron } 2976eddf8dfSVince Harron return bytes_extracted; 2986eddf8dfSVince Harron } 29930fdc8d8SChris Lattner 300b9c1b51eSKate Stone size_t StringExtractor::GetHexByteString(std::string &str) { 301de9d0494SGreg Clayton str.clear(); 3028c1b6bd7SPavel Labath str.reserve(GetBytesLeft() / 2); 303de9d0494SGreg Clayton char ch; 304de9d0494SGreg Clayton while ((ch = GetHexU8()) != '\0') 305de9d0494SGreg Clayton str.append(1, ch); 306de9d0494SGreg Clayton return str.size(); 307de9d0494SGreg Clayton } 308de9d0494SGreg Clayton 309b9c1b51eSKate Stone size_t StringExtractor::GetHexByteStringFixedLength(std::string &str, 310b9c1b51eSKate Stone uint32_t nibble_length) { 311af245d11STodd Fiala str.clear(); 312b9739d40SPavel Labath 313b9739d40SPavel Labath uint32_t nibble_count = 0; 314b9c1b51eSKate Stone for (const char *pch = Peek(); 315b9c1b51eSKate Stone (nibble_count < nibble_length) && (pch != nullptr); 316b9c1b51eSKate Stone str.append(1, GetHexU8(0, false)), pch = Peek(), nibble_count += 2) { 317b9c1b51eSKate Stone } 318b9739d40SPavel Labath 319af245d11STodd Fiala return str.size(); 320af245d11STodd Fiala } 321af245d11STodd Fiala 322b9c1b51eSKate Stone size_t StringExtractor::GetHexByteStringTerminatedBy(std::string &str, 323b9c1b51eSKate Stone char terminator) { 324e0f8f574SDaniel Malea str.clear(); 325e0f8f574SDaniel Malea char ch; 326e0f8f574SDaniel Malea while ((ch = GetHexU8(0, false)) != '\0') 327e0f8f574SDaniel Malea str.append(1, ch); 328b9739d40SPavel Labath if (Peek() && *Peek() == terminator) 329e0f8f574SDaniel Malea return str.size(); 330af245d11STodd Fiala 331e0f8f574SDaniel Malea str.clear(); 332e0f8f574SDaniel Malea return str.size(); 333e0f8f574SDaniel Malea } 334e0f8f574SDaniel Malea 335b9c1b51eSKate Stone bool StringExtractor::GetNameColonValue(llvm::StringRef &name, 336b9c1b51eSKate Stone llvm::StringRef &value) { 33705097246SAdrian Prantl // Read something in the form of NNNN:VVVV; where NNNN is any character that 33805097246SAdrian Prantl // is not a colon, followed by a ':' character, then a value (one or more ';' 33905097246SAdrian Prantl // chars), followed by a ';' 34054695a33SZachary Turner if (m_index >= m_packet.size()) 34154695a33SZachary Turner return fail(); 34254695a33SZachary Turner 34354695a33SZachary Turner llvm::StringRef view(m_packet); 34454695a33SZachary Turner if (view.empty()) 34554695a33SZachary Turner return fail(); 34654695a33SZachary Turner 34754695a33SZachary Turner llvm::StringRef a, b, c, d; 34854695a33SZachary Turner view = view.substr(m_index); 34954695a33SZachary Turner std::tie(a, b) = view.split(':'); 35054695a33SZachary Turner if (a.empty() || b.empty()) 35154695a33SZachary Turner return fail(); 35254695a33SZachary Turner std::tie(c, d) = b.split(';'); 35354695a33SZachary Turner if (b == c && d.empty()) 35454695a33SZachary Turner return fail(); 35554695a33SZachary Turner 35654695a33SZachary Turner name = a; 35754695a33SZachary Turner value = c; 35854695a33SZachary Turner if (d.empty()) 35954695a33SZachary Turner m_index = m_packet.size(); 360b9c1b51eSKate Stone else { 36154695a33SZachary Turner size_t bytes_consumed = d.data() - view.data(); 36254695a33SZachary Turner m_index += bytes_consumed; 36354695a33SZachary Turner } 36430fdc8d8SChris Lattner return true; 36530fdc8d8SChris Lattner } 36698424c44SGreg Clayton 367b9c1b51eSKate Stone void StringExtractor::SkipSpaces() { 36898424c44SGreg Clayton const size_t n = m_packet.size(); 369*f5eaa2afSRaphael Isemann while (m_index < n && llvm::isSpace(m_packet[m_index])) 37098424c44SGreg Clayton ++m_index; 37198424c44SGreg Clayton } 372