180814287SRaphael Isemann //===-- StringPrinter.cpp -------------------------------------------------===// 2ca6c8ee2SEnrico Granata // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6ca6c8ee2SEnrico Granata // 7ca6c8ee2SEnrico Granata //===----------------------------------------------------------------------===// 8ca6c8ee2SEnrico Granata 9ca6c8ee2SEnrico Granata #include "lldb/DataFormatters/StringPrinter.h" 10ca6c8ee2SEnrico Granata 11ebdc1ac0SEnrico Granata #include "lldb/Core/Debugger.h" 12ebdc1ac0SEnrico Granata #include "lldb/Core/ValueObject.h" 13ac49453bSEnrico Granata #include "lldb/Target/Language.h" 14ca6c8ee2SEnrico Granata #include "lldb/Target/Process.h" 15ca6c8ee2SEnrico Granata #include "lldb/Target/Target.h" 1697206d57SZachary Turner #include "lldb/Utility/Status.h" 17ca6c8ee2SEnrico Granata 18ca6c8ee2SEnrico Granata #include "llvm/Support/ConvertUTF.h" 19ca6c8ee2SEnrico Granata 20ca6c8ee2SEnrico Granata #include <ctype.h> 21ca6c8ee2SEnrico Granata #include <locale> 22796ac80bSJonas Devlieghere #include <memory> 23ca6c8ee2SEnrico Granata 24ca6c8ee2SEnrico Granata using namespace lldb; 25ca6c8ee2SEnrico Granata using namespace lldb_private; 26ca6c8ee2SEnrico Granata using namespace lldb_private::formatters; 27ca6c8ee2SEnrico Granata 2805097246SAdrian Prantl // we define this for all values of type but only implement it for those we 2905097246SAdrian Prantl // care about that's good because we get linker errors for any unsupported type 30ac49453bSEnrico Granata template <lldb_private::formatters::StringPrinter::StringElementType type> 3163e65082SVedant Kumar static StringPrinter::StringPrinterBufferPointer 32ca6c8ee2SEnrico Granata GetPrintableImpl(uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next); 33ca6c8ee2SEnrico Granata 34ca6c8ee2SEnrico Granata // mimic isprint() for Unicode codepoints 35b9c1b51eSKate Stone static bool isprint(char32_t codepoint) { 36ca6c8ee2SEnrico Granata if (codepoint <= 0x1F || codepoint == 0x7F) // C0 37ca6c8ee2SEnrico Granata { 38ca6c8ee2SEnrico Granata return false; 39ca6c8ee2SEnrico Granata } 40ca6c8ee2SEnrico Granata if (codepoint >= 0x80 && codepoint <= 0x9F) // C1 41ca6c8ee2SEnrico Granata { 42ca6c8ee2SEnrico Granata return false; 43ca6c8ee2SEnrico Granata } 44ca6c8ee2SEnrico Granata if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators 45ca6c8ee2SEnrico Granata { 46ca6c8ee2SEnrico Granata return false; 47ca6c8ee2SEnrico Granata } 48b9c1b51eSKate Stone if (codepoint == 0x200E || codepoint == 0x200F || 49b9c1b51eSKate Stone (codepoint >= 0x202A && 50b9c1b51eSKate Stone codepoint <= 0x202E)) // bidirectional text control 51ca6c8ee2SEnrico Granata { 52ca6c8ee2SEnrico Granata return false; 53ca6c8ee2SEnrico Granata } 54b9c1b51eSKate Stone if (codepoint >= 0xFFF9 && 55b9c1b51eSKate Stone codepoint <= 0xFFFF) // interlinears and generally specials 56ca6c8ee2SEnrico Granata { 57ca6c8ee2SEnrico Granata return false; 58ca6c8ee2SEnrico Granata } 59ca6c8ee2SEnrico Granata return true; 60ca6c8ee2SEnrico Granata } 61ca6c8ee2SEnrico Granata 62ca6c8ee2SEnrico Granata template <> 6363e65082SVedant Kumar StringPrinter::StringPrinterBufferPointer 64b9c1b51eSKate Stone GetPrintableImpl<StringPrinter::StringElementType::ASCII>(uint8_t *buffer, 65b9c1b51eSKate Stone uint8_t *buffer_end, 66b9c1b51eSKate Stone uint8_t *&next) { 6763e65082SVedant Kumar StringPrinter::StringPrinterBufferPointer retval = {nullptr}; 68ca6c8ee2SEnrico Granata 69b9c1b51eSKate Stone switch (*buffer) { 70da04fbb5SEnrico Granata case 0: 71da04fbb5SEnrico Granata retval = {"\\0", 2}; 72da04fbb5SEnrico Granata break; 73ca6c8ee2SEnrico Granata case '\a': 74ca6c8ee2SEnrico Granata retval = {"\\a", 2}; 75ca6c8ee2SEnrico Granata break; 76ca6c8ee2SEnrico Granata case '\b': 77ca6c8ee2SEnrico Granata retval = {"\\b", 2}; 78ca6c8ee2SEnrico Granata break; 79ca6c8ee2SEnrico Granata case '\f': 80ca6c8ee2SEnrico Granata retval = {"\\f", 2}; 81ca6c8ee2SEnrico Granata break; 82ca6c8ee2SEnrico Granata case '\n': 83ca6c8ee2SEnrico Granata retval = {"\\n", 2}; 84ca6c8ee2SEnrico Granata break; 85ca6c8ee2SEnrico Granata case '\r': 86ca6c8ee2SEnrico Granata retval = {"\\r", 2}; 87ca6c8ee2SEnrico Granata break; 88ca6c8ee2SEnrico Granata case '\t': 89ca6c8ee2SEnrico Granata retval = {"\\t", 2}; 90ca6c8ee2SEnrico Granata break; 91ca6c8ee2SEnrico Granata case '\v': 92ca6c8ee2SEnrico Granata retval = {"\\v", 2}; 93ca6c8ee2SEnrico Granata break; 94ca6c8ee2SEnrico Granata case '\"': 95ca6c8ee2SEnrico Granata retval = {"\\\"", 2}; 96ca6c8ee2SEnrico Granata break; 97ca6c8ee2SEnrico Granata case '\\': 98ca6c8ee2SEnrico Granata retval = {"\\\\", 2}; 99ca6c8ee2SEnrico Granata break; 100ca6c8ee2SEnrico Granata default: 101ca6c8ee2SEnrico Granata if (isprint(*buffer)) 102ca6c8ee2SEnrico Granata retval = {buffer, 1}; 103b9c1b51eSKate Stone else { 104d7e6a4f2SVince Harron uint8_t *data = new uint8_t[5]; 105d7e6a4f2SVince Harron sprintf((char *)data, "\\x%02x", *buffer); 106d7e6a4f2SVince Harron retval = {data, 4, [](const uint8_t *c) { delete[] c; }}; 107ca6c8ee2SEnrico Granata break; 108ca6c8ee2SEnrico Granata } 109ca6c8ee2SEnrico Granata } 110ca6c8ee2SEnrico Granata 111ca6c8ee2SEnrico Granata next = buffer + 1; 112ca6c8ee2SEnrico Granata return retval; 113ca6c8ee2SEnrico Granata } 114ca6c8ee2SEnrico Granata 115b9c1b51eSKate Stone static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1) { 116ca6c8ee2SEnrico Granata return (c0 - 192) * 64 + (c1 - 128); 117ca6c8ee2SEnrico Granata } 118b9c1b51eSKate Stone static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1, 119b9c1b51eSKate Stone unsigned char c2) { 120ca6c8ee2SEnrico Granata return (c0 - 224) * 4096 + (c1 - 128) * 64 + (c2 - 128); 121ca6c8ee2SEnrico Granata } 122b9c1b51eSKate Stone static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1, 123b9c1b51eSKate Stone unsigned char c2, unsigned char c3) { 124ca6c8ee2SEnrico Granata return (c0 - 240) * 262144 + (c2 - 128) * 4096 + (c2 - 128) * 64 + (c3 - 128); 125ca6c8ee2SEnrico Granata } 126ca6c8ee2SEnrico Granata 127ca6c8ee2SEnrico Granata template <> 12863e65082SVedant Kumar StringPrinter::StringPrinterBufferPointer 129b9c1b51eSKate Stone GetPrintableImpl<StringPrinter::StringElementType::UTF8>(uint8_t *buffer, 130b9c1b51eSKate Stone uint8_t *buffer_end, 131b9c1b51eSKate Stone uint8_t *&next) { 13263e65082SVedant Kumar StringPrinter::StringPrinterBufferPointer retval{nullptr}; 133ca6c8ee2SEnrico Granata 1347aabad13SVedant Kumar const unsigned utf8_encoded_len = llvm::getNumBytesForUTF8(*buffer); 135ca6c8ee2SEnrico Granata 1367aabad13SVedant Kumar // If the utf8 encoded length is invalid, or if there aren't enough bytes to 1377aabad13SVedant Kumar // print, this is some kind of corrupted string. 1387aabad13SVedant Kumar if (utf8_encoded_len == 0 || utf8_encoded_len > 4) 139ca6c8ee2SEnrico Granata return retval; 1407aabad13SVedant Kumar if ((buffer_end - buffer) < utf8_encoded_len) 1417aabad13SVedant Kumar // There's no room in the buffer for the utf8 sequence. 1427aabad13SVedant Kumar return retval; 143ca6c8ee2SEnrico Granata 144ca6c8ee2SEnrico Granata char32_t codepoint = 0; 145b9c1b51eSKate Stone switch (utf8_encoded_len) { 146ca6c8ee2SEnrico Granata case 1: 147ca6c8ee2SEnrico Granata // this is just an ASCII byte - ask ASCII 148b9c1b51eSKate Stone return GetPrintableImpl<StringPrinter::StringElementType::ASCII>( 149b9c1b51eSKate Stone buffer, buffer_end, next); 150ca6c8ee2SEnrico Granata case 2: 151b9c1b51eSKate Stone codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, 152b9c1b51eSKate Stone (unsigned char)*(buffer + 1)); 153ca6c8ee2SEnrico Granata break; 154ca6c8ee2SEnrico Granata case 3: 155b9c1b51eSKate Stone codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, 156b9c1b51eSKate Stone (unsigned char)*(buffer + 1), 157b9c1b51eSKate Stone (unsigned char)*(buffer + 2)); 158ca6c8ee2SEnrico Granata break; 159ca6c8ee2SEnrico Granata case 4: 160b9c1b51eSKate Stone codepoint = ConvertUTF8ToCodePoint( 161b9c1b51eSKate Stone (unsigned char)*buffer, (unsigned char)*(buffer + 1), 162b9c1b51eSKate Stone (unsigned char)*(buffer + 2), (unsigned char)*(buffer + 3)); 163ca6c8ee2SEnrico Granata break; 164ca6c8ee2SEnrico Granata } 165ca6c8ee2SEnrico Granata 166b9c1b51eSKate Stone if (codepoint) { 167b9c1b51eSKate Stone switch (codepoint) { 168da04fbb5SEnrico Granata case 0: 169da04fbb5SEnrico Granata retval = {"\\0", 2}; 170da04fbb5SEnrico Granata break; 171ca6c8ee2SEnrico Granata case '\a': 172ca6c8ee2SEnrico Granata retval = {"\\a", 2}; 173ca6c8ee2SEnrico Granata break; 174ca6c8ee2SEnrico Granata case '\b': 175ca6c8ee2SEnrico Granata retval = {"\\b", 2}; 176ca6c8ee2SEnrico Granata break; 177ca6c8ee2SEnrico Granata case '\f': 178ca6c8ee2SEnrico Granata retval = {"\\f", 2}; 179ca6c8ee2SEnrico Granata break; 180ca6c8ee2SEnrico Granata case '\n': 181ca6c8ee2SEnrico Granata retval = {"\\n", 2}; 182ca6c8ee2SEnrico Granata break; 183ca6c8ee2SEnrico Granata case '\r': 184ca6c8ee2SEnrico Granata retval = {"\\r", 2}; 185ca6c8ee2SEnrico Granata break; 186ca6c8ee2SEnrico Granata case '\t': 187ca6c8ee2SEnrico Granata retval = {"\\t", 2}; 188ca6c8ee2SEnrico Granata break; 189ca6c8ee2SEnrico Granata case '\v': 190ca6c8ee2SEnrico Granata retval = {"\\v", 2}; 191ca6c8ee2SEnrico Granata break; 192ca6c8ee2SEnrico Granata case '\"': 193ca6c8ee2SEnrico Granata retval = {"\\\"", 2}; 194ca6c8ee2SEnrico Granata break; 195ca6c8ee2SEnrico Granata case '\\': 196ca6c8ee2SEnrico Granata retval = {"\\\\", 2}; 197ca6c8ee2SEnrico Granata break; 198ca6c8ee2SEnrico Granata default: 199ca6c8ee2SEnrico Granata if (isprint(codepoint)) 200ca6c8ee2SEnrico Granata retval = {buffer, utf8_encoded_len}; 201b9c1b51eSKate Stone else { 202d7e6a4f2SVince Harron uint8_t *data = new uint8_t[11]; 203a505be4eSZachary Turner sprintf((char *)data, "\\U%08x", (unsigned)codepoint); 204d7e6a4f2SVince Harron retval = {data, 10, [](const uint8_t *c) { delete[] c; }}; 205ca6c8ee2SEnrico Granata break; 206ca6c8ee2SEnrico Granata } 207ca6c8ee2SEnrico Granata } 208ca6c8ee2SEnrico Granata 209ca6c8ee2SEnrico Granata next = buffer + utf8_encoded_len; 210ca6c8ee2SEnrico Granata return retval; 211ca6c8ee2SEnrico Granata } 212ca6c8ee2SEnrico Granata 2137aabad13SVedant Kumar // We couldn't figure out how to print this string. 214ca6c8ee2SEnrico Granata return retval; 215ca6c8ee2SEnrico Granata } 216ca6c8ee2SEnrico Granata 21705097246SAdrian Prantl // Given a sequence of bytes, this function returns: a sequence of bytes to 21805097246SAdrian Prantl // actually print out + a length the following unscanned position of the buffer 21905097246SAdrian Prantl // is in next 22063e65082SVedant Kumar static StringPrinter::StringPrinterBufferPointer 221b9c1b51eSKate Stone GetPrintable(StringPrinter::StringElementType type, uint8_t *buffer, 222b9c1b51eSKate Stone uint8_t *buffer_end, uint8_t *&next) { 2237aabad13SVedant Kumar if (!buffer || buffer >= buffer_end) 224ca6c8ee2SEnrico Granata return {nullptr}; 225ca6c8ee2SEnrico Granata 226b9c1b51eSKate Stone switch (type) { 227ac49453bSEnrico Granata case StringPrinter::StringElementType::ASCII: 228b9c1b51eSKate Stone return GetPrintableImpl<StringPrinter::StringElementType::ASCII>( 229b9c1b51eSKate Stone buffer, buffer_end, next); 230ac49453bSEnrico Granata case StringPrinter::StringElementType::UTF8: 231b9c1b51eSKate Stone return GetPrintableImpl<StringPrinter::StringElementType::UTF8>( 232b9c1b51eSKate Stone buffer, buffer_end, next); 233ca6c8ee2SEnrico Granata default: 234ca6c8ee2SEnrico Granata return {nullptr}; 235ca6c8ee2SEnrico Granata } 236ca6c8ee2SEnrico Granata } 237ca6c8ee2SEnrico Granata 238ac49453bSEnrico Granata StringPrinter::EscapingHelper 239b9c1b51eSKate Stone StringPrinter::GetDefaultEscapingHelper(GetPrintableElementType elem_type) { 240b9c1b51eSKate Stone switch (elem_type) { 241ac49453bSEnrico Granata case GetPrintableElementType::UTF8: 242b9c1b51eSKate Stone return [](uint8_t *buffer, uint8_t *buffer_end, 24363e65082SVedant Kumar uint8_t *&next) -> StringPrinter::StringPrinterBufferPointer { 244b9c1b51eSKate Stone return GetPrintable(StringPrinter::StringElementType::UTF8, buffer, 245b9c1b51eSKate Stone buffer_end, next); 246ac49453bSEnrico Granata }; 247ac49453bSEnrico Granata case GetPrintableElementType::ASCII: 248b9c1b51eSKate Stone return [](uint8_t *buffer, uint8_t *buffer_end, 24963e65082SVedant Kumar uint8_t *&next) -> StringPrinter::StringPrinterBufferPointer { 250b9c1b51eSKate Stone return GetPrintable(StringPrinter::StringElementType::ASCII, buffer, 251b9c1b51eSKate Stone buffer_end, next); 252ac49453bSEnrico Granata }; 253ac49453bSEnrico Granata } 25443d3a7aeSSaleem Abdulrasool llvm_unreachable("bad element type"); 255ac49453bSEnrico Granata } 256ac49453bSEnrico Granata 257ca6c8ee2SEnrico Granata // use this call if you already have an LLDB-side buffer for the data 258ca6c8ee2SEnrico Granata template <typename SourceDataType> 259b9c1b51eSKate Stone static bool DumpUTFBufferToStream( 2609091055eSJustin Lebar llvm::ConversionResult (*ConvertFunction)(const SourceDataType **, 2619091055eSJustin Lebar const SourceDataType *, 2629091055eSJustin Lebar llvm::UTF8 **, llvm::UTF8 *, 2639091055eSJustin Lebar llvm::ConversionFlags), 264b9c1b51eSKate Stone const StringPrinter::ReadBufferAndDumpToStreamOptions &dump_options) { 265d07f7550SEnrico Granata Stream &stream(*dump_options.GetStream()); 266248a1305SKonrad Kleine if (dump_options.GetPrefixToken() != nullptr) 267d54f7fb8SEnrico Granata stream.Printf("%s", dump_options.GetPrefixToken()); 268d07f7550SEnrico Granata if (dump_options.GetQuote() != 0) 269d07f7550SEnrico Granata stream.Printf("%c", dump_options.GetQuote()); 270d07f7550SEnrico Granata auto data(dump_options.GetData()); 271d07f7550SEnrico Granata auto source_size(dump_options.GetSourceSize()); 272b9c1b51eSKate Stone if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd()) { 273ca6c8ee2SEnrico Granata const int bufferSPSize = data.GetByteSize(); 274b9c1b51eSKate Stone if (dump_options.GetSourceSize() == 0) { 275ca6c8ee2SEnrico Granata const int origin_encoding = 8 * sizeof(SourceDataType); 276d07f7550SEnrico Granata source_size = bufferSPSize / (origin_encoding / 4); 277ca6c8ee2SEnrico Granata } 278ca6c8ee2SEnrico Granata 279b9c1b51eSKate Stone const SourceDataType *data_ptr = 280b9c1b51eSKate Stone (const SourceDataType *)data.GetDataStart(); 281d07f7550SEnrico Granata const SourceDataType *data_end_ptr = data_ptr + source_size; 282ca6c8ee2SEnrico Granata 283d07f7550SEnrico Granata const bool zero_is_terminator = dump_options.GetBinaryZeroIsTerminator(); 284d07f7550SEnrico Granata 285b9c1b51eSKate Stone if (zero_is_terminator) { 286b9c1b51eSKate Stone while (data_ptr < data_end_ptr) { 287b9c1b51eSKate Stone if (!*data_ptr) { 288ca6c8ee2SEnrico Granata data_end_ptr = data_ptr; 289ca6c8ee2SEnrico Granata break; 290ca6c8ee2SEnrico Granata } 291ca6c8ee2SEnrico Granata data_ptr++; 292ca6c8ee2SEnrico Granata } 293ca6c8ee2SEnrico Granata 294d7e6a4f2SVince Harron data_ptr = (const SourceDataType *)data.GetDataStart(); 295d07f7550SEnrico Granata } 296ca6c8ee2SEnrico Granata 297ca6c8ee2SEnrico Granata lldb::DataBufferSP utf8_data_buffer_sp; 2989091055eSJustin Lebar llvm::UTF8 *utf8_data_ptr = nullptr; 2999091055eSJustin Lebar llvm::UTF8 *utf8_data_end_ptr = nullptr; 300ca6c8ee2SEnrico Granata 301b9c1b51eSKate Stone if (ConvertFunction) { 302796ac80bSJonas Devlieghere utf8_data_buffer_sp = 303796ac80bSJonas Devlieghere std::make_shared<DataBufferHeap>(4 * bufferSPSize, 0); 3049091055eSJustin Lebar utf8_data_ptr = (llvm::UTF8 *)utf8_data_buffer_sp->GetBytes(); 305ca6c8ee2SEnrico Granata utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize(); 306b9c1b51eSKate Stone ConvertFunction(&data_ptr, data_end_ptr, &utf8_data_ptr, 3079091055eSJustin Lebar utf8_data_end_ptr, llvm::lenientConversion); 308a6682a41SJonas Devlieghere if (!zero_is_terminator) 3098101f570SEnrico Granata utf8_data_end_ptr = utf8_data_ptr; 3109091055eSJustin Lebar // needed because the ConvertFunction will change the value of the 3119091055eSJustin Lebar // data_ptr. 312b9c1b51eSKate Stone utf8_data_ptr = 3139091055eSJustin Lebar (llvm::UTF8 *)utf8_data_buffer_sp->GetBytes(); 314b9c1b51eSKate Stone } else { 315b9c1b51eSKate Stone // just copy the pointers - the cast is necessary to make the compiler 31605097246SAdrian Prantl // happy but this should only happen if we are reading UTF8 data 3179091055eSJustin Lebar utf8_data_ptr = const_cast<llvm::UTF8 *>( 3189091055eSJustin Lebar reinterpret_cast<const llvm::UTF8 *>(data_ptr)); 3199091055eSJustin Lebar utf8_data_end_ptr = const_cast<llvm::UTF8 *>( 3209091055eSJustin Lebar reinterpret_cast<const llvm::UTF8 *>(data_end_ptr)); 321ca6c8ee2SEnrico Granata } 322ca6c8ee2SEnrico Granata 323d07f7550SEnrico Granata const bool escape_non_printables = dump_options.GetEscapeNonPrintables(); 324ac49453bSEnrico Granata lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback; 325b9c1b51eSKate Stone if (escape_non_printables) { 326ac49453bSEnrico Granata if (Language *language = Language::FindPlugin(dump_options.GetLanguage())) 327b9c1b51eSKate Stone escaping_callback = language->GetStringPrinterEscapingHelper( 328b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::GetPrintableElementType:: 329b9c1b51eSKate Stone UTF8); 330ac49453bSEnrico Granata else 331b9c1b51eSKate Stone escaping_callback = 332b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper( 333b9c1b51eSKate Stone lldb_private::formatters::StringPrinter:: 334b9c1b51eSKate Stone GetPrintableElementType::UTF8); 335ac49453bSEnrico Granata } 336d07f7550SEnrico Granata 337ca6c8ee2SEnrico Granata // since we tend to accept partial data (and even partially malformed data) 33805097246SAdrian Prantl // we might end up with no NULL terminator before the end_ptr hence we need 33905097246SAdrian Prantl // to take a slower route and ensure we stay within boundaries 340b9c1b51eSKate Stone for (; utf8_data_ptr < utf8_data_end_ptr;) { 341d07f7550SEnrico Granata if (zero_is_terminator && !*utf8_data_ptr) 342ca6c8ee2SEnrico Granata break; 343ca6c8ee2SEnrico Granata 344b9c1b51eSKate Stone if (escape_non_printables) { 345ca6c8ee2SEnrico Granata uint8_t *next_data = nullptr; 346b9c1b51eSKate Stone auto printable = 347b9c1b51eSKate Stone escaping_callback(utf8_data_ptr, utf8_data_end_ptr, next_data); 348ca6c8ee2SEnrico Granata auto printable_bytes = printable.GetBytes(); 349ca6c8ee2SEnrico Granata auto printable_size = printable.GetSize(); 3507aabad13SVedant Kumar 3517aabad13SVedant Kumar // We failed to figure out how to print this string. 3527aabad13SVedant Kumar if (!printable_bytes || !next_data) 3537aabad13SVedant Kumar return false; 3547aabad13SVedant Kumar 3553acfe1a3SAndy Gibbs for (unsigned c = 0; c < printable_size; c++) 356ca6c8ee2SEnrico Granata stream.Printf("%c", *(printable_bytes + c)); 357ca6c8ee2SEnrico Granata utf8_data_ptr = (uint8_t *)next_data; 358b9c1b51eSKate Stone } else { 359ca6c8ee2SEnrico Granata stream.Printf("%c", *utf8_data_ptr); 360ca6c8ee2SEnrico Granata utf8_data_ptr++; 361ca6c8ee2SEnrico Granata } 362ca6c8ee2SEnrico Granata } 363ca6c8ee2SEnrico Granata } 364d07f7550SEnrico Granata if (dump_options.GetQuote() != 0) 365d07f7550SEnrico Granata stream.Printf("%c", dump_options.GetQuote()); 366248a1305SKonrad Kleine if (dump_options.GetSuffixToken() != nullptr) 367d54f7fb8SEnrico Granata stream.Printf("%s", dump_options.GetSuffixToken()); 368b7662929SEnrico Granata if (dump_options.GetIsTruncated()) 369b7662929SEnrico Granata stream.Printf("..."); 370ca6c8ee2SEnrico Granata return true; 371ca6c8ee2SEnrico Granata } 372ca6c8ee2SEnrico Granata 373b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::ReadStringAndDumpToStreamOptions:: 374b9c1b51eSKate Stone ReadStringAndDumpToStreamOptions(ValueObject &valobj) 375b9c1b51eSKate Stone : ReadStringAndDumpToStreamOptions() { 376b9c1b51eSKate Stone SetEscapeNonPrintables( 377b9c1b51eSKate Stone valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 378ebdc1ac0SEnrico Granata } 379ebdc1ac0SEnrico Granata 380b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions:: 381b9c1b51eSKate Stone ReadBufferAndDumpToStreamOptions(ValueObject &valobj) 382b9c1b51eSKate Stone : ReadBufferAndDumpToStreamOptions() { 383b9c1b51eSKate Stone SetEscapeNonPrintables( 384b9c1b51eSKate Stone valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 385ebdc1ac0SEnrico Granata } 386ebdc1ac0SEnrico Granata 387b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions:: 388b9c1b51eSKate Stone ReadBufferAndDumpToStreamOptions( 389b9c1b51eSKate Stone const ReadStringAndDumpToStreamOptions &options) 390b9c1b51eSKate Stone : ReadBufferAndDumpToStreamOptions() { 391d07f7550SEnrico Granata SetStream(options.GetStream()); 392d07f7550SEnrico Granata SetPrefixToken(options.GetPrefixToken()); 393d54f7fb8SEnrico Granata SetSuffixToken(options.GetSuffixToken()); 394d07f7550SEnrico Granata SetQuote(options.GetQuote()); 395d07f7550SEnrico Granata SetEscapeNonPrintables(options.GetEscapeNonPrintables()); 396d07f7550SEnrico Granata SetBinaryZeroIsTerminator(options.GetBinaryZeroIsTerminator()); 397ac49453bSEnrico Granata SetLanguage(options.GetLanguage()); 398d07f7550SEnrico Granata } 399d07f7550SEnrico Granata 400b9c1b51eSKate Stone namespace lldb_private { 401ebdc1ac0SEnrico Granata 402b9c1b51eSKate Stone namespace formatters { 403fd13743fSShawn Best 404fd13743fSShawn Best template <> 405b9c1b51eSKate Stone bool StringPrinter::ReadStringAndDumpToStream< 406b9c1b51eSKate Stone StringPrinter::StringElementType::ASCII>( 407b9c1b51eSKate Stone const ReadStringAndDumpToStreamOptions &options) { 408fd13743fSShawn Best assert(options.GetStream() && "need a Stream to print the string to"); 40997206d57SZachary Turner Status my_error; 410fd13743fSShawn Best 411fd13743fSShawn Best ProcessSP process_sp(options.GetProcessSP()); 412fd13743fSShawn Best 413fd13743fSShawn Best if (process_sp.get() == nullptr || options.GetLocation() == 0) 414fd13743fSShawn Best return false; 415fd13743fSShawn Best 416fd13743fSShawn Best size_t size; 417b7662929SEnrico Granata const auto max_size = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 418b7662929SEnrico Granata bool is_truncated = false; 419fd13743fSShawn Best 420fd13743fSShawn Best if (options.GetSourceSize() == 0) 421b7662929SEnrico Granata size = max_size; 422b9c1b51eSKate Stone else if (!options.GetIgnoreMaxLength()) { 423b7662929SEnrico Granata size = options.GetSourceSize(); 424b9c1b51eSKate Stone if (size > max_size) { 425b7662929SEnrico Granata size = max_size; 426b7662929SEnrico Granata is_truncated = true; 427b7662929SEnrico Granata } 428b9c1b51eSKate Stone } else 42934042212SEnrico Granata size = options.GetSourceSize(); 430fd13743fSShawn Best 431fd13743fSShawn Best lldb::DataBufferSP buffer_sp(new DataBufferHeap(size, 0)); 432fd13743fSShawn Best 433b9c1b51eSKate Stone process_sp->ReadCStringFromMemory( 434b9c1b51eSKate Stone options.GetLocation(), (char *)buffer_sp->GetBytes(), size, my_error); 435fd13743fSShawn Best 436fd13743fSShawn Best if (my_error.Fail()) 437fd13743fSShawn Best return false; 438fd13743fSShawn Best 439d54f7fb8SEnrico Granata const char *prefix_token = options.GetPrefixToken(); 440fd13743fSShawn Best char quote = options.GetQuote(); 441fd13743fSShawn Best 442248a1305SKonrad Kleine if (prefix_token != nullptr) 443d54f7fb8SEnrico Granata options.GetStream()->Printf("%s%c", prefix_token, quote); 444fd13743fSShawn Best else if (quote != 0) 445fd13743fSShawn Best options.GetStream()->Printf("%c", quote); 446fd13743fSShawn Best 447fd13743fSShawn Best uint8_t *data_end = buffer_sp->GetBytes() + buffer_sp->GetByteSize(); 448fd13743fSShawn Best 449ac49453bSEnrico Granata const bool escape_non_printables = options.GetEscapeNonPrintables(); 450ac49453bSEnrico Granata lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback; 451b9c1b51eSKate Stone if (escape_non_printables) { 452ac49453bSEnrico Granata if (Language *language = Language::FindPlugin(options.GetLanguage())) 453b9c1b51eSKate Stone escaping_callback = language->GetStringPrinterEscapingHelper( 454b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::GetPrintableElementType:: 455b9c1b51eSKate Stone ASCII); 456ac49453bSEnrico Granata else 457b9c1b51eSKate Stone escaping_callback = 458b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper( 459b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::GetPrintableElementType:: 460b9c1b51eSKate Stone ASCII); 461ac49453bSEnrico Granata } 462ac49453bSEnrico Granata 463fd13743fSShawn Best // since we tend to accept partial data (and even partially malformed data) 46405097246SAdrian Prantl // we might end up with no NULL terminator before the end_ptr hence we need 46505097246SAdrian Prantl // to take a slower route and ensure we stay within boundaries 466b9c1b51eSKate Stone for (uint8_t *data = buffer_sp->GetBytes(); *data && (data < data_end);) { 467b9c1b51eSKate Stone if (escape_non_printables) { 468fd13743fSShawn Best uint8_t *next_data = nullptr; 469ac49453bSEnrico Granata auto printable = escaping_callback(data, data_end, next_data); 470fd13743fSShawn Best auto printable_bytes = printable.GetBytes(); 471fd13743fSShawn Best auto printable_size = printable.GetSize(); 4727aabad13SVedant Kumar 4737aabad13SVedant Kumar // We failed to figure out how to print this string. 4747aabad13SVedant Kumar if (!printable_bytes || !next_data) 4757aabad13SVedant Kumar return false; 4767aabad13SVedant Kumar 4773acfe1a3SAndy Gibbs for (unsigned c = 0; c < printable_size; c++) 478fd13743fSShawn Best options.GetStream()->Printf("%c", *(printable_bytes + c)); 479fd13743fSShawn Best data = (uint8_t *)next_data; 480b9c1b51eSKate Stone } else { 481fd13743fSShawn Best options.GetStream()->Printf("%c", *data); 482fd13743fSShawn Best data++; 483fd13743fSShawn Best } 484fd13743fSShawn Best } 485fd13743fSShawn Best 486d54f7fb8SEnrico Granata const char *suffix_token = options.GetSuffixToken(); 487d54f7fb8SEnrico Granata 488248a1305SKonrad Kleine if (suffix_token != nullptr) 489d54f7fb8SEnrico Granata options.GetStream()->Printf("%c%s", quote, suffix_token); 490d54f7fb8SEnrico Granata else if (quote != 0) 491fd13743fSShawn Best options.GetStream()->Printf("%c", quote); 492fd13743fSShawn Best 493b7662929SEnrico Granata if (is_truncated) 494b7662929SEnrico Granata options.GetStream()->Printf("..."); 495b7662929SEnrico Granata 496fd13743fSShawn Best return true; 497fd13743fSShawn Best } 498fd13743fSShawn Best 499ca6c8ee2SEnrico Granata template <typename SourceDataType> 500b9c1b51eSKate Stone static bool ReadUTFBufferAndDumpToStream( 501b9c1b51eSKate Stone const StringPrinter::ReadStringAndDumpToStreamOptions &options, 5029091055eSJustin Lebar llvm::ConversionResult (*ConvertFunction)(const SourceDataType **, 5039091055eSJustin Lebar const SourceDataType *, 5049091055eSJustin Lebar llvm::UTF8 **, llvm::UTF8 *, 5059091055eSJustin Lebar llvm::ConversionFlags)) { 506ca6c8ee2SEnrico Granata assert(options.GetStream() && "need a Stream to print the string to"); 507ca6c8ee2SEnrico Granata 508b9c1b51eSKate Stone if (options.GetLocation() == 0 || 509b9c1b51eSKate Stone options.GetLocation() == LLDB_INVALID_ADDRESS) 510ca6c8ee2SEnrico Granata return false; 511ca6c8ee2SEnrico Granata 512ca6c8ee2SEnrico Granata lldb::ProcessSP process_sp(options.GetProcessSP()); 513ca6c8ee2SEnrico Granata 514ca6c8ee2SEnrico Granata if (!process_sp) 515ca6c8ee2SEnrico Granata return false; 516ca6c8ee2SEnrico Granata 517ca6c8ee2SEnrico Granata const int type_width = sizeof(SourceDataType); 518ca6c8ee2SEnrico Granata const int origin_encoding = 8 * type_width; 519ca6c8ee2SEnrico Granata if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32) 520ca6c8ee2SEnrico Granata return false; 521ca6c8ee2SEnrico Granata // if not UTF8, I need a conversion function to return proper UTF8 522ca6c8ee2SEnrico Granata if (origin_encoding != 8 && !ConvertFunction) 523ca6c8ee2SEnrico Granata return false; 524ca6c8ee2SEnrico Granata 525ca6c8ee2SEnrico Granata if (!options.GetStream()) 526ca6c8ee2SEnrico Granata return false; 527ca6c8ee2SEnrico Granata 528*7b244258SRaphael Isemann uint32_t sourceSize; 529ca6c8ee2SEnrico Granata bool needs_zero_terminator = options.GetNeedsZeroTermination(); 530ca6c8ee2SEnrico Granata 531b7662929SEnrico Granata bool is_truncated = false; 532b7662929SEnrico Granata const auto max_size = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 533b7662929SEnrico Granata 534*7b244258SRaphael Isemann if (options.HasSourceSize()) { 535*7b244258SRaphael Isemann sourceSize = options.GetSourceSize(); 536*7b244258SRaphael Isemann if (!options.GetIgnoreMaxLength()) { 537b9c1b51eSKate Stone if (sourceSize > max_size) { 538b7662929SEnrico Granata sourceSize = max_size; 539b7662929SEnrico Granata is_truncated = true; 540b7662929SEnrico Granata } 541b7662929SEnrico Granata } 542*7b244258SRaphael Isemann } else { 543*7b244258SRaphael Isemann sourceSize = max_size; 544*7b244258SRaphael Isemann needs_zero_terminator = true; 545*7b244258SRaphael Isemann } 546ca6c8ee2SEnrico Granata 547ca6c8ee2SEnrico Granata const int bufferSPSize = sourceSize * type_width; 548ca6c8ee2SEnrico Granata 549ca6c8ee2SEnrico Granata lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize, 0)); 550ca6c8ee2SEnrico Granata 551*7b244258SRaphael Isemann // Check if we got bytes. We never get any bytes if we have an empty 552*7b244258SRaphael Isemann // string, but we still continue so that we end up actually printing 553*7b244258SRaphael Isemann // an empty string (""). 554*7b244258SRaphael Isemann if (sourceSize != 0 && !buffer_sp->GetBytes()) 555ca6c8ee2SEnrico Granata return false; 556ca6c8ee2SEnrico Granata 55797206d57SZachary Turner Status error; 558ca6c8ee2SEnrico Granata char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes()); 559ca6c8ee2SEnrico Granata 560ca6c8ee2SEnrico Granata if (needs_zero_terminator) 561b9c1b51eSKate Stone process_sp->ReadStringFromMemory(options.GetLocation(), buffer, 562b9c1b51eSKate Stone bufferSPSize, error, type_width); 563ca6c8ee2SEnrico Granata else 564b9c1b51eSKate Stone process_sp->ReadMemoryFromInferior(options.GetLocation(), 565b9c1b51eSKate Stone (char *)buffer_sp->GetBytes(), 566b9c1b51eSKate Stone bufferSPSize, error); 567ca6c8ee2SEnrico Granata 568b9c1b51eSKate Stone if (error.Fail()) { 569ca6c8ee2SEnrico Granata options.GetStream()->Printf("unable to read data"); 570ca6c8ee2SEnrico Granata return true; 571ca6c8ee2SEnrico Granata } 572ca6c8ee2SEnrico Granata 573b9c1b51eSKate Stone DataExtractor data(buffer_sp, process_sp->GetByteOrder(), 574b9c1b51eSKate Stone process_sp->GetAddressByteSize()); 575ca6c8ee2SEnrico Granata 576ac49453bSEnrico Granata StringPrinter::ReadBufferAndDumpToStreamOptions dump_options(options); 577d07f7550SEnrico Granata dump_options.SetData(data); 578d07f7550SEnrico Granata dump_options.SetSourceSize(sourceSize); 579b7662929SEnrico Granata dump_options.SetIsTruncated(is_truncated); 580d07f7550SEnrico Granata 581d07f7550SEnrico Granata return DumpUTFBufferToStream(ConvertFunction, dump_options); 582ca6c8ee2SEnrico Granata } 583ca6c8ee2SEnrico Granata 584ca6c8ee2SEnrico Granata template <> 585b9c1b51eSKate Stone bool StringPrinter::ReadStringAndDumpToStream< 586b9c1b51eSKate Stone StringPrinter::StringElementType::UTF8>( 587b9c1b51eSKate Stone const ReadStringAndDumpToStreamOptions &options) { 5889091055eSJustin Lebar return ReadUTFBufferAndDumpToStream<llvm::UTF8>(options, nullptr); 589ca6c8ee2SEnrico Granata } 590ca6c8ee2SEnrico Granata 591ca6c8ee2SEnrico Granata template <> 592b9c1b51eSKate Stone bool StringPrinter::ReadStringAndDumpToStream< 593b9c1b51eSKate Stone StringPrinter::StringElementType::UTF16>( 594b9c1b51eSKate Stone const ReadStringAndDumpToStreamOptions &options) { 5959091055eSJustin Lebar return ReadUTFBufferAndDumpToStream<llvm::UTF16>(options, 5969091055eSJustin Lebar llvm::ConvertUTF16toUTF8); 597ca6c8ee2SEnrico Granata } 598ca6c8ee2SEnrico Granata 599ca6c8ee2SEnrico Granata template <> 600b9c1b51eSKate Stone bool StringPrinter::ReadStringAndDumpToStream< 601b9c1b51eSKate Stone StringPrinter::StringElementType::UTF32>( 602b9c1b51eSKate Stone const ReadStringAndDumpToStreamOptions &options) { 6039091055eSJustin Lebar return ReadUTFBufferAndDumpToStream<llvm::UTF32>(options, 6049091055eSJustin Lebar llvm::ConvertUTF32toUTF8); 605ca6c8ee2SEnrico Granata } 606ca6c8ee2SEnrico Granata 607ca6c8ee2SEnrico Granata template <> 608b9c1b51eSKate Stone bool StringPrinter::ReadBufferAndDumpToStream< 609b9c1b51eSKate Stone StringPrinter::StringElementType::UTF8>( 610b9c1b51eSKate Stone const ReadBufferAndDumpToStreamOptions &options) { 611ca6c8ee2SEnrico Granata assert(options.GetStream() && "need a Stream to print the string to"); 612ca6c8ee2SEnrico Granata 6139091055eSJustin Lebar return DumpUTFBufferToStream<llvm::UTF8>(nullptr, options); 614ca6c8ee2SEnrico Granata } 615ca6c8ee2SEnrico Granata 616ca6c8ee2SEnrico Granata template <> 617b9c1b51eSKate Stone bool StringPrinter::ReadBufferAndDumpToStream< 618b9c1b51eSKate Stone StringPrinter::StringElementType::ASCII>( 619b9c1b51eSKate Stone const ReadBufferAndDumpToStreamOptions &options) { 620ca6c8ee2SEnrico Granata // treat ASCII the same as UTF8 621ca6c8ee2SEnrico Granata // FIXME: can we optimize ASCII some more? 622ca6c8ee2SEnrico Granata return ReadBufferAndDumpToStream<StringElementType::UTF8>(options); 623ca6c8ee2SEnrico Granata } 624ca6c8ee2SEnrico Granata 625ca6c8ee2SEnrico Granata template <> 626b9c1b51eSKate Stone bool StringPrinter::ReadBufferAndDumpToStream< 627b9c1b51eSKate Stone StringPrinter::StringElementType::UTF16>( 628b9c1b51eSKate Stone const ReadBufferAndDumpToStreamOptions &options) { 629ca6c8ee2SEnrico Granata assert(options.GetStream() && "need a Stream to print the string to"); 630ca6c8ee2SEnrico Granata 6319091055eSJustin Lebar return DumpUTFBufferToStream(llvm::ConvertUTF16toUTF8, options); 632ca6c8ee2SEnrico Granata } 633ca6c8ee2SEnrico Granata 634ca6c8ee2SEnrico Granata template <> 635b9c1b51eSKate Stone bool StringPrinter::ReadBufferAndDumpToStream< 636b9c1b51eSKate Stone StringPrinter::StringElementType::UTF32>( 637b9c1b51eSKate Stone const ReadBufferAndDumpToStreamOptions &options) { 638ca6c8ee2SEnrico Granata assert(options.GetStream() && "need a Stream to print the string to"); 639ca6c8ee2SEnrico Granata 6409091055eSJustin Lebar return DumpUTFBufferToStream(llvm::ConvertUTF32toUTF8, options); 641ca6c8ee2SEnrico Granata } 642fd13743fSShawn Best 643fd13743fSShawn Best } // namespace formatters 644fd13743fSShawn Best 645fd13743fSShawn Best } // namespace lldb_private 646