1b9c1b51eSKate Stone //===-- StringPrinter.cpp ----------------------------------------*- C++ 2b9c1b51eSKate Stone //-*-===// 3ca6c8ee2SEnrico Granata // 4*2946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5*2946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 6*2946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7ca6c8ee2SEnrico Granata // 8ca6c8ee2SEnrico Granata //===----------------------------------------------------------------------===// 9ca6c8ee2SEnrico Granata 10ca6c8ee2SEnrico Granata #include "lldb/DataFormatters/StringPrinter.h" 11ca6c8ee2SEnrico Granata 12ebdc1ac0SEnrico Granata #include "lldb/Core/Debugger.h" 13ebdc1ac0SEnrico Granata #include "lldb/Core/ValueObject.h" 14ac49453bSEnrico Granata #include "lldb/Target/Language.h" 15ca6c8ee2SEnrico Granata #include "lldb/Target/Process.h" 16ca6c8ee2SEnrico Granata #include "lldb/Target/Target.h" 1797206d57SZachary Turner #include "lldb/Utility/Status.h" 18ca6c8ee2SEnrico Granata 19ca6c8ee2SEnrico Granata #include "llvm/Support/ConvertUTF.h" 20ca6c8ee2SEnrico Granata 21ca6c8ee2SEnrico Granata #include <ctype.h> 22ca6c8ee2SEnrico Granata #include <locale> 23ca6c8ee2SEnrico Granata 24ca6c8ee2SEnrico Granata using namespace lldb; 25ca6c8ee2SEnrico Granata using namespace lldb_private; 26ca6c8ee2SEnrico Granata using namespace lldb_private::formatters; 27ca6c8ee2SEnrico Granata 2805097246SAdrian Prantl // we define this for all values of type but only implement it for those we 2905097246SAdrian Prantl // care about that's good because we get linker errors for any unsupported type 30ac49453bSEnrico Granata template <lldb_private::formatters::StringPrinter::StringElementType type> 31ad650a18SEnrico Granata static StringPrinter::StringPrinterBufferPointer<> 32ca6c8ee2SEnrico Granata GetPrintableImpl(uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next); 33ca6c8ee2SEnrico Granata 34ca6c8ee2SEnrico Granata // mimic isprint() for Unicode codepoints 35b9c1b51eSKate Stone static bool isprint(char32_t codepoint) { 36ca6c8ee2SEnrico Granata if (codepoint <= 0x1F || codepoint == 0x7F) // C0 37ca6c8ee2SEnrico Granata { 38ca6c8ee2SEnrico Granata return false; 39ca6c8ee2SEnrico Granata } 40ca6c8ee2SEnrico Granata if (codepoint >= 0x80 && codepoint <= 0x9F) // C1 41ca6c8ee2SEnrico Granata { 42ca6c8ee2SEnrico Granata return false; 43ca6c8ee2SEnrico Granata } 44ca6c8ee2SEnrico Granata if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators 45ca6c8ee2SEnrico Granata { 46ca6c8ee2SEnrico Granata return false; 47ca6c8ee2SEnrico Granata } 48b9c1b51eSKate Stone if (codepoint == 0x200E || codepoint == 0x200F || 49b9c1b51eSKate Stone (codepoint >= 0x202A && 50b9c1b51eSKate Stone codepoint <= 0x202E)) // bidirectional text control 51ca6c8ee2SEnrico Granata { 52ca6c8ee2SEnrico Granata return false; 53ca6c8ee2SEnrico Granata } 54b9c1b51eSKate Stone if (codepoint >= 0xFFF9 && 55b9c1b51eSKate Stone codepoint <= 0xFFFF) // interlinears and generally specials 56ca6c8ee2SEnrico Granata { 57ca6c8ee2SEnrico Granata return false; 58ca6c8ee2SEnrico Granata } 59ca6c8ee2SEnrico Granata return true; 60ca6c8ee2SEnrico Granata } 61ca6c8ee2SEnrico Granata 62ca6c8ee2SEnrico Granata template <> 63ad650a18SEnrico Granata StringPrinter::StringPrinterBufferPointer<> 64b9c1b51eSKate Stone GetPrintableImpl<StringPrinter::StringElementType::ASCII>(uint8_t *buffer, 65b9c1b51eSKate Stone uint8_t *buffer_end, 66b9c1b51eSKate Stone uint8_t *&next) { 67ad650a18SEnrico Granata StringPrinter::StringPrinterBufferPointer<> retval = {nullptr}; 68ca6c8ee2SEnrico Granata 69b9c1b51eSKate Stone switch (*buffer) { 70da04fbb5SEnrico Granata case 0: 71da04fbb5SEnrico Granata retval = {"\\0", 2}; 72da04fbb5SEnrico Granata break; 73ca6c8ee2SEnrico Granata case '\a': 74ca6c8ee2SEnrico Granata retval = {"\\a", 2}; 75ca6c8ee2SEnrico Granata break; 76ca6c8ee2SEnrico Granata case '\b': 77ca6c8ee2SEnrico Granata retval = {"\\b", 2}; 78ca6c8ee2SEnrico Granata break; 79ca6c8ee2SEnrico Granata case '\f': 80ca6c8ee2SEnrico Granata retval = {"\\f", 2}; 81ca6c8ee2SEnrico Granata break; 82ca6c8ee2SEnrico Granata case '\n': 83ca6c8ee2SEnrico Granata retval = {"\\n", 2}; 84ca6c8ee2SEnrico Granata break; 85ca6c8ee2SEnrico Granata case '\r': 86ca6c8ee2SEnrico Granata retval = {"\\r", 2}; 87ca6c8ee2SEnrico Granata break; 88ca6c8ee2SEnrico Granata case '\t': 89ca6c8ee2SEnrico Granata retval = {"\\t", 2}; 90ca6c8ee2SEnrico Granata break; 91ca6c8ee2SEnrico Granata case '\v': 92ca6c8ee2SEnrico Granata retval = {"\\v", 2}; 93ca6c8ee2SEnrico Granata break; 94ca6c8ee2SEnrico Granata case '\"': 95ca6c8ee2SEnrico Granata retval = {"\\\"", 2}; 96ca6c8ee2SEnrico Granata break; 97ca6c8ee2SEnrico Granata case '\\': 98ca6c8ee2SEnrico Granata retval = {"\\\\", 2}; 99ca6c8ee2SEnrico Granata break; 100ca6c8ee2SEnrico Granata default: 101ca6c8ee2SEnrico Granata if (isprint(*buffer)) 102ca6c8ee2SEnrico Granata retval = {buffer, 1}; 103b9c1b51eSKate Stone else { 104d7e6a4f2SVince Harron uint8_t *data = new uint8_t[5]; 105d7e6a4f2SVince Harron sprintf((char *)data, "\\x%02x", *buffer); 106d7e6a4f2SVince Harron retval = {data, 4, [](const uint8_t *c) { delete[] c; }}; 107ca6c8ee2SEnrico Granata break; 108ca6c8ee2SEnrico Granata } 109ca6c8ee2SEnrico Granata } 110ca6c8ee2SEnrico Granata 111ca6c8ee2SEnrico Granata next = buffer + 1; 112ca6c8ee2SEnrico Granata return retval; 113ca6c8ee2SEnrico Granata } 114ca6c8ee2SEnrico Granata 115b9c1b51eSKate Stone static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1) { 116ca6c8ee2SEnrico Granata return (c0 - 192) * 64 + (c1 - 128); 117ca6c8ee2SEnrico Granata } 118b9c1b51eSKate Stone static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1, 119b9c1b51eSKate Stone unsigned char c2) { 120ca6c8ee2SEnrico Granata return (c0 - 224) * 4096 + (c1 - 128) * 64 + (c2 - 128); 121ca6c8ee2SEnrico Granata } 122b9c1b51eSKate Stone static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1, 123b9c1b51eSKate Stone unsigned char c2, unsigned char c3) { 124ca6c8ee2SEnrico Granata return (c0 - 240) * 262144 + (c2 - 128) * 4096 + (c2 - 128) * 64 + (c3 - 128); 125ca6c8ee2SEnrico Granata } 126ca6c8ee2SEnrico Granata 127ca6c8ee2SEnrico Granata template <> 128ad650a18SEnrico Granata StringPrinter::StringPrinterBufferPointer<> 129b9c1b51eSKate Stone GetPrintableImpl<StringPrinter::StringElementType::UTF8>(uint8_t *buffer, 130b9c1b51eSKate Stone uint8_t *buffer_end, 131b9c1b51eSKate Stone uint8_t *&next) { 132ad650a18SEnrico Granata StringPrinter::StringPrinterBufferPointer<> retval{nullptr}; 133ca6c8ee2SEnrico Granata 1349091055eSJustin Lebar unsigned utf8_encoded_len = llvm::getNumBytesForUTF8(*buffer); 135ca6c8ee2SEnrico Granata 1365a8ad459SZachary Turner if (1u + std::distance(buffer, buffer_end) < utf8_encoded_len) { 137ca6c8ee2SEnrico Granata // I don't have enough bytes - print whatever I have left 138ca6c8ee2SEnrico Granata retval = {buffer, static_cast<size_t>(1 + buffer_end - buffer)}; 139ca6c8ee2SEnrico Granata next = buffer_end + 1; 140ca6c8ee2SEnrico Granata return retval; 141ca6c8ee2SEnrico Granata } 142ca6c8ee2SEnrico Granata 143ca6c8ee2SEnrico Granata char32_t codepoint = 0; 144b9c1b51eSKate Stone switch (utf8_encoded_len) { 145ca6c8ee2SEnrico Granata case 1: 146ca6c8ee2SEnrico Granata // this is just an ASCII byte - ask ASCII 147b9c1b51eSKate Stone return GetPrintableImpl<StringPrinter::StringElementType::ASCII>( 148b9c1b51eSKate Stone buffer, buffer_end, next); 149ca6c8ee2SEnrico Granata case 2: 150b9c1b51eSKate Stone codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, 151b9c1b51eSKate Stone (unsigned char)*(buffer + 1)); 152ca6c8ee2SEnrico Granata break; 153ca6c8ee2SEnrico Granata case 3: 154b9c1b51eSKate Stone codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, 155b9c1b51eSKate Stone (unsigned char)*(buffer + 1), 156b9c1b51eSKate Stone (unsigned char)*(buffer + 2)); 157ca6c8ee2SEnrico Granata break; 158ca6c8ee2SEnrico Granata case 4: 159b9c1b51eSKate Stone codepoint = ConvertUTF8ToCodePoint( 160b9c1b51eSKate Stone (unsigned char)*buffer, (unsigned char)*(buffer + 1), 161b9c1b51eSKate Stone (unsigned char)*(buffer + 2), (unsigned char)*(buffer + 3)); 162ca6c8ee2SEnrico Granata break; 163ca6c8ee2SEnrico Granata default: 16405097246SAdrian Prantl // this is probably some bogus non-character thing just print it as-is and 16505097246SAdrian Prantl // hope to sync up again soon 166ca6c8ee2SEnrico Granata retval = {buffer, 1}; 167ca6c8ee2SEnrico Granata next = buffer + 1; 168ca6c8ee2SEnrico Granata return retval; 169ca6c8ee2SEnrico Granata } 170ca6c8ee2SEnrico Granata 171b9c1b51eSKate Stone if (codepoint) { 172b9c1b51eSKate Stone switch (codepoint) { 173da04fbb5SEnrico Granata case 0: 174da04fbb5SEnrico Granata retval = {"\\0", 2}; 175da04fbb5SEnrico Granata break; 176ca6c8ee2SEnrico Granata case '\a': 177ca6c8ee2SEnrico Granata retval = {"\\a", 2}; 178ca6c8ee2SEnrico Granata break; 179ca6c8ee2SEnrico Granata case '\b': 180ca6c8ee2SEnrico Granata retval = {"\\b", 2}; 181ca6c8ee2SEnrico Granata break; 182ca6c8ee2SEnrico Granata case '\f': 183ca6c8ee2SEnrico Granata retval = {"\\f", 2}; 184ca6c8ee2SEnrico Granata break; 185ca6c8ee2SEnrico Granata case '\n': 186ca6c8ee2SEnrico Granata retval = {"\\n", 2}; 187ca6c8ee2SEnrico Granata break; 188ca6c8ee2SEnrico Granata case '\r': 189ca6c8ee2SEnrico Granata retval = {"\\r", 2}; 190ca6c8ee2SEnrico Granata break; 191ca6c8ee2SEnrico Granata case '\t': 192ca6c8ee2SEnrico Granata retval = {"\\t", 2}; 193ca6c8ee2SEnrico Granata break; 194ca6c8ee2SEnrico Granata case '\v': 195ca6c8ee2SEnrico Granata retval = {"\\v", 2}; 196ca6c8ee2SEnrico Granata break; 197ca6c8ee2SEnrico Granata case '\"': 198ca6c8ee2SEnrico Granata retval = {"\\\"", 2}; 199ca6c8ee2SEnrico Granata break; 200ca6c8ee2SEnrico Granata case '\\': 201ca6c8ee2SEnrico Granata retval = {"\\\\", 2}; 202ca6c8ee2SEnrico Granata break; 203ca6c8ee2SEnrico Granata default: 204ca6c8ee2SEnrico Granata if (isprint(codepoint)) 205ca6c8ee2SEnrico Granata retval = {buffer, utf8_encoded_len}; 206b9c1b51eSKate Stone else { 207d7e6a4f2SVince Harron uint8_t *data = new uint8_t[11]; 208a505be4eSZachary Turner sprintf((char *)data, "\\U%08x", (unsigned)codepoint); 209d7e6a4f2SVince Harron retval = {data, 10, [](const uint8_t *c) { delete[] c; }}; 210ca6c8ee2SEnrico Granata break; 211ca6c8ee2SEnrico Granata } 212ca6c8ee2SEnrico Granata } 213ca6c8ee2SEnrico Granata 214ca6c8ee2SEnrico Granata next = buffer + utf8_encoded_len; 215ca6c8ee2SEnrico Granata return retval; 216ca6c8ee2SEnrico Granata } 217ca6c8ee2SEnrico Granata 218ca6c8ee2SEnrico Granata // this should not happen - but just in case.. try to resync at some point 219ca6c8ee2SEnrico Granata retval = {buffer, 1}; 220ca6c8ee2SEnrico Granata next = buffer + 1; 221ca6c8ee2SEnrico Granata return retval; 222ca6c8ee2SEnrico Granata } 223ca6c8ee2SEnrico Granata 22405097246SAdrian Prantl // Given a sequence of bytes, this function returns: a sequence of bytes to 22505097246SAdrian Prantl // actually print out + a length the following unscanned position of the buffer 22605097246SAdrian Prantl // is in next 227ad650a18SEnrico Granata static StringPrinter::StringPrinterBufferPointer<> 228b9c1b51eSKate Stone GetPrintable(StringPrinter::StringElementType type, uint8_t *buffer, 229b9c1b51eSKate Stone uint8_t *buffer_end, uint8_t *&next) { 230ca6c8ee2SEnrico Granata if (!buffer) 231ca6c8ee2SEnrico Granata return {nullptr}; 232ca6c8ee2SEnrico Granata 233b9c1b51eSKate Stone switch (type) { 234ac49453bSEnrico Granata case StringPrinter::StringElementType::ASCII: 235b9c1b51eSKate Stone return GetPrintableImpl<StringPrinter::StringElementType::ASCII>( 236b9c1b51eSKate Stone buffer, buffer_end, next); 237ac49453bSEnrico Granata case StringPrinter::StringElementType::UTF8: 238b9c1b51eSKate Stone return GetPrintableImpl<StringPrinter::StringElementType::UTF8>( 239b9c1b51eSKate Stone buffer, buffer_end, next); 240ca6c8ee2SEnrico Granata default: 241ca6c8ee2SEnrico Granata return {nullptr}; 242ca6c8ee2SEnrico Granata } 243ca6c8ee2SEnrico Granata } 244ca6c8ee2SEnrico Granata 245ac49453bSEnrico Granata StringPrinter::EscapingHelper 246b9c1b51eSKate Stone StringPrinter::GetDefaultEscapingHelper(GetPrintableElementType elem_type) { 247b9c1b51eSKate Stone switch (elem_type) { 248ac49453bSEnrico Granata case GetPrintableElementType::UTF8: 249b9c1b51eSKate Stone return [](uint8_t *buffer, uint8_t *buffer_end, 250b9c1b51eSKate Stone uint8_t *&next) -> StringPrinter::StringPrinterBufferPointer<> { 251b9c1b51eSKate Stone return GetPrintable(StringPrinter::StringElementType::UTF8, buffer, 252b9c1b51eSKate Stone buffer_end, next); 253ac49453bSEnrico Granata }; 254ac49453bSEnrico Granata case GetPrintableElementType::ASCII: 255b9c1b51eSKate Stone return [](uint8_t *buffer, uint8_t *buffer_end, 256b9c1b51eSKate Stone uint8_t *&next) -> StringPrinter::StringPrinterBufferPointer<> { 257b9c1b51eSKate Stone return GetPrintable(StringPrinter::StringElementType::ASCII, buffer, 258b9c1b51eSKate Stone buffer_end, next); 259ac49453bSEnrico Granata }; 260ac49453bSEnrico Granata } 26143d3a7aeSSaleem Abdulrasool llvm_unreachable("bad element type"); 262ac49453bSEnrico Granata } 263ac49453bSEnrico Granata 264ca6c8ee2SEnrico Granata // use this call if you already have an LLDB-side buffer for the data 265ca6c8ee2SEnrico Granata template <typename SourceDataType> 266b9c1b51eSKate Stone static bool DumpUTFBufferToStream( 2679091055eSJustin Lebar llvm::ConversionResult (*ConvertFunction)(const SourceDataType **, 2689091055eSJustin Lebar const SourceDataType *, 2699091055eSJustin Lebar llvm::UTF8 **, llvm::UTF8 *, 2709091055eSJustin Lebar llvm::ConversionFlags), 271b9c1b51eSKate Stone const StringPrinter::ReadBufferAndDumpToStreamOptions &dump_options) { 272d07f7550SEnrico Granata Stream &stream(*dump_options.GetStream()); 273d07f7550SEnrico Granata if (dump_options.GetPrefixToken() != 0) 274d54f7fb8SEnrico Granata stream.Printf("%s", dump_options.GetPrefixToken()); 275d07f7550SEnrico Granata if (dump_options.GetQuote() != 0) 276d07f7550SEnrico Granata stream.Printf("%c", dump_options.GetQuote()); 277d07f7550SEnrico Granata auto data(dump_options.GetData()); 278d07f7550SEnrico Granata auto source_size(dump_options.GetSourceSize()); 279b9c1b51eSKate Stone if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd()) { 280ca6c8ee2SEnrico Granata const int bufferSPSize = data.GetByteSize(); 281b9c1b51eSKate Stone if (dump_options.GetSourceSize() == 0) { 282ca6c8ee2SEnrico Granata const int origin_encoding = 8 * sizeof(SourceDataType); 283d07f7550SEnrico Granata source_size = bufferSPSize / (origin_encoding / 4); 284ca6c8ee2SEnrico Granata } 285ca6c8ee2SEnrico Granata 286b9c1b51eSKate Stone const SourceDataType *data_ptr = 287b9c1b51eSKate Stone (const SourceDataType *)data.GetDataStart(); 288d07f7550SEnrico Granata const SourceDataType *data_end_ptr = data_ptr + source_size; 289ca6c8ee2SEnrico Granata 290d07f7550SEnrico Granata const bool zero_is_terminator = dump_options.GetBinaryZeroIsTerminator(); 291d07f7550SEnrico Granata 292b9c1b51eSKate Stone if (zero_is_terminator) { 293b9c1b51eSKate Stone while (data_ptr < data_end_ptr) { 294b9c1b51eSKate Stone if (!*data_ptr) { 295ca6c8ee2SEnrico Granata data_end_ptr = data_ptr; 296ca6c8ee2SEnrico Granata break; 297ca6c8ee2SEnrico Granata } 298ca6c8ee2SEnrico Granata data_ptr++; 299ca6c8ee2SEnrico Granata } 300ca6c8ee2SEnrico Granata 301d7e6a4f2SVince Harron data_ptr = (const SourceDataType *)data.GetDataStart(); 302d07f7550SEnrico Granata } 303ca6c8ee2SEnrico Granata 304ca6c8ee2SEnrico Granata lldb::DataBufferSP utf8_data_buffer_sp; 3059091055eSJustin Lebar llvm::UTF8 *utf8_data_ptr = nullptr; 3069091055eSJustin Lebar llvm::UTF8 *utf8_data_end_ptr = nullptr; 307ca6c8ee2SEnrico Granata 308b9c1b51eSKate Stone if (ConvertFunction) { 309ca6c8ee2SEnrico Granata utf8_data_buffer_sp.reset(new DataBufferHeap(4 * bufferSPSize, 0)); 3109091055eSJustin Lebar utf8_data_ptr = (llvm::UTF8 *)utf8_data_buffer_sp->GetBytes(); 311ca6c8ee2SEnrico Granata utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize(); 312b9c1b51eSKate Stone ConvertFunction(&data_ptr, data_end_ptr, &utf8_data_ptr, 3139091055eSJustin Lebar utf8_data_end_ptr, llvm::lenientConversion); 314a6682a41SJonas Devlieghere if (!zero_is_terminator) 3158101f570SEnrico Granata utf8_data_end_ptr = utf8_data_ptr; 3169091055eSJustin Lebar // needed because the ConvertFunction will change the value of the 3179091055eSJustin Lebar // data_ptr. 318b9c1b51eSKate Stone utf8_data_ptr = 3199091055eSJustin Lebar (llvm::UTF8 *)utf8_data_buffer_sp->GetBytes(); 320b9c1b51eSKate Stone } else { 321b9c1b51eSKate Stone // just copy the pointers - the cast is necessary to make the compiler 32205097246SAdrian Prantl // happy but this should only happen if we are reading UTF8 data 3239091055eSJustin Lebar utf8_data_ptr = const_cast<llvm::UTF8 *>( 3249091055eSJustin Lebar reinterpret_cast<const llvm::UTF8 *>(data_ptr)); 3259091055eSJustin Lebar utf8_data_end_ptr = const_cast<llvm::UTF8 *>( 3269091055eSJustin Lebar reinterpret_cast<const llvm::UTF8 *>(data_end_ptr)); 327ca6c8ee2SEnrico Granata } 328ca6c8ee2SEnrico Granata 329d07f7550SEnrico Granata const bool escape_non_printables = dump_options.GetEscapeNonPrintables(); 330ac49453bSEnrico Granata lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback; 331b9c1b51eSKate Stone if (escape_non_printables) { 332ac49453bSEnrico Granata if (Language *language = Language::FindPlugin(dump_options.GetLanguage())) 333b9c1b51eSKate Stone escaping_callback = language->GetStringPrinterEscapingHelper( 334b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::GetPrintableElementType:: 335b9c1b51eSKate Stone UTF8); 336ac49453bSEnrico Granata else 337b9c1b51eSKate Stone escaping_callback = 338b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper( 339b9c1b51eSKate Stone lldb_private::formatters::StringPrinter:: 340b9c1b51eSKate Stone GetPrintableElementType::UTF8); 341ac49453bSEnrico Granata } 342d07f7550SEnrico Granata 343ca6c8ee2SEnrico Granata // since we tend to accept partial data (and even partially malformed data) 34405097246SAdrian Prantl // we might end up with no NULL terminator before the end_ptr hence we need 34505097246SAdrian Prantl // to take a slower route and ensure we stay within boundaries 346b9c1b51eSKate Stone for (; utf8_data_ptr < utf8_data_end_ptr;) { 347d07f7550SEnrico Granata if (zero_is_terminator && !*utf8_data_ptr) 348ca6c8ee2SEnrico Granata break; 349ca6c8ee2SEnrico Granata 350b9c1b51eSKate Stone if (escape_non_printables) { 351ca6c8ee2SEnrico Granata uint8_t *next_data = nullptr; 352b9c1b51eSKate Stone auto printable = 353b9c1b51eSKate Stone escaping_callback(utf8_data_ptr, utf8_data_end_ptr, next_data); 354ca6c8ee2SEnrico Granata auto printable_bytes = printable.GetBytes(); 355ca6c8ee2SEnrico Granata auto printable_size = printable.GetSize(); 356b9c1b51eSKate Stone if (!printable_bytes || !next_data) { 357b9c1b51eSKate Stone // GetPrintable() failed on us - print one byte in a desperate resync 358b9c1b51eSKate Stone // attempt 359ca6c8ee2SEnrico Granata printable_bytes = utf8_data_ptr; 360ca6c8ee2SEnrico Granata printable_size = 1; 361ca6c8ee2SEnrico Granata next_data = utf8_data_ptr + 1; 362ca6c8ee2SEnrico Granata } 3633acfe1a3SAndy Gibbs for (unsigned c = 0; c < printable_size; c++) 364ca6c8ee2SEnrico Granata stream.Printf("%c", *(printable_bytes + c)); 365ca6c8ee2SEnrico Granata utf8_data_ptr = (uint8_t *)next_data; 366b9c1b51eSKate Stone } else { 367ca6c8ee2SEnrico Granata stream.Printf("%c", *utf8_data_ptr); 368ca6c8ee2SEnrico Granata utf8_data_ptr++; 369ca6c8ee2SEnrico Granata } 370ca6c8ee2SEnrico Granata } 371ca6c8ee2SEnrico Granata } 372d07f7550SEnrico Granata if (dump_options.GetQuote() != 0) 373d07f7550SEnrico Granata stream.Printf("%c", dump_options.GetQuote()); 374d54f7fb8SEnrico Granata if (dump_options.GetSuffixToken() != 0) 375d54f7fb8SEnrico Granata stream.Printf("%s", dump_options.GetSuffixToken()); 376b7662929SEnrico Granata if (dump_options.GetIsTruncated()) 377b7662929SEnrico Granata stream.Printf("..."); 378ca6c8ee2SEnrico Granata return true; 379ca6c8ee2SEnrico Granata } 380ca6c8ee2SEnrico Granata 381b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::ReadStringAndDumpToStreamOptions:: 382b9c1b51eSKate Stone ReadStringAndDumpToStreamOptions(ValueObject &valobj) 383b9c1b51eSKate Stone : ReadStringAndDumpToStreamOptions() { 384b9c1b51eSKate Stone SetEscapeNonPrintables( 385b9c1b51eSKate Stone valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 386ebdc1ac0SEnrico Granata } 387ebdc1ac0SEnrico Granata 388b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions:: 389b9c1b51eSKate Stone ReadBufferAndDumpToStreamOptions(ValueObject &valobj) 390b9c1b51eSKate Stone : ReadBufferAndDumpToStreamOptions() { 391b9c1b51eSKate Stone SetEscapeNonPrintables( 392b9c1b51eSKate Stone valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 393ebdc1ac0SEnrico Granata } 394ebdc1ac0SEnrico Granata 395b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions:: 396b9c1b51eSKate Stone ReadBufferAndDumpToStreamOptions( 397b9c1b51eSKate Stone const ReadStringAndDumpToStreamOptions &options) 398b9c1b51eSKate Stone : ReadBufferAndDumpToStreamOptions() { 399d07f7550SEnrico Granata SetStream(options.GetStream()); 400d07f7550SEnrico Granata SetPrefixToken(options.GetPrefixToken()); 401d54f7fb8SEnrico Granata SetSuffixToken(options.GetSuffixToken()); 402d07f7550SEnrico Granata SetQuote(options.GetQuote()); 403d07f7550SEnrico Granata SetEscapeNonPrintables(options.GetEscapeNonPrintables()); 404d07f7550SEnrico Granata SetBinaryZeroIsTerminator(options.GetBinaryZeroIsTerminator()); 405ac49453bSEnrico Granata SetLanguage(options.GetLanguage()); 406d07f7550SEnrico Granata } 407d07f7550SEnrico Granata 408b9c1b51eSKate Stone namespace lldb_private { 409ebdc1ac0SEnrico Granata 410b9c1b51eSKate Stone namespace formatters { 411fd13743fSShawn Best 412fd13743fSShawn Best template <> 413b9c1b51eSKate Stone bool StringPrinter::ReadStringAndDumpToStream< 414b9c1b51eSKate Stone StringPrinter::StringElementType::ASCII>( 415b9c1b51eSKate Stone const ReadStringAndDumpToStreamOptions &options) { 416fd13743fSShawn Best assert(options.GetStream() && "need a Stream to print the string to"); 41797206d57SZachary Turner Status my_error; 418fd13743fSShawn Best 419fd13743fSShawn Best ProcessSP process_sp(options.GetProcessSP()); 420fd13743fSShawn Best 421fd13743fSShawn Best if (process_sp.get() == nullptr || options.GetLocation() == 0) 422fd13743fSShawn Best return false; 423fd13743fSShawn Best 424fd13743fSShawn Best size_t size; 425b7662929SEnrico Granata const auto max_size = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 426b7662929SEnrico Granata bool is_truncated = false; 427fd13743fSShawn Best 428fd13743fSShawn Best if (options.GetSourceSize() == 0) 429b7662929SEnrico Granata size = max_size; 430b9c1b51eSKate Stone else if (!options.GetIgnoreMaxLength()) { 431b7662929SEnrico Granata size = options.GetSourceSize(); 432b9c1b51eSKate Stone if (size > max_size) { 433b7662929SEnrico Granata size = max_size; 434b7662929SEnrico Granata is_truncated = true; 435b7662929SEnrico Granata } 436b9c1b51eSKate Stone } else 43734042212SEnrico Granata size = options.GetSourceSize(); 438fd13743fSShawn Best 439fd13743fSShawn Best lldb::DataBufferSP buffer_sp(new DataBufferHeap(size, 0)); 440fd13743fSShawn Best 441b9c1b51eSKate Stone process_sp->ReadCStringFromMemory( 442b9c1b51eSKate Stone options.GetLocation(), (char *)buffer_sp->GetBytes(), size, my_error); 443fd13743fSShawn Best 444fd13743fSShawn Best if (my_error.Fail()) 445fd13743fSShawn Best return false; 446fd13743fSShawn Best 447d54f7fb8SEnrico Granata const char *prefix_token = options.GetPrefixToken(); 448fd13743fSShawn Best char quote = options.GetQuote(); 449fd13743fSShawn Best 450fd13743fSShawn Best if (prefix_token != 0) 451d54f7fb8SEnrico Granata options.GetStream()->Printf("%s%c", prefix_token, quote); 452fd13743fSShawn Best else if (quote != 0) 453fd13743fSShawn Best options.GetStream()->Printf("%c", quote); 454fd13743fSShawn Best 455fd13743fSShawn Best uint8_t *data_end = buffer_sp->GetBytes() + buffer_sp->GetByteSize(); 456fd13743fSShawn Best 457ac49453bSEnrico Granata const bool escape_non_printables = options.GetEscapeNonPrintables(); 458ac49453bSEnrico Granata lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback; 459b9c1b51eSKate Stone if (escape_non_printables) { 460ac49453bSEnrico Granata if (Language *language = Language::FindPlugin(options.GetLanguage())) 461b9c1b51eSKate Stone escaping_callback = language->GetStringPrinterEscapingHelper( 462b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::GetPrintableElementType:: 463b9c1b51eSKate Stone ASCII); 464ac49453bSEnrico Granata else 465b9c1b51eSKate Stone escaping_callback = 466b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper( 467b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::GetPrintableElementType:: 468b9c1b51eSKate Stone ASCII); 469ac49453bSEnrico Granata } 470ac49453bSEnrico Granata 471fd13743fSShawn Best // since we tend to accept partial data (and even partially malformed data) 47205097246SAdrian Prantl // we might end up with no NULL terminator before the end_ptr hence we need 47305097246SAdrian Prantl // to take a slower route and ensure we stay within boundaries 474b9c1b51eSKate Stone for (uint8_t *data = buffer_sp->GetBytes(); *data && (data < data_end);) { 475b9c1b51eSKate Stone if (escape_non_printables) { 476fd13743fSShawn Best uint8_t *next_data = nullptr; 477ac49453bSEnrico Granata auto printable = escaping_callback(data, data_end, next_data); 478fd13743fSShawn Best auto printable_bytes = printable.GetBytes(); 479fd13743fSShawn Best auto printable_size = printable.GetSize(); 480b9c1b51eSKate Stone if (!printable_bytes || !next_data) { 481b9c1b51eSKate Stone // GetPrintable() failed on us - print one byte in a desperate resync 482b9c1b51eSKate Stone // attempt 483fd13743fSShawn Best printable_bytes = data; 484fd13743fSShawn Best printable_size = 1; 485fd13743fSShawn Best next_data = data + 1; 486fd13743fSShawn Best } 4873acfe1a3SAndy Gibbs for (unsigned c = 0; c < printable_size; c++) 488fd13743fSShawn Best options.GetStream()->Printf("%c", *(printable_bytes + c)); 489fd13743fSShawn Best data = (uint8_t *)next_data; 490b9c1b51eSKate Stone } else { 491fd13743fSShawn Best options.GetStream()->Printf("%c", *data); 492fd13743fSShawn Best data++; 493fd13743fSShawn Best } 494fd13743fSShawn Best } 495fd13743fSShawn Best 496d54f7fb8SEnrico Granata const char *suffix_token = options.GetSuffixToken(); 497d54f7fb8SEnrico Granata 498d54f7fb8SEnrico Granata if (suffix_token != 0) 499d54f7fb8SEnrico Granata options.GetStream()->Printf("%c%s", quote, suffix_token); 500d54f7fb8SEnrico Granata else if (quote != 0) 501fd13743fSShawn Best options.GetStream()->Printf("%c", quote); 502fd13743fSShawn Best 503b7662929SEnrico Granata if (is_truncated) 504b7662929SEnrico Granata options.GetStream()->Printf("..."); 505b7662929SEnrico Granata 506fd13743fSShawn Best return true; 507fd13743fSShawn Best } 508fd13743fSShawn Best 509ca6c8ee2SEnrico Granata template <typename SourceDataType> 510b9c1b51eSKate Stone static bool ReadUTFBufferAndDumpToStream( 511b9c1b51eSKate Stone const StringPrinter::ReadStringAndDumpToStreamOptions &options, 5129091055eSJustin Lebar llvm::ConversionResult (*ConvertFunction)(const SourceDataType **, 5139091055eSJustin Lebar const SourceDataType *, 5149091055eSJustin Lebar llvm::UTF8 **, llvm::UTF8 *, 5159091055eSJustin Lebar llvm::ConversionFlags)) { 516ca6c8ee2SEnrico Granata assert(options.GetStream() && "need a Stream to print the string to"); 517ca6c8ee2SEnrico Granata 518b9c1b51eSKate Stone if (options.GetLocation() == 0 || 519b9c1b51eSKate Stone options.GetLocation() == LLDB_INVALID_ADDRESS) 520ca6c8ee2SEnrico Granata return false; 521ca6c8ee2SEnrico Granata 522ca6c8ee2SEnrico Granata lldb::ProcessSP process_sp(options.GetProcessSP()); 523ca6c8ee2SEnrico Granata 524ca6c8ee2SEnrico Granata if (!process_sp) 525ca6c8ee2SEnrico Granata return false; 526ca6c8ee2SEnrico Granata 527ca6c8ee2SEnrico Granata const int type_width = sizeof(SourceDataType); 528ca6c8ee2SEnrico Granata const int origin_encoding = 8 * type_width; 529ca6c8ee2SEnrico Granata if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32) 530ca6c8ee2SEnrico Granata return false; 531ca6c8ee2SEnrico Granata // if not UTF8, I need a conversion function to return proper UTF8 532ca6c8ee2SEnrico Granata if (origin_encoding != 8 && !ConvertFunction) 533ca6c8ee2SEnrico Granata return false; 534ca6c8ee2SEnrico Granata 535ca6c8ee2SEnrico Granata if (!options.GetStream()) 536ca6c8ee2SEnrico Granata return false; 537ca6c8ee2SEnrico Granata 538ca6c8ee2SEnrico Granata uint32_t sourceSize = options.GetSourceSize(); 539ca6c8ee2SEnrico Granata bool needs_zero_terminator = options.GetNeedsZeroTermination(); 540ca6c8ee2SEnrico Granata 541b7662929SEnrico Granata bool is_truncated = false; 542b7662929SEnrico Granata const auto max_size = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 543b7662929SEnrico Granata 544b9c1b51eSKate Stone if (!sourceSize) { 545b7662929SEnrico Granata sourceSize = max_size; 546ca6c8ee2SEnrico Granata needs_zero_terminator = true; 547b9c1b51eSKate Stone } else if (!options.GetIgnoreMaxLength()) { 548b9c1b51eSKate Stone if (sourceSize > max_size) { 549b7662929SEnrico Granata sourceSize = max_size; 550b7662929SEnrico Granata is_truncated = true; 551b7662929SEnrico Granata } 552b7662929SEnrico Granata } 553ca6c8ee2SEnrico Granata 554ca6c8ee2SEnrico Granata const int bufferSPSize = sourceSize * type_width; 555ca6c8ee2SEnrico Granata 556ca6c8ee2SEnrico Granata lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize, 0)); 557ca6c8ee2SEnrico Granata 558ca6c8ee2SEnrico Granata if (!buffer_sp->GetBytes()) 559ca6c8ee2SEnrico Granata return false; 560ca6c8ee2SEnrico Granata 56197206d57SZachary Turner Status error; 562ca6c8ee2SEnrico Granata char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes()); 563ca6c8ee2SEnrico Granata 564ca6c8ee2SEnrico Granata if (needs_zero_terminator) 565b9c1b51eSKate Stone process_sp->ReadStringFromMemory(options.GetLocation(), buffer, 566b9c1b51eSKate Stone bufferSPSize, error, type_width); 567ca6c8ee2SEnrico Granata else 568b9c1b51eSKate Stone process_sp->ReadMemoryFromInferior(options.GetLocation(), 569b9c1b51eSKate Stone (char *)buffer_sp->GetBytes(), 570b9c1b51eSKate Stone bufferSPSize, error); 571ca6c8ee2SEnrico Granata 572b9c1b51eSKate Stone if (error.Fail()) { 573ca6c8ee2SEnrico Granata options.GetStream()->Printf("unable to read data"); 574ca6c8ee2SEnrico Granata return true; 575ca6c8ee2SEnrico Granata } 576ca6c8ee2SEnrico Granata 577b9c1b51eSKate Stone DataExtractor data(buffer_sp, process_sp->GetByteOrder(), 578b9c1b51eSKate Stone process_sp->GetAddressByteSize()); 579ca6c8ee2SEnrico Granata 580ac49453bSEnrico Granata StringPrinter::ReadBufferAndDumpToStreamOptions dump_options(options); 581d07f7550SEnrico Granata dump_options.SetData(data); 582d07f7550SEnrico Granata dump_options.SetSourceSize(sourceSize); 583b7662929SEnrico Granata dump_options.SetIsTruncated(is_truncated); 584d07f7550SEnrico Granata 585d07f7550SEnrico Granata return DumpUTFBufferToStream(ConvertFunction, dump_options); 586ca6c8ee2SEnrico Granata } 587ca6c8ee2SEnrico Granata 588ca6c8ee2SEnrico Granata template <> 589b9c1b51eSKate Stone bool StringPrinter::ReadStringAndDumpToStream< 590b9c1b51eSKate Stone StringPrinter::StringElementType::UTF8>( 591b9c1b51eSKate Stone const ReadStringAndDumpToStreamOptions &options) { 5929091055eSJustin Lebar return ReadUTFBufferAndDumpToStream<llvm::UTF8>(options, nullptr); 593ca6c8ee2SEnrico Granata } 594ca6c8ee2SEnrico Granata 595ca6c8ee2SEnrico Granata template <> 596b9c1b51eSKate Stone bool StringPrinter::ReadStringAndDumpToStream< 597b9c1b51eSKate Stone StringPrinter::StringElementType::UTF16>( 598b9c1b51eSKate Stone const ReadStringAndDumpToStreamOptions &options) { 5999091055eSJustin Lebar return ReadUTFBufferAndDumpToStream<llvm::UTF16>(options, 6009091055eSJustin Lebar llvm::ConvertUTF16toUTF8); 601ca6c8ee2SEnrico Granata } 602ca6c8ee2SEnrico Granata 603ca6c8ee2SEnrico Granata template <> 604b9c1b51eSKate Stone bool StringPrinter::ReadStringAndDumpToStream< 605b9c1b51eSKate Stone StringPrinter::StringElementType::UTF32>( 606b9c1b51eSKate Stone const ReadStringAndDumpToStreamOptions &options) { 6079091055eSJustin Lebar return ReadUTFBufferAndDumpToStream<llvm::UTF32>(options, 6089091055eSJustin Lebar llvm::ConvertUTF32toUTF8); 609ca6c8ee2SEnrico Granata } 610ca6c8ee2SEnrico Granata 611ca6c8ee2SEnrico Granata template <> 612b9c1b51eSKate Stone bool StringPrinter::ReadBufferAndDumpToStream< 613b9c1b51eSKate Stone StringPrinter::StringElementType::UTF8>( 614b9c1b51eSKate Stone const ReadBufferAndDumpToStreamOptions &options) { 615ca6c8ee2SEnrico Granata assert(options.GetStream() && "need a Stream to print the string to"); 616ca6c8ee2SEnrico Granata 6179091055eSJustin Lebar return DumpUTFBufferToStream<llvm::UTF8>(nullptr, options); 618ca6c8ee2SEnrico Granata } 619ca6c8ee2SEnrico Granata 620ca6c8ee2SEnrico Granata template <> 621b9c1b51eSKate Stone bool StringPrinter::ReadBufferAndDumpToStream< 622b9c1b51eSKate Stone StringPrinter::StringElementType::ASCII>( 623b9c1b51eSKate Stone const ReadBufferAndDumpToStreamOptions &options) { 624ca6c8ee2SEnrico Granata // treat ASCII the same as UTF8 625ca6c8ee2SEnrico Granata // FIXME: can we optimize ASCII some more? 626ca6c8ee2SEnrico Granata return ReadBufferAndDumpToStream<StringElementType::UTF8>(options); 627ca6c8ee2SEnrico Granata } 628ca6c8ee2SEnrico Granata 629ca6c8ee2SEnrico Granata template <> 630b9c1b51eSKate Stone bool StringPrinter::ReadBufferAndDumpToStream< 631b9c1b51eSKate Stone StringPrinter::StringElementType::UTF16>( 632b9c1b51eSKate Stone const ReadBufferAndDumpToStreamOptions &options) { 633ca6c8ee2SEnrico Granata assert(options.GetStream() && "need a Stream to print the string to"); 634ca6c8ee2SEnrico Granata 6359091055eSJustin Lebar return DumpUTFBufferToStream(llvm::ConvertUTF16toUTF8, options); 636ca6c8ee2SEnrico Granata } 637ca6c8ee2SEnrico Granata 638ca6c8ee2SEnrico Granata template <> 639b9c1b51eSKate Stone bool StringPrinter::ReadBufferAndDumpToStream< 640b9c1b51eSKate Stone StringPrinter::StringElementType::UTF32>( 641b9c1b51eSKate Stone const ReadBufferAndDumpToStreamOptions &options) { 642ca6c8ee2SEnrico Granata assert(options.GetStream() && "need a Stream to print the string to"); 643ca6c8ee2SEnrico Granata 6449091055eSJustin Lebar return DumpUTFBufferToStream(llvm::ConvertUTF32toUTF8, options); 645ca6c8ee2SEnrico Granata } 646fd13743fSShawn Best 647fd13743fSShawn Best } // namespace formatters 648fd13743fSShawn Best 649fd13743fSShawn Best } // namespace lldb_private 650