1b9c1b51eSKate Stone //===-- StringPrinter.cpp ----------------------------------------*- C++ 2b9c1b51eSKate Stone //-*-===// 3ca6c8ee2SEnrico Granata // 4ca6c8ee2SEnrico Granata // The LLVM Compiler Infrastructure 5ca6c8ee2SEnrico Granata // 6ca6c8ee2SEnrico Granata // This file is distributed under the University of Illinois Open Source 7ca6c8ee2SEnrico Granata // License. See LICENSE.TXT for details. 8ca6c8ee2SEnrico Granata // 9ca6c8ee2SEnrico Granata //===----------------------------------------------------------------------===// 10ca6c8ee2SEnrico Granata 11ca6c8ee2SEnrico Granata #include "lldb/DataFormatters/StringPrinter.h" 12ca6c8ee2SEnrico Granata 13ebdc1ac0SEnrico Granata #include "lldb/Core/Debugger.h" 14ca6c8ee2SEnrico Granata #include "lldb/Core/Error.h" 15ebdc1ac0SEnrico Granata #include "lldb/Core/ValueObject.h" 16ac49453bSEnrico Granata #include "lldb/Target/Language.h" 17ca6c8ee2SEnrico Granata #include "lldb/Target/Process.h" 18ca6c8ee2SEnrico Granata #include "lldb/Target/Target.h" 19ca6c8ee2SEnrico Granata 20ca6c8ee2SEnrico Granata #include "llvm/Support/ConvertUTF.h" 21ca6c8ee2SEnrico Granata 22ca6c8ee2SEnrico Granata #include <ctype.h> 23ca6c8ee2SEnrico Granata #include <locale> 24ca6c8ee2SEnrico Granata 25ca6c8ee2SEnrico Granata using namespace lldb; 26ca6c8ee2SEnrico Granata using namespace lldb_private; 27ca6c8ee2SEnrico Granata using namespace lldb_private::formatters; 28ca6c8ee2SEnrico Granata 29b9c1b51eSKate Stone // we define this for all values of type but only implement it for those we care 30b9c1b51eSKate Stone // about 31ca6c8ee2SEnrico Granata // that's good because we get linker errors for any unsupported type 32ac49453bSEnrico Granata template <lldb_private::formatters::StringPrinter::StringElementType type> 33ad650a18SEnrico Granata static StringPrinter::StringPrinterBufferPointer<> 34ca6c8ee2SEnrico Granata GetPrintableImpl(uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next); 35ca6c8ee2SEnrico Granata 36ca6c8ee2SEnrico Granata // mimic isprint() for Unicode codepoints 37b9c1b51eSKate Stone static bool isprint(char32_t codepoint) { 38ca6c8ee2SEnrico Granata if (codepoint <= 0x1F || codepoint == 0x7F) // C0 39ca6c8ee2SEnrico Granata { 40ca6c8ee2SEnrico Granata return false; 41ca6c8ee2SEnrico Granata } 42ca6c8ee2SEnrico Granata if (codepoint >= 0x80 && codepoint <= 0x9F) // C1 43ca6c8ee2SEnrico Granata { 44ca6c8ee2SEnrico Granata return false; 45ca6c8ee2SEnrico Granata } 46ca6c8ee2SEnrico Granata if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators 47ca6c8ee2SEnrico Granata { 48ca6c8ee2SEnrico Granata return false; 49ca6c8ee2SEnrico Granata } 50b9c1b51eSKate Stone if (codepoint == 0x200E || codepoint == 0x200F || 51b9c1b51eSKate Stone (codepoint >= 0x202A && 52b9c1b51eSKate Stone codepoint <= 0x202E)) // bidirectional text control 53ca6c8ee2SEnrico Granata { 54ca6c8ee2SEnrico Granata return false; 55ca6c8ee2SEnrico Granata } 56b9c1b51eSKate Stone if (codepoint >= 0xFFF9 && 57b9c1b51eSKate Stone codepoint <= 0xFFFF) // interlinears and generally specials 58ca6c8ee2SEnrico Granata { 59ca6c8ee2SEnrico Granata return false; 60ca6c8ee2SEnrico Granata } 61ca6c8ee2SEnrico Granata return true; 62ca6c8ee2SEnrico Granata } 63ca6c8ee2SEnrico Granata 64ca6c8ee2SEnrico Granata template <> 65ad650a18SEnrico Granata StringPrinter::StringPrinterBufferPointer<> 66b9c1b51eSKate Stone GetPrintableImpl<StringPrinter::StringElementType::ASCII>(uint8_t *buffer, 67b9c1b51eSKate Stone uint8_t *buffer_end, 68b9c1b51eSKate Stone uint8_t *&next) { 69ad650a18SEnrico Granata StringPrinter::StringPrinterBufferPointer<> retval = {nullptr}; 70ca6c8ee2SEnrico Granata 71b9c1b51eSKate Stone switch (*buffer) { 72da04fbb5SEnrico Granata case 0: 73da04fbb5SEnrico Granata retval = {"\\0", 2}; 74da04fbb5SEnrico Granata break; 75ca6c8ee2SEnrico Granata case '\a': 76ca6c8ee2SEnrico Granata retval = {"\\a", 2}; 77ca6c8ee2SEnrico Granata break; 78ca6c8ee2SEnrico Granata case '\b': 79ca6c8ee2SEnrico Granata retval = {"\\b", 2}; 80ca6c8ee2SEnrico Granata break; 81ca6c8ee2SEnrico Granata case '\f': 82ca6c8ee2SEnrico Granata retval = {"\\f", 2}; 83ca6c8ee2SEnrico Granata break; 84ca6c8ee2SEnrico Granata case '\n': 85ca6c8ee2SEnrico Granata retval = {"\\n", 2}; 86ca6c8ee2SEnrico Granata break; 87ca6c8ee2SEnrico Granata case '\r': 88ca6c8ee2SEnrico Granata retval = {"\\r", 2}; 89ca6c8ee2SEnrico Granata break; 90ca6c8ee2SEnrico Granata case '\t': 91ca6c8ee2SEnrico Granata retval = {"\\t", 2}; 92ca6c8ee2SEnrico Granata break; 93ca6c8ee2SEnrico Granata case '\v': 94ca6c8ee2SEnrico Granata retval = {"\\v", 2}; 95ca6c8ee2SEnrico Granata break; 96ca6c8ee2SEnrico Granata case '\"': 97ca6c8ee2SEnrico Granata retval = {"\\\"", 2}; 98ca6c8ee2SEnrico Granata break; 99ca6c8ee2SEnrico Granata case '\\': 100ca6c8ee2SEnrico Granata retval = {"\\\\", 2}; 101ca6c8ee2SEnrico Granata break; 102ca6c8ee2SEnrico Granata default: 103ca6c8ee2SEnrico Granata if (isprint(*buffer)) 104ca6c8ee2SEnrico Granata retval = {buffer, 1}; 105b9c1b51eSKate Stone else { 106d7e6a4f2SVince Harron uint8_t *data = new uint8_t[5]; 107d7e6a4f2SVince Harron sprintf((char *)data, "\\x%02x", *buffer); 108d7e6a4f2SVince Harron retval = {data, 4, [](const uint8_t *c) { delete[] c; }}; 109ca6c8ee2SEnrico Granata break; 110ca6c8ee2SEnrico Granata } 111ca6c8ee2SEnrico Granata } 112ca6c8ee2SEnrico Granata 113ca6c8ee2SEnrico Granata next = buffer + 1; 114ca6c8ee2SEnrico Granata return retval; 115ca6c8ee2SEnrico Granata } 116ca6c8ee2SEnrico Granata 117b9c1b51eSKate Stone static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1) { 118ca6c8ee2SEnrico Granata return (c0 - 192) * 64 + (c1 - 128); 119ca6c8ee2SEnrico Granata } 120b9c1b51eSKate Stone static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1, 121b9c1b51eSKate Stone unsigned char c2) { 122ca6c8ee2SEnrico Granata return (c0 - 224) * 4096 + (c1 - 128) * 64 + (c2 - 128); 123ca6c8ee2SEnrico Granata } 124b9c1b51eSKate Stone static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1, 125b9c1b51eSKate Stone unsigned char c2, unsigned char c3) { 126ca6c8ee2SEnrico Granata return (c0 - 240) * 262144 + (c2 - 128) * 4096 + (c2 - 128) * 64 + (c3 - 128); 127ca6c8ee2SEnrico Granata } 128ca6c8ee2SEnrico Granata 129ca6c8ee2SEnrico Granata template <> 130ad650a18SEnrico Granata StringPrinter::StringPrinterBufferPointer<> 131b9c1b51eSKate Stone GetPrintableImpl<StringPrinter::StringElementType::UTF8>(uint8_t *buffer, 132b9c1b51eSKate Stone uint8_t *buffer_end, 133b9c1b51eSKate Stone uint8_t *&next) { 134ad650a18SEnrico Granata StringPrinter::StringPrinterBufferPointer<> retval{nullptr}; 135ca6c8ee2SEnrico Granata 1369091055eSJustin Lebar unsigned utf8_encoded_len = llvm::getNumBytesForUTF8(*buffer); 137ca6c8ee2SEnrico Granata 138*5a8ad459SZachary Turner if (1u + std::distance(buffer, buffer_end) < utf8_encoded_len) { 139ca6c8ee2SEnrico Granata // I don't have enough bytes - print whatever I have left 140ca6c8ee2SEnrico Granata retval = {buffer, static_cast<size_t>(1 + buffer_end - buffer)}; 141ca6c8ee2SEnrico Granata next = buffer_end + 1; 142ca6c8ee2SEnrico Granata return retval; 143ca6c8ee2SEnrico Granata } 144ca6c8ee2SEnrico Granata 145ca6c8ee2SEnrico Granata char32_t codepoint = 0; 146b9c1b51eSKate Stone switch (utf8_encoded_len) { 147ca6c8ee2SEnrico Granata case 1: 148ca6c8ee2SEnrico Granata // this is just an ASCII byte - ask ASCII 149b9c1b51eSKate Stone return GetPrintableImpl<StringPrinter::StringElementType::ASCII>( 150b9c1b51eSKate Stone buffer, buffer_end, next); 151ca6c8ee2SEnrico Granata case 2: 152b9c1b51eSKate Stone codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, 153b9c1b51eSKate Stone (unsigned char)*(buffer + 1)); 154ca6c8ee2SEnrico Granata break; 155ca6c8ee2SEnrico Granata case 3: 156b9c1b51eSKate Stone codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, 157b9c1b51eSKate Stone (unsigned char)*(buffer + 1), 158b9c1b51eSKate Stone (unsigned char)*(buffer + 2)); 159ca6c8ee2SEnrico Granata break; 160ca6c8ee2SEnrico Granata case 4: 161b9c1b51eSKate Stone codepoint = ConvertUTF8ToCodePoint( 162b9c1b51eSKate Stone (unsigned char)*buffer, (unsigned char)*(buffer + 1), 163b9c1b51eSKate Stone (unsigned char)*(buffer + 2), (unsigned char)*(buffer + 3)); 164ca6c8ee2SEnrico Granata break; 165ca6c8ee2SEnrico Granata default: 166ca6c8ee2SEnrico Granata // this is probably some bogus non-character thing 167ca6c8ee2SEnrico Granata // just print it as-is and hope to sync up again soon 168ca6c8ee2SEnrico Granata retval = {buffer, 1}; 169ca6c8ee2SEnrico Granata next = buffer + 1; 170ca6c8ee2SEnrico Granata return retval; 171ca6c8ee2SEnrico Granata } 172ca6c8ee2SEnrico Granata 173b9c1b51eSKate Stone if (codepoint) { 174b9c1b51eSKate Stone switch (codepoint) { 175da04fbb5SEnrico Granata case 0: 176da04fbb5SEnrico Granata retval = {"\\0", 2}; 177da04fbb5SEnrico Granata break; 178ca6c8ee2SEnrico Granata case '\a': 179ca6c8ee2SEnrico Granata retval = {"\\a", 2}; 180ca6c8ee2SEnrico Granata break; 181ca6c8ee2SEnrico Granata case '\b': 182ca6c8ee2SEnrico Granata retval = {"\\b", 2}; 183ca6c8ee2SEnrico Granata break; 184ca6c8ee2SEnrico Granata case '\f': 185ca6c8ee2SEnrico Granata retval = {"\\f", 2}; 186ca6c8ee2SEnrico Granata break; 187ca6c8ee2SEnrico Granata case '\n': 188ca6c8ee2SEnrico Granata retval = {"\\n", 2}; 189ca6c8ee2SEnrico Granata break; 190ca6c8ee2SEnrico Granata case '\r': 191ca6c8ee2SEnrico Granata retval = {"\\r", 2}; 192ca6c8ee2SEnrico Granata break; 193ca6c8ee2SEnrico Granata case '\t': 194ca6c8ee2SEnrico Granata retval = {"\\t", 2}; 195ca6c8ee2SEnrico Granata break; 196ca6c8ee2SEnrico Granata case '\v': 197ca6c8ee2SEnrico Granata retval = {"\\v", 2}; 198ca6c8ee2SEnrico Granata break; 199ca6c8ee2SEnrico Granata case '\"': 200ca6c8ee2SEnrico Granata retval = {"\\\"", 2}; 201ca6c8ee2SEnrico Granata break; 202ca6c8ee2SEnrico Granata case '\\': 203ca6c8ee2SEnrico Granata retval = {"\\\\", 2}; 204ca6c8ee2SEnrico Granata break; 205ca6c8ee2SEnrico Granata default: 206ca6c8ee2SEnrico Granata if (isprint(codepoint)) 207ca6c8ee2SEnrico Granata retval = {buffer, utf8_encoded_len}; 208b9c1b51eSKate Stone else { 209d7e6a4f2SVince Harron uint8_t *data = new uint8_t[11]; 210a505be4eSZachary Turner sprintf((char *)data, "\\U%08x", (unsigned)codepoint); 211d7e6a4f2SVince Harron retval = {data, 10, [](const uint8_t *c) { delete[] c; }}; 212ca6c8ee2SEnrico Granata break; 213ca6c8ee2SEnrico Granata } 214ca6c8ee2SEnrico Granata } 215ca6c8ee2SEnrico Granata 216ca6c8ee2SEnrico Granata next = buffer + utf8_encoded_len; 217ca6c8ee2SEnrico Granata return retval; 218ca6c8ee2SEnrico Granata } 219ca6c8ee2SEnrico Granata 220ca6c8ee2SEnrico Granata // this should not happen - but just in case.. try to resync at some point 221ca6c8ee2SEnrico Granata retval = {buffer, 1}; 222ca6c8ee2SEnrico Granata next = buffer + 1; 223ca6c8ee2SEnrico Granata return retval; 224ca6c8ee2SEnrico Granata } 225ca6c8ee2SEnrico Granata 226ca6c8ee2SEnrico Granata // Given a sequence of bytes, this function returns: 227ca6c8ee2SEnrico Granata // a sequence of bytes to actually print out + a length 228ca6c8ee2SEnrico Granata // the following unscanned position of the buffer is in next 229ad650a18SEnrico Granata static StringPrinter::StringPrinterBufferPointer<> 230b9c1b51eSKate Stone GetPrintable(StringPrinter::StringElementType type, uint8_t *buffer, 231b9c1b51eSKate Stone uint8_t *buffer_end, uint8_t *&next) { 232ca6c8ee2SEnrico Granata if (!buffer) 233ca6c8ee2SEnrico Granata return {nullptr}; 234ca6c8ee2SEnrico Granata 235b9c1b51eSKate Stone switch (type) { 236ac49453bSEnrico Granata case StringPrinter::StringElementType::ASCII: 237b9c1b51eSKate Stone return GetPrintableImpl<StringPrinter::StringElementType::ASCII>( 238b9c1b51eSKate Stone buffer, buffer_end, next); 239ac49453bSEnrico Granata case StringPrinter::StringElementType::UTF8: 240b9c1b51eSKate Stone return GetPrintableImpl<StringPrinter::StringElementType::UTF8>( 241b9c1b51eSKate Stone buffer, buffer_end, next); 242ca6c8ee2SEnrico Granata default: 243ca6c8ee2SEnrico Granata return {nullptr}; 244ca6c8ee2SEnrico Granata } 245ca6c8ee2SEnrico Granata } 246ca6c8ee2SEnrico Granata 247ac49453bSEnrico Granata StringPrinter::EscapingHelper 248b9c1b51eSKate Stone StringPrinter::GetDefaultEscapingHelper(GetPrintableElementType elem_type) { 249b9c1b51eSKate Stone switch (elem_type) { 250ac49453bSEnrico Granata case GetPrintableElementType::UTF8: 251b9c1b51eSKate Stone return [](uint8_t *buffer, uint8_t *buffer_end, 252b9c1b51eSKate Stone uint8_t *&next) -> StringPrinter::StringPrinterBufferPointer<> { 253b9c1b51eSKate Stone return GetPrintable(StringPrinter::StringElementType::UTF8, buffer, 254b9c1b51eSKate Stone buffer_end, next); 255ac49453bSEnrico Granata }; 256ac49453bSEnrico Granata case GetPrintableElementType::ASCII: 257b9c1b51eSKate Stone return [](uint8_t *buffer, uint8_t *buffer_end, 258b9c1b51eSKate Stone uint8_t *&next) -> StringPrinter::StringPrinterBufferPointer<> { 259b9c1b51eSKate Stone return GetPrintable(StringPrinter::StringElementType::ASCII, buffer, 260b9c1b51eSKate Stone buffer_end, next); 261ac49453bSEnrico Granata }; 262ac49453bSEnrico Granata } 26343d3a7aeSSaleem Abdulrasool llvm_unreachable("bad element type"); 264ac49453bSEnrico Granata } 265ac49453bSEnrico Granata 266ca6c8ee2SEnrico Granata // use this call if you already have an LLDB-side buffer for the data 267ca6c8ee2SEnrico Granata template <typename SourceDataType> 268b9c1b51eSKate Stone static bool DumpUTFBufferToStream( 2699091055eSJustin Lebar llvm::ConversionResult (*ConvertFunction)(const SourceDataType **, 2709091055eSJustin Lebar const SourceDataType *, 2719091055eSJustin Lebar llvm::UTF8 **, llvm::UTF8 *, 2729091055eSJustin Lebar llvm::ConversionFlags), 273b9c1b51eSKate Stone const StringPrinter::ReadBufferAndDumpToStreamOptions &dump_options) { 274d07f7550SEnrico Granata Stream &stream(*dump_options.GetStream()); 275d07f7550SEnrico Granata if (dump_options.GetPrefixToken() != 0) 276d54f7fb8SEnrico Granata stream.Printf("%s", dump_options.GetPrefixToken()); 277d07f7550SEnrico Granata if (dump_options.GetQuote() != 0) 278d07f7550SEnrico Granata stream.Printf("%c", dump_options.GetQuote()); 279d07f7550SEnrico Granata auto data(dump_options.GetData()); 280d07f7550SEnrico Granata auto source_size(dump_options.GetSourceSize()); 281b9c1b51eSKate Stone if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd()) { 282ca6c8ee2SEnrico Granata const int bufferSPSize = data.GetByteSize(); 283b9c1b51eSKate Stone if (dump_options.GetSourceSize() == 0) { 284ca6c8ee2SEnrico Granata const int origin_encoding = 8 * sizeof(SourceDataType); 285d07f7550SEnrico Granata source_size = bufferSPSize / (origin_encoding / 4); 286ca6c8ee2SEnrico Granata } 287ca6c8ee2SEnrico Granata 288b9c1b51eSKate Stone const SourceDataType *data_ptr = 289b9c1b51eSKate Stone (const SourceDataType *)data.GetDataStart(); 290d07f7550SEnrico Granata const SourceDataType *data_end_ptr = data_ptr + source_size; 291ca6c8ee2SEnrico Granata 292d07f7550SEnrico Granata const bool zero_is_terminator = dump_options.GetBinaryZeroIsTerminator(); 293d07f7550SEnrico Granata 294b9c1b51eSKate Stone if (zero_is_terminator) { 295b9c1b51eSKate Stone while (data_ptr < data_end_ptr) { 296b9c1b51eSKate Stone if (!*data_ptr) { 297ca6c8ee2SEnrico Granata data_end_ptr = data_ptr; 298ca6c8ee2SEnrico Granata break; 299ca6c8ee2SEnrico Granata } 300ca6c8ee2SEnrico Granata data_ptr++; 301ca6c8ee2SEnrico Granata } 302ca6c8ee2SEnrico Granata 303d7e6a4f2SVince Harron data_ptr = (const SourceDataType *)data.GetDataStart(); 304d07f7550SEnrico Granata } 305ca6c8ee2SEnrico Granata 306ca6c8ee2SEnrico Granata lldb::DataBufferSP utf8_data_buffer_sp; 3079091055eSJustin Lebar llvm::UTF8 *utf8_data_ptr = nullptr; 3089091055eSJustin Lebar llvm::UTF8 *utf8_data_end_ptr = nullptr; 309ca6c8ee2SEnrico Granata 310b9c1b51eSKate Stone if (ConvertFunction) { 311ca6c8ee2SEnrico Granata utf8_data_buffer_sp.reset(new DataBufferHeap(4 * bufferSPSize, 0)); 3129091055eSJustin Lebar utf8_data_ptr = (llvm::UTF8 *)utf8_data_buffer_sp->GetBytes(); 313ca6c8ee2SEnrico Granata utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize(); 314b9c1b51eSKate Stone ConvertFunction(&data_ptr, data_end_ptr, &utf8_data_ptr, 3159091055eSJustin Lebar utf8_data_end_ptr, llvm::lenientConversion); 3168101f570SEnrico Granata if (false == zero_is_terminator) 3178101f570SEnrico Granata utf8_data_end_ptr = utf8_data_ptr; 3189091055eSJustin Lebar // needed because the ConvertFunction will change the value of the 3199091055eSJustin Lebar // data_ptr. 320b9c1b51eSKate Stone utf8_data_ptr = 3219091055eSJustin Lebar (llvm::UTF8 *)utf8_data_buffer_sp->GetBytes(); 322b9c1b51eSKate Stone } else { 323b9c1b51eSKate Stone // just copy the pointers - the cast is necessary to make the compiler 324b9c1b51eSKate Stone // happy 325ca6c8ee2SEnrico Granata // but this should only happen if we are reading UTF8 data 3269091055eSJustin Lebar utf8_data_ptr = const_cast<llvm::UTF8 *>( 3279091055eSJustin Lebar reinterpret_cast<const llvm::UTF8 *>(data_ptr)); 3289091055eSJustin Lebar utf8_data_end_ptr = const_cast<llvm::UTF8 *>( 3299091055eSJustin Lebar reinterpret_cast<const llvm::UTF8 *>(data_end_ptr)); 330ca6c8ee2SEnrico Granata } 331ca6c8ee2SEnrico Granata 332d07f7550SEnrico Granata const bool escape_non_printables = dump_options.GetEscapeNonPrintables(); 333ac49453bSEnrico Granata lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback; 334b9c1b51eSKate Stone if (escape_non_printables) { 335ac49453bSEnrico Granata if (Language *language = Language::FindPlugin(dump_options.GetLanguage())) 336b9c1b51eSKate Stone escaping_callback = language->GetStringPrinterEscapingHelper( 337b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::GetPrintableElementType:: 338b9c1b51eSKate Stone UTF8); 339ac49453bSEnrico Granata else 340b9c1b51eSKate Stone escaping_callback = 341b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper( 342b9c1b51eSKate Stone lldb_private::formatters::StringPrinter:: 343b9c1b51eSKate Stone GetPrintableElementType::UTF8); 344ac49453bSEnrico Granata } 345d07f7550SEnrico Granata 346ca6c8ee2SEnrico Granata // since we tend to accept partial data (and even partially malformed data) 347ca6c8ee2SEnrico Granata // we might end up with no NULL terminator before the end_ptr 348ca6c8ee2SEnrico Granata // hence we need to take a slower route and ensure we stay within boundaries 349b9c1b51eSKate Stone for (; utf8_data_ptr < utf8_data_end_ptr;) { 350d07f7550SEnrico Granata if (zero_is_terminator && !*utf8_data_ptr) 351ca6c8ee2SEnrico Granata break; 352ca6c8ee2SEnrico Granata 353b9c1b51eSKate Stone if (escape_non_printables) { 354ca6c8ee2SEnrico Granata uint8_t *next_data = nullptr; 355b9c1b51eSKate Stone auto printable = 356b9c1b51eSKate Stone escaping_callback(utf8_data_ptr, utf8_data_end_ptr, next_data); 357ca6c8ee2SEnrico Granata auto printable_bytes = printable.GetBytes(); 358ca6c8ee2SEnrico Granata auto printable_size = printable.GetSize(); 359b9c1b51eSKate Stone if (!printable_bytes || !next_data) { 360b9c1b51eSKate Stone // GetPrintable() failed on us - print one byte in a desperate resync 361b9c1b51eSKate Stone // attempt 362ca6c8ee2SEnrico Granata printable_bytes = utf8_data_ptr; 363ca6c8ee2SEnrico Granata printable_size = 1; 364ca6c8ee2SEnrico Granata next_data = utf8_data_ptr + 1; 365ca6c8ee2SEnrico Granata } 3663acfe1a3SAndy Gibbs for (unsigned c = 0; c < printable_size; c++) 367ca6c8ee2SEnrico Granata stream.Printf("%c", *(printable_bytes + c)); 368ca6c8ee2SEnrico Granata utf8_data_ptr = (uint8_t *)next_data; 369b9c1b51eSKate Stone } else { 370ca6c8ee2SEnrico Granata stream.Printf("%c", *utf8_data_ptr); 371ca6c8ee2SEnrico Granata utf8_data_ptr++; 372ca6c8ee2SEnrico Granata } 373ca6c8ee2SEnrico Granata } 374ca6c8ee2SEnrico Granata } 375d07f7550SEnrico Granata if (dump_options.GetQuote() != 0) 376d07f7550SEnrico Granata stream.Printf("%c", dump_options.GetQuote()); 377d54f7fb8SEnrico Granata if (dump_options.GetSuffixToken() != 0) 378d54f7fb8SEnrico Granata stream.Printf("%s", dump_options.GetSuffixToken()); 379b7662929SEnrico Granata if (dump_options.GetIsTruncated()) 380b7662929SEnrico Granata stream.Printf("..."); 381ca6c8ee2SEnrico Granata return true; 382ca6c8ee2SEnrico Granata } 383ca6c8ee2SEnrico Granata 384b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::ReadStringAndDumpToStreamOptions:: 385b9c1b51eSKate Stone ReadStringAndDumpToStreamOptions(ValueObject &valobj) 386b9c1b51eSKate Stone : ReadStringAndDumpToStreamOptions() { 387b9c1b51eSKate Stone SetEscapeNonPrintables( 388b9c1b51eSKate Stone valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 389ebdc1ac0SEnrico Granata } 390ebdc1ac0SEnrico Granata 391b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions:: 392b9c1b51eSKate Stone ReadBufferAndDumpToStreamOptions(ValueObject &valobj) 393b9c1b51eSKate Stone : ReadBufferAndDumpToStreamOptions() { 394b9c1b51eSKate Stone SetEscapeNonPrintables( 395b9c1b51eSKate Stone valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 396ebdc1ac0SEnrico Granata } 397ebdc1ac0SEnrico Granata 398b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions:: 399b9c1b51eSKate Stone ReadBufferAndDumpToStreamOptions( 400b9c1b51eSKate Stone const ReadStringAndDumpToStreamOptions &options) 401b9c1b51eSKate Stone : ReadBufferAndDumpToStreamOptions() { 402d07f7550SEnrico Granata SetStream(options.GetStream()); 403d07f7550SEnrico Granata SetPrefixToken(options.GetPrefixToken()); 404d54f7fb8SEnrico Granata SetSuffixToken(options.GetSuffixToken()); 405d07f7550SEnrico Granata SetQuote(options.GetQuote()); 406d07f7550SEnrico Granata SetEscapeNonPrintables(options.GetEscapeNonPrintables()); 407d07f7550SEnrico Granata SetBinaryZeroIsTerminator(options.GetBinaryZeroIsTerminator()); 408ac49453bSEnrico Granata SetLanguage(options.GetLanguage()); 409d07f7550SEnrico Granata } 410d07f7550SEnrico Granata 411b9c1b51eSKate Stone namespace lldb_private { 412ebdc1ac0SEnrico Granata 413b9c1b51eSKate Stone namespace formatters { 414fd13743fSShawn Best 415fd13743fSShawn Best template <> 416b9c1b51eSKate Stone bool StringPrinter::ReadStringAndDumpToStream< 417b9c1b51eSKate Stone StringPrinter::StringElementType::ASCII>( 418b9c1b51eSKate Stone const ReadStringAndDumpToStreamOptions &options) { 419fd13743fSShawn Best assert(options.GetStream() && "need a Stream to print the string to"); 420fd13743fSShawn Best Error my_error; 421fd13743fSShawn Best 422fd13743fSShawn Best ProcessSP process_sp(options.GetProcessSP()); 423fd13743fSShawn Best 424fd13743fSShawn Best if (process_sp.get() == nullptr || options.GetLocation() == 0) 425fd13743fSShawn Best return false; 426fd13743fSShawn Best 427fd13743fSShawn Best size_t size; 428b7662929SEnrico Granata const auto max_size = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 429b7662929SEnrico Granata bool is_truncated = false; 430fd13743fSShawn Best 431fd13743fSShawn Best if (options.GetSourceSize() == 0) 432b7662929SEnrico Granata size = max_size; 433b9c1b51eSKate Stone else if (!options.GetIgnoreMaxLength()) { 434b7662929SEnrico Granata size = options.GetSourceSize(); 435b9c1b51eSKate Stone if (size > max_size) { 436b7662929SEnrico Granata size = max_size; 437b7662929SEnrico Granata is_truncated = true; 438b7662929SEnrico Granata } 439b9c1b51eSKate Stone } else 44034042212SEnrico Granata size = options.GetSourceSize(); 441fd13743fSShawn Best 442fd13743fSShawn Best lldb::DataBufferSP buffer_sp(new DataBufferHeap(size, 0)); 443fd13743fSShawn Best 444b9c1b51eSKate Stone process_sp->ReadCStringFromMemory( 445b9c1b51eSKate Stone options.GetLocation(), (char *)buffer_sp->GetBytes(), size, my_error); 446fd13743fSShawn Best 447fd13743fSShawn Best if (my_error.Fail()) 448fd13743fSShawn Best return false; 449fd13743fSShawn Best 450d54f7fb8SEnrico Granata const char *prefix_token = options.GetPrefixToken(); 451fd13743fSShawn Best char quote = options.GetQuote(); 452fd13743fSShawn Best 453fd13743fSShawn Best if (prefix_token != 0) 454d54f7fb8SEnrico Granata options.GetStream()->Printf("%s%c", prefix_token, quote); 455fd13743fSShawn Best else if (quote != 0) 456fd13743fSShawn Best options.GetStream()->Printf("%c", quote); 457fd13743fSShawn Best 458fd13743fSShawn Best uint8_t *data_end = buffer_sp->GetBytes() + buffer_sp->GetByteSize(); 459fd13743fSShawn Best 460ac49453bSEnrico Granata const bool escape_non_printables = options.GetEscapeNonPrintables(); 461ac49453bSEnrico Granata lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback; 462b9c1b51eSKate Stone if (escape_non_printables) { 463ac49453bSEnrico Granata if (Language *language = Language::FindPlugin(options.GetLanguage())) 464b9c1b51eSKate Stone escaping_callback = language->GetStringPrinterEscapingHelper( 465b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::GetPrintableElementType:: 466b9c1b51eSKate Stone ASCII); 467ac49453bSEnrico Granata else 468b9c1b51eSKate Stone escaping_callback = 469b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper( 470b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::GetPrintableElementType:: 471b9c1b51eSKate Stone ASCII); 472ac49453bSEnrico Granata } 473ac49453bSEnrico Granata 474fd13743fSShawn Best // since we tend to accept partial data (and even partially malformed data) 475fd13743fSShawn Best // we might end up with no NULL terminator before the end_ptr 476fd13743fSShawn Best // hence we need to take a slower route and ensure we stay within boundaries 477b9c1b51eSKate Stone for (uint8_t *data = buffer_sp->GetBytes(); *data && (data < data_end);) { 478b9c1b51eSKate Stone if (escape_non_printables) { 479fd13743fSShawn Best uint8_t *next_data = nullptr; 480ac49453bSEnrico Granata auto printable = escaping_callback(data, data_end, next_data); 481fd13743fSShawn Best auto printable_bytes = printable.GetBytes(); 482fd13743fSShawn Best auto printable_size = printable.GetSize(); 483b9c1b51eSKate Stone if (!printable_bytes || !next_data) { 484b9c1b51eSKate Stone // GetPrintable() failed on us - print one byte in a desperate resync 485b9c1b51eSKate Stone // attempt 486fd13743fSShawn Best printable_bytes = data; 487fd13743fSShawn Best printable_size = 1; 488fd13743fSShawn Best next_data = data + 1; 489fd13743fSShawn Best } 4903acfe1a3SAndy Gibbs for (unsigned c = 0; c < printable_size; c++) 491fd13743fSShawn Best options.GetStream()->Printf("%c", *(printable_bytes + c)); 492fd13743fSShawn Best data = (uint8_t *)next_data; 493b9c1b51eSKate Stone } else { 494fd13743fSShawn Best options.GetStream()->Printf("%c", *data); 495fd13743fSShawn Best data++; 496fd13743fSShawn Best } 497fd13743fSShawn Best } 498fd13743fSShawn Best 499d54f7fb8SEnrico Granata const char *suffix_token = options.GetSuffixToken(); 500d54f7fb8SEnrico Granata 501d54f7fb8SEnrico Granata if (suffix_token != 0) 502d54f7fb8SEnrico Granata options.GetStream()->Printf("%c%s", quote, suffix_token); 503d54f7fb8SEnrico Granata else if (quote != 0) 504fd13743fSShawn Best options.GetStream()->Printf("%c", quote); 505fd13743fSShawn Best 506b7662929SEnrico Granata if (is_truncated) 507b7662929SEnrico Granata options.GetStream()->Printf("..."); 508b7662929SEnrico Granata 509fd13743fSShawn Best return true; 510fd13743fSShawn Best } 511fd13743fSShawn Best 512ca6c8ee2SEnrico Granata template <typename SourceDataType> 513b9c1b51eSKate Stone static bool ReadUTFBufferAndDumpToStream( 514b9c1b51eSKate Stone const StringPrinter::ReadStringAndDumpToStreamOptions &options, 5159091055eSJustin Lebar llvm::ConversionResult (*ConvertFunction)(const SourceDataType **, 5169091055eSJustin Lebar const SourceDataType *, 5179091055eSJustin Lebar llvm::UTF8 **, llvm::UTF8 *, 5189091055eSJustin Lebar llvm::ConversionFlags)) { 519ca6c8ee2SEnrico Granata assert(options.GetStream() && "need a Stream to print the string to"); 520ca6c8ee2SEnrico Granata 521b9c1b51eSKate Stone if (options.GetLocation() == 0 || 522b9c1b51eSKate Stone options.GetLocation() == LLDB_INVALID_ADDRESS) 523ca6c8ee2SEnrico Granata return false; 524ca6c8ee2SEnrico Granata 525ca6c8ee2SEnrico Granata lldb::ProcessSP process_sp(options.GetProcessSP()); 526ca6c8ee2SEnrico Granata 527ca6c8ee2SEnrico Granata if (!process_sp) 528ca6c8ee2SEnrico Granata return false; 529ca6c8ee2SEnrico Granata 530ca6c8ee2SEnrico Granata const int type_width = sizeof(SourceDataType); 531ca6c8ee2SEnrico Granata const int origin_encoding = 8 * type_width; 532ca6c8ee2SEnrico Granata if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32) 533ca6c8ee2SEnrico Granata return false; 534ca6c8ee2SEnrico Granata // if not UTF8, I need a conversion function to return proper UTF8 535ca6c8ee2SEnrico Granata if (origin_encoding != 8 && !ConvertFunction) 536ca6c8ee2SEnrico Granata return false; 537ca6c8ee2SEnrico Granata 538ca6c8ee2SEnrico Granata if (!options.GetStream()) 539ca6c8ee2SEnrico Granata return false; 540ca6c8ee2SEnrico Granata 541ca6c8ee2SEnrico Granata uint32_t sourceSize = options.GetSourceSize(); 542ca6c8ee2SEnrico Granata bool needs_zero_terminator = options.GetNeedsZeroTermination(); 543ca6c8ee2SEnrico Granata 544b7662929SEnrico Granata bool is_truncated = false; 545b7662929SEnrico Granata const auto max_size = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 546b7662929SEnrico Granata 547b9c1b51eSKate Stone if (!sourceSize) { 548b7662929SEnrico Granata sourceSize = max_size; 549ca6c8ee2SEnrico Granata needs_zero_terminator = true; 550b9c1b51eSKate Stone } else if (!options.GetIgnoreMaxLength()) { 551b9c1b51eSKate Stone if (sourceSize > max_size) { 552b7662929SEnrico Granata sourceSize = max_size; 553b7662929SEnrico Granata is_truncated = true; 554b7662929SEnrico Granata } 555b7662929SEnrico Granata } 556ca6c8ee2SEnrico Granata 557ca6c8ee2SEnrico Granata const int bufferSPSize = sourceSize * type_width; 558ca6c8ee2SEnrico Granata 559ca6c8ee2SEnrico Granata lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize, 0)); 560ca6c8ee2SEnrico Granata 561ca6c8ee2SEnrico Granata if (!buffer_sp->GetBytes()) 562ca6c8ee2SEnrico Granata return false; 563ca6c8ee2SEnrico Granata 564ca6c8ee2SEnrico Granata Error error; 565ca6c8ee2SEnrico Granata char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes()); 566ca6c8ee2SEnrico Granata 567ca6c8ee2SEnrico Granata if (needs_zero_terminator) 568b9c1b51eSKate Stone process_sp->ReadStringFromMemory(options.GetLocation(), buffer, 569b9c1b51eSKate Stone bufferSPSize, error, type_width); 570ca6c8ee2SEnrico Granata else 571b9c1b51eSKate Stone process_sp->ReadMemoryFromInferior(options.GetLocation(), 572b9c1b51eSKate Stone (char *)buffer_sp->GetBytes(), 573b9c1b51eSKate Stone bufferSPSize, error); 574ca6c8ee2SEnrico Granata 575b9c1b51eSKate Stone if (error.Fail()) { 576ca6c8ee2SEnrico Granata options.GetStream()->Printf("unable to read data"); 577ca6c8ee2SEnrico Granata return true; 578ca6c8ee2SEnrico Granata } 579ca6c8ee2SEnrico Granata 580b9c1b51eSKate Stone DataExtractor data(buffer_sp, process_sp->GetByteOrder(), 581b9c1b51eSKate Stone process_sp->GetAddressByteSize()); 582ca6c8ee2SEnrico Granata 583ac49453bSEnrico Granata StringPrinter::ReadBufferAndDumpToStreamOptions dump_options(options); 584d07f7550SEnrico Granata dump_options.SetData(data); 585d07f7550SEnrico Granata dump_options.SetSourceSize(sourceSize); 586b7662929SEnrico Granata dump_options.SetIsTruncated(is_truncated); 587d07f7550SEnrico Granata 588d07f7550SEnrico Granata return DumpUTFBufferToStream(ConvertFunction, dump_options); 589ca6c8ee2SEnrico Granata } 590ca6c8ee2SEnrico Granata 591ca6c8ee2SEnrico Granata template <> 592b9c1b51eSKate Stone bool StringPrinter::ReadStringAndDumpToStream< 593b9c1b51eSKate Stone StringPrinter::StringElementType::UTF8>( 594b9c1b51eSKate Stone const ReadStringAndDumpToStreamOptions &options) { 5959091055eSJustin Lebar return ReadUTFBufferAndDumpToStream<llvm::UTF8>(options, nullptr); 596ca6c8ee2SEnrico Granata } 597ca6c8ee2SEnrico Granata 598ca6c8ee2SEnrico Granata template <> 599b9c1b51eSKate Stone bool StringPrinter::ReadStringAndDumpToStream< 600b9c1b51eSKate Stone StringPrinter::StringElementType::UTF16>( 601b9c1b51eSKate Stone const ReadStringAndDumpToStreamOptions &options) { 6029091055eSJustin Lebar return ReadUTFBufferAndDumpToStream<llvm::UTF16>(options, 6039091055eSJustin Lebar llvm::ConvertUTF16toUTF8); 604ca6c8ee2SEnrico Granata } 605ca6c8ee2SEnrico Granata 606ca6c8ee2SEnrico Granata template <> 607b9c1b51eSKate Stone bool StringPrinter::ReadStringAndDumpToStream< 608b9c1b51eSKate Stone StringPrinter::StringElementType::UTF32>( 609b9c1b51eSKate Stone const ReadStringAndDumpToStreamOptions &options) { 6109091055eSJustin Lebar return ReadUTFBufferAndDumpToStream<llvm::UTF32>(options, 6119091055eSJustin Lebar llvm::ConvertUTF32toUTF8); 612ca6c8ee2SEnrico Granata } 613ca6c8ee2SEnrico Granata 614ca6c8ee2SEnrico Granata template <> 615b9c1b51eSKate Stone bool StringPrinter::ReadBufferAndDumpToStream< 616b9c1b51eSKate Stone StringPrinter::StringElementType::UTF8>( 617b9c1b51eSKate Stone const ReadBufferAndDumpToStreamOptions &options) { 618ca6c8ee2SEnrico Granata assert(options.GetStream() && "need a Stream to print the string to"); 619ca6c8ee2SEnrico Granata 6209091055eSJustin Lebar return DumpUTFBufferToStream<llvm::UTF8>(nullptr, options); 621ca6c8ee2SEnrico Granata } 622ca6c8ee2SEnrico Granata 623ca6c8ee2SEnrico Granata template <> 624b9c1b51eSKate Stone bool StringPrinter::ReadBufferAndDumpToStream< 625b9c1b51eSKate Stone StringPrinter::StringElementType::ASCII>( 626b9c1b51eSKate Stone const ReadBufferAndDumpToStreamOptions &options) { 627ca6c8ee2SEnrico Granata // treat ASCII the same as UTF8 628ca6c8ee2SEnrico Granata // FIXME: can we optimize ASCII some more? 629ca6c8ee2SEnrico Granata return ReadBufferAndDumpToStream<StringElementType::UTF8>(options); 630ca6c8ee2SEnrico Granata } 631ca6c8ee2SEnrico Granata 632ca6c8ee2SEnrico Granata template <> 633b9c1b51eSKate Stone bool StringPrinter::ReadBufferAndDumpToStream< 634b9c1b51eSKate Stone StringPrinter::StringElementType::UTF16>( 635b9c1b51eSKate Stone const ReadBufferAndDumpToStreamOptions &options) { 636ca6c8ee2SEnrico Granata assert(options.GetStream() && "need a Stream to print the string to"); 637ca6c8ee2SEnrico Granata 6389091055eSJustin Lebar return DumpUTFBufferToStream(llvm::ConvertUTF16toUTF8, options); 639ca6c8ee2SEnrico Granata } 640ca6c8ee2SEnrico Granata 641ca6c8ee2SEnrico Granata template <> 642b9c1b51eSKate Stone bool StringPrinter::ReadBufferAndDumpToStream< 643b9c1b51eSKate Stone StringPrinter::StringElementType::UTF32>( 644b9c1b51eSKate Stone const ReadBufferAndDumpToStreamOptions &options) { 645ca6c8ee2SEnrico Granata assert(options.GetStream() && "need a Stream to print the string to"); 646ca6c8ee2SEnrico Granata 6479091055eSJustin Lebar return DumpUTFBufferToStream(llvm::ConvertUTF32toUTF8, options); 648ca6c8ee2SEnrico Granata } 649fd13743fSShawn Best 650fd13743fSShawn Best } // namespace formatters 651fd13743fSShawn Best 652fd13743fSShawn Best } // namespace lldb_private 653