180814287SRaphael Isemann //===-- StringPrinter.cpp -------------------------------------------------===// 2ca6c8ee2SEnrico Granata // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6ca6c8ee2SEnrico Granata // 7ca6c8ee2SEnrico Granata //===----------------------------------------------------------------------===// 8ca6c8ee2SEnrico Granata 9ca6c8ee2SEnrico Granata #include "lldb/DataFormatters/StringPrinter.h" 10ca6c8ee2SEnrico Granata 11ebdc1ac0SEnrico Granata #include "lldb/Core/Debugger.h" 12ebdc1ac0SEnrico Granata #include "lldb/Core/ValueObject.h" 13ac49453bSEnrico Granata #include "lldb/Target/Language.h" 14ca6c8ee2SEnrico Granata #include "lldb/Target/Process.h" 15ca6c8ee2SEnrico Granata #include "lldb/Target/Target.h" 1697206d57SZachary Turner #include "lldb/Utility/Status.h" 17ca6c8ee2SEnrico Granata 18ca6c8ee2SEnrico Granata #include "llvm/Support/ConvertUTF.h" 19ca6c8ee2SEnrico Granata 20ca6c8ee2SEnrico Granata #include <ctype.h> 21ca6c8ee2SEnrico Granata #include <locale> 22796ac80bSJonas Devlieghere #include <memory> 23ca6c8ee2SEnrico Granata 24ca6c8ee2SEnrico Granata using namespace lldb; 25ca6c8ee2SEnrico Granata using namespace lldb_private; 26ca6c8ee2SEnrico Granata using namespace lldb_private::formatters; 27*a37caebcSVedant Kumar using GetPrintableElementType = StringPrinter::GetPrintableElementType; 28*a37caebcSVedant Kumar using StringElementType = StringPrinter::StringElementType; 29*a37caebcSVedant Kumar 30*a37caebcSVedant Kumar /// DecodedCharBuffer stores the decoded contents of a single character. It 31*a37caebcSVedant Kumar /// avoids managing memory on the heap by copying decoded bytes into an in-line 32*a37caebcSVedant Kumar /// buffer. 33*a37caebcSVedant Kumar struct DecodedCharBuffer { 34*a37caebcSVedant Kumar static constexpr unsigned MaxLength = 16; 35*a37caebcSVedant Kumar 36*a37caebcSVedant Kumar public: 37*a37caebcSVedant Kumar DecodedCharBuffer(std::nullptr_t) {} 38*a37caebcSVedant Kumar 39*a37caebcSVedant Kumar DecodedCharBuffer(const uint8_t *bytes, size_t size) : m_size(size) { 40*a37caebcSVedant Kumar if (size > MaxLength) 41*a37caebcSVedant Kumar llvm_unreachable("unsupported length"); 42*a37caebcSVedant Kumar memcpy(m_data, bytes, size); 43*a37caebcSVedant Kumar } 44*a37caebcSVedant Kumar 45*a37caebcSVedant Kumar DecodedCharBuffer(const char *bytes, size_t size) 46*a37caebcSVedant Kumar : DecodedCharBuffer(reinterpret_cast<const uint8_t *>(bytes), size) {} 47*a37caebcSVedant Kumar 48*a37caebcSVedant Kumar const uint8_t *GetBytes() const { return m_data; } 49*a37caebcSVedant Kumar 50*a37caebcSVedant Kumar size_t GetSize() const { return m_size; } 51*a37caebcSVedant Kumar 52*a37caebcSVedant Kumar private: 53*a37caebcSVedant Kumar size_t m_size = 0; 54*a37caebcSVedant Kumar uint8_t m_data[MaxLength] = {0}; 55*a37caebcSVedant Kumar }; 56*a37caebcSVedant Kumar 57*a37caebcSVedant Kumar using EscapingHelper = 58*a37caebcSVedant Kumar std::function<DecodedCharBuffer(uint8_t *, uint8_t *, uint8_t *&)>; 59ca6c8ee2SEnrico Granata 6005097246SAdrian Prantl // we define this for all values of type but only implement it for those we 6105097246SAdrian Prantl // care about that's good because we get linker errors for any unsupported type 62*a37caebcSVedant Kumar template <StringElementType type> 63*a37caebcSVedant Kumar static DecodedCharBuffer 64*a37caebcSVedant Kumar GetPrintableImpl(uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next, 65*a37caebcSVedant Kumar StringPrinter::EscapeStyle escape_style); 66ca6c8ee2SEnrico Granata 67*a37caebcSVedant Kumar // Mimic isprint() for Unicode codepoints. 68*a37caebcSVedant Kumar static bool isprint32(char32_t codepoint) { 69ca6c8ee2SEnrico Granata if (codepoint <= 0x1F || codepoint == 0x7F) // C0 70ca6c8ee2SEnrico Granata { 71ca6c8ee2SEnrico Granata return false; 72ca6c8ee2SEnrico Granata } 73ca6c8ee2SEnrico Granata if (codepoint >= 0x80 && codepoint <= 0x9F) // C1 74ca6c8ee2SEnrico Granata { 75ca6c8ee2SEnrico Granata return false; 76ca6c8ee2SEnrico Granata } 77ca6c8ee2SEnrico Granata if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators 78ca6c8ee2SEnrico Granata { 79ca6c8ee2SEnrico Granata return false; 80ca6c8ee2SEnrico Granata } 81b9c1b51eSKate Stone if (codepoint == 0x200E || codepoint == 0x200F || 82b9c1b51eSKate Stone (codepoint >= 0x202A && 83b9c1b51eSKate Stone codepoint <= 0x202E)) // bidirectional text control 84ca6c8ee2SEnrico Granata { 85ca6c8ee2SEnrico Granata return false; 86ca6c8ee2SEnrico Granata } 87b9c1b51eSKate Stone if (codepoint >= 0xFFF9 && 88b9c1b51eSKate Stone codepoint <= 0xFFFF) // interlinears and generally specials 89ca6c8ee2SEnrico Granata { 90ca6c8ee2SEnrico Granata return false; 91ca6c8ee2SEnrico Granata } 92ca6c8ee2SEnrico Granata return true; 93ca6c8ee2SEnrico Granata } 94ca6c8ee2SEnrico Granata 95*a37caebcSVedant Kumar DecodedCharBuffer attemptASCIIEscape(char32_t c, 96*a37caebcSVedant Kumar StringPrinter::EscapeStyle escape_style) { 97*a37caebcSVedant Kumar const bool is_swift_escape_style = 98*a37caebcSVedant Kumar escape_style == StringPrinter::EscapeStyle::Swift; 99*a37caebcSVedant Kumar switch (c) { 100da04fbb5SEnrico Granata case 0: 101*a37caebcSVedant Kumar return {"\\0", 2}; 102ca6c8ee2SEnrico Granata case '\a': 103*a37caebcSVedant Kumar return {"\\a", 2}; 104ca6c8ee2SEnrico Granata case '\b': 105*a37caebcSVedant Kumar if (is_swift_escape_style) 106*a37caebcSVedant Kumar return nullptr; 107*a37caebcSVedant Kumar return {"\\b", 2}; 108ca6c8ee2SEnrico Granata case '\f': 109*a37caebcSVedant Kumar if (is_swift_escape_style) 110*a37caebcSVedant Kumar return nullptr; 111*a37caebcSVedant Kumar return {"\\f", 2}; 112ca6c8ee2SEnrico Granata case '\n': 113*a37caebcSVedant Kumar return {"\\n", 2}; 114ca6c8ee2SEnrico Granata case '\r': 115*a37caebcSVedant Kumar return {"\\r", 2}; 116ca6c8ee2SEnrico Granata case '\t': 117*a37caebcSVedant Kumar return {"\\t", 2}; 118ca6c8ee2SEnrico Granata case '\v': 119*a37caebcSVedant Kumar if (is_swift_escape_style) 120*a37caebcSVedant Kumar return nullptr; 121*a37caebcSVedant Kumar return {"\\v", 2}; 122ca6c8ee2SEnrico Granata case '\"': 123*a37caebcSVedant Kumar return {"\\\"", 2}; 124*a37caebcSVedant Kumar case '\'': 125*a37caebcSVedant Kumar if (is_swift_escape_style) 126*a37caebcSVedant Kumar return {"\\'", 2}; 127*a37caebcSVedant Kumar return nullptr; 128ca6c8ee2SEnrico Granata case '\\': 129*a37caebcSVedant Kumar return {"\\\\", 2}; 130ca6c8ee2SEnrico Granata } 131*a37caebcSVedant Kumar return nullptr; 132ca6c8ee2SEnrico Granata } 133ca6c8ee2SEnrico Granata 134*a37caebcSVedant Kumar template <> 135*a37caebcSVedant Kumar DecodedCharBuffer GetPrintableImpl<StringElementType::ASCII>( 136*a37caebcSVedant Kumar uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next, 137*a37caebcSVedant Kumar StringPrinter::EscapeStyle escape_style) { 138*a37caebcSVedant Kumar // The ASCII helper always advances 1 byte at a time. 139ca6c8ee2SEnrico Granata next = buffer + 1; 140*a37caebcSVedant Kumar 141*a37caebcSVedant Kumar DecodedCharBuffer retval = attemptASCIIEscape(*buffer, escape_style); 142*a37caebcSVedant Kumar if (retval.GetSize()) 143ca6c8ee2SEnrico Granata return retval; 144*a37caebcSVedant Kumar if (isprint(*buffer)) 145*a37caebcSVedant Kumar return {buffer, 1}; 146*a37caebcSVedant Kumar 147*a37caebcSVedant Kumar unsigned escaped_len; 148*a37caebcSVedant Kumar constexpr unsigned max_buffer_size = 7; 149*a37caebcSVedant Kumar uint8_t data[max_buffer_size]; 150*a37caebcSVedant Kumar switch (escape_style) { 151*a37caebcSVedant Kumar case StringPrinter::EscapeStyle::CXX: 152*a37caebcSVedant Kumar // Prints 4 characters, then a \0 terminator. 153*a37caebcSVedant Kumar escaped_len = sprintf((char *)data, "\\x%02x", *buffer); 154*a37caebcSVedant Kumar break; 155*a37caebcSVedant Kumar case StringPrinter::EscapeStyle::Swift: 156*a37caebcSVedant Kumar // Prints up to 6 characters, then a \0 terminator. 157*a37caebcSVedant Kumar escaped_len = sprintf((char *)data, "\\u{%x}", *buffer); 158*a37caebcSVedant Kumar break; 159*a37caebcSVedant Kumar } 160*a37caebcSVedant Kumar lldbassert(escaped_len > 0 && "unknown string escape style"); 161*a37caebcSVedant Kumar return {data, escaped_len}; 162ca6c8ee2SEnrico Granata } 163ca6c8ee2SEnrico Granata 164b9c1b51eSKate Stone static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1) { 165ca6c8ee2SEnrico Granata return (c0 - 192) * 64 + (c1 - 128); 166ca6c8ee2SEnrico Granata } 167b9c1b51eSKate Stone static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1, 168b9c1b51eSKate Stone unsigned char c2) { 169ca6c8ee2SEnrico Granata return (c0 - 224) * 4096 + (c1 - 128) * 64 + (c2 - 128); 170ca6c8ee2SEnrico Granata } 171b9c1b51eSKate Stone static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1, 172b9c1b51eSKate Stone unsigned char c2, unsigned char c3) { 173ca6c8ee2SEnrico Granata return (c0 - 240) * 262144 + (c2 - 128) * 4096 + (c2 - 128) * 64 + (c3 - 128); 174ca6c8ee2SEnrico Granata } 175ca6c8ee2SEnrico Granata 176ca6c8ee2SEnrico Granata template <> 177*a37caebcSVedant Kumar DecodedCharBuffer GetPrintableImpl<StringElementType::UTF8>( 178*a37caebcSVedant Kumar uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next, 179*a37caebcSVedant Kumar StringPrinter::EscapeStyle escape_style) { 1807aabad13SVedant Kumar const unsigned utf8_encoded_len = llvm::getNumBytesForUTF8(*buffer); 181ca6c8ee2SEnrico Granata 1827aabad13SVedant Kumar // If the utf8 encoded length is invalid, or if there aren't enough bytes to 1837aabad13SVedant Kumar // print, this is some kind of corrupted string. 1847aabad13SVedant Kumar if (utf8_encoded_len == 0 || utf8_encoded_len > 4) 185*a37caebcSVedant Kumar return nullptr; 1867aabad13SVedant Kumar if ((buffer_end - buffer) < utf8_encoded_len) 1877aabad13SVedant Kumar // There's no room in the buffer for the utf8 sequence. 188*a37caebcSVedant Kumar return nullptr; 189ca6c8ee2SEnrico Granata 190ca6c8ee2SEnrico Granata char32_t codepoint = 0; 191b9c1b51eSKate Stone switch (utf8_encoded_len) { 192ca6c8ee2SEnrico Granata case 1: 193ca6c8ee2SEnrico Granata // this is just an ASCII byte - ask ASCII 194*a37caebcSVedant Kumar return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next, 195*a37caebcSVedant Kumar escape_style); 196ca6c8ee2SEnrico Granata case 2: 197b9c1b51eSKate Stone codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, 198b9c1b51eSKate Stone (unsigned char)*(buffer + 1)); 199ca6c8ee2SEnrico Granata break; 200ca6c8ee2SEnrico Granata case 3: 201b9c1b51eSKate Stone codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, 202b9c1b51eSKate Stone (unsigned char)*(buffer + 1), 203b9c1b51eSKate Stone (unsigned char)*(buffer + 2)); 204ca6c8ee2SEnrico Granata break; 205ca6c8ee2SEnrico Granata case 4: 206b9c1b51eSKate Stone codepoint = ConvertUTF8ToCodePoint( 207b9c1b51eSKate Stone (unsigned char)*buffer, (unsigned char)*(buffer + 1), 208b9c1b51eSKate Stone (unsigned char)*(buffer + 2), (unsigned char)*(buffer + 3)); 209ca6c8ee2SEnrico Granata break; 210ca6c8ee2SEnrico Granata } 211ca6c8ee2SEnrico Granata 212*a37caebcSVedant Kumar // We couldn't figure out how to print this codepoint. 213*a37caebcSVedant Kumar if (!codepoint) 214*a37caebcSVedant Kumar return nullptr; 215ca6c8ee2SEnrico Granata 216*a37caebcSVedant Kumar // The UTF8 helper always advances by the utf8 encoded length. 217ca6c8ee2SEnrico Granata next = buffer + utf8_encoded_len; 218*a37caebcSVedant Kumar DecodedCharBuffer retval = attemptASCIIEscape(codepoint, escape_style); 219*a37caebcSVedant Kumar if (retval.GetSize()) 220ca6c8ee2SEnrico Granata return retval; 221*a37caebcSVedant Kumar if (isprint32(codepoint)) 222*a37caebcSVedant Kumar return {buffer, utf8_encoded_len}; 223ca6c8ee2SEnrico Granata 224*a37caebcSVedant Kumar unsigned escaped_len; 225*a37caebcSVedant Kumar constexpr unsigned max_buffer_size = 13; 226*a37caebcSVedant Kumar uint8_t data[max_buffer_size]; 227*a37caebcSVedant Kumar switch (escape_style) { 228*a37caebcSVedant Kumar case StringPrinter::EscapeStyle::CXX: 229*a37caebcSVedant Kumar // Prints 10 characters, then a \0 terminator. 230*a37caebcSVedant Kumar escaped_len = sprintf((char *)data, "\\U%08x", (unsigned)codepoint); 231*a37caebcSVedant Kumar break; 232*a37caebcSVedant Kumar case StringPrinter::EscapeStyle::Swift: 233*a37caebcSVedant Kumar // Prints up to 12 characters, then a \0 terminator. 234*a37caebcSVedant Kumar escaped_len = sprintf((char *)data, "\\u{%x}", (unsigned)codepoint); 235*a37caebcSVedant Kumar break; 236*a37caebcSVedant Kumar } 237*a37caebcSVedant Kumar lldbassert(escaped_len > 0 && "unknown string escape style"); 238*a37caebcSVedant Kumar return {data, escaped_len}; 239ca6c8ee2SEnrico Granata } 240ca6c8ee2SEnrico Granata 24105097246SAdrian Prantl // Given a sequence of bytes, this function returns: a sequence of bytes to 24205097246SAdrian Prantl // actually print out + a length the following unscanned position of the buffer 24305097246SAdrian Prantl // is in next 244*a37caebcSVedant Kumar static DecodedCharBuffer GetPrintable(StringElementType type, uint8_t *buffer, 245*a37caebcSVedant Kumar uint8_t *buffer_end, uint8_t *&next, 246*a37caebcSVedant Kumar StringPrinter::EscapeStyle escape_style) { 2477aabad13SVedant Kumar if (!buffer || buffer >= buffer_end) 248ca6c8ee2SEnrico Granata return {nullptr}; 249ca6c8ee2SEnrico Granata 250b9c1b51eSKate Stone switch (type) { 251*a37caebcSVedant Kumar case StringElementType::ASCII: 252*a37caebcSVedant Kumar return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next, 253*a37caebcSVedant Kumar escape_style); 254*a37caebcSVedant Kumar case StringElementType::UTF8: 255*a37caebcSVedant Kumar return GetPrintableImpl<StringElementType::UTF8>(buffer, buffer_end, next, 256*a37caebcSVedant Kumar escape_style); 257ca6c8ee2SEnrico Granata default: 258ca6c8ee2SEnrico Granata return {nullptr}; 259ca6c8ee2SEnrico Granata } 260ca6c8ee2SEnrico Granata } 261ca6c8ee2SEnrico Granata 262*a37caebcSVedant Kumar static EscapingHelper 263*a37caebcSVedant Kumar GetDefaultEscapingHelper(GetPrintableElementType elem_type, 264*a37caebcSVedant Kumar StringPrinter::EscapeStyle escape_style) { 265b9c1b51eSKate Stone switch (elem_type) { 266ac49453bSEnrico Granata case GetPrintableElementType::UTF8: 267ac49453bSEnrico Granata case GetPrintableElementType::ASCII: 268*a37caebcSVedant Kumar return [escape_style, elem_type](uint8_t *buffer, uint8_t *buffer_end, 269*a37caebcSVedant Kumar uint8_t *&next) -> DecodedCharBuffer { 270*a37caebcSVedant Kumar return GetPrintable(elem_type == GetPrintableElementType::UTF8 271*a37caebcSVedant Kumar ? StringElementType::UTF8 272*a37caebcSVedant Kumar : StringElementType::ASCII, 273*a37caebcSVedant Kumar buffer, buffer_end, next, escape_style); 274ac49453bSEnrico Granata }; 275ac49453bSEnrico Granata } 27643d3a7aeSSaleem Abdulrasool llvm_unreachable("bad element type"); 277ac49453bSEnrico Granata } 278ac49453bSEnrico Granata 279*a37caebcSVedant Kumar /// Read a string encoded in accordance with \tparam SourceDataType from a 280*a37caebcSVedant Kumar /// host-side LLDB buffer, then pretty-print it to a stream using \p style. 281ca6c8ee2SEnrico Granata template <typename SourceDataType> 282*a37caebcSVedant Kumar static bool DumpEncodedBufferToStream( 283*a37caebcSVedant Kumar GetPrintableElementType style, 2849091055eSJustin Lebar llvm::ConversionResult (*ConvertFunction)(const SourceDataType **, 2859091055eSJustin Lebar const SourceDataType *, 2869091055eSJustin Lebar llvm::UTF8 **, llvm::UTF8 *, 2879091055eSJustin Lebar llvm::ConversionFlags), 288b9c1b51eSKate Stone const StringPrinter::ReadBufferAndDumpToStreamOptions &dump_options) { 289*a37caebcSVedant Kumar assert(dump_options.GetStream() && "need a Stream to print the string to"); 290d07f7550SEnrico Granata Stream &stream(*dump_options.GetStream()); 291248a1305SKonrad Kleine if (dump_options.GetPrefixToken() != nullptr) 292d54f7fb8SEnrico Granata stream.Printf("%s", dump_options.GetPrefixToken()); 293d07f7550SEnrico Granata if (dump_options.GetQuote() != 0) 294d07f7550SEnrico Granata stream.Printf("%c", dump_options.GetQuote()); 295d07f7550SEnrico Granata auto data(dump_options.GetData()); 296d07f7550SEnrico Granata auto source_size(dump_options.GetSourceSize()); 297b9c1b51eSKate Stone if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd()) { 298ca6c8ee2SEnrico Granata const int bufferSPSize = data.GetByteSize(); 299b9c1b51eSKate Stone if (dump_options.GetSourceSize() == 0) { 300ca6c8ee2SEnrico Granata const int origin_encoding = 8 * sizeof(SourceDataType); 301d07f7550SEnrico Granata source_size = bufferSPSize / (origin_encoding / 4); 302ca6c8ee2SEnrico Granata } 303ca6c8ee2SEnrico Granata 304b9c1b51eSKate Stone const SourceDataType *data_ptr = 305b9c1b51eSKate Stone (const SourceDataType *)data.GetDataStart(); 306d07f7550SEnrico Granata const SourceDataType *data_end_ptr = data_ptr + source_size; 307ca6c8ee2SEnrico Granata 308d07f7550SEnrico Granata const bool zero_is_terminator = dump_options.GetBinaryZeroIsTerminator(); 309d07f7550SEnrico Granata 310b9c1b51eSKate Stone if (zero_is_terminator) { 311b9c1b51eSKate Stone while (data_ptr < data_end_ptr) { 312b9c1b51eSKate Stone if (!*data_ptr) { 313ca6c8ee2SEnrico Granata data_end_ptr = data_ptr; 314ca6c8ee2SEnrico Granata break; 315ca6c8ee2SEnrico Granata } 316ca6c8ee2SEnrico Granata data_ptr++; 317ca6c8ee2SEnrico Granata } 318ca6c8ee2SEnrico Granata 319d7e6a4f2SVince Harron data_ptr = (const SourceDataType *)data.GetDataStart(); 320d07f7550SEnrico Granata } 321ca6c8ee2SEnrico Granata 322ca6c8ee2SEnrico Granata lldb::DataBufferSP utf8_data_buffer_sp; 3239091055eSJustin Lebar llvm::UTF8 *utf8_data_ptr = nullptr; 3249091055eSJustin Lebar llvm::UTF8 *utf8_data_end_ptr = nullptr; 325ca6c8ee2SEnrico Granata 326b9c1b51eSKate Stone if (ConvertFunction) { 327796ac80bSJonas Devlieghere utf8_data_buffer_sp = 328796ac80bSJonas Devlieghere std::make_shared<DataBufferHeap>(4 * bufferSPSize, 0); 3299091055eSJustin Lebar utf8_data_ptr = (llvm::UTF8 *)utf8_data_buffer_sp->GetBytes(); 330ca6c8ee2SEnrico Granata utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize(); 331b9c1b51eSKate Stone ConvertFunction(&data_ptr, data_end_ptr, &utf8_data_ptr, 3329091055eSJustin Lebar utf8_data_end_ptr, llvm::lenientConversion); 333a6682a41SJonas Devlieghere if (!zero_is_terminator) 3348101f570SEnrico Granata utf8_data_end_ptr = utf8_data_ptr; 3359091055eSJustin Lebar // needed because the ConvertFunction will change the value of the 3369091055eSJustin Lebar // data_ptr. 337b9c1b51eSKate Stone utf8_data_ptr = 3389091055eSJustin Lebar (llvm::UTF8 *)utf8_data_buffer_sp->GetBytes(); 339b9c1b51eSKate Stone } else { 340b9c1b51eSKate Stone // just copy the pointers - the cast is necessary to make the compiler 34105097246SAdrian Prantl // happy but this should only happen if we are reading UTF8 data 3429091055eSJustin Lebar utf8_data_ptr = const_cast<llvm::UTF8 *>( 3439091055eSJustin Lebar reinterpret_cast<const llvm::UTF8 *>(data_ptr)); 3449091055eSJustin Lebar utf8_data_end_ptr = const_cast<llvm::UTF8 *>( 3459091055eSJustin Lebar reinterpret_cast<const llvm::UTF8 *>(data_end_ptr)); 346ca6c8ee2SEnrico Granata } 347ca6c8ee2SEnrico Granata 348d07f7550SEnrico Granata const bool escape_non_printables = dump_options.GetEscapeNonPrintables(); 349*a37caebcSVedant Kumar EscapingHelper escaping_callback; 350*a37caebcSVedant Kumar if (escape_non_printables) 351b9c1b51eSKate Stone escaping_callback = 352*a37caebcSVedant Kumar GetDefaultEscapingHelper(style, dump_options.GetEscapeStyle()); 353d07f7550SEnrico Granata 354ca6c8ee2SEnrico Granata // since we tend to accept partial data (and even partially malformed data) 35505097246SAdrian Prantl // we might end up with no NULL terminator before the end_ptr hence we need 35605097246SAdrian Prantl // to take a slower route and ensure we stay within boundaries 357b9c1b51eSKate Stone for (; utf8_data_ptr < utf8_data_end_ptr;) { 358d07f7550SEnrico Granata if (zero_is_terminator && !*utf8_data_ptr) 359ca6c8ee2SEnrico Granata break; 360ca6c8ee2SEnrico Granata 361b9c1b51eSKate Stone if (escape_non_printables) { 362ca6c8ee2SEnrico Granata uint8_t *next_data = nullptr; 363b9c1b51eSKate Stone auto printable = 364b9c1b51eSKate Stone escaping_callback(utf8_data_ptr, utf8_data_end_ptr, next_data); 365ca6c8ee2SEnrico Granata auto printable_bytes = printable.GetBytes(); 366ca6c8ee2SEnrico Granata auto printable_size = printable.GetSize(); 3677aabad13SVedant Kumar 3687aabad13SVedant Kumar // We failed to figure out how to print this string. 3697aabad13SVedant Kumar if (!printable_bytes || !next_data) 3707aabad13SVedant Kumar return false; 3717aabad13SVedant Kumar 3723acfe1a3SAndy Gibbs for (unsigned c = 0; c < printable_size; c++) 373ca6c8ee2SEnrico Granata stream.Printf("%c", *(printable_bytes + c)); 374ca6c8ee2SEnrico Granata utf8_data_ptr = (uint8_t *)next_data; 375b9c1b51eSKate Stone } else { 376ca6c8ee2SEnrico Granata stream.Printf("%c", *utf8_data_ptr); 377ca6c8ee2SEnrico Granata utf8_data_ptr++; 378ca6c8ee2SEnrico Granata } 379ca6c8ee2SEnrico Granata } 380ca6c8ee2SEnrico Granata } 381d07f7550SEnrico Granata if (dump_options.GetQuote() != 0) 382d07f7550SEnrico Granata stream.Printf("%c", dump_options.GetQuote()); 383248a1305SKonrad Kleine if (dump_options.GetSuffixToken() != nullptr) 384d54f7fb8SEnrico Granata stream.Printf("%s", dump_options.GetSuffixToken()); 385b7662929SEnrico Granata if (dump_options.GetIsTruncated()) 386b7662929SEnrico Granata stream.Printf("..."); 387ca6c8ee2SEnrico Granata return true; 388ca6c8ee2SEnrico Granata } 389ca6c8ee2SEnrico Granata 390b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::ReadStringAndDumpToStreamOptions:: 391b9c1b51eSKate Stone ReadStringAndDumpToStreamOptions(ValueObject &valobj) 392b9c1b51eSKate Stone : ReadStringAndDumpToStreamOptions() { 393b9c1b51eSKate Stone SetEscapeNonPrintables( 394b9c1b51eSKate Stone valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 395ebdc1ac0SEnrico Granata } 396ebdc1ac0SEnrico Granata 397b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions:: 398b9c1b51eSKate Stone ReadBufferAndDumpToStreamOptions(ValueObject &valobj) 399b9c1b51eSKate Stone : ReadBufferAndDumpToStreamOptions() { 400b9c1b51eSKate Stone SetEscapeNonPrintables( 401b9c1b51eSKate Stone valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 402ebdc1ac0SEnrico Granata } 403ebdc1ac0SEnrico Granata 404b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions:: 405b9c1b51eSKate Stone ReadBufferAndDumpToStreamOptions( 406b9c1b51eSKate Stone const ReadStringAndDumpToStreamOptions &options) 407b9c1b51eSKate Stone : ReadBufferAndDumpToStreamOptions() { 408d07f7550SEnrico Granata SetStream(options.GetStream()); 409d07f7550SEnrico Granata SetPrefixToken(options.GetPrefixToken()); 410d54f7fb8SEnrico Granata SetSuffixToken(options.GetSuffixToken()); 411d07f7550SEnrico Granata SetQuote(options.GetQuote()); 412d07f7550SEnrico Granata SetEscapeNonPrintables(options.GetEscapeNonPrintables()); 413d07f7550SEnrico Granata SetBinaryZeroIsTerminator(options.GetBinaryZeroIsTerminator()); 414*a37caebcSVedant Kumar SetEscapeStyle(options.GetEscapeStyle()); 415d07f7550SEnrico Granata } 416d07f7550SEnrico Granata 417b9c1b51eSKate Stone namespace lldb_private { 418ebdc1ac0SEnrico Granata 419b9c1b51eSKate Stone namespace formatters { 420fd13743fSShawn Best 421ca6c8ee2SEnrico Granata template <typename SourceDataType> 422*a37caebcSVedant Kumar static bool ReadEncodedBufferAndDumpToStream( 423*a37caebcSVedant Kumar StringElementType elem_type, 424b9c1b51eSKate Stone const StringPrinter::ReadStringAndDumpToStreamOptions &options, 4259091055eSJustin Lebar llvm::ConversionResult (*ConvertFunction)(const SourceDataType **, 4269091055eSJustin Lebar const SourceDataType *, 4279091055eSJustin Lebar llvm::UTF8 **, llvm::UTF8 *, 4289091055eSJustin Lebar llvm::ConversionFlags)) { 429ca6c8ee2SEnrico Granata assert(options.GetStream() && "need a Stream to print the string to"); 430*a37caebcSVedant Kumar if (!options.GetStream()) 431*a37caebcSVedant Kumar return false; 432ca6c8ee2SEnrico Granata 433b9c1b51eSKate Stone if (options.GetLocation() == 0 || 434b9c1b51eSKate Stone options.GetLocation() == LLDB_INVALID_ADDRESS) 435ca6c8ee2SEnrico Granata return false; 436ca6c8ee2SEnrico Granata 437ca6c8ee2SEnrico Granata lldb::ProcessSP process_sp(options.GetProcessSP()); 438ca6c8ee2SEnrico Granata if (!process_sp) 439ca6c8ee2SEnrico Granata return false; 440ca6c8ee2SEnrico Granata 441*a37caebcSVedant Kumar constexpr int type_width = sizeof(SourceDataType); 442*a37caebcSVedant Kumar constexpr int origin_encoding = 8 * type_width; 443ca6c8ee2SEnrico Granata if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32) 444ca6c8ee2SEnrico Granata return false; 445*a37caebcSVedant Kumar // If not UTF8 or ASCII, conversion to UTF8 is necessary. 446ca6c8ee2SEnrico Granata if (origin_encoding != 8 && !ConvertFunction) 447ca6c8ee2SEnrico Granata return false; 448ca6c8ee2SEnrico Granata 449ca6c8ee2SEnrico Granata bool needs_zero_terminator = options.GetNeedsZeroTermination(); 450ca6c8ee2SEnrico Granata 451b7662929SEnrico Granata bool is_truncated = false; 452b7662929SEnrico Granata const auto max_size = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 453b7662929SEnrico Granata 454*a37caebcSVedant Kumar uint32_t sourceSize; 455*a37caebcSVedant Kumar if (elem_type == StringElementType::ASCII && !options.GetSourceSize()) { 456*a37caebcSVedant Kumar // FIXME: The NSString formatter sets HasSourceSize(true) when the size is 457*a37caebcSVedant Kumar // actually unknown, as well as SetBinaryZeroIsTerminator(false). IIUC the 458*a37caebcSVedant Kumar // C++ formatter also sets SetBinaryZeroIsTerminator(false) when it doesn't 459*a37caebcSVedant Kumar // mean to. I don't see how this makes sense: we should fix the formatters. 460*a37caebcSVedant Kumar // 461*a37caebcSVedant Kumar // Until then, the behavior that's expected for ASCII strings with unknown 462*a37caebcSVedant Kumar // lengths is to read up to the max size and then null-terminate. Do that. 463*a37caebcSVedant Kumar sourceSize = max_size; 464*a37caebcSVedant Kumar needs_zero_terminator = true; 465*a37caebcSVedant Kumar } else if (options.HasSourceSize()) { 4667b244258SRaphael Isemann sourceSize = options.GetSourceSize(); 4677b244258SRaphael Isemann if (!options.GetIgnoreMaxLength()) { 468b9c1b51eSKate Stone if (sourceSize > max_size) { 469b7662929SEnrico Granata sourceSize = max_size; 470b7662929SEnrico Granata is_truncated = true; 471b7662929SEnrico Granata } 472b7662929SEnrico Granata } 4737b244258SRaphael Isemann } else { 4747b244258SRaphael Isemann sourceSize = max_size; 4757b244258SRaphael Isemann needs_zero_terminator = true; 4767b244258SRaphael Isemann } 477ca6c8ee2SEnrico Granata 478ca6c8ee2SEnrico Granata const int bufferSPSize = sourceSize * type_width; 479ca6c8ee2SEnrico Granata lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize, 0)); 480ca6c8ee2SEnrico Granata 4817b244258SRaphael Isemann // Check if we got bytes. We never get any bytes if we have an empty 4827b244258SRaphael Isemann // string, but we still continue so that we end up actually printing 4837b244258SRaphael Isemann // an empty string (""). 4847b244258SRaphael Isemann if (sourceSize != 0 && !buffer_sp->GetBytes()) 485ca6c8ee2SEnrico Granata return false; 486ca6c8ee2SEnrico Granata 48797206d57SZachary Turner Status error; 488ca6c8ee2SEnrico Granata char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes()); 489ca6c8ee2SEnrico Granata 490*a37caebcSVedant Kumar if (elem_type == StringElementType::ASCII) 491*a37caebcSVedant Kumar process_sp->ReadCStringFromMemory(options.GetLocation(), buffer, 492*a37caebcSVedant Kumar bufferSPSize, error); 493*a37caebcSVedant Kumar else if (needs_zero_terminator) 494b9c1b51eSKate Stone process_sp->ReadStringFromMemory(options.GetLocation(), buffer, 495b9c1b51eSKate Stone bufferSPSize, error, type_width); 496ca6c8ee2SEnrico Granata else 497*a37caebcSVedant Kumar process_sp->ReadMemoryFromInferior(options.GetLocation(), buffer, 498b9c1b51eSKate Stone bufferSPSize, error); 499b9c1b51eSKate Stone if (error.Fail()) { 500ca6c8ee2SEnrico Granata options.GetStream()->Printf("unable to read data"); 501ca6c8ee2SEnrico Granata return true; 502ca6c8ee2SEnrico Granata } 503ca6c8ee2SEnrico Granata 504b9c1b51eSKate Stone DataExtractor data(buffer_sp, process_sp->GetByteOrder(), 505b9c1b51eSKate Stone process_sp->GetAddressByteSize()); 506ca6c8ee2SEnrico Granata 507ac49453bSEnrico Granata StringPrinter::ReadBufferAndDumpToStreamOptions dump_options(options); 508d07f7550SEnrico Granata dump_options.SetData(data); 509d07f7550SEnrico Granata dump_options.SetSourceSize(sourceSize); 510b7662929SEnrico Granata dump_options.SetIsTruncated(is_truncated); 511*a37caebcSVedant Kumar dump_options.SetNeedsZeroTermination(needs_zero_terminator); 512*a37caebcSVedant Kumar if (needs_zero_terminator) 513*a37caebcSVedant Kumar dump_options.SetBinaryZeroIsTerminator(true); 514d07f7550SEnrico Granata 515*a37caebcSVedant Kumar GetPrintableElementType print_style = (elem_type == StringElementType::ASCII) 516*a37caebcSVedant Kumar ? GetPrintableElementType::ASCII 517*a37caebcSVedant Kumar : GetPrintableElementType::UTF8; 518*a37caebcSVedant Kumar return DumpEncodedBufferToStream(print_style, ConvertFunction, dump_options); 519ca6c8ee2SEnrico Granata } 520ca6c8ee2SEnrico Granata 521ca6c8ee2SEnrico Granata template <> 522*a37caebcSVedant Kumar bool StringPrinter::ReadStringAndDumpToStream<StringElementType::UTF8>( 523b9c1b51eSKate Stone const ReadStringAndDumpToStreamOptions &options) { 524*a37caebcSVedant Kumar return ReadEncodedBufferAndDumpToStream<llvm::UTF8>(StringElementType::UTF8, 525*a37caebcSVedant Kumar options, nullptr); 526ca6c8ee2SEnrico Granata } 527ca6c8ee2SEnrico Granata 528ca6c8ee2SEnrico Granata template <> 529*a37caebcSVedant Kumar bool StringPrinter::ReadStringAndDumpToStream<StringElementType::UTF16>( 530b9c1b51eSKate Stone const ReadStringAndDumpToStreamOptions &options) { 531*a37caebcSVedant Kumar return ReadEncodedBufferAndDumpToStream<llvm::UTF16>( 532*a37caebcSVedant Kumar StringElementType::UTF16, options, llvm::ConvertUTF16toUTF8); 533ca6c8ee2SEnrico Granata } 534ca6c8ee2SEnrico Granata 535ca6c8ee2SEnrico Granata template <> 536*a37caebcSVedant Kumar bool StringPrinter::ReadStringAndDumpToStream<StringElementType::UTF32>( 537b9c1b51eSKate Stone const ReadStringAndDumpToStreamOptions &options) { 538*a37caebcSVedant Kumar return ReadEncodedBufferAndDumpToStream<llvm::UTF32>( 539*a37caebcSVedant Kumar StringElementType::UTF32, options, llvm::ConvertUTF32toUTF8); 540ca6c8ee2SEnrico Granata } 541ca6c8ee2SEnrico Granata 542ca6c8ee2SEnrico Granata template <> 543*a37caebcSVedant Kumar bool StringPrinter::ReadStringAndDumpToStream<StringElementType::ASCII>( 544*a37caebcSVedant Kumar const ReadStringAndDumpToStreamOptions &options) { 545*a37caebcSVedant Kumar return ReadEncodedBufferAndDumpToStream<char>(StringElementType::ASCII, 546*a37caebcSVedant Kumar options, nullptr); 547*a37caebcSVedant Kumar } 548*a37caebcSVedant Kumar 549*a37caebcSVedant Kumar template <> 550*a37caebcSVedant Kumar bool StringPrinter::ReadBufferAndDumpToStream<StringElementType::UTF8>( 551b9c1b51eSKate Stone const ReadBufferAndDumpToStreamOptions &options) { 552*a37caebcSVedant Kumar return DumpEncodedBufferToStream<llvm::UTF8>(GetPrintableElementType::UTF8, 553*a37caebcSVedant Kumar nullptr, options); 554ca6c8ee2SEnrico Granata } 555ca6c8ee2SEnrico Granata 556ca6c8ee2SEnrico Granata template <> 557*a37caebcSVedant Kumar bool StringPrinter::ReadBufferAndDumpToStream<StringElementType::UTF16>( 558b9c1b51eSKate Stone const ReadBufferAndDumpToStreamOptions &options) { 559*a37caebcSVedant Kumar return DumpEncodedBufferToStream(GetPrintableElementType::UTF8, 560*a37caebcSVedant Kumar llvm::ConvertUTF16toUTF8, options); 561*a37caebcSVedant Kumar } 562*a37caebcSVedant Kumar 563*a37caebcSVedant Kumar template <> 564*a37caebcSVedant Kumar bool StringPrinter::ReadBufferAndDumpToStream<StringElementType::UTF32>( 565*a37caebcSVedant Kumar const ReadBufferAndDumpToStreamOptions &options) { 566*a37caebcSVedant Kumar return DumpEncodedBufferToStream(GetPrintableElementType::UTF8, 567*a37caebcSVedant Kumar llvm::ConvertUTF32toUTF8, options); 568*a37caebcSVedant Kumar } 569*a37caebcSVedant Kumar 570*a37caebcSVedant Kumar template <> 571*a37caebcSVedant Kumar bool StringPrinter::ReadBufferAndDumpToStream<StringElementType::ASCII>( 572*a37caebcSVedant Kumar const ReadBufferAndDumpToStreamOptions &options) { 573*a37caebcSVedant Kumar // Treat ASCII the same as UTF8. 574*a37caebcSVedant Kumar // 575*a37caebcSVedant Kumar // FIXME: This is probably not the right thing to do (well, it's debatable). 576*a37caebcSVedant Kumar // If an ASCII-encoded string happens to contain a sequence of invalid bytes 577*a37caebcSVedant Kumar // that forms a valid UTF8 character, we'll print out that character. This is 578*a37caebcSVedant Kumar // good if you're playing fast and loose with encodings (probably good for 579*a37caebcSVedant Kumar // std::string users), but maybe not so good if you care about your string 580*a37caebcSVedant Kumar // formatter respecting the semantics of your selected string encoding. In 581*a37caebcSVedant Kumar // the latter case you'd want to see the character byte sequence ('\x..'), not 582*a37caebcSVedant Kumar // the UTF8 character itself. 583ca6c8ee2SEnrico Granata return ReadBufferAndDumpToStream<StringElementType::UTF8>(options); 584ca6c8ee2SEnrico Granata } 585ca6c8ee2SEnrico Granata 586fd13743fSShawn Best } // namespace formatters 587fd13743fSShawn Best 588fd13743fSShawn Best } // namespace lldb_private 589