180814287SRaphael Isemann //===-- StringPrinter.cpp -------------------------------------------------===// 2ca6c8ee2SEnrico Granata // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6ca6c8ee2SEnrico Granata // 7ca6c8ee2SEnrico Granata //===----------------------------------------------------------------------===// 8ca6c8ee2SEnrico Granata 9ca6c8ee2SEnrico Granata #include "lldb/DataFormatters/StringPrinter.h" 10ca6c8ee2SEnrico Granata 11ebdc1ac0SEnrico Granata #include "lldb/Core/Debugger.h" 12ebdc1ac0SEnrico Granata #include "lldb/Core/ValueObject.h" 13ac49453bSEnrico Granata #include "lldb/Target/Language.h" 14ca6c8ee2SEnrico Granata #include "lldb/Target/Process.h" 15ca6c8ee2SEnrico Granata #include "lldb/Target/Target.h" 1697206d57SZachary Turner #include "lldb/Utility/Status.h" 17ca6c8ee2SEnrico Granata 18*4699a7e2SVedant Kumar #include "llvm/ADT/StringExtras.h" 19ca6c8ee2SEnrico Granata #include "llvm/Support/ConvertUTF.h" 20ca6c8ee2SEnrico Granata 21ca6c8ee2SEnrico Granata #include <ctype.h> 22ca6c8ee2SEnrico Granata #include <locale> 23796ac80bSJonas Devlieghere #include <memory> 24ca6c8ee2SEnrico Granata 25ca6c8ee2SEnrico Granata using namespace lldb; 26ca6c8ee2SEnrico Granata using namespace lldb_private; 27ca6c8ee2SEnrico Granata using namespace lldb_private::formatters; 28a37caebcSVedant Kumar using GetPrintableElementType = StringPrinter::GetPrintableElementType; 29a37caebcSVedant Kumar using StringElementType = StringPrinter::StringElementType; 30a37caebcSVedant Kumar 31a37caebcSVedant Kumar /// DecodedCharBuffer stores the decoded contents of a single character. It 32a37caebcSVedant Kumar /// avoids managing memory on the heap by copying decoded bytes into an in-line 33a37caebcSVedant Kumar /// buffer. 347822b8a8SVedant Kumar class DecodedCharBuffer { 35a37caebcSVedant Kumar public: 36a37caebcSVedant Kumar DecodedCharBuffer(std::nullptr_t) {} 37a37caebcSVedant Kumar 38a37caebcSVedant Kumar DecodedCharBuffer(const uint8_t *bytes, size_t size) : m_size(size) { 39a37caebcSVedant Kumar if (size > MaxLength) 40a37caebcSVedant Kumar llvm_unreachable("unsupported length"); 41a37caebcSVedant Kumar memcpy(m_data, bytes, size); 42a37caebcSVedant Kumar } 43a37caebcSVedant Kumar 44a37caebcSVedant Kumar DecodedCharBuffer(const char *bytes, size_t size) 45a37caebcSVedant Kumar : DecodedCharBuffer(reinterpret_cast<const uint8_t *>(bytes), size) {} 46a37caebcSVedant Kumar 47a37caebcSVedant Kumar const uint8_t *GetBytes() const { return m_data; } 48a37caebcSVedant Kumar 49a37caebcSVedant Kumar size_t GetSize() const { return m_size; } 50a37caebcSVedant Kumar 51a37caebcSVedant Kumar private: 527822b8a8SVedant Kumar static constexpr unsigned MaxLength = 16; 537822b8a8SVedant Kumar 54a37caebcSVedant Kumar size_t m_size = 0; 55a37caebcSVedant Kumar uint8_t m_data[MaxLength] = {0}; 56a37caebcSVedant Kumar }; 57a37caebcSVedant Kumar 58a37caebcSVedant Kumar using EscapingHelper = 59a37caebcSVedant Kumar std::function<DecodedCharBuffer(uint8_t *, uint8_t *, uint8_t *&)>; 60ca6c8ee2SEnrico Granata 6105097246SAdrian Prantl // we define this for all values of type but only implement it for those we 6205097246SAdrian Prantl // care about that's good because we get linker errors for any unsupported type 63a37caebcSVedant Kumar template <StringElementType type> 64a37caebcSVedant Kumar static DecodedCharBuffer 65a37caebcSVedant Kumar GetPrintableImpl(uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next, 66a37caebcSVedant Kumar StringPrinter::EscapeStyle escape_style); 67ca6c8ee2SEnrico Granata 68a37caebcSVedant Kumar // Mimic isprint() for Unicode codepoints. 69a37caebcSVedant Kumar static bool isprint32(char32_t codepoint) { 70ca6c8ee2SEnrico Granata if (codepoint <= 0x1F || codepoint == 0x7F) // C0 71ca6c8ee2SEnrico Granata { 72ca6c8ee2SEnrico Granata return false; 73ca6c8ee2SEnrico Granata } 74ca6c8ee2SEnrico Granata if (codepoint >= 0x80 && codepoint <= 0x9F) // C1 75ca6c8ee2SEnrico Granata { 76ca6c8ee2SEnrico Granata return false; 77ca6c8ee2SEnrico Granata } 78ca6c8ee2SEnrico Granata if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators 79ca6c8ee2SEnrico Granata { 80ca6c8ee2SEnrico Granata return false; 81ca6c8ee2SEnrico Granata } 82b9c1b51eSKate Stone if (codepoint == 0x200E || codepoint == 0x200F || 83b9c1b51eSKate Stone (codepoint >= 0x202A && 84b9c1b51eSKate Stone codepoint <= 0x202E)) // bidirectional text control 85ca6c8ee2SEnrico Granata { 86ca6c8ee2SEnrico Granata return false; 87ca6c8ee2SEnrico Granata } 88b9c1b51eSKate Stone if (codepoint >= 0xFFF9 && 89b9c1b51eSKate Stone codepoint <= 0xFFFF) // interlinears and generally specials 90ca6c8ee2SEnrico Granata { 91ca6c8ee2SEnrico Granata return false; 92ca6c8ee2SEnrico Granata } 93ca6c8ee2SEnrico Granata return true; 94ca6c8ee2SEnrico Granata } 95ca6c8ee2SEnrico Granata 96*4699a7e2SVedant Kumar DecodedCharBuffer attemptASCIIEscape(llvm::UTF32 c, 97a37caebcSVedant Kumar StringPrinter::EscapeStyle escape_style) { 98a37caebcSVedant Kumar const bool is_swift_escape_style = 99a37caebcSVedant Kumar escape_style == StringPrinter::EscapeStyle::Swift; 100a37caebcSVedant Kumar switch (c) { 101da04fbb5SEnrico Granata case 0: 102a37caebcSVedant Kumar return {"\\0", 2}; 103ca6c8ee2SEnrico Granata case '\a': 104a37caebcSVedant Kumar return {"\\a", 2}; 105ca6c8ee2SEnrico Granata case '\b': 106a37caebcSVedant Kumar if (is_swift_escape_style) 107a37caebcSVedant Kumar return nullptr; 108a37caebcSVedant Kumar return {"\\b", 2}; 109ca6c8ee2SEnrico Granata case '\f': 110a37caebcSVedant Kumar if (is_swift_escape_style) 111a37caebcSVedant Kumar return nullptr; 112a37caebcSVedant Kumar return {"\\f", 2}; 113ca6c8ee2SEnrico Granata case '\n': 114a37caebcSVedant Kumar return {"\\n", 2}; 115ca6c8ee2SEnrico Granata case '\r': 116a37caebcSVedant Kumar return {"\\r", 2}; 117ca6c8ee2SEnrico Granata case '\t': 118a37caebcSVedant Kumar return {"\\t", 2}; 119ca6c8ee2SEnrico Granata case '\v': 120a37caebcSVedant Kumar if (is_swift_escape_style) 121a37caebcSVedant Kumar return nullptr; 122a37caebcSVedant Kumar return {"\\v", 2}; 123ca6c8ee2SEnrico Granata case '\"': 124a37caebcSVedant Kumar return {"\\\"", 2}; 125a37caebcSVedant Kumar case '\'': 126a37caebcSVedant Kumar if (is_swift_escape_style) 127a37caebcSVedant Kumar return {"\\'", 2}; 128a37caebcSVedant Kumar return nullptr; 129ca6c8ee2SEnrico Granata case '\\': 130a37caebcSVedant Kumar return {"\\\\", 2}; 131ca6c8ee2SEnrico Granata } 132a37caebcSVedant Kumar return nullptr; 133ca6c8ee2SEnrico Granata } 134ca6c8ee2SEnrico Granata 135a37caebcSVedant Kumar template <> 136a37caebcSVedant Kumar DecodedCharBuffer GetPrintableImpl<StringElementType::ASCII>( 137a37caebcSVedant Kumar uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next, 138a37caebcSVedant Kumar StringPrinter::EscapeStyle escape_style) { 139a37caebcSVedant Kumar // The ASCII helper always advances 1 byte at a time. 140ca6c8ee2SEnrico Granata next = buffer + 1; 141a37caebcSVedant Kumar 142a37caebcSVedant Kumar DecodedCharBuffer retval = attemptASCIIEscape(*buffer, escape_style); 143a37caebcSVedant Kumar if (retval.GetSize()) 144ca6c8ee2SEnrico Granata return retval; 145*4699a7e2SVedant Kumar 146*4699a7e2SVedant Kumar // Use llvm's locale-independent isPrint(char), instead of the libc 147*4699a7e2SVedant Kumar // implementation which may give different results on different platforms. 148*4699a7e2SVedant Kumar if (llvm::isPrint(*buffer)) 149a37caebcSVedant Kumar return {buffer, 1}; 150a37caebcSVedant Kumar 151a37caebcSVedant Kumar unsigned escaped_len; 152a37caebcSVedant Kumar constexpr unsigned max_buffer_size = 7; 153a37caebcSVedant Kumar uint8_t data[max_buffer_size]; 154a37caebcSVedant Kumar switch (escape_style) { 155a37caebcSVedant Kumar case StringPrinter::EscapeStyle::CXX: 156a37caebcSVedant Kumar // Prints 4 characters, then a \0 terminator. 157a37caebcSVedant Kumar escaped_len = sprintf((char *)data, "\\x%02x", *buffer); 158a37caebcSVedant Kumar break; 159a37caebcSVedant Kumar case StringPrinter::EscapeStyle::Swift: 160a37caebcSVedant Kumar // Prints up to 6 characters, then a \0 terminator. 161a37caebcSVedant Kumar escaped_len = sprintf((char *)data, "\\u{%x}", *buffer); 162a37caebcSVedant Kumar break; 163a37caebcSVedant Kumar } 164a37caebcSVedant Kumar lldbassert(escaped_len > 0 && "unknown string escape style"); 165a37caebcSVedant Kumar return {data, escaped_len}; 166ca6c8ee2SEnrico Granata } 167ca6c8ee2SEnrico Granata 168ca6c8ee2SEnrico Granata template <> 169a37caebcSVedant Kumar DecodedCharBuffer GetPrintableImpl<StringElementType::UTF8>( 170a37caebcSVedant Kumar uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next, 171a37caebcSVedant Kumar StringPrinter::EscapeStyle escape_style) { 172*4699a7e2SVedant Kumar // If the utf8 encoded length is invalid (i.e., not in the closed interval 173*4699a7e2SVedant Kumar // [1;4]), or if there aren't enough bytes to print, or if the subsequence 174*4699a7e2SVedant Kumar // isn't valid utf8, fall back to printing an ASCII-escaped subsequence. 175*4699a7e2SVedant Kumar if (!llvm::isLegalUTF8Sequence(buffer, buffer_end)) 176a37caebcSVedant Kumar return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next, 177a37caebcSVedant Kumar escape_style); 178ca6c8ee2SEnrico Granata 179*4699a7e2SVedant Kumar // Convert the valid utf8 sequence to a utf32 codepoint. This cannot fail. 180*4699a7e2SVedant Kumar llvm::UTF32 codepoint = 0; 181*4699a7e2SVedant Kumar const llvm::UTF8 *buffer_for_conversion = buffer; 182*4699a7e2SVedant Kumar llvm::ConversionResult result = llvm::convertUTF8Sequence( 183*4699a7e2SVedant Kumar &buffer_for_conversion, buffer_end, &codepoint, llvm::strictConversion); 184*4699a7e2SVedant Kumar assert(result == llvm::conversionOK && 185*4699a7e2SVedant Kumar "Failed to convert legal utf8 sequence"); 186*4699a7e2SVedant Kumar (void)result; 187ca6c8ee2SEnrico Granata 188a37caebcSVedant Kumar // The UTF8 helper always advances by the utf8 encoded length. 189*4699a7e2SVedant Kumar const unsigned utf8_encoded_len = buffer_for_conversion - buffer; 190ca6c8ee2SEnrico Granata next = buffer + utf8_encoded_len; 191*4699a7e2SVedant Kumar 192a37caebcSVedant Kumar DecodedCharBuffer retval = attemptASCIIEscape(codepoint, escape_style); 193a37caebcSVedant Kumar if (retval.GetSize()) 194ca6c8ee2SEnrico Granata return retval; 195a37caebcSVedant Kumar if (isprint32(codepoint)) 196a37caebcSVedant Kumar return {buffer, utf8_encoded_len}; 197ca6c8ee2SEnrico Granata 198a37caebcSVedant Kumar unsigned escaped_len; 199a37caebcSVedant Kumar constexpr unsigned max_buffer_size = 13; 200a37caebcSVedant Kumar uint8_t data[max_buffer_size]; 201a37caebcSVedant Kumar switch (escape_style) { 202a37caebcSVedant Kumar case StringPrinter::EscapeStyle::CXX: 203a37caebcSVedant Kumar // Prints 10 characters, then a \0 terminator. 204*4699a7e2SVedant Kumar escaped_len = sprintf((char *)data, "\\U%08x", codepoint); 205a37caebcSVedant Kumar break; 206a37caebcSVedant Kumar case StringPrinter::EscapeStyle::Swift: 207a37caebcSVedant Kumar // Prints up to 12 characters, then a \0 terminator. 208*4699a7e2SVedant Kumar escaped_len = sprintf((char *)data, "\\u{%x}", codepoint); 209a37caebcSVedant Kumar break; 210a37caebcSVedant Kumar } 211a37caebcSVedant Kumar lldbassert(escaped_len > 0 && "unknown string escape style"); 212a37caebcSVedant Kumar return {data, escaped_len}; 213ca6c8ee2SEnrico Granata } 214ca6c8ee2SEnrico Granata 21505097246SAdrian Prantl // Given a sequence of bytes, this function returns: a sequence of bytes to 21605097246SAdrian Prantl // actually print out + a length the following unscanned position of the buffer 21705097246SAdrian Prantl // is in next 218a37caebcSVedant Kumar static DecodedCharBuffer GetPrintable(StringElementType type, uint8_t *buffer, 219a37caebcSVedant Kumar uint8_t *buffer_end, uint8_t *&next, 220a37caebcSVedant Kumar StringPrinter::EscapeStyle escape_style) { 2217aabad13SVedant Kumar if (!buffer || buffer >= buffer_end) 222ca6c8ee2SEnrico Granata return {nullptr}; 223ca6c8ee2SEnrico Granata 224b9c1b51eSKate Stone switch (type) { 225a37caebcSVedant Kumar case StringElementType::ASCII: 226a37caebcSVedant Kumar return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next, 227a37caebcSVedant Kumar escape_style); 228a37caebcSVedant Kumar case StringElementType::UTF8: 229a37caebcSVedant Kumar return GetPrintableImpl<StringElementType::UTF8>(buffer, buffer_end, next, 230a37caebcSVedant Kumar escape_style); 231ca6c8ee2SEnrico Granata default: 232ca6c8ee2SEnrico Granata return {nullptr}; 233ca6c8ee2SEnrico Granata } 234ca6c8ee2SEnrico Granata } 235ca6c8ee2SEnrico Granata 236a37caebcSVedant Kumar static EscapingHelper 237a37caebcSVedant Kumar GetDefaultEscapingHelper(GetPrintableElementType elem_type, 238a37caebcSVedant Kumar StringPrinter::EscapeStyle escape_style) { 239b9c1b51eSKate Stone switch (elem_type) { 240ac49453bSEnrico Granata case GetPrintableElementType::UTF8: 241ac49453bSEnrico Granata case GetPrintableElementType::ASCII: 242a37caebcSVedant Kumar return [escape_style, elem_type](uint8_t *buffer, uint8_t *buffer_end, 243a37caebcSVedant Kumar uint8_t *&next) -> DecodedCharBuffer { 244a37caebcSVedant Kumar return GetPrintable(elem_type == GetPrintableElementType::UTF8 245a37caebcSVedant Kumar ? StringElementType::UTF8 246a37caebcSVedant Kumar : StringElementType::ASCII, 247a37caebcSVedant Kumar buffer, buffer_end, next, escape_style); 248ac49453bSEnrico Granata }; 249ac49453bSEnrico Granata } 25043d3a7aeSSaleem Abdulrasool llvm_unreachable("bad element type"); 251ac49453bSEnrico Granata } 252ac49453bSEnrico Granata 253a37caebcSVedant Kumar /// Read a string encoded in accordance with \tparam SourceDataType from a 254a37caebcSVedant Kumar /// host-side LLDB buffer, then pretty-print it to a stream using \p style. 255ca6c8ee2SEnrico Granata template <typename SourceDataType> 256a37caebcSVedant Kumar static bool DumpEncodedBufferToStream( 257a37caebcSVedant Kumar GetPrintableElementType style, 2589091055eSJustin Lebar llvm::ConversionResult (*ConvertFunction)(const SourceDataType **, 2599091055eSJustin Lebar const SourceDataType *, 2609091055eSJustin Lebar llvm::UTF8 **, llvm::UTF8 *, 2619091055eSJustin Lebar llvm::ConversionFlags), 262b9c1b51eSKate Stone const StringPrinter::ReadBufferAndDumpToStreamOptions &dump_options) { 263a37caebcSVedant Kumar assert(dump_options.GetStream() && "need a Stream to print the string to"); 264d07f7550SEnrico Granata Stream &stream(*dump_options.GetStream()); 265248a1305SKonrad Kleine if (dump_options.GetPrefixToken() != nullptr) 266d54f7fb8SEnrico Granata stream.Printf("%s", dump_options.GetPrefixToken()); 267d07f7550SEnrico Granata if (dump_options.GetQuote() != 0) 268d07f7550SEnrico Granata stream.Printf("%c", dump_options.GetQuote()); 269d07f7550SEnrico Granata auto data(dump_options.GetData()); 270d07f7550SEnrico Granata auto source_size(dump_options.GetSourceSize()); 271b9c1b51eSKate Stone if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd()) { 272ca6c8ee2SEnrico Granata const int bufferSPSize = data.GetByteSize(); 273b9c1b51eSKate Stone if (dump_options.GetSourceSize() == 0) { 274ca6c8ee2SEnrico Granata const int origin_encoding = 8 * sizeof(SourceDataType); 275d07f7550SEnrico Granata source_size = bufferSPSize / (origin_encoding / 4); 276ca6c8ee2SEnrico Granata } 277ca6c8ee2SEnrico Granata 278b9c1b51eSKate Stone const SourceDataType *data_ptr = 279b9c1b51eSKate Stone (const SourceDataType *)data.GetDataStart(); 280d07f7550SEnrico Granata const SourceDataType *data_end_ptr = data_ptr + source_size; 281ca6c8ee2SEnrico Granata 282d07f7550SEnrico Granata const bool zero_is_terminator = dump_options.GetBinaryZeroIsTerminator(); 283d07f7550SEnrico Granata 284b9c1b51eSKate Stone if (zero_is_terminator) { 285b9c1b51eSKate Stone while (data_ptr < data_end_ptr) { 286b9c1b51eSKate Stone if (!*data_ptr) { 287ca6c8ee2SEnrico Granata data_end_ptr = data_ptr; 288ca6c8ee2SEnrico Granata break; 289ca6c8ee2SEnrico Granata } 290ca6c8ee2SEnrico Granata data_ptr++; 291ca6c8ee2SEnrico Granata } 292ca6c8ee2SEnrico Granata 293d7e6a4f2SVince Harron data_ptr = (const SourceDataType *)data.GetDataStart(); 294d07f7550SEnrico Granata } 295ca6c8ee2SEnrico Granata 296ca6c8ee2SEnrico Granata lldb::DataBufferSP utf8_data_buffer_sp; 2979091055eSJustin Lebar llvm::UTF8 *utf8_data_ptr = nullptr; 2989091055eSJustin Lebar llvm::UTF8 *utf8_data_end_ptr = nullptr; 299ca6c8ee2SEnrico Granata 300b9c1b51eSKate Stone if (ConvertFunction) { 301796ac80bSJonas Devlieghere utf8_data_buffer_sp = 302796ac80bSJonas Devlieghere std::make_shared<DataBufferHeap>(4 * bufferSPSize, 0); 3039091055eSJustin Lebar utf8_data_ptr = (llvm::UTF8 *)utf8_data_buffer_sp->GetBytes(); 304ca6c8ee2SEnrico Granata utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize(); 305b9c1b51eSKate Stone ConvertFunction(&data_ptr, data_end_ptr, &utf8_data_ptr, 3069091055eSJustin Lebar utf8_data_end_ptr, llvm::lenientConversion); 307a6682a41SJonas Devlieghere if (!zero_is_terminator) 3088101f570SEnrico Granata utf8_data_end_ptr = utf8_data_ptr; 3099091055eSJustin Lebar // needed because the ConvertFunction will change the value of the 3109091055eSJustin Lebar // data_ptr. 311b9c1b51eSKate Stone utf8_data_ptr = 3129091055eSJustin Lebar (llvm::UTF8 *)utf8_data_buffer_sp->GetBytes(); 313b9c1b51eSKate Stone } else { 314b9c1b51eSKate Stone // just copy the pointers - the cast is necessary to make the compiler 31505097246SAdrian Prantl // happy but this should only happen if we are reading UTF8 data 3169091055eSJustin Lebar utf8_data_ptr = const_cast<llvm::UTF8 *>( 3179091055eSJustin Lebar reinterpret_cast<const llvm::UTF8 *>(data_ptr)); 3189091055eSJustin Lebar utf8_data_end_ptr = const_cast<llvm::UTF8 *>( 3199091055eSJustin Lebar reinterpret_cast<const llvm::UTF8 *>(data_end_ptr)); 320ca6c8ee2SEnrico Granata } 321ca6c8ee2SEnrico Granata 322d07f7550SEnrico Granata const bool escape_non_printables = dump_options.GetEscapeNonPrintables(); 323a37caebcSVedant Kumar EscapingHelper escaping_callback; 324a37caebcSVedant Kumar if (escape_non_printables) 325b9c1b51eSKate Stone escaping_callback = 326a37caebcSVedant Kumar GetDefaultEscapingHelper(style, dump_options.GetEscapeStyle()); 327d07f7550SEnrico Granata 328ca6c8ee2SEnrico Granata // since we tend to accept partial data (and even partially malformed data) 32905097246SAdrian Prantl // we might end up with no NULL terminator before the end_ptr hence we need 33005097246SAdrian Prantl // to take a slower route and ensure we stay within boundaries 331b9c1b51eSKate Stone for (; utf8_data_ptr < utf8_data_end_ptr;) { 332d07f7550SEnrico Granata if (zero_is_terminator && !*utf8_data_ptr) 333ca6c8ee2SEnrico Granata break; 334ca6c8ee2SEnrico Granata 335b9c1b51eSKate Stone if (escape_non_printables) { 336ca6c8ee2SEnrico Granata uint8_t *next_data = nullptr; 337b9c1b51eSKate Stone auto printable = 338b9c1b51eSKate Stone escaping_callback(utf8_data_ptr, utf8_data_end_ptr, next_data); 339ca6c8ee2SEnrico Granata auto printable_bytes = printable.GetBytes(); 340ca6c8ee2SEnrico Granata auto printable_size = printable.GetSize(); 3417aabad13SVedant Kumar 3427aabad13SVedant Kumar // We failed to figure out how to print this string. 3437aabad13SVedant Kumar if (!printable_bytes || !next_data) 3447aabad13SVedant Kumar return false; 3457aabad13SVedant Kumar 3463acfe1a3SAndy Gibbs for (unsigned c = 0; c < printable_size; c++) 347ca6c8ee2SEnrico Granata stream.Printf("%c", *(printable_bytes + c)); 348ca6c8ee2SEnrico Granata utf8_data_ptr = (uint8_t *)next_data; 349b9c1b51eSKate Stone } else { 350ca6c8ee2SEnrico Granata stream.Printf("%c", *utf8_data_ptr); 351ca6c8ee2SEnrico Granata utf8_data_ptr++; 352ca6c8ee2SEnrico Granata } 353ca6c8ee2SEnrico Granata } 354ca6c8ee2SEnrico Granata } 355d07f7550SEnrico Granata if (dump_options.GetQuote() != 0) 356d07f7550SEnrico Granata stream.Printf("%c", dump_options.GetQuote()); 357248a1305SKonrad Kleine if (dump_options.GetSuffixToken() != nullptr) 358d54f7fb8SEnrico Granata stream.Printf("%s", dump_options.GetSuffixToken()); 359b7662929SEnrico Granata if (dump_options.GetIsTruncated()) 360b7662929SEnrico Granata stream.Printf("..."); 361ca6c8ee2SEnrico Granata return true; 362ca6c8ee2SEnrico Granata } 363ca6c8ee2SEnrico Granata 364b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::ReadStringAndDumpToStreamOptions:: 365b9c1b51eSKate Stone ReadStringAndDumpToStreamOptions(ValueObject &valobj) 366b9c1b51eSKate Stone : ReadStringAndDumpToStreamOptions() { 367b9c1b51eSKate Stone SetEscapeNonPrintables( 368b9c1b51eSKate Stone valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 369ebdc1ac0SEnrico Granata } 370ebdc1ac0SEnrico Granata 371b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions:: 372b9c1b51eSKate Stone ReadBufferAndDumpToStreamOptions(ValueObject &valobj) 373b9c1b51eSKate Stone : ReadBufferAndDumpToStreamOptions() { 374b9c1b51eSKate Stone SetEscapeNonPrintables( 375b9c1b51eSKate Stone valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 376ebdc1ac0SEnrico Granata } 377ebdc1ac0SEnrico Granata 378b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions:: 379b9c1b51eSKate Stone ReadBufferAndDumpToStreamOptions( 380b9c1b51eSKate Stone const ReadStringAndDumpToStreamOptions &options) 381b9c1b51eSKate Stone : ReadBufferAndDumpToStreamOptions() { 382d07f7550SEnrico Granata SetStream(options.GetStream()); 383d07f7550SEnrico Granata SetPrefixToken(options.GetPrefixToken()); 384d54f7fb8SEnrico Granata SetSuffixToken(options.GetSuffixToken()); 385d07f7550SEnrico Granata SetQuote(options.GetQuote()); 386d07f7550SEnrico Granata SetEscapeNonPrintables(options.GetEscapeNonPrintables()); 387d07f7550SEnrico Granata SetBinaryZeroIsTerminator(options.GetBinaryZeroIsTerminator()); 388a37caebcSVedant Kumar SetEscapeStyle(options.GetEscapeStyle()); 389d07f7550SEnrico Granata } 390d07f7550SEnrico Granata 391b9c1b51eSKate Stone namespace lldb_private { 392ebdc1ac0SEnrico Granata 393b9c1b51eSKate Stone namespace formatters { 394fd13743fSShawn Best 395ca6c8ee2SEnrico Granata template <typename SourceDataType> 396a37caebcSVedant Kumar static bool ReadEncodedBufferAndDumpToStream( 397a37caebcSVedant Kumar StringElementType elem_type, 398b9c1b51eSKate Stone const StringPrinter::ReadStringAndDumpToStreamOptions &options, 3999091055eSJustin Lebar llvm::ConversionResult (*ConvertFunction)(const SourceDataType **, 4009091055eSJustin Lebar const SourceDataType *, 4019091055eSJustin Lebar llvm::UTF8 **, llvm::UTF8 *, 4029091055eSJustin Lebar llvm::ConversionFlags)) { 403ca6c8ee2SEnrico Granata assert(options.GetStream() && "need a Stream to print the string to"); 404a37caebcSVedant Kumar if (!options.GetStream()) 405a37caebcSVedant Kumar return false; 406ca6c8ee2SEnrico Granata 407b9c1b51eSKate Stone if (options.GetLocation() == 0 || 408b9c1b51eSKate Stone options.GetLocation() == LLDB_INVALID_ADDRESS) 409ca6c8ee2SEnrico Granata return false; 410ca6c8ee2SEnrico Granata 411ca6c8ee2SEnrico Granata lldb::ProcessSP process_sp(options.GetProcessSP()); 412ca6c8ee2SEnrico Granata if (!process_sp) 413ca6c8ee2SEnrico Granata return false; 414ca6c8ee2SEnrico Granata 415a37caebcSVedant Kumar constexpr int type_width = sizeof(SourceDataType); 416a37caebcSVedant Kumar constexpr int origin_encoding = 8 * type_width; 417ca6c8ee2SEnrico Granata if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32) 418ca6c8ee2SEnrico Granata return false; 419a37caebcSVedant Kumar // If not UTF8 or ASCII, conversion to UTF8 is necessary. 420ca6c8ee2SEnrico Granata if (origin_encoding != 8 && !ConvertFunction) 421ca6c8ee2SEnrico Granata return false; 422ca6c8ee2SEnrico Granata 423ca6c8ee2SEnrico Granata bool needs_zero_terminator = options.GetNeedsZeroTermination(); 424ca6c8ee2SEnrico Granata 425b7662929SEnrico Granata bool is_truncated = false; 426b7662929SEnrico Granata const auto max_size = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 427b7662929SEnrico Granata 428a37caebcSVedant Kumar uint32_t sourceSize; 429a37caebcSVedant Kumar if (elem_type == StringElementType::ASCII && !options.GetSourceSize()) { 430a37caebcSVedant Kumar // FIXME: The NSString formatter sets HasSourceSize(true) when the size is 431a37caebcSVedant Kumar // actually unknown, as well as SetBinaryZeroIsTerminator(false). IIUC the 432a37caebcSVedant Kumar // C++ formatter also sets SetBinaryZeroIsTerminator(false) when it doesn't 433a37caebcSVedant Kumar // mean to. I don't see how this makes sense: we should fix the formatters. 434a37caebcSVedant Kumar // 435a37caebcSVedant Kumar // Until then, the behavior that's expected for ASCII strings with unknown 436a37caebcSVedant Kumar // lengths is to read up to the max size and then null-terminate. Do that. 437a37caebcSVedant Kumar sourceSize = max_size; 438a37caebcSVedant Kumar needs_zero_terminator = true; 439a37caebcSVedant Kumar } else if (options.HasSourceSize()) { 4407b244258SRaphael Isemann sourceSize = options.GetSourceSize(); 4417b244258SRaphael Isemann if (!options.GetIgnoreMaxLength()) { 442b9c1b51eSKate Stone if (sourceSize > max_size) { 443b7662929SEnrico Granata sourceSize = max_size; 444b7662929SEnrico Granata is_truncated = true; 445b7662929SEnrico Granata } 446b7662929SEnrico Granata } 4477b244258SRaphael Isemann } else { 4487b244258SRaphael Isemann sourceSize = max_size; 4497b244258SRaphael Isemann needs_zero_terminator = true; 4507b244258SRaphael Isemann } 451ca6c8ee2SEnrico Granata 452ca6c8ee2SEnrico Granata const int bufferSPSize = sourceSize * type_width; 453ca6c8ee2SEnrico Granata lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize, 0)); 454ca6c8ee2SEnrico Granata 4557b244258SRaphael Isemann // Check if we got bytes. We never get any bytes if we have an empty 4567b244258SRaphael Isemann // string, but we still continue so that we end up actually printing 4577b244258SRaphael Isemann // an empty string (""). 4587b244258SRaphael Isemann if (sourceSize != 0 && !buffer_sp->GetBytes()) 459ca6c8ee2SEnrico Granata return false; 460ca6c8ee2SEnrico Granata 46197206d57SZachary Turner Status error; 462ca6c8ee2SEnrico Granata char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes()); 463ca6c8ee2SEnrico Granata 464a37caebcSVedant Kumar if (elem_type == StringElementType::ASCII) 465a37caebcSVedant Kumar process_sp->ReadCStringFromMemory(options.GetLocation(), buffer, 466a37caebcSVedant Kumar bufferSPSize, error); 467a37caebcSVedant Kumar else if (needs_zero_terminator) 468b9c1b51eSKate Stone process_sp->ReadStringFromMemory(options.GetLocation(), buffer, 469b9c1b51eSKate Stone bufferSPSize, error, type_width); 470ca6c8ee2SEnrico Granata else 471a37caebcSVedant Kumar process_sp->ReadMemoryFromInferior(options.GetLocation(), buffer, 472b9c1b51eSKate Stone bufferSPSize, error); 473b9c1b51eSKate Stone if (error.Fail()) { 474ca6c8ee2SEnrico Granata options.GetStream()->Printf("unable to read data"); 475ca6c8ee2SEnrico Granata return true; 476ca6c8ee2SEnrico Granata } 477ca6c8ee2SEnrico Granata 478b9c1b51eSKate Stone DataExtractor data(buffer_sp, process_sp->GetByteOrder(), 479b9c1b51eSKate Stone process_sp->GetAddressByteSize()); 480ca6c8ee2SEnrico Granata 481ac49453bSEnrico Granata StringPrinter::ReadBufferAndDumpToStreamOptions dump_options(options); 482d07f7550SEnrico Granata dump_options.SetData(data); 483d07f7550SEnrico Granata dump_options.SetSourceSize(sourceSize); 484b7662929SEnrico Granata dump_options.SetIsTruncated(is_truncated); 485a37caebcSVedant Kumar dump_options.SetNeedsZeroTermination(needs_zero_terminator); 486a37caebcSVedant Kumar if (needs_zero_terminator) 487a37caebcSVedant Kumar dump_options.SetBinaryZeroIsTerminator(true); 488d07f7550SEnrico Granata 489a37caebcSVedant Kumar GetPrintableElementType print_style = (elem_type == StringElementType::ASCII) 490a37caebcSVedant Kumar ? GetPrintableElementType::ASCII 491a37caebcSVedant Kumar : GetPrintableElementType::UTF8; 492a37caebcSVedant Kumar return DumpEncodedBufferToStream(print_style, ConvertFunction, dump_options); 493ca6c8ee2SEnrico Granata } 494ca6c8ee2SEnrico Granata 495ca6c8ee2SEnrico Granata template <> 496a37caebcSVedant Kumar bool StringPrinter::ReadStringAndDumpToStream<StringElementType::UTF8>( 497b9c1b51eSKate Stone const ReadStringAndDumpToStreamOptions &options) { 498a37caebcSVedant Kumar return ReadEncodedBufferAndDumpToStream<llvm::UTF8>(StringElementType::UTF8, 499a37caebcSVedant Kumar options, nullptr); 500ca6c8ee2SEnrico Granata } 501ca6c8ee2SEnrico Granata 502ca6c8ee2SEnrico Granata template <> 503a37caebcSVedant Kumar bool StringPrinter::ReadStringAndDumpToStream<StringElementType::UTF16>( 504b9c1b51eSKate Stone const ReadStringAndDumpToStreamOptions &options) { 505a37caebcSVedant Kumar return ReadEncodedBufferAndDumpToStream<llvm::UTF16>( 506a37caebcSVedant Kumar StringElementType::UTF16, options, llvm::ConvertUTF16toUTF8); 507ca6c8ee2SEnrico Granata } 508ca6c8ee2SEnrico Granata 509ca6c8ee2SEnrico Granata template <> 510a37caebcSVedant Kumar bool StringPrinter::ReadStringAndDumpToStream<StringElementType::UTF32>( 511b9c1b51eSKate Stone const ReadStringAndDumpToStreamOptions &options) { 512a37caebcSVedant Kumar return ReadEncodedBufferAndDumpToStream<llvm::UTF32>( 513a37caebcSVedant Kumar StringElementType::UTF32, options, llvm::ConvertUTF32toUTF8); 514ca6c8ee2SEnrico Granata } 515ca6c8ee2SEnrico Granata 516ca6c8ee2SEnrico Granata template <> 517a37caebcSVedant Kumar bool StringPrinter::ReadStringAndDumpToStream<StringElementType::ASCII>( 518a37caebcSVedant Kumar const ReadStringAndDumpToStreamOptions &options) { 519a37caebcSVedant Kumar return ReadEncodedBufferAndDumpToStream<char>(StringElementType::ASCII, 520a37caebcSVedant Kumar options, nullptr); 521a37caebcSVedant Kumar } 522a37caebcSVedant Kumar 523a37caebcSVedant Kumar template <> 524a37caebcSVedant Kumar bool StringPrinter::ReadBufferAndDumpToStream<StringElementType::UTF8>( 525b9c1b51eSKate Stone const ReadBufferAndDumpToStreamOptions &options) { 526a37caebcSVedant Kumar return DumpEncodedBufferToStream<llvm::UTF8>(GetPrintableElementType::UTF8, 527a37caebcSVedant Kumar nullptr, options); 528ca6c8ee2SEnrico Granata } 529ca6c8ee2SEnrico Granata 530ca6c8ee2SEnrico Granata template <> 531a37caebcSVedant Kumar bool StringPrinter::ReadBufferAndDumpToStream<StringElementType::UTF16>( 532b9c1b51eSKate Stone const ReadBufferAndDumpToStreamOptions &options) { 533a37caebcSVedant Kumar return DumpEncodedBufferToStream(GetPrintableElementType::UTF8, 534a37caebcSVedant Kumar llvm::ConvertUTF16toUTF8, options); 535a37caebcSVedant Kumar } 536a37caebcSVedant Kumar 537a37caebcSVedant Kumar template <> 538a37caebcSVedant Kumar bool StringPrinter::ReadBufferAndDumpToStream<StringElementType::UTF32>( 539a37caebcSVedant Kumar const ReadBufferAndDumpToStreamOptions &options) { 540a37caebcSVedant Kumar return DumpEncodedBufferToStream(GetPrintableElementType::UTF8, 541a37caebcSVedant Kumar llvm::ConvertUTF32toUTF8, options); 542a37caebcSVedant Kumar } 543a37caebcSVedant Kumar 544a37caebcSVedant Kumar template <> 545a37caebcSVedant Kumar bool StringPrinter::ReadBufferAndDumpToStream<StringElementType::ASCII>( 546a37caebcSVedant Kumar const ReadBufferAndDumpToStreamOptions &options) { 547a37caebcSVedant Kumar // Treat ASCII the same as UTF8. 548a37caebcSVedant Kumar // 549a37caebcSVedant Kumar // FIXME: This is probably not the right thing to do (well, it's debatable). 550a37caebcSVedant Kumar // If an ASCII-encoded string happens to contain a sequence of invalid bytes 551a37caebcSVedant Kumar // that forms a valid UTF8 character, we'll print out that character. This is 552a37caebcSVedant Kumar // good if you're playing fast and loose with encodings (probably good for 553a37caebcSVedant Kumar // std::string users), but maybe not so good if you care about your string 554a37caebcSVedant Kumar // formatter respecting the semantics of your selected string encoding. In 555a37caebcSVedant Kumar // the latter case you'd want to see the character byte sequence ('\x..'), not 556a37caebcSVedant Kumar // the UTF8 character itself. 557ca6c8ee2SEnrico Granata return ReadBufferAndDumpToStream<StringElementType::UTF8>(options); 558ca6c8ee2SEnrico Granata } 559ca6c8ee2SEnrico Granata 560fd13743fSShawn Best } // namespace formatters 561fd13743fSShawn Best 562fd13743fSShawn Best } // namespace lldb_private 563