1ca6c8ee2SEnrico Granata //===-- StringPrinter.cpp ----------------------------------------*- C++ -*-===// 2ca6c8ee2SEnrico Granata // 3ca6c8ee2SEnrico Granata // The LLVM Compiler Infrastructure 4ca6c8ee2SEnrico Granata // 5ca6c8ee2SEnrico Granata // This file is distributed under the University of Illinois Open Source 6ca6c8ee2SEnrico Granata // License. See LICENSE.TXT for details. 7ca6c8ee2SEnrico Granata // 8ca6c8ee2SEnrico Granata //===----------------------------------------------------------------------===// 9ca6c8ee2SEnrico Granata 10ca6c8ee2SEnrico Granata #include "lldb/DataFormatters/StringPrinter.h" 11ca6c8ee2SEnrico Granata 12ca6c8ee2SEnrico Granata #include "lldb/Core/DataExtractor.h" 13ebdc1ac0SEnrico Granata #include "lldb/Core/Debugger.h" 14ca6c8ee2SEnrico Granata #include "lldb/Core/Error.h" 15ebdc1ac0SEnrico Granata #include "lldb/Core/ValueObject.h" 16ac49453bSEnrico Granata #include "lldb/Target/Language.h" 17ca6c8ee2SEnrico Granata #include "lldb/Target/Process.h" 18ca6c8ee2SEnrico Granata #include "lldb/Target/Target.h" 19ca6c8ee2SEnrico Granata 20ca6c8ee2SEnrico Granata #include "llvm/Support/ConvertUTF.h" 21ca6c8ee2SEnrico Granata 22ca6c8ee2SEnrico Granata #include <ctype.h> 23ca6c8ee2SEnrico Granata #include <locale> 24ca6c8ee2SEnrico Granata 25ca6c8ee2SEnrico Granata using namespace lldb; 26ca6c8ee2SEnrico Granata using namespace lldb_private; 27ca6c8ee2SEnrico Granata using namespace lldb_private::formatters; 28ca6c8ee2SEnrico Granata 29ca6c8ee2SEnrico Granata // we define this for all values of type but only implement it for those we care about 30ca6c8ee2SEnrico Granata // that's good because we get linker errors for any unsupported type 31ac49453bSEnrico Granata template <lldb_private::formatters::StringPrinter::StringElementType type> 32ad650a18SEnrico Granata static StringPrinter::StringPrinterBufferPointer<> 33ca6c8ee2SEnrico Granata GetPrintableImpl(uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next); 34ca6c8ee2SEnrico Granata 35ca6c8ee2SEnrico Granata // mimic isprint() for Unicode codepoints 36ca6c8ee2SEnrico Granata static bool 37ca6c8ee2SEnrico Granata isprint(char32_t codepoint) 38ca6c8ee2SEnrico Granata { 39ca6c8ee2SEnrico Granata if (codepoint <= 0x1F || codepoint == 0x7F) // C0 40ca6c8ee2SEnrico Granata { 41ca6c8ee2SEnrico Granata return false; 42ca6c8ee2SEnrico Granata } 43ca6c8ee2SEnrico Granata if (codepoint >= 0x80 && codepoint <= 0x9F) // C1 44ca6c8ee2SEnrico Granata { 45ca6c8ee2SEnrico Granata return false; 46ca6c8ee2SEnrico Granata } 47ca6c8ee2SEnrico Granata if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators 48ca6c8ee2SEnrico Granata { 49ca6c8ee2SEnrico Granata return false; 50ca6c8ee2SEnrico Granata } 51ca6c8ee2SEnrico Granata if (codepoint == 0x200E || codepoint == 0x200F || (codepoint >= 0x202A && codepoint <= 0x202E)) // bidirectional text control 52ca6c8ee2SEnrico Granata { 53ca6c8ee2SEnrico Granata return false; 54ca6c8ee2SEnrico Granata } 55ca6c8ee2SEnrico Granata if (codepoint >= 0xFFF9 && codepoint <= 0xFFFF) // interlinears and generally specials 56ca6c8ee2SEnrico Granata { 57ca6c8ee2SEnrico Granata return false; 58ca6c8ee2SEnrico Granata } 59ca6c8ee2SEnrico Granata return true; 60ca6c8ee2SEnrico Granata } 61ca6c8ee2SEnrico Granata 62ca6c8ee2SEnrico Granata template <> 63ad650a18SEnrico Granata StringPrinter::StringPrinterBufferPointer<> 64ac49453bSEnrico Granata GetPrintableImpl<StringPrinter::StringElementType::ASCII> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) 65ca6c8ee2SEnrico Granata { 66ad650a18SEnrico Granata StringPrinter::StringPrinterBufferPointer<> retval = {nullptr}; 67ca6c8ee2SEnrico Granata 68ca6c8ee2SEnrico Granata switch (*buffer) 69ca6c8ee2SEnrico Granata { 70da04fbb5SEnrico Granata case 0: 71da04fbb5SEnrico Granata retval = {"\\0",2}; 72da04fbb5SEnrico Granata break; 73ca6c8ee2SEnrico Granata case '\a': 74ca6c8ee2SEnrico Granata retval = {"\\a",2}; 75ca6c8ee2SEnrico Granata break; 76ca6c8ee2SEnrico Granata case '\b': 77ca6c8ee2SEnrico Granata retval = {"\\b",2}; 78ca6c8ee2SEnrico Granata break; 79ca6c8ee2SEnrico Granata case '\f': 80ca6c8ee2SEnrico Granata retval = {"\\f",2}; 81ca6c8ee2SEnrico Granata break; 82ca6c8ee2SEnrico Granata case '\n': 83ca6c8ee2SEnrico Granata retval = {"\\n",2}; 84ca6c8ee2SEnrico Granata break; 85ca6c8ee2SEnrico Granata case '\r': 86ca6c8ee2SEnrico Granata retval = {"\\r",2}; 87ca6c8ee2SEnrico Granata break; 88ca6c8ee2SEnrico Granata case '\t': 89ca6c8ee2SEnrico Granata retval = {"\\t",2}; 90ca6c8ee2SEnrico Granata break; 91ca6c8ee2SEnrico Granata case '\v': 92ca6c8ee2SEnrico Granata retval = {"\\v",2}; 93ca6c8ee2SEnrico Granata break; 94ca6c8ee2SEnrico Granata case '\"': 95ca6c8ee2SEnrico Granata retval = {"\\\"",2}; 96ca6c8ee2SEnrico Granata break; 97ca6c8ee2SEnrico Granata case '\\': 98ca6c8ee2SEnrico Granata retval = {"\\\\",2}; 99ca6c8ee2SEnrico Granata break; 100ca6c8ee2SEnrico Granata default: 101ca6c8ee2SEnrico Granata if (isprint(*buffer)) 102ca6c8ee2SEnrico Granata retval = {buffer,1}; 103ca6c8ee2SEnrico Granata else 104ca6c8ee2SEnrico Granata { 105d7e6a4f2SVince Harron uint8_t* data = new uint8_t[5]; 106d7e6a4f2SVince Harron sprintf((char*)data,"\\x%02x",*buffer); 107d7e6a4f2SVince Harron retval = {data, 4, [] (const uint8_t* c) {delete[] c;} }; 108ca6c8ee2SEnrico Granata break; 109ca6c8ee2SEnrico Granata } 110ca6c8ee2SEnrico Granata } 111ca6c8ee2SEnrico Granata 112ca6c8ee2SEnrico Granata next = buffer + 1; 113ca6c8ee2SEnrico Granata return retval; 114ca6c8ee2SEnrico Granata } 115ca6c8ee2SEnrico Granata 116ca6c8ee2SEnrico Granata static char32_t 117ca6c8ee2SEnrico Granata ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1) 118ca6c8ee2SEnrico Granata { 119ca6c8ee2SEnrico Granata return (c0-192)*64+(c1-128); 120ca6c8ee2SEnrico Granata } 121ca6c8ee2SEnrico Granata static char32_t 122ca6c8ee2SEnrico Granata ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2) 123ca6c8ee2SEnrico Granata { 124ca6c8ee2SEnrico Granata return (c0-224)*4096+(c1-128)*64+(c2-128); 125ca6c8ee2SEnrico Granata } 126ca6c8ee2SEnrico Granata static char32_t 127ca6c8ee2SEnrico Granata ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2, unsigned char c3) 128ca6c8ee2SEnrico Granata { 129ca6c8ee2SEnrico Granata return (c0-240)*262144+(c2-128)*4096+(c2-128)*64+(c3-128); 130ca6c8ee2SEnrico Granata } 131ca6c8ee2SEnrico Granata 132ca6c8ee2SEnrico Granata template <> 133ad650a18SEnrico Granata StringPrinter::StringPrinterBufferPointer<> 134ac49453bSEnrico Granata GetPrintableImpl<StringPrinter::StringElementType::UTF8> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) 135ca6c8ee2SEnrico Granata { 136ad650a18SEnrico Granata StringPrinter::StringPrinterBufferPointer<> retval {nullptr}; 137ca6c8ee2SEnrico Granata 138ca6c8ee2SEnrico Granata unsigned utf8_encoded_len = getNumBytesForUTF8(*buffer); 139ca6c8ee2SEnrico Granata 140ca6c8ee2SEnrico Granata if (1+buffer_end-buffer < utf8_encoded_len) 141ca6c8ee2SEnrico Granata { 142ca6c8ee2SEnrico Granata // I don't have enough bytes - print whatever I have left 143ca6c8ee2SEnrico Granata retval = {buffer,static_cast<size_t>(1+buffer_end-buffer)}; 144ca6c8ee2SEnrico Granata next = buffer_end+1; 145ca6c8ee2SEnrico Granata return retval; 146ca6c8ee2SEnrico Granata } 147ca6c8ee2SEnrico Granata 148ca6c8ee2SEnrico Granata char32_t codepoint = 0; 149ca6c8ee2SEnrico Granata switch (utf8_encoded_len) 150ca6c8ee2SEnrico Granata { 151ca6c8ee2SEnrico Granata case 1: 152ca6c8ee2SEnrico Granata // this is just an ASCII byte - ask ASCII 153ac49453bSEnrico Granata return GetPrintableImpl<StringPrinter::StringElementType::ASCII>(buffer, buffer_end, next); 154ca6c8ee2SEnrico Granata case 2: 155ca6c8ee2SEnrico Granata codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1)); 156ca6c8ee2SEnrico Granata break; 157ca6c8ee2SEnrico Granata case 3: 158ca6c8ee2SEnrico Granata codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2)); 159ca6c8ee2SEnrico Granata break; 160ca6c8ee2SEnrico Granata case 4: 161ca6c8ee2SEnrico Granata codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2), (unsigned char)*(buffer+3)); 162ca6c8ee2SEnrico Granata break; 163ca6c8ee2SEnrico Granata default: 164ca6c8ee2SEnrico Granata // this is probably some bogus non-character thing 165ca6c8ee2SEnrico Granata // just print it as-is and hope to sync up again soon 166ca6c8ee2SEnrico Granata retval = {buffer,1}; 167ca6c8ee2SEnrico Granata next = buffer+1; 168ca6c8ee2SEnrico Granata return retval; 169ca6c8ee2SEnrico Granata } 170ca6c8ee2SEnrico Granata 171ca6c8ee2SEnrico Granata if (codepoint) 172ca6c8ee2SEnrico Granata { 173ca6c8ee2SEnrico Granata switch (codepoint) 174ca6c8ee2SEnrico Granata { 175da04fbb5SEnrico Granata case 0: 176da04fbb5SEnrico Granata retval = {"\\0",2}; 177da04fbb5SEnrico Granata break; 178ca6c8ee2SEnrico Granata case '\a': 179ca6c8ee2SEnrico Granata retval = {"\\a",2}; 180ca6c8ee2SEnrico Granata break; 181ca6c8ee2SEnrico Granata case '\b': 182ca6c8ee2SEnrico Granata retval = {"\\b",2}; 183ca6c8ee2SEnrico Granata break; 184ca6c8ee2SEnrico Granata case '\f': 185ca6c8ee2SEnrico Granata retval = {"\\f",2}; 186ca6c8ee2SEnrico Granata break; 187ca6c8ee2SEnrico Granata case '\n': 188ca6c8ee2SEnrico Granata retval = {"\\n",2}; 189ca6c8ee2SEnrico Granata break; 190ca6c8ee2SEnrico Granata case '\r': 191ca6c8ee2SEnrico Granata retval = {"\\r",2}; 192ca6c8ee2SEnrico Granata break; 193ca6c8ee2SEnrico Granata case '\t': 194ca6c8ee2SEnrico Granata retval = {"\\t",2}; 195ca6c8ee2SEnrico Granata break; 196ca6c8ee2SEnrico Granata case '\v': 197ca6c8ee2SEnrico Granata retval = {"\\v",2}; 198ca6c8ee2SEnrico Granata break; 199ca6c8ee2SEnrico Granata case '\"': 200ca6c8ee2SEnrico Granata retval = {"\\\"",2}; 201ca6c8ee2SEnrico Granata break; 202ca6c8ee2SEnrico Granata case '\\': 203ca6c8ee2SEnrico Granata retval = {"\\\\",2}; 204ca6c8ee2SEnrico Granata break; 205ca6c8ee2SEnrico Granata default: 206ca6c8ee2SEnrico Granata if (isprint(codepoint)) 207ca6c8ee2SEnrico Granata retval = {buffer,utf8_encoded_len}; 208ca6c8ee2SEnrico Granata else 209ca6c8ee2SEnrico Granata { 210d7e6a4f2SVince Harron uint8_t* data = new uint8_t[11]; 211d7e6a4f2SVince Harron sprintf((char*)data,"\\U%08x",codepoint); 212d7e6a4f2SVince Harron retval = { data,10,[] (const uint8_t* c) {delete[] c;} }; 213ca6c8ee2SEnrico Granata break; 214ca6c8ee2SEnrico Granata } 215ca6c8ee2SEnrico Granata } 216ca6c8ee2SEnrico Granata 217ca6c8ee2SEnrico Granata next = buffer + utf8_encoded_len; 218ca6c8ee2SEnrico Granata return retval; 219ca6c8ee2SEnrico Granata } 220ca6c8ee2SEnrico Granata 221ca6c8ee2SEnrico Granata // this should not happen - but just in case.. try to resync at some point 222ca6c8ee2SEnrico Granata retval = {buffer,1}; 223ca6c8ee2SEnrico Granata next = buffer+1; 224ca6c8ee2SEnrico Granata return retval; 225ca6c8ee2SEnrico Granata } 226ca6c8ee2SEnrico Granata 227ca6c8ee2SEnrico Granata // Given a sequence of bytes, this function returns: 228ca6c8ee2SEnrico Granata // a sequence of bytes to actually print out + a length 229ca6c8ee2SEnrico Granata // the following unscanned position of the buffer is in next 230ad650a18SEnrico Granata static StringPrinter::StringPrinterBufferPointer<> 231ac49453bSEnrico Granata GetPrintable(StringPrinter::StringElementType type, uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) 232ca6c8ee2SEnrico Granata { 233ca6c8ee2SEnrico Granata if (!buffer) 234ca6c8ee2SEnrico Granata return {nullptr}; 235ca6c8ee2SEnrico Granata 236ca6c8ee2SEnrico Granata switch (type) 237ca6c8ee2SEnrico Granata { 238ac49453bSEnrico Granata case StringPrinter::StringElementType::ASCII: 239ac49453bSEnrico Granata return GetPrintableImpl<StringPrinter::StringElementType::ASCII>(buffer, buffer_end, next); 240ac49453bSEnrico Granata case StringPrinter::StringElementType::UTF8: 241ac49453bSEnrico Granata return GetPrintableImpl<StringPrinter::StringElementType::UTF8>(buffer, buffer_end, next); 242ca6c8ee2SEnrico Granata default: 243ca6c8ee2SEnrico Granata return {nullptr}; 244ca6c8ee2SEnrico Granata } 245ca6c8ee2SEnrico Granata } 246ca6c8ee2SEnrico Granata 247ac49453bSEnrico Granata StringPrinter::EscapingHelper 248ac49453bSEnrico Granata StringPrinter::GetDefaultEscapingHelper (GetPrintableElementType elem_type) 249ac49453bSEnrico Granata { 250ac49453bSEnrico Granata switch (elem_type) 251ac49453bSEnrico Granata { 252ac49453bSEnrico Granata case GetPrintableElementType::UTF8: 253ac49453bSEnrico Granata return [] (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) -> StringPrinter::StringPrinterBufferPointer<> { 254ac49453bSEnrico Granata return GetPrintable(StringPrinter::StringElementType::UTF8, buffer, buffer_end, next); 255ac49453bSEnrico Granata }; 256ac49453bSEnrico Granata case GetPrintableElementType::ASCII: 257ac49453bSEnrico Granata return [] (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) -> StringPrinter::StringPrinterBufferPointer<> { 258ac49453bSEnrico Granata return GetPrintable(StringPrinter::StringElementType::ASCII, buffer, buffer_end, next); 259ac49453bSEnrico Granata }; 260ac49453bSEnrico Granata } 261*43d3a7aeSSaleem Abdulrasool llvm_unreachable("bad element type"); 262ac49453bSEnrico Granata } 263ac49453bSEnrico Granata 264ca6c8ee2SEnrico Granata // use this call if you already have an LLDB-side buffer for the data 265ca6c8ee2SEnrico Granata template<typename SourceDataType> 266ca6c8ee2SEnrico Granata static bool 267ca6c8ee2SEnrico Granata DumpUTFBufferToStream (ConversionResult (*ConvertFunction) (const SourceDataType**, 268ca6c8ee2SEnrico Granata const SourceDataType*, 269ca6c8ee2SEnrico Granata UTF8**, 270ca6c8ee2SEnrico Granata UTF8*, 271ca6c8ee2SEnrico Granata ConversionFlags), 272ac49453bSEnrico Granata const StringPrinter::ReadBufferAndDumpToStreamOptions& dump_options) 273ca6c8ee2SEnrico Granata { 274d07f7550SEnrico Granata Stream &stream(*dump_options.GetStream()); 275d07f7550SEnrico Granata if (dump_options.GetPrefixToken() != 0) 276d54f7fb8SEnrico Granata stream.Printf("%s",dump_options.GetPrefixToken()); 277d07f7550SEnrico Granata if (dump_options.GetQuote() != 0) 278d07f7550SEnrico Granata stream.Printf("%c",dump_options.GetQuote()); 279d07f7550SEnrico Granata auto data(dump_options.GetData()); 280d07f7550SEnrico Granata auto source_size(dump_options.GetSourceSize()); 281ca6c8ee2SEnrico Granata if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd()) 282ca6c8ee2SEnrico Granata { 283ca6c8ee2SEnrico Granata const int bufferSPSize = data.GetByteSize(); 284d07f7550SEnrico Granata if (dump_options.GetSourceSize() == 0) 285ca6c8ee2SEnrico Granata { 286ca6c8ee2SEnrico Granata const int origin_encoding = 8*sizeof(SourceDataType); 287d07f7550SEnrico Granata source_size = bufferSPSize/(origin_encoding / 4); 288ca6c8ee2SEnrico Granata } 289ca6c8ee2SEnrico Granata 290d7e6a4f2SVince Harron const SourceDataType *data_ptr = (const SourceDataType*)data.GetDataStart(); 291d07f7550SEnrico Granata const SourceDataType *data_end_ptr = data_ptr + source_size; 292ca6c8ee2SEnrico Granata 293d07f7550SEnrico Granata const bool zero_is_terminator = dump_options.GetBinaryZeroIsTerminator(); 294d07f7550SEnrico Granata 295d07f7550SEnrico Granata if (zero_is_terminator) 296d07f7550SEnrico Granata { 297ca6c8ee2SEnrico Granata while (data_ptr < data_end_ptr) 298ca6c8ee2SEnrico Granata { 299ca6c8ee2SEnrico Granata if (!*data_ptr) 300ca6c8ee2SEnrico Granata { 301ca6c8ee2SEnrico Granata data_end_ptr = data_ptr; 302ca6c8ee2SEnrico Granata break; 303ca6c8ee2SEnrico Granata } 304ca6c8ee2SEnrico Granata data_ptr++; 305ca6c8ee2SEnrico Granata } 306ca6c8ee2SEnrico Granata 307d7e6a4f2SVince Harron data_ptr = (const SourceDataType*)data.GetDataStart(); 308d07f7550SEnrico Granata } 309ca6c8ee2SEnrico Granata 310ca6c8ee2SEnrico Granata lldb::DataBufferSP utf8_data_buffer_sp; 311ca6c8ee2SEnrico Granata UTF8* utf8_data_ptr = nullptr; 312ca6c8ee2SEnrico Granata UTF8* utf8_data_end_ptr = nullptr; 313ca6c8ee2SEnrico Granata 314ca6c8ee2SEnrico Granata if (ConvertFunction) 315ca6c8ee2SEnrico Granata { 316ca6c8ee2SEnrico Granata utf8_data_buffer_sp.reset(new DataBufferHeap(4*bufferSPSize,0)); 317ca6c8ee2SEnrico Granata utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes(); 318ca6c8ee2SEnrico Granata utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize(); 319d7e6a4f2SVince Harron ConvertFunction ( &data_ptr, data_end_ptr, &utf8_data_ptr, utf8_data_end_ptr, lenientConversion ); 3208101f570SEnrico Granata if (false == zero_is_terminator) 3218101f570SEnrico Granata utf8_data_end_ptr = utf8_data_ptr; 322ca6c8ee2SEnrico Granata utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes(); // needed because the ConvertFunction will change the value of the data_ptr 323ca6c8ee2SEnrico Granata } 324ca6c8ee2SEnrico Granata else 325ca6c8ee2SEnrico Granata { 326ca6c8ee2SEnrico Granata // just copy the pointers - the cast is necessary to make the compiler happy 327ca6c8ee2SEnrico Granata // but this should only happen if we are reading UTF8 data 328ba507b04SSaleem Abdulrasool utf8_data_ptr = const_cast<UTF8 *>(reinterpret_cast<const UTF8*>(data_ptr)); 329ba507b04SSaleem Abdulrasool utf8_data_end_ptr = const_cast<UTF8 *>(reinterpret_cast<const UTF8*>(data_end_ptr)); 330ca6c8ee2SEnrico Granata } 331ca6c8ee2SEnrico Granata 332d07f7550SEnrico Granata const bool escape_non_printables = dump_options.GetEscapeNonPrintables(); 333ac49453bSEnrico Granata lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback; 334ac49453bSEnrico Granata if (escape_non_printables) 335ac49453bSEnrico Granata { 336ac49453bSEnrico Granata if (Language *language = Language::FindPlugin(dump_options.GetLanguage())) 337ac49453bSEnrico Granata escaping_callback = language->GetStringPrinterEscapingHelper(lldb_private::formatters::StringPrinter::GetPrintableElementType::UTF8); 338ac49453bSEnrico Granata else 339ac49453bSEnrico Granata escaping_callback = lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper(lldb_private::formatters::StringPrinter::GetPrintableElementType::UTF8); 340ac49453bSEnrico Granata } 341d07f7550SEnrico Granata 342ca6c8ee2SEnrico Granata // since we tend to accept partial data (and even partially malformed data) 343ca6c8ee2SEnrico Granata // we might end up with no NULL terminator before the end_ptr 344ca6c8ee2SEnrico Granata // hence we need to take a slower route and ensure we stay within boundaries 345ca6c8ee2SEnrico Granata for (;utf8_data_ptr < utf8_data_end_ptr;) 346ca6c8ee2SEnrico Granata { 347d07f7550SEnrico Granata if (zero_is_terminator && !*utf8_data_ptr) 348ca6c8ee2SEnrico Granata break; 349ca6c8ee2SEnrico Granata 350d07f7550SEnrico Granata if (escape_non_printables) 351ca6c8ee2SEnrico Granata { 352ca6c8ee2SEnrico Granata uint8_t* next_data = nullptr; 353ac49453bSEnrico Granata auto printable = escaping_callback(utf8_data_ptr, utf8_data_end_ptr, next_data); 354ca6c8ee2SEnrico Granata auto printable_bytes = printable.GetBytes(); 355ca6c8ee2SEnrico Granata auto printable_size = printable.GetSize(); 356ca6c8ee2SEnrico Granata if (!printable_bytes || !next_data) 357ca6c8ee2SEnrico Granata { 358ca6c8ee2SEnrico Granata // GetPrintable() failed on us - print one byte in a desperate resync attempt 359ca6c8ee2SEnrico Granata printable_bytes = utf8_data_ptr; 360ca6c8ee2SEnrico Granata printable_size = 1; 361ca6c8ee2SEnrico Granata next_data = utf8_data_ptr+1; 362ca6c8ee2SEnrico Granata } 3633acfe1a3SAndy Gibbs for (unsigned c = 0; c < printable_size; c++) 364ca6c8ee2SEnrico Granata stream.Printf("%c", *(printable_bytes+c)); 365ca6c8ee2SEnrico Granata utf8_data_ptr = (uint8_t*)next_data; 366ca6c8ee2SEnrico Granata } 367ca6c8ee2SEnrico Granata else 368ca6c8ee2SEnrico Granata { 369ca6c8ee2SEnrico Granata stream.Printf("%c",*utf8_data_ptr); 370ca6c8ee2SEnrico Granata utf8_data_ptr++; 371ca6c8ee2SEnrico Granata } 372ca6c8ee2SEnrico Granata } 373ca6c8ee2SEnrico Granata } 374d07f7550SEnrico Granata if (dump_options.GetQuote() != 0) 375d07f7550SEnrico Granata stream.Printf("%c",dump_options.GetQuote()); 376d54f7fb8SEnrico Granata if (dump_options.GetSuffixToken() != 0) 377d54f7fb8SEnrico Granata stream.Printf("%s",dump_options.GetSuffixToken()); 378ca6c8ee2SEnrico Granata return true; 379ca6c8ee2SEnrico Granata } 380ca6c8ee2SEnrico Granata 381ac49453bSEnrico Granata lldb_private::formatters::StringPrinter::ReadStringAndDumpToStreamOptions::ReadStringAndDumpToStreamOptions (ValueObject& valobj) : 382ebdc1ac0SEnrico Granata ReadStringAndDumpToStreamOptions() 383ebdc1ac0SEnrico Granata { 384ebdc1ac0SEnrico Granata SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 385ebdc1ac0SEnrico Granata } 386ebdc1ac0SEnrico Granata 387ac49453bSEnrico Granata lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions::ReadBufferAndDumpToStreamOptions (ValueObject& valobj) : 388ebdc1ac0SEnrico Granata ReadBufferAndDumpToStreamOptions() 389ebdc1ac0SEnrico Granata { 390ebdc1ac0SEnrico Granata SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 391ebdc1ac0SEnrico Granata } 392ebdc1ac0SEnrico Granata 393ac49453bSEnrico Granata lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions::ReadBufferAndDumpToStreamOptions (const ReadStringAndDumpToStreamOptions& options) : 394d07f7550SEnrico Granata ReadBufferAndDumpToStreamOptions() 395d07f7550SEnrico Granata { 396d07f7550SEnrico Granata SetStream(options.GetStream()); 397d07f7550SEnrico Granata SetPrefixToken(options.GetPrefixToken()); 398d54f7fb8SEnrico Granata SetSuffixToken(options.GetSuffixToken()); 399d07f7550SEnrico Granata SetQuote(options.GetQuote()); 400d07f7550SEnrico Granata SetEscapeNonPrintables(options.GetEscapeNonPrintables()); 401d07f7550SEnrico Granata SetBinaryZeroIsTerminator(options.GetBinaryZeroIsTerminator()); 402ac49453bSEnrico Granata SetLanguage(options.GetLanguage()); 403d07f7550SEnrico Granata } 404d07f7550SEnrico Granata 405ebdc1ac0SEnrico Granata 406fd13743fSShawn Best namespace lldb_private 407fd13743fSShawn Best { 408fd13743fSShawn Best 409fd13743fSShawn Best namespace formatters 410fd13743fSShawn Best { 411fd13743fSShawn Best 412fd13743fSShawn Best template <> 413fd13743fSShawn Best bool 414ac49453bSEnrico Granata StringPrinter::ReadStringAndDumpToStream<StringPrinter::StringElementType::ASCII> (const ReadStringAndDumpToStreamOptions& options) 415fd13743fSShawn Best { 416fd13743fSShawn Best assert(options.GetStream() && "need a Stream to print the string to"); 417fd13743fSShawn Best Error my_error; 418fd13743fSShawn Best 419fd13743fSShawn Best ProcessSP process_sp(options.GetProcessSP()); 420fd13743fSShawn Best 421fd13743fSShawn Best if (process_sp.get() == nullptr || options.GetLocation() == 0) 422fd13743fSShawn Best return false; 423fd13743fSShawn Best 424fd13743fSShawn Best size_t size; 425fd13743fSShawn Best 426fd13743fSShawn Best if (options.GetSourceSize() == 0) 427fd13743fSShawn Best size = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 42834042212SEnrico Granata else if (!options.GetIgnoreMaxLength()) 429fd13743fSShawn Best size = std::min(options.GetSourceSize(),process_sp->GetTarget().GetMaximumSizeOfStringSummary()); 43034042212SEnrico Granata else 43134042212SEnrico Granata size = options.GetSourceSize(); 432fd13743fSShawn Best 433fd13743fSShawn Best lldb::DataBufferSP buffer_sp(new DataBufferHeap(size,0)); 434fd13743fSShawn Best 435d7e6a4f2SVince Harron process_sp->ReadCStringFromMemory(options.GetLocation(), (char*)buffer_sp->GetBytes(), size, my_error); 436fd13743fSShawn Best 437fd13743fSShawn Best if (my_error.Fail()) 438fd13743fSShawn Best return false; 439fd13743fSShawn Best 440d54f7fb8SEnrico Granata const char* prefix_token = options.GetPrefixToken(); 441fd13743fSShawn Best char quote = options.GetQuote(); 442fd13743fSShawn Best 443fd13743fSShawn Best if (prefix_token != 0) 444d54f7fb8SEnrico Granata options.GetStream()->Printf("%s%c",prefix_token,quote); 445fd13743fSShawn Best else if (quote != 0) 446fd13743fSShawn Best options.GetStream()->Printf("%c",quote); 447fd13743fSShawn Best 448fd13743fSShawn Best uint8_t* data_end = buffer_sp->GetBytes()+buffer_sp->GetByteSize(); 449fd13743fSShawn Best 450ac49453bSEnrico Granata const bool escape_non_printables = options.GetEscapeNonPrintables(); 451ac49453bSEnrico Granata lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback; 452ac49453bSEnrico Granata if (escape_non_printables) 453ac49453bSEnrico Granata { 454ac49453bSEnrico Granata if (Language *language = Language::FindPlugin(options.GetLanguage())) 455ac49453bSEnrico Granata escaping_callback = language->GetStringPrinterEscapingHelper(lldb_private::formatters::StringPrinter::GetPrintableElementType::ASCII); 456ac49453bSEnrico Granata else 457ac49453bSEnrico Granata escaping_callback = lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper(lldb_private::formatters::StringPrinter::GetPrintableElementType::ASCII); 458ac49453bSEnrico Granata } 459ac49453bSEnrico Granata 460fd13743fSShawn Best // since we tend to accept partial data (and even partially malformed data) 461fd13743fSShawn Best // we might end up with no NULL terminator before the end_ptr 462fd13743fSShawn Best // hence we need to take a slower route and ensure we stay within boundaries 463fd13743fSShawn Best for (uint8_t* data = buffer_sp->GetBytes(); *data && (data < data_end);) 464fd13743fSShawn Best { 465ac49453bSEnrico Granata if (escape_non_printables) 466fd13743fSShawn Best { 467fd13743fSShawn Best uint8_t* next_data = nullptr; 468ac49453bSEnrico Granata auto printable = escaping_callback(data, data_end, next_data); 469fd13743fSShawn Best auto printable_bytes = printable.GetBytes(); 470fd13743fSShawn Best auto printable_size = printable.GetSize(); 471fd13743fSShawn Best if (!printable_bytes || !next_data) 472fd13743fSShawn Best { 473fd13743fSShawn Best // GetPrintable() failed on us - print one byte in a desperate resync attempt 474fd13743fSShawn Best printable_bytes = data; 475fd13743fSShawn Best printable_size = 1; 476fd13743fSShawn Best next_data = data+1; 477fd13743fSShawn Best } 4783acfe1a3SAndy Gibbs for (unsigned c = 0; c < printable_size; c++) 479fd13743fSShawn Best options.GetStream()->Printf("%c", *(printable_bytes+c)); 480fd13743fSShawn Best data = (uint8_t*)next_data; 481fd13743fSShawn Best } 482fd13743fSShawn Best else 483fd13743fSShawn Best { 484fd13743fSShawn Best options.GetStream()->Printf("%c",*data); 485fd13743fSShawn Best data++; 486fd13743fSShawn Best } 487fd13743fSShawn Best } 488fd13743fSShawn Best 489d54f7fb8SEnrico Granata const char* suffix_token = options.GetSuffixToken(); 490d54f7fb8SEnrico Granata 491d54f7fb8SEnrico Granata if (suffix_token != 0) 492d54f7fb8SEnrico Granata options.GetStream()->Printf("%c%s",quote, suffix_token); 493d54f7fb8SEnrico Granata else if (quote != 0) 494fd13743fSShawn Best options.GetStream()->Printf("%c",quote); 495fd13743fSShawn Best 496fd13743fSShawn Best return true; 497fd13743fSShawn Best } 498fd13743fSShawn Best 499ca6c8ee2SEnrico Granata template<typename SourceDataType> 500ca6c8ee2SEnrico Granata static bool 501ac49453bSEnrico Granata ReadUTFBufferAndDumpToStream (const StringPrinter::ReadStringAndDumpToStreamOptions& options, 502ca6c8ee2SEnrico Granata ConversionResult (*ConvertFunction) (const SourceDataType**, 503ca6c8ee2SEnrico Granata const SourceDataType*, 504ca6c8ee2SEnrico Granata UTF8**, 505ca6c8ee2SEnrico Granata UTF8*, 506ca6c8ee2SEnrico Granata ConversionFlags)) 507ca6c8ee2SEnrico Granata { 508ca6c8ee2SEnrico Granata assert(options.GetStream() && "need a Stream to print the string to"); 509ca6c8ee2SEnrico Granata 510ca6c8ee2SEnrico Granata if (options.GetLocation() == 0 || options.GetLocation() == LLDB_INVALID_ADDRESS) 511ca6c8ee2SEnrico Granata return false; 512ca6c8ee2SEnrico Granata 513ca6c8ee2SEnrico Granata lldb::ProcessSP process_sp(options.GetProcessSP()); 514ca6c8ee2SEnrico Granata 515ca6c8ee2SEnrico Granata if (!process_sp) 516ca6c8ee2SEnrico Granata return false; 517ca6c8ee2SEnrico Granata 518ca6c8ee2SEnrico Granata const int type_width = sizeof(SourceDataType); 519ca6c8ee2SEnrico Granata const int origin_encoding = 8 * type_width ; 520ca6c8ee2SEnrico Granata if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32) 521ca6c8ee2SEnrico Granata return false; 522ca6c8ee2SEnrico Granata // if not UTF8, I need a conversion function to return proper UTF8 523ca6c8ee2SEnrico Granata if (origin_encoding != 8 && !ConvertFunction) 524ca6c8ee2SEnrico Granata return false; 525ca6c8ee2SEnrico Granata 526ca6c8ee2SEnrico Granata if (!options.GetStream()) 527ca6c8ee2SEnrico Granata return false; 528ca6c8ee2SEnrico Granata 529ca6c8ee2SEnrico Granata uint32_t sourceSize = options.GetSourceSize(); 530ca6c8ee2SEnrico Granata bool needs_zero_terminator = options.GetNeedsZeroTermination(); 531ca6c8ee2SEnrico Granata 532ca6c8ee2SEnrico Granata if (!sourceSize) 533ca6c8ee2SEnrico Granata { 534ca6c8ee2SEnrico Granata sourceSize = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 535ca6c8ee2SEnrico Granata needs_zero_terminator = true; 536ca6c8ee2SEnrico Granata } 537b0e8a55dSEnrico Granata else if (!options.GetIgnoreMaxLength()) 538ca6c8ee2SEnrico Granata sourceSize = std::min(sourceSize,process_sp->GetTarget().GetMaximumSizeOfStringSummary()); 539ca6c8ee2SEnrico Granata 540ca6c8ee2SEnrico Granata const int bufferSPSize = sourceSize * type_width; 541ca6c8ee2SEnrico Granata 542ca6c8ee2SEnrico Granata lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize,0)); 543ca6c8ee2SEnrico Granata 544ca6c8ee2SEnrico Granata if (!buffer_sp->GetBytes()) 545ca6c8ee2SEnrico Granata return false; 546ca6c8ee2SEnrico Granata 547ca6c8ee2SEnrico Granata Error error; 548ca6c8ee2SEnrico Granata char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes()); 549ca6c8ee2SEnrico Granata 550ca6c8ee2SEnrico Granata if (needs_zero_terminator) 551d7e6a4f2SVince Harron process_sp->ReadStringFromMemory(options.GetLocation(), buffer, bufferSPSize, error, type_width); 552ca6c8ee2SEnrico Granata else 553d7e6a4f2SVince Harron process_sp->ReadMemoryFromInferior(options.GetLocation(), (char*)buffer_sp->GetBytes(), bufferSPSize, error); 554ca6c8ee2SEnrico Granata 555099263b4SEnrico Granata if (error.Fail()) 556ca6c8ee2SEnrico Granata { 557ca6c8ee2SEnrico Granata options.GetStream()->Printf("unable to read data"); 558ca6c8ee2SEnrico Granata return true; 559ca6c8ee2SEnrico Granata } 560ca6c8ee2SEnrico Granata 561ca6c8ee2SEnrico Granata DataExtractor data(buffer_sp, process_sp->GetByteOrder(), process_sp->GetAddressByteSize()); 562ca6c8ee2SEnrico Granata 563ac49453bSEnrico Granata StringPrinter::ReadBufferAndDumpToStreamOptions dump_options(options); 564d07f7550SEnrico Granata dump_options.SetData(data); 565d07f7550SEnrico Granata dump_options.SetSourceSize(sourceSize); 566d07f7550SEnrico Granata 567d07f7550SEnrico Granata return DumpUTFBufferToStream(ConvertFunction, dump_options); 568ca6c8ee2SEnrico Granata } 569ca6c8ee2SEnrico Granata 570ca6c8ee2SEnrico Granata template <> 571ca6c8ee2SEnrico Granata bool 572ac49453bSEnrico Granata StringPrinter::ReadStringAndDumpToStream<StringPrinter::StringElementType::UTF8> (const ReadStringAndDumpToStreamOptions& options) 573ca6c8ee2SEnrico Granata { 574ca6c8ee2SEnrico Granata return ReadUTFBufferAndDumpToStream<UTF8>(options, 575ca6c8ee2SEnrico Granata nullptr); 576ca6c8ee2SEnrico Granata } 577ca6c8ee2SEnrico Granata 578ca6c8ee2SEnrico Granata template <> 579ca6c8ee2SEnrico Granata bool 580ac49453bSEnrico Granata StringPrinter::ReadStringAndDumpToStream<StringPrinter::StringElementType::UTF16> (const ReadStringAndDumpToStreamOptions& options) 581ca6c8ee2SEnrico Granata { 582ca6c8ee2SEnrico Granata return ReadUTFBufferAndDumpToStream<UTF16>(options, 583ca6c8ee2SEnrico Granata ConvertUTF16toUTF8); 584ca6c8ee2SEnrico Granata } 585ca6c8ee2SEnrico Granata 586ca6c8ee2SEnrico Granata template <> 587ca6c8ee2SEnrico Granata bool 588ac49453bSEnrico Granata StringPrinter::ReadStringAndDumpToStream<StringPrinter::StringElementType::UTF32> (const ReadStringAndDumpToStreamOptions& options) 589ca6c8ee2SEnrico Granata { 590ca6c8ee2SEnrico Granata return ReadUTFBufferAndDumpToStream<UTF32>(options, 591ca6c8ee2SEnrico Granata ConvertUTF32toUTF8); 592ca6c8ee2SEnrico Granata } 593ca6c8ee2SEnrico Granata 594ca6c8ee2SEnrico Granata template <> 595ca6c8ee2SEnrico Granata bool 596ac49453bSEnrico Granata StringPrinter::ReadBufferAndDumpToStream<StringPrinter::StringElementType::UTF8> (const ReadBufferAndDumpToStreamOptions& options) 597ca6c8ee2SEnrico Granata { 598ca6c8ee2SEnrico Granata assert(options.GetStream() && "need a Stream to print the string to"); 599ca6c8ee2SEnrico Granata 600d07f7550SEnrico Granata return DumpUTFBufferToStream<UTF8>(nullptr, options); 601ca6c8ee2SEnrico Granata } 602ca6c8ee2SEnrico Granata 603ca6c8ee2SEnrico Granata template <> 604ca6c8ee2SEnrico Granata bool 605ac49453bSEnrico Granata StringPrinter::ReadBufferAndDumpToStream<StringPrinter::StringElementType::ASCII> (const ReadBufferAndDumpToStreamOptions& options) 606ca6c8ee2SEnrico Granata { 607ca6c8ee2SEnrico Granata // treat ASCII the same as UTF8 608ca6c8ee2SEnrico Granata // FIXME: can we optimize ASCII some more? 609ca6c8ee2SEnrico Granata return ReadBufferAndDumpToStream<StringElementType::UTF8>(options); 610ca6c8ee2SEnrico Granata } 611ca6c8ee2SEnrico Granata 612ca6c8ee2SEnrico Granata template <> 613ca6c8ee2SEnrico Granata bool 614ac49453bSEnrico Granata StringPrinter::ReadBufferAndDumpToStream<StringPrinter::StringElementType::UTF16> (const ReadBufferAndDumpToStreamOptions& options) 615ca6c8ee2SEnrico Granata { 616ca6c8ee2SEnrico Granata assert(options.GetStream() && "need a Stream to print the string to"); 617ca6c8ee2SEnrico Granata 618d07f7550SEnrico Granata return DumpUTFBufferToStream(ConvertUTF16toUTF8, options); 619ca6c8ee2SEnrico Granata } 620ca6c8ee2SEnrico Granata 621ca6c8ee2SEnrico Granata template <> 622ca6c8ee2SEnrico Granata bool 623ac49453bSEnrico Granata StringPrinter::ReadBufferAndDumpToStream<StringPrinter::StringElementType::UTF32> (const ReadBufferAndDumpToStreamOptions& options) 624ca6c8ee2SEnrico Granata { 625ca6c8ee2SEnrico Granata assert(options.GetStream() && "need a Stream to print the string to"); 626ca6c8ee2SEnrico Granata 627d07f7550SEnrico Granata return DumpUTFBufferToStream(ConvertUTF32toUTF8, options); 628ca6c8ee2SEnrico Granata } 629fd13743fSShawn Best 630fd13743fSShawn Best } // namespace formatters 631fd13743fSShawn Best 632fd13743fSShawn Best } // namespace lldb_private 633