1ca6c8ee2SEnrico Granata //===-- StringPrinter.cpp ----------------------------------------*- C++ -*-===// 2ca6c8ee2SEnrico Granata // 3ca6c8ee2SEnrico Granata // The LLVM Compiler Infrastructure 4ca6c8ee2SEnrico Granata // 5ca6c8ee2SEnrico Granata // This file is distributed under the University of Illinois Open Source 6ca6c8ee2SEnrico Granata // License. See LICENSE.TXT for details. 7ca6c8ee2SEnrico Granata // 8ca6c8ee2SEnrico Granata //===----------------------------------------------------------------------===// 9ca6c8ee2SEnrico Granata 10ca6c8ee2SEnrico Granata #include "lldb/DataFormatters/StringPrinter.h" 11ca6c8ee2SEnrico Granata 12ca6c8ee2SEnrico Granata #include "lldb/Core/DataExtractor.h" 13ebdc1ac0SEnrico Granata #include "lldb/Core/Debugger.h" 14ca6c8ee2SEnrico Granata #include "lldb/Core/Error.h" 15ebdc1ac0SEnrico Granata #include "lldb/Core/ValueObject.h" 16ca6c8ee2SEnrico Granata #include "lldb/Target/Process.h" 17ca6c8ee2SEnrico Granata #include "lldb/Target/Target.h" 18ca6c8ee2SEnrico Granata 19ca6c8ee2SEnrico Granata #include "llvm/Support/ConvertUTF.h" 20ca6c8ee2SEnrico Granata 21ca6c8ee2SEnrico Granata #include <ctype.h> 22ca6c8ee2SEnrico Granata #include <functional> 23ca6c8ee2SEnrico Granata #include <locale> 24ca6c8ee2SEnrico Granata 25ca6c8ee2SEnrico Granata using namespace lldb; 26ca6c8ee2SEnrico Granata using namespace lldb_private; 27ca6c8ee2SEnrico Granata using namespace lldb_private::formatters; 28ca6c8ee2SEnrico Granata 29ca6c8ee2SEnrico Granata // I can't use a std::unique_ptr for this because the Deleter is a template argument there 30ca6c8ee2SEnrico Granata // and I want the same type to represent both pointers I want to free and pointers I don't need 31ca6c8ee2SEnrico Granata // to free - which is what this class essentially is 32ca6c8ee2SEnrico Granata // It's very specialized to the needs of this file, and not suggested for general use 33ca6c8ee2SEnrico Granata template <typename T = uint8_t, typename U = char, typename S = size_t> 34ca6c8ee2SEnrico Granata struct StringPrinterBufferPointer 35ca6c8ee2SEnrico Granata { 36ca6c8ee2SEnrico Granata public: 37ca6c8ee2SEnrico Granata 38ca6c8ee2SEnrico Granata typedef std::function<void(const T*)> Deleter; 39ca6c8ee2SEnrico Granata 40ca6c8ee2SEnrico Granata StringPrinterBufferPointer (std::nullptr_t ptr) : 41ca6c8ee2SEnrico Granata m_data(nullptr), 42ca6c8ee2SEnrico Granata m_size(0), 43ca6c8ee2SEnrico Granata m_deleter() 44ca6c8ee2SEnrico Granata {} 45ca6c8ee2SEnrico Granata 46ca6c8ee2SEnrico Granata StringPrinterBufferPointer(const T* bytes, S size, Deleter deleter = nullptr) : 47ca6c8ee2SEnrico Granata m_data(bytes), 48ca6c8ee2SEnrico Granata m_size(size), 49ca6c8ee2SEnrico Granata m_deleter(deleter) 50ca6c8ee2SEnrico Granata {} 51ca6c8ee2SEnrico Granata 52ca6c8ee2SEnrico Granata StringPrinterBufferPointer(const U* bytes, S size, Deleter deleter = nullptr) : 53ca6c8ee2SEnrico Granata m_data((T*)bytes), 54ca6c8ee2SEnrico Granata m_size(size), 55ca6c8ee2SEnrico Granata m_deleter(deleter) 56ca6c8ee2SEnrico Granata {} 57ca6c8ee2SEnrico Granata 58ca6c8ee2SEnrico Granata StringPrinterBufferPointer(StringPrinterBufferPointer&& rhs) : 59ca6c8ee2SEnrico Granata m_data(rhs.m_data), 60ca6c8ee2SEnrico Granata m_size(rhs.m_size), 61ca6c8ee2SEnrico Granata m_deleter(rhs.m_deleter) 62ca6c8ee2SEnrico Granata { 63ca6c8ee2SEnrico Granata rhs.m_data = nullptr; 64ca6c8ee2SEnrico Granata } 65ca6c8ee2SEnrico Granata 66ca6c8ee2SEnrico Granata StringPrinterBufferPointer(const StringPrinterBufferPointer& rhs) : 67ca6c8ee2SEnrico Granata m_data(rhs.m_data), 68ca6c8ee2SEnrico Granata m_size(rhs.m_size), 69ca6c8ee2SEnrico Granata m_deleter(rhs.m_deleter) 70ca6c8ee2SEnrico Granata { 71ca6c8ee2SEnrico Granata rhs.m_data = nullptr; // this is why m_data has to be mutable 72ca6c8ee2SEnrico Granata } 73ca6c8ee2SEnrico Granata 74ca6c8ee2SEnrico Granata const T* 75ca6c8ee2SEnrico Granata GetBytes () const 76ca6c8ee2SEnrico Granata { 77ca6c8ee2SEnrico Granata return m_data; 78ca6c8ee2SEnrico Granata } 79ca6c8ee2SEnrico Granata 80ca6c8ee2SEnrico Granata const S 81ca6c8ee2SEnrico Granata GetSize () const 82ca6c8ee2SEnrico Granata { 83ca6c8ee2SEnrico Granata return m_size; 84ca6c8ee2SEnrico Granata } 85ca6c8ee2SEnrico Granata 86ca6c8ee2SEnrico Granata ~StringPrinterBufferPointer () 87ca6c8ee2SEnrico Granata { 88ca6c8ee2SEnrico Granata if (m_data && m_deleter) 89ca6c8ee2SEnrico Granata m_deleter(m_data); 90ca6c8ee2SEnrico Granata m_data = nullptr; 91ca6c8ee2SEnrico Granata } 92ca6c8ee2SEnrico Granata 93ca6c8ee2SEnrico Granata StringPrinterBufferPointer& 94ca6c8ee2SEnrico Granata operator = (const StringPrinterBufferPointer& rhs) 95ca6c8ee2SEnrico Granata { 96ca6c8ee2SEnrico Granata if (m_data && m_deleter) 97ca6c8ee2SEnrico Granata m_deleter(m_data); 98ca6c8ee2SEnrico Granata m_data = rhs.m_data; 99ca6c8ee2SEnrico Granata m_size = rhs.m_size; 100ca6c8ee2SEnrico Granata m_deleter = rhs.m_deleter; 101ca6c8ee2SEnrico Granata rhs.m_data = nullptr; 102ca6c8ee2SEnrico Granata return *this; 103ca6c8ee2SEnrico Granata } 104ca6c8ee2SEnrico Granata 105ca6c8ee2SEnrico Granata private: 106ca6c8ee2SEnrico Granata mutable const T* m_data; 107ca6c8ee2SEnrico Granata size_t m_size; 108ca6c8ee2SEnrico Granata Deleter m_deleter; 109ca6c8ee2SEnrico Granata }; 110ca6c8ee2SEnrico Granata 111ca6c8ee2SEnrico Granata // we define this for all values of type but only implement it for those we care about 112ca6c8ee2SEnrico Granata // that's good because we get linker errors for any unsupported type 113ca6c8ee2SEnrico Granata template <StringElementType type> 114ca6c8ee2SEnrico Granata static StringPrinterBufferPointer<> 115ca6c8ee2SEnrico Granata GetPrintableImpl(uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next); 116ca6c8ee2SEnrico Granata 117ca6c8ee2SEnrico Granata // mimic isprint() for Unicode codepoints 118ca6c8ee2SEnrico Granata static bool 119ca6c8ee2SEnrico Granata isprint(char32_t codepoint) 120ca6c8ee2SEnrico Granata { 121ca6c8ee2SEnrico Granata if (codepoint <= 0x1F || codepoint == 0x7F) // C0 122ca6c8ee2SEnrico Granata { 123ca6c8ee2SEnrico Granata return false; 124ca6c8ee2SEnrico Granata } 125ca6c8ee2SEnrico Granata if (codepoint >= 0x80 && codepoint <= 0x9F) // C1 126ca6c8ee2SEnrico Granata { 127ca6c8ee2SEnrico Granata return false; 128ca6c8ee2SEnrico Granata } 129ca6c8ee2SEnrico Granata if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators 130ca6c8ee2SEnrico Granata { 131ca6c8ee2SEnrico Granata return false; 132ca6c8ee2SEnrico Granata } 133ca6c8ee2SEnrico Granata if (codepoint == 0x200E || codepoint == 0x200F || (codepoint >= 0x202A && codepoint <= 0x202E)) // bidirectional text control 134ca6c8ee2SEnrico Granata { 135ca6c8ee2SEnrico Granata return false; 136ca6c8ee2SEnrico Granata } 137ca6c8ee2SEnrico Granata if (codepoint >= 0xFFF9 && codepoint <= 0xFFFF) // interlinears and generally specials 138ca6c8ee2SEnrico Granata { 139ca6c8ee2SEnrico Granata return false; 140ca6c8ee2SEnrico Granata } 141ca6c8ee2SEnrico Granata return true; 142ca6c8ee2SEnrico Granata } 143ca6c8ee2SEnrico Granata 144ca6c8ee2SEnrico Granata template <> 145ca6c8ee2SEnrico Granata StringPrinterBufferPointer<> 146ca6c8ee2SEnrico Granata GetPrintableImpl<StringElementType::ASCII> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) 147ca6c8ee2SEnrico Granata { 148ca6c8ee2SEnrico Granata StringPrinterBufferPointer<> retval = {nullptr}; 149ca6c8ee2SEnrico Granata 150ca6c8ee2SEnrico Granata switch (*buffer) 151ca6c8ee2SEnrico Granata { 152da04fbb5SEnrico Granata case 0: 153da04fbb5SEnrico Granata retval = {"\\0",2}; 154da04fbb5SEnrico Granata break; 155ca6c8ee2SEnrico Granata case '\a': 156ca6c8ee2SEnrico Granata retval = {"\\a",2}; 157ca6c8ee2SEnrico Granata break; 158ca6c8ee2SEnrico Granata case '\b': 159ca6c8ee2SEnrico Granata retval = {"\\b",2}; 160ca6c8ee2SEnrico Granata break; 161ca6c8ee2SEnrico Granata case '\f': 162ca6c8ee2SEnrico Granata retval = {"\\f",2}; 163ca6c8ee2SEnrico Granata break; 164ca6c8ee2SEnrico Granata case '\n': 165ca6c8ee2SEnrico Granata retval = {"\\n",2}; 166ca6c8ee2SEnrico Granata break; 167ca6c8ee2SEnrico Granata case '\r': 168ca6c8ee2SEnrico Granata retval = {"\\r",2}; 169ca6c8ee2SEnrico Granata break; 170ca6c8ee2SEnrico Granata case '\t': 171ca6c8ee2SEnrico Granata retval = {"\\t",2}; 172ca6c8ee2SEnrico Granata break; 173ca6c8ee2SEnrico Granata case '\v': 174ca6c8ee2SEnrico Granata retval = {"\\v",2}; 175ca6c8ee2SEnrico Granata break; 176ca6c8ee2SEnrico Granata case '\"': 177ca6c8ee2SEnrico Granata retval = {"\\\"",2}; 178ca6c8ee2SEnrico Granata break; 179ca6c8ee2SEnrico Granata case '\\': 180ca6c8ee2SEnrico Granata retval = {"\\\\",2}; 181ca6c8ee2SEnrico Granata break; 182ca6c8ee2SEnrico Granata default: 183ca6c8ee2SEnrico Granata if (isprint(*buffer)) 184ca6c8ee2SEnrico Granata retval = {buffer,1}; 185ca6c8ee2SEnrico Granata else 186ca6c8ee2SEnrico Granata { 187d7e6a4f2SVince Harron uint8_t* data = new uint8_t[5]; 188d7e6a4f2SVince Harron sprintf((char*)data,"\\x%02x",*buffer); 189d7e6a4f2SVince Harron retval = {data, 4, [] (const uint8_t* c) {delete[] c;} }; 190ca6c8ee2SEnrico Granata break; 191ca6c8ee2SEnrico Granata } 192ca6c8ee2SEnrico Granata } 193ca6c8ee2SEnrico Granata 194ca6c8ee2SEnrico Granata next = buffer + 1; 195ca6c8ee2SEnrico Granata return retval; 196ca6c8ee2SEnrico Granata } 197ca6c8ee2SEnrico Granata 198ca6c8ee2SEnrico Granata static char32_t 199ca6c8ee2SEnrico Granata ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1) 200ca6c8ee2SEnrico Granata { 201ca6c8ee2SEnrico Granata return (c0-192)*64+(c1-128); 202ca6c8ee2SEnrico Granata } 203ca6c8ee2SEnrico Granata static char32_t 204ca6c8ee2SEnrico Granata ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2) 205ca6c8ee2SEnrico Granata { 206ca6c8ee2SEnrico Granata return (c0-224)*4096+(c1-128)*64+(c2-128); 207ca6c8ee2SEnrico Granata } 208ca6c8ee2SEnrico Granata static char32_t 209ca6c8ee2SEnrico Granata ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2, unsigned char c3) 210ca6c8ee2SEnrico Granata { 211ca6c8ee2SEnrico Granata return (c0-240)*262144+(c2-128)*4096+(c2-128)*64+(c3-128); 212ca6c8ee2SEnrico Granata } 213ca6c8ee2SEnrico Granata 214ca6c8ee2SEnrico Granata template <> 215ca6c8ee2SEnrico Granata StringPrinterBufferPointer<> 216ca6c8ee2SEnrico Granata GetPrintableImpl<StringElementType::UTF8> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) 217ca6c8ee2SEnrico Granata { 218ca6c8ee2SEnrico Granata StringPrinterBufferPointer<> retval {nullptr}; 219ca6c8ee2SEnrico Granata 220ca6c8ee2SEnrico Granata unsigned utf8_encoded_len = getNumBytesForUTF8(*buffer); 221ca6c8ee2SEnrico Granata 222ca6c8ee2SEnrico Granata if (1+buffer_end-buffer < utf8_encoded_len) 223ca6c8ee2SEnrico Granata { 224ca6c8ee2SEnrico Granata // I don't have enough bytes - print whatever I have left 225ca6c8ee2SEnrico Granata retval = {buffer,static_cast<size_t>(1+buffer_end-buffer)}; 226ca6c8ee2SEnrico Granata next = buffer_end+1; 227ca6c8ee2SEnrico Granata return retval; 228ca6c8ee2SEnrico Granata } 229ca6c8ee2SEnrico Granata 230ca6c8ee2SEnrico Granata char32_t codepoint = 0; 231ca6c8ee2SEnrico Granata switch (utf8_encoded_len) 232ca6c8ee2SEnrico Granata { 233ca6c8ee2SEnrico Granata case 1: 234ca6c8ee2SEnrico Granata // this is just an ASCII byte - ask ASCII 235ca6c8ee2SEnrico Granata return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next); 236ca6c8ee2SEnrico Granata case 2: 237ca6c8ee2SEnrico Granata codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1)); 238ca6c8ee2SEnrico Granata break; 239ca6c8ee2SEnrico Granata case 3: 240ca6c8ee2SEnrico Granata codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2)); 241ca6c8ee2SEnrico Granata break; 242ca6c8ee2SEnrico Granata case 4: 243ca6c8ee2SEnrico Granata codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2), (unsigned char)*(buffer+3)); 244ca6c8ee2SEnrico Granata break; 245ca6c8ee2SEnrico Granata default: 246ca6c8ee2SEnrico Granata // this is probably some bogus non-character thing 247ca6c8ee2SEnrico Granata // just print it as-is and hope to sync up again soon 248ca6c8ee2SEnrico Granata retval = {buffer,1}; 249ca6c8ee2SEnrico Granata next = buffer+1; 250ca6c8ee2SEnrico Granata return retval; 251ca6c8ee2SEnrico Granata } 252ca6c8ee2SEnrico Granata 253ca6c8ee2SEnrico Granata if (codepoint) 254ca6c8ee2SEnrico Granata { 255ca6c8ee2SEnrico Granata switch (codepoint) 256ca6c8ee2SEnrico Granata { 257da04fbb5SEnrico Granata case 0: 258da04fbb5SEnrico Granata retval = {"\\0",2}; 259da04fbb5SEnrico Granata break; 260ca6c8ee2SEnrico Granata case '\a': 261ca6c8ee2SEnrico Granata retval = {"\\a",2}; 262ca6c8ee2SEnrico Granata break; 263ca6c8ee2SEnrico Granata case '\b': 264ca6c8ee2SEnrico Granata retval = {"\\b",2}; 265ca6c8ee2SEnrico Granata break; 266ca6c8ee2SEnrico Granata case '\f': 267ca6c8ee2SEnrico Granata retval = {"\\f",2}; 268ca6c8ee2SEnrico Granata break; 269ca6c8ee2SEnrico Granata case '\n': 270ca6c8ee2SEnrico Granata retval = {"\\n",2}; 271ca6c8ee2SEnrico Granata break; 272ca6c8ee2SEnrico Granata case '\r': 273ca6c8ee2SEnrico Granata retval = {"\\r",2}; 274ca6c8ee2SEnrico Granata break; 275ca6c8ee2SEnrico Granata case '\t': 276ca6c8ee2SEnrico Granata retval = {"\\t",2}; 277ca6c8ee2SEnrico Granata break; 278ca6c8ee2SEnrico Granata case '\v': 279ca6c8ee2SEnrico Granata retval = {"\\v",2}; 280ca6c8ee2SEnrico Granata break; 281ca6c8ee2SEnrico Granata case '\"': 282ca6c8ee2SEnrico Granata retval = {"\\\"",2}; 283ca6c8ee2SEnrico Granata break; 284ca6c8ee2SEnrico Granata case '\\': 285ca6c8ee2SEnrico Granata retval = {"\\\\",2}; 286ca6c8ee2SEnrico Granata break; 287ca6c8ee2SEnrico Granata default: 288ca6c8ee2SEnrico Granata if (isprint(codepoint)) 289ca6c8ee2SEnrico Granata retval = {buffer,utf8_encoded_len}; 290ca6c8ee2SEnrico Granata else 291ca6c8ee2SEnrico Granata { 292d7e6a4f2SVince Harron uint8_t* data = new uint8_t[11]; 293d7e6a4f2SVince Harron sprintf((char*)data,"\\U%08x",codepoint); 294d7e6a4f2SVince Harron retval = { data,10,[] (const uint8_t* c) {delete[] c;} }; 295ca6c8ee2SEnrico Granata break; 296ca6c8ee2SEnrico Granata } 297ca6c8ee2SEnrico Granata } 298ca6c8ee2SEnrico Granata 299ca6c8ee2SEnrico Granata next = buffer + utf8_encoded_len; 300ca6c8ee2SEnrico Granata return retval; 301ca6c8ee2SEnrico Granata } 302ca6c8ee2SEnrico Granata 303ca6c8ee2SEnrico Granata // this should not happen - but just in case.. try to resync at some point 304ca6c8ee2SEnrico Granata retval = {buffer,1}; 305ca6c8ee2SEnrico Granata next = buffer+1; 306ca6c8ee2SEnrico Granata return retval; 307ca6c8ee2SEnrico Granata } 308ca6c8ee2SEnrico Granata 309ca6c8ee2SEnrico Granata // Given a sequence of bytes, this function returns: 310ca6c8ee2SEnrico Granata // a sequence of bytes to actually print out + a length 311ca6c8ee2SEnrico Granata // the following unscanned position of the buffer is in next 312ca6c8ee2SEnrico Granata static StringPrinterBufferPointer<> 313ca6c8ee2SEnrico Granata GetPrintable(StringElementType type, uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) 314ca6c8ee2SEnrico Granata { 315ca6c8ee2SEnrico Granata if (!buffer) 316ca6c8ee2SEnrico Granata return {nullptr}; 317ca6c8ee2SEnrico Granata 318ca6c8ee2SEnrico Granata switch (type) 319ca6c8ee2SEnrico Granata { 320ca6c8ee2SEnrico Granata case StringElementType::ASCII: 321ca6c8ee2SEnrico Granata return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next); 322ca6c8ee2SEnrico Granata case StringElementType::UTF8: 323ca6c8ee2SEnrico Granata return GetPrintableImpl<StringElementType::UTF8>(buffer, buffer_end, next); 324ca6c8ee2SEnrico Granata default: 325ca6c8ee2SEnrico Granata return {nullptr}; 326ca6c8ee2SEnrico Granata } 327ca6c8ee2SEnrico Granata } 328ca6c8ee2SEnrico Granata 329ca6c8ee2SEnrico Granata // use this call if you already have an LLDB-side buffer for the data 330ca6c8ee2SEnrico Granata template<typename SourceDataType> 331ca6c8ee2SEnrico Granata static bool 332ca6c8ee2SEnrico Granata DumpUTFBufferToStream (ConversionResult (*ConvertFunction) (const SourceDataType**, 333ca6c8ee2SEnrico Granata const SourceDataType*, 334ca6c8ee2SEnrico Granata UTF8**, 335ca6c8ee2SEnrico Granata UTF8*, 336ca6c8ee2SEnrico Granata ConversionFlags), 337d07f7550SEnrico Granata const ReadBufferAndDumpToStreamOptions& dump_options) 338ca6c8ee2SEnrico Granata { 339d07f7550SEnrico Granata Stream &stream(*dump_options.GetStream()); 340d07f7550SEnrico Granata if (dump_options.GetPrefixToken() != 0) 341d07f7550SEnrico Granata stream.Printf("%c",dump_options.GetPrefixToken()); 342d07f7550SEnrico Granata if (dump_options.GetQuote() != 0) 343d07f7550SEnrico Granata stream.Printf("%c",dump_options.GetQuote()); 344d07f7550SEnrico Granata auto data(dump_options.GetData()); 345d07f7550SEnrico Granata auto source_size(dump_options.GetSourceSize()); 346ca6c8ee2SEnrico Granata if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd()) 347ca6c8ee2SEnrico Granata { 348ca6c8ee2SEnrico Granata const int bufferSPSize = data.GetByteSize(); 349d07f7550SEnrico Granata if (dump_options.GetSourceSize() == 0) 350ca6c8ee2SEnrico Granata { 351ca6c8ee2SEnrico Granata const int origin_encoding = 8*sizeof(SourceDataType); 352d07f7550SEnrico Granata source_size = bufferSPSize/(origin_encoding / 4); 353ca6c8ee2SEnrico Granata } 354ca6c8ee2SEnrico Granata 355d7e6a4f2SVince Harron const SourceDataType *data_ptr = (const SourceDataType*)data.GetDataStart(); 356d07f7550SEnrico Granata const SourceDataType *data_end_ptr = data_ptr + source_size; 357ca6c8ee2SEnrico Granata 358d07f7550SEnrico Granata const bool zero_is_terminator = dump_options.GetBinaryZeroIsTerminator(); 359d07f7550SEnrico Granata 360d07f7550SEnrico Granata if (zero_is_terminator) 361d07f7550SEnrico Granata { 362ca6c8ee2SEnrico Granata while (data_ptr < data_end_ptr) 363ca6c8ee2SEnrico Granata { 364ca6c8ee2SEnrico Granata if (!*data_ptr) 365ca6c8ee2SEnrico Granata { 366ca6c8ee2SEnrico Granata data_end_ptr = data_ptr; 367ca6c8ee2SEnrico Granata break; 368ca6c8ee2SEnrico Granata } 369ca6c8ee2SEnrico Granata data_ptr++; 370ca6c8ee2SEnrico Granata } 371ca6c8ee2SEnrico Granata 372d7e6a4f2SVince Harron data_ptr = (const SourceDataType*)data.GetDataStart(); 373d07f7550SEnrico Granata } 374ca6c8ee2SEnrico Granata 375ca6c8ee2SEnrico Granata lldb::DataBufferSP utf8_data_buffer_sp; 376ca6c8ee2SEnrico Granata UTF8* utf8_data_ptr = nullptr; 377ca6c8ee2SEnrico Granata UTF8* utf8_data_end_ptr = nullptr; 378ca6c8ee2SEnrico Granata 379ca6c8ee2SEnrico Granata if (ConvertFunction) 380ca6c8ee2SEnrico Granata { 381ca6c8ee2SEnrico Granata utf8_data_buffer_sp.reset(new DataBufferHeap(4*bufferSPSize,0)); 382ca6c8ee2SEnrico Granata utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes(); 383ca6c8ee2SEnrico Granata utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize(); 384d7e6a4f2SVince Harron ConvertFunction ( &data_ptr, data_end_ptr, &utf8_data_ptr, utf8_data_end_ptr, lenientConversion ); 385*8101f570SEnrico Granata if (false == zero_is_terminator) 386*8101f570SEnrico Granata utf8_data_end_ptr = utf8_data_ptr; 387ca6c8ee2SEnrico Granata utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes(); // needed because the ConvertFunction will change the value of the data_ptr 388ca6c8ee2SEnrico Granata } 389ca6c8ee2SEnrico Granata else 390ca6c8ee2SEnrico Granata { 391ca6c8ee2SEnrico Granata // just copy the pointers - the cast is necessary to make the compiler happy 392ca6c8ee2SEnrico Granata // but this should only happen if we are reading UTF8 data 393ca6c8ee2SEnrico Granata utf8_data_ptr = (UTF8*)data_ptr; 394ca6c8ee2SEnrico Granata utf8_data_end_ptr = (UTF8*)data_end_ptr; 395ca6c8ee2SEnrico Granata } 396ca6c8ee2SEnrico Granata 397d07f7550SEnrico Granata const bool escape_non_printables = dump_options.GetEscapeNonPrintables(); 398d07f7550SEnrico Granata 399ca6c8ee2SEnrico Granata // since we tend to accept partial data (and even partially malformed data) 400ca6c8ee2SEnrico Granata // we might end up with no NULL terminator before the end_ptr 401ca6c8ee2SEnrico Granata // hence we need to take a slower route and ensure we stay within boundaries 402ca6c8ee2SEnrico Granata for (;utf8_data_ptr < utf8_data_end_ptr;) 403ca6c8ee2SEnrico Granata { 404d07f7550SEnrico Granata if (zero_is_terminator && !*utf8_data_ptr) 405ca6c8ee2SEnrico Granata break; 406ca6c8ee2SEnrico Granata 407d07f7550SEnrico Granata if (escape_non_printables) 408ca6c8ee2SEnrico Granata { 409ca6c8ee2SEnrico Granata uint8_t* next_data = nullptr; 410ca6c8ee2SEnrico Granata auto printable = GetPrintable(StringElementType::UTF8, utf8_data_ptr, utf8_data_end_ptr, next_data); 411ca6c8ee2SEnrico Granata auto printable_bytes = printable.GetBytes(); 412ca6c8ee2SEnrico Granata auto printable_size = printable.GetSize(); 413ca6c8ee2SEnrico Granata if (!printable_bytes || !next_data) 414ca6c8ee2SEnrico Granata { 415ca6c8ee2SEnrico Granata // GetPrintable() failed on us - print one byte in a desperate resync attempt 416ca6c8ee2SEnrico Granata printable_bytes = utf8_data_ptr; 417ca6c8ee2SEnrico Granata printable_size = 1; 418ca6c8ee2SEnrico Granata next_data = utf8_data_ptr+1; 419ca6c8ee2SEnrico Granata } 4203acfe1a3SAndy Gibbs for (unsigned c = 0; c < printable_size; c++) 421ca6c8ee2SEnrico Granata stream.Printf("%c", *(printable_bytes+c)); 422ca6c8ee2SEnrico Granata utf8_data_ptr = (uint8_t*)next_data; 423ca6c8ee2SEnrico Granata } 424ca6c8ee2SEnrico Granata else 425ca6c8ee2SEnrico Granata { 426ca6c8ee2SEnrico Granata stream.Printf("%c",*utf8_data_ptr); 427ca6c8ee2SEnrico Granata utf8_data_ptr++; 428ca6c8ee2SEnrico Granata } 429ca6c8ee2SEnrico Granata } 430ca6c8ee2SEnrico Granata } 431d07f7550SEnrico Granata if (dump_options.GetQuote() != 0) 432d07f7550SEnrico Granata stream.Printf("%c",dump_options.GetQuote()); 433ca6c8ee2SEnrico Granata return true; 434ca6c8ee2SEnrico Granata } 435ca6c8ee2SEnrico Granata 436ebdc1ac0SEnrico Granata lldb_private::formatters::ReadStringAndDumpToStreamOptions::ReadStringAndDumpToStreamOptions (ValueObject& valobj) : 437ebdc1ac0SEnrico Granata ReadStringAndDumpToStreamOptions() 438ebdc1ac0SEnrico Granata { 439ebdc1ac0SEnrico Granata SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 440ebdc1ac0SEnrico Granata } 441ebdc1ac0SEnrico Granata 442ebdc1ac0SEnrico Granata lldb_private::formatters::ReadBufferAndDumpToStreamOptions::ReadBufferAndDumpToStreamOptions (ValueObject& valobj) : 443ebdc1ac0SEnrico Granata ReadBufferAndDumpToStreamOptions() 444ebdc1ac0SEnrico Granata { 445ebdc1ac0SEnrico Granata SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 446ebdc1ac0SEnrico Granata } 447ebdc1ac0SEnrico Granata 448d07f7550SEnrico Granata lldb_private::formatters::ReadBufferAndDumpToStreamOptions::ReadBufferAndDumpToStreamOptions (const lldb_private::formatters::ReadStringAndDumpToStreamOptions& options) : 449d07f7550SEnrico Granata ReadBufferAndDumpToStreamOptions() 450d07f7550SEnrico Granata { 451d07f7550SEnrico Granata SetStream(options.GetStream()); 452d07f7550SEnrico Granata SetPrefixToken(options.GetPrefixToken()); 453d07f7550SEnrico Granata SetQuote(options.GetQuote()); 454d07f7550SEnrico Granata SetEscapeNonPrintables(options.GetEscapeNonPrintables()); 455d07f7550SEnrico Granata SetBinaryZeroIsTerminator(options.GetBinaryZeroIsTerminator()); 456d07f7550SEnrico Granata } 457d07f7550SEnrico Granata 458ebdc1ac0SEnrico Granata 459fd13743fSShawn Best namespace lldb_private 460fd13743fSShawn Best { 461fd13743fSShawn Best 462fd13743fSShawn Best namespace formatters 463fd13743fSShawn Best { 464fd13743fSShawn Best 465fd13743fSShawn Best template <> 466fd13743fSShawn Best bool 467d07f7550SEnrico Granata ReadStringAndDumpToStream<StringElementType::ASCII> (const ReadStringAndDumpToStreamOptions& options) 468fd13743fSShawn Best { 469fd13743fSShawn Best assert(options.GetStream() && "need a Stream to print the string to"); 470fd13743fSShawn Best Error my_error; 471fd13743fSShawn Best 472fd13743fSShawn Best ProcessSP process_sp(options.GetProcessSP()); 473fd13743fSShawn Best 474fd13743fSShawn Best if (process_sp.get() == nullptr || options.GetLocation() == 0) 475fd13743fSShawn Best return false; 476fd13743fSShawn Best 477fd13743fSShawn Best size_t size; 478fd13743fSShawn Best 479fd13743fSShawn Best if (options.GetSourceSize() == 0) 480fd13743fSShawn Best size = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 48134042212SEnrico Granata else if (!options.GetIgnoreMaxLength()) 482fd13743fSShawn Best size = std::min(options.GetSourceSize(),process_sp->GetTarget().GetMaximumSizeOfStringSummary()); 48334042212SEnrico Granata else 48434042212SEnrico Granata size = options.GetSourceSize(); 485fd13743fSShawn Best 486fd13743fSShawn Best lldb::DataBufferSP buffer_sp(new DataBufferHeap(size,0)); 487fd13743fSShawn Best 488d7e6a4f2SVince Harron process_sp->ReadCStringFromMemory(options.GetLocation(), (char*)buffer_sp->GetBytes(), size, my_error); 489fd13743fSShawn Best 490fd13743fSShawn Best if (my_error.Fail()) 491fd13743fSShawn Best return false; 492fd13743fSShawn Best 493fd13743fSShawn Best char prefix_token = options.GetPrefixToken(); 494fd13743fSShawn Best char quote = options.GetQuote(); 495fd13743fSShawn Best 496fd13743fSShawn Best if (prefix_token != 0) 497fd13743fSShawn Best options.GetStream()->Printf("%c%c",prefix_token,quote); 498fd13743fSShawn Best else if (quote != 0) 499fd13743fSShawn Best options.GetStream()->Printf("%c",quote); 500fd13743fSShawn Best 501fd13743fSShawn Best uint8_t* data_end = buffer_sp->GetBytes()+buffer_sp->GetByteSize(); 502fd13743fSShawn Best 503fd13743fSShawn Best // since we tend to accept partial data (and even partially malformed data) 504fd13743fSShawn Best // we might end up with no NULL terminator before the end_ptr 505fd13743fSShawn Best // hence we need to take a slower route and ensure we stay within boundaries 506fd13743fSShawn Best for (uint8_t* data = buffer_sp->GetBytes(); *data && (data < data_end);) 507fd13743fSShawn Best { 508fd13743fSShawn Best if (options.GetEscapeNonPrintables()) 509fd13743fSShawn Best { 510fd13743fSShawn Best uint8_t* next_data = nullptr; 511fd13743fSShawn Best auto printable = GetPrintable(StringElementType::ASCII, data, data_end, next_data); 512fd13743fSShawn Best auto printable_bytes = printable.GetBytes(); 513fd13743fSShawn Best auto printable_size = printable.GetSize(); 514fd13743fSShawn Best if (!printable_bytes || !next_data) 515fd13743fSShawn Best { 516fd13743fSShawn Best // GetPrintable() failed on us - print one byte in a desperate resync attempt 517fd13743fSShawn Best printable_bytes = data; 518fd13743fSShawn Best printable_size = 1; 519fd13743fSShawn Best next_data = data+1; 520fd13743fSShawn Best } 5213acfe1a3SAndy Gibbs for (unsigned c = 0; c < printable_size; c++) 522fd13743fSShawn Best options.GetStream()->Printf("%c", *(printable_bytes+c)); 523fd13743fSShawn Best data = (uint8_t*)next_data; 524fd13743fSShawn Best } 525fd13743fSShawn Best else 526fd13743fSShawn Best { 527fd13743fSShawn Best options.GetStream()->Printf("%c",*data); 528fd13743fSShawn Best data++; 529fd13743fSShawn Best } 530fd13743fSShawn Best } 531fd13743fSShawn Best 532fd13743fSShawn Best if (quote != 0) 533fd13743fSShawn Best options.GetStream()->Printf("%c",quote); 534fd13743fSShawn Best 535fd13743fSShawn Best return true; 536fd13743fSShawn Best } 537fd13743fSShawn Best 538ca6c8ee2SEnrico Granata template<typename SourceDataType> 539ca6c8ee2SEnrico Granata static bool 540ca6c8ee2SEnrico Granata ReadUTFBufferAndDumpToStream (const ReadStringAndDumpToStreamOptions& options, 541ca6c8ee2SEnrico Granata ConversionResult (*ConvertFunction) (const SourceDataType**, 542ca6c8ee2SEnrico Granata const SourceDataType*, 543ca6c8ee2SEnrico Granata UTF8**, 544ca6c8ee2SEnrico Granata UTF8*, 545ca6c8ee2SEnrico Granata ConversionFlags)) 546ca6c8ee2SEnrico Granata { 547ca6c8ee2SEnrico Granata assert(options.GetStream() && "need a Stream to print the string to"); 548ca6c8ee2SEnrico Granata 549ca6c8ee2SEnrico Granata if (options.GetLocation() == 0 || options.GetLocation() == LLDB_INVALID_ADDRESS) 550ca6c8ee2SEnrico Granata return false; 551ca6c8ee2SEnrico Granata 552ca6c8ee2SEnrico Granata lldb::ProcessSP process_sp(options.GetProcessSP()); 553ca6c8ee2SEnrico Granata 554ca6c8ee2SEnrico Granata if (!process_sp) 555ca6c8ee2SEnrico Granata return false; 556ca6c8ee2SEnrico Granata 557ca6c8ee2SEnrico Granata const int type_width = sizeof(SourceDataType); 558ca6c8ee2SEnrico Granata const int origin_encoding = 8 * type_width ; 559ca6c8ee2SEnrico Granata if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32) 560ca6c8ee2SEnrico Granata return false; 561ca6c8ee2SEnrico Granata // if not UTF8, I need a conversion function to return proper UTF8 562ca6c8ee2SEnrico Granata if (origin_encoding != 8 && !ConvertFunction) 563ca6c8ee2SEnrico Granata return false; 564ca6c8ee2SEnrico Granata 565ca6c8ee2SEnrico Granata if (!options.GetStream()) 566ca6c8ee2SEnrico Granata return false; 567ca6c8ee2SEnrico Granata 568ca6c8ee2SEnrico Granata uint32_t sourceSize = options.GetSourceSize(); 569ca6c8ee2SEnrico Granata bool needs_zero_terminator = options.GetNeedsZeroTermination(); 570ca6c8ee2SEnrico Granata 571ca6c8ee2SEnrico Granata if (!sourceSize) 572ca6c8ee2SEnrico Granata { 573ca6c8ee2SEnrico Granata sourceSize = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 574ca6c8ee2SEnrico Granata needs_zero_terminator = true; 575ca6c8ee2SEnrico Granata } 576b0e8a55dSEnrico Granata else if (!options.GetIgnoreMaxLength()) 577ca6c8ee2SEnrico Granata sourceSize = std::min(sourceSize,process_sp->GetTarget().GetMaximumSizeOfStringSummary()); 578ca6c8ee2SEnrico Granata 579ca6c8ee2SEnrico Granata const int bufferSPSize = sourceSize * type_width; 580ca6c8ee2SEnrico Granata 581ca6c8ee2SEnrico Granata lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize,0)); 582ca6c8ee2SEnrico Granata 583ca6c8ee2SEnrico Granata if (!buffer_sp->GetBytes()) 584ca6c8ee2SEnrico Granata return false; 585ca6c8ee2SEnrico Granata 586ca6c8ee2SEnrico Granata Error error; 587ca6c8ee2SEnrico Granata char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes()); 588ca6c8ee2SEnrico Granata 589ca6c8ee2SEnrico Granata if (needs_zero_terminator) 590d7e6a4f2SVince Harron process_sp->ReadStringFromMemory(options.GetLocation(), buffer, bufferSPSize, error, type_width); 591ca6c8ee2SEnrico Granata else 592d7e6a4f2SVince Harron process_sp->ReadMemoryFromInferior(options.GetLocation(), (char*)buffer_sp->GetBytes(), bufferSPSize, error); 593ca6c8ee2SEnrico Granata 594099263b4SEnrico Granata if (error.Fail()) 595ca6c8ee2SEnrico Granata { 596ca6c8ee2SEnrico Granata options.GetStream()->Printf("unable to read data"); 597ca6c8ee2SEnrico Granata return true; 598ca6c8ee2SEnrico Granata } 599ca6c8ee2SEnrico Granata 600ca6c8ee2SEnrico Granata DataExtractor data(buffer_sp, process_sp->GetByteOrder(), process_sp->GetAddressByteSize()); 601ca6c8ee2SEnrico Granata 602d07f7550SEnrico Granata ReadBufferAndDumpToStreamOptions dump_options(options); 603d07f7550SEnrico Granata dump_options.SetData(data); 604d07f7550SEnrico Granata dump_options.SetSourceSize(sourceSize); 605d07f7550SEnrico Granata 606d07f7550SEnrico Granata return DumpUTFBufferToStream(ConvertFunction, dump_options); 607ca6c8ee2SEnrico Granata } 608ca6c8ee2SEnrico Granata 609ca6c8ee2SEnrico Granata template <> 610ca6c8ee2SEnrico Granata bool 611d07f7550SEnrico Granata ReadStringAndDumpToStream<StringElementType::UTF8> (const ReadStringAndDumpToStreamOptions& options) 612ca6c8ee2SEnrico Granata { 613ca6c8ee2SEnrico Granata return ReadUTFBufferAndDumpToStream<UTF8>(options, 614ca6c8ee2SEnrico Granata nullptr); 615ca6c8ee2SEnrico Granata } 616ca6c8ee2SEnrico Granata 617ca6c8ee2SEnrico Granata template <> 618ca6c8ee2SEnrico Granata bool 619d07f7550SEnrico Granata ReadStringAndDumpToStream<StringElementType::UTF16> (const ReadStringAndDumpToStreamOptions& options) 620ca6c8ee2SEnrico Granata { 621ca6c8ee2SEnrico Granata return ReadUTFBufferAndDumpToStream<UTF16>(options, 622ca6c8ee2SEnrico Granata ConvertUTF16toUTF8); 623ca6c8ee2SEnrico Granata } 624ca6c8ee2SEnrico Granata 625ca6c8ee2SEnrico Granata template <> 626ca6c8ee2SEnrico Granata bool 627d07f7550SEnrico Granata ReadStringAndDumpToStream<StringElementType::UTF32> (const ReadStringAndDumpToStreamOptions& options) 628ca6c8ee2SEnrico Granata { 629ca6c8ee2SEnrico Granata return ReadUTFBufferAndDumpToStream<UTF32>(options, 630ca6c8ee2SEnrico Granata ConvertUTF32toUTF8); 631ca6c8ee2SEnrico Granata } 632ca6c8ee2SEnrico Granata 633ca6c8ee2SEnrico Granata template <> 634ca6c8ee2SEnrico Granata bool 635d07f7550SEnrico Granata ReadBufferAndDumpToStream<StringElementType::UTF8> (const ReadBufferAndDumpToStreamOptions& options) 636ca6c8ee2SEnrico Granata { 637ca6c8ee2SEnrico Granata assert(options.GetStream() && "need a Stream to print the string to"); 638ca6c8ee2SEnrico Granata 639d07f7550SEnrico Granata return DumpUTFBufferToStream<UTF8>(nullptr, options); 640ca6c8ee2SEnrico Granata } 641ca6c8ee2SEnrico Granata 642ca6c8ee2SEnrico Granata template <> 643ca6c8ee2SEnrico Granata bool 644d07f7550SEnrico Granata ReadBufferAndDumpToStream<StringElementType::ASCII> (const ReadBufferAndDumpToStreamOptions& options) 645ca6c8ee2SEnrico Granata { 646ca6c8ee2SEnrico Granata // treat ASCII the same as UTF8 647ca6c8ee2SEnrico Granata // FIXME: can we optimize ASCII some more? 648ca6c8ee2SEnrico Granata return ReadBufferAndDumpToStream<StringElementType::UTF8>(options); 649ca6c8ee2SEnrico Granata } 650ca6c8ee2SEnrico Granata 651ca6c8ee2SEnrico Granata template <> 652ca6c8ee2SEnrico Granata bool 653d07f7550SEnrico Granata ReadBufferAndDumpToStream<StringElementType::UTF16> (const ReadBufferAndDumpToStreamOptions& options) 654ca6c8ee2SEnrico Granata { 655ca6c8ee2SEnrico Granata assert(options.GetStream() && "need a Stream to print the string to"); 656ca6c8ee2SEnrico Granata 657d07f7550SEnrico Granata return DumpUTFBufferToStream(ConvertUTF16toUTF8, options); 658ca6c8ee2SEnrico Granata } 659ca6c8ee2SEnrico Granata 660ca6c8ee2SEnrico Granata template <> 661ca6c8ee2SEnrico Granata bool 662d07f7550SEnrico Granata ReadBufferAndDumpToStream<StringElementType::UTF32> (const ReadBufferAndDumpToStreamOptions& options) 663ca6c8ee2SEnrico Granata { 664ca6c8ee2SEnrico Granata assert(options.GetStream() && "need a Stream to print the string to"); 665ca6c8ee2SEnrico Granata 666d07f7550SEnrico Granata return DumpUTFBufferToStream(ConvertUTF32toUTF8, options); 667ca6c8ee2SEnrico Granata } 668fd13743fSShawn Best 669fd13743fSShawn Best } // namespace formatters 670fd13743fSShawn Best 671fd13743fSShawn Best } // namespace lldb_private 672