1*ca6c8ee2SEnrico Granata //===-- StringPrinter.cpp ----------------------------------------*- C++ -*-===// 2*ca6c8ee2SEnrico Granata // 3*ca6c8ee2SEnrico Granata // The LLVM Compiler Infrastructure 4*ca6c8ee2SEnrico Granata // 5*ca6c8ee2SEnrico Granata // This file is distributed under the University of Illinois Open Source 6*ca6c8ee2SEnrico Granata // License. See LICENSE.TXT for details. 7*ca6c8ee2SEnrico Granata // 8*ca6c8ee2SEnrico Granata //===----------------------------------------------------------------------===// 9*ca6c8ee2SEnrico Granata 10*ca6c8ee2SEnrico Granata #include "lldb/DataFormatters/StringPrinter.h" 11*ca6c8ee2SEnrico Granata 12*ca6c8ee2SEnrico Granata #include "lldb/Core/DataExtractor.h" 13*ca6c8ee2SEnrico Granata #include "lldb/Core/Error.h" 14*ca6c8ee2SEnrico Granata #include "lldb/Target/Process.h" 15*ca6c8ee2SEnrico Granata #include "lldb/Target/Target.h" 16*ca6c8ee2SEnrico Granata 17*ca6c8ee2SEnrico Granata #include "llvm/Support/ConvertUTF.h" 18*ca6c8ee2SEnrico Granata 19*ca6c8ee2SEnrico Granata #include <codecvt> 20*ca6c8ee2SEnrico Granata #include <ctype.h> 21*ca6c8ee2SEnrico Granata #include <functional> 22*ca6c8ee2SEnrico Granata #include <locale> 23*ca6c8ee2SEnrico Granata 24*ca6c8ee2SEnrico Granata using namespace lldb; 25*ca6c8ee2SEnrico Granata using namespace lldb_private; 26*ca6c8ee2SEnrico Granata using namespace lldb_private::formatters; 27*ca6c8ee2SEnrico Granata 28*ca6c8ee2SEnrico Granata // I can't use a std::unique_ptr for this because the Deleter is a template argument there 29*ca6c8ee2SEnrico Granata // and I want the same type to represent both pointers I want to free and pointers I don't need 30*ca6c8ee2SEnrico Granata // to free - which is what this class essentially is 31*ca6c8ee2SEnrico Granata // It's very specialized to the needs of this file, and not suggested for general use 32*ca6c8ee2SEnrico Granata template <typename T = uint8_t, typename U = char, typename S = size_t> 33*ca6c8ee2SEnrico Granata struct StringPrinterBufferPointer 34*ca6c8ee2SEnrico Granata { 35*ca6c8ee2SEnrico Granata public: 36*ca6c8ee2SEnrico Granata 37*ca6c8ee2SEnrico Granata typedef std::function<void(const T*)> Deleter; 38*ca6c8ee2SEnrico Granata 39*ca6c8ee2SEnrico Granata StringPrinterBufferPointer (std::nullptr_t ptr) : 40*ca6c8ee2SEnrico Granata m_data(nullptr), 41*ca6c8ee2SEnrico Granata m_size(0), 42*ca6c8ee2SEnrico Granata m_deleter() 43*ca6c8ee2SEnrico Granata {} 44*ca6c8ee2SEnrico Granata 45*ca6c8ee2SEnrico Granata StringPrinterBufferPointer(const T* bytes, S size, Deleter deleter = nullptr) : 46*ca6c8ee2SEnrico Granata m_data(bytes), 47*ca6c8ee2SEnrico Granata m_size(size), 48*ca6c8ee2SEnrico Granata m_deleter(deleter) 49*ca6c8ee2SEnrico Granata {} 50*ca6c8ee2SEnrico Granata 51*ca6c8ee2SEnrico Granata StringPrinterBufferPointer(const U* bytes, S size, Deleter deleter = nullptr) : 52*ca6c8ee2SEnrico Granata m_data((T*)bytes), 53*ca6c8ee2SEnrico Granata m_size(size), 54*ca6c8ee2SEnrico Granata m_deleter(deleter) 55*ca6c8ee2SEnrico Granata {} 56*ca6c8ee2SEnrico Granata 57*ca6c8ee2SEnrico Granata StringPrinterBufferPointer(StringPrinterBufferPointer&& rhs) : 58*ca6c8ee2SEnrico Granata m_data(rhs.m_data), 59*ca6c8ee2SEnrico Granata m_size(rhs.m_size), 60*ca6c8ee2SEnrico Granata m_deleter(rhs.m_deleter) 61*ca6c8ee2SEnrico Granata { 62*ca6c8ee2SEnrico Granata rhs.m_data = nullptr; 63*ca6c8ee2SEnrico Granata } 64*ca6c8ee2SEnrico Granata 65*ca6c8ee2SEnrico Granata StringPrinterBufferPointer(const StringPrinterBufferPointer& rhs) : 66*ca6c8ee2SEnrico Granata m_data(rhs.m_data), 67*ca6c8ee2SEnrico Granata m_size(rhs.m_size), 68*ca6c8ee2SEnrico Granata m_deleter(rhs.m_deleter) 69*ca6c8ee2SEnrico Granata { 70*ca6c8ee2SEnrico Granata rhs.m_data = nullptr; // this is why m_data has to be mutable 71*ca6c8ee2SEnrico Granata } 72*ca6c8ee2SEnrico Granata 73*ca6c8ee2SEnrico Granata const T* 74*ca6c8ee2SEnrico Granata GetBytes () const 75*ca6c8ee2SEnrico Granata { 76*ca6c8ee2SEnrico Granata return m_data; 77*ca6c8ee2SEnrico Granata } 78*ca6c8ee2SEnrico Granata 79*ca6c8ee2SEnrico Granata const S 80*ca6c8ee2SEnrico Granata GetSize () const 81*ca6c8ee2SEnrico Granata { 82*ca6c8ee2SEnrico Granata return m_size; 83*ca6c8ee2SEnrico Granata } 84*ca6c8ee2SEnrico Granata 85*ca6c8ee2SEnrico Granata ~StringPrinterBufferPointer () 86*ca6c8ee2SEnrico Granata { 87*ca6c8ee2SEnrico Granata if (m_data && m_deleter) 88*ca6c8ee2SEnrico Granata m_deleter(m_data); 89*ca6c8ee2SEnrico Granata m_data = nullptr; 90*ca6c8ee2SEnrico Granata } 91*ca6c8ee2SEnrico Granata 92*ca6c8ee2SEnrico Granata StringPrinterBufferPointer& 93*ca6c8ee2SEnrico Granata operator = (const StringPrinterBufferPointer& rhs) 94*ca6c8ee2SEnrico Granata { 95*ca6c8ee2SEnrico Granata if (m_data && m_deleter) 96*ca6c8ee2SEnrico Granata m_deleter(m_data); 97*ca6c8ee2SEnrico Granata m_data = rhs.m_data; 98*ca6c8ee2SEnrico Granata m_size = rhs.m_size; 99*ca6c8ee2SEnrico Granata m_deleter = rhs.m_deleter; 100*ca6c8ee2SEnrico Granata rhs.m_data = nullptr; 101*ca6c8ee2SEnrico Granata return *this; 102*ca6c8ee2SEnrico Granata } 103*ca6c8ee2SEnrico Granata 104*ca6c8ee2SEnrico Granata private: 105*ca6c8ee2SEnrico Granata mutable const T* m_data; 106*ca6c8ee2SEnrico Granata size_t m_size; 107*ca6c8ee2SEnrico Granata Deleter m_deleter; 108*ca6c8ee2SEnrico Granata }; 109*ca6c8ee2SEnrico Granata 110*ca6c8ee2SEnrico Granata // we define this for all values of type but only implement it for those we care about 111*ca6c8ee2SEnrico Granata // that's good because we get linker errors for any unsupported type 112*ca6c8ee2SEnrico Granata template <StringElementType type> 113*ca6c8ee2SEnrico Granata static StringPrinterBufferPointer<> 114*ca6c8ee2SEnrico Granata GetPrintableImpl(uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next); 115*ca6c8ee2SEnrico Granata 116*ca6c8ee2SEnrico Granata // mimic isprint() for Unicode codepoints 117*ca6c8ee2SEnrico Granata static bool 118*ca6c8ee2SEnrico Granata isprint(char32_t codepoint) 119*ca6c8ee2SEnrico Granata { 120*ca6c8ee2SEnrico Granata if (codepoint <= 0x1F || codepoint == 0x7F) // C0 121*ca6c8ee2SEnrico Granata { 122*ca6c8ee2SEnrico Granata return false; 123*ca6c8ee2SEnrico Granata } 124*ca6c8ee2SEnrico Granata if (codepoint >= 0x80 && codepoint <= 0x9F) // C1 125*ca6c8ee2SEnrico Granata { 126*ca6c8ee2SEnrico Granata return false; 127*ca6c8ee2SEnrico Granata } 128*ca6c8ee2SEnrico Granata if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators 129*ca6c8ee2SEnrico Granata { 130*ca6c8ee2SEnrico Granata return false; 131*ca6c8ee2SEnrico Granata } 132*ca6c8ee2SEnrico Granata if (codepoint == 0x200E || codepoint == 0x200F || (codepoint >= 0x202A && codepoint <= 0x202E)) // bidirectional text control 133*ca6c8ee2SEnrico Granata { 134*ca6c8ee2SEnrico Granata return false; 135*ca6c8ee2SEnrico Granata } 136*ca6c8ee2SEnrico Granata if (codepoint >= 0xFFF9 && codepoint <= 0xFFFF) // interlinears and generally specials 137*ca6c8ee2SEnrico Granata { 138*ca6c8ee2SEnrico Granata return false; 139*ca6c8ee2SEnrico Granata } 140*ca6c8ee2SEnrico Granata return true; 141*ca6c8ee2SEnrico Granata } 142*ca6c8ee2SEnrico Granata 143*ca6c8ee2SEnrico Granata template <> 144*ca6c8ee2SEnrico Granata StringPrinterBufferPointer<> 145*ca6c8ee2SEnrico Granata GetPrintableImpl<StringElementType::ASCII> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) 146*ca6c8ee2SEnrico Granata { 147*ca6c8ee2SEnrico Granata StringPrinterBufferPointer<> retval = {nullptr}; 148*ca6c8ee2SEnrico Granata 149*ca6c8ee2SEnrico Granata switch (*buffer) 150*ca6c8ee2SEnrico Granata { 151*ca6c8ee2SEnrico Granata case '\a': 152*ca6c8ee2SEnrico Granata retval = {"\\a",2}; 153*ca6c8ee2SEnrico Granata break; 154*ca6c8ee2SEnrico Granata case '\b': 155*ca6c8ee2SEnrico Granata retval = {"\\b",2}; 156*ca6c8ee2SEnrico Granata break; 157*ca6c8ee2SEnrico Granata case '\f': 158*ca6c8ee2SEnrico Granata retval = {"\\f",2}; 159*ca6c8ee2SEnrico Granata break; 160*ca6c8ee2SEnrico Granata case '\n': 161*ca6c8ee2SEnrico Granata retval = {"\\n",2}; 162*ca6c8ee2SEnrico Granata break; 163*ca6c8ee2SEnrico Granata case '\r': 164*ca6c8ee2SEnrico Granata retval = {"\\r",2}; 165*ca6c8ee2SEnrico Granata break; 166*ca6c8ee2SEnrico Granata case '\t': 167*ca6c8ee2SEnrico Granata retval = {"\\t",2}; 168*ca6c8ee2SEnrico Granata break; 169*ca6c8ee2SEnrico Granata case '\v': 170*ca6c8ee2SEnrico Granata retval = {"\\v",2}; 171*ca6c8ee2SEnrico Granata break; 172*ca6c8ee2SEnrico Granata case '\"': 173*ca6c8ee2SEnrico Granata retval = {"\\\"",2}; 174*ca6c8ee2SEnrico Granata break; 175*ca6c8ee2SEnrico Granata case '\\': 176*ca6c8ee2SEnrico Granata retval = {"\\\\",2}; 177*ca6c8ee2SEnrico Granata break; 178*ca6c8ee2SEnrico Granata default: 179*ca6c8ee2SEnrico Granata if (isprint(*buffer)) 180*ca6c8ee2SEnrico Granata retval = {buffer,1}; 181*ca6c8ee2SEnrico Granata else 182*ca6c8ee2SEnrico Granata { 183*ca6c8ee2SEnrico Granata retval = { new uint8_t[5],4,[] (const uint8_t* c) {delete[] c;} }; 184*ca6c8ee2SEnrico Granata sprintf((char*)retval.GetBytes(),"\\x%02x",*buffer); 185*ca6c8ee2SEnrico Granata break; 186*ca6c8ee2SEnrico Granata } 187*ca6c8ee2SEnrico Granata } 188*ca6c8ee2SEnrico Granata 189*ca6c8ee2SEnrico Granata next = buffer + 1; 190*ca6c8ee2SEnrico Granata return retval; 191*ca6c8ee2SEnrico Granata } 192*ca6c8ee2SEnrico Granata 193*ca6c8ee2SEnrico Granata static char32_t 194*ca6c8ee2SEnrico Granata ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1) 195*ca6c8ee2SEnrico Granata { 196*ca6c8ee2SEnrico Granata return (c0-192)*64+(c1-128); 197*ca6c8ee2SEnrico Granata } 198*ca6c8ee2SEnrico Granata static char32_t 199*ca6c8ee2SEnrico Granata ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2) 200*ca6c8ee2SEnrico Granata { 201*ca6c8ee2SEnrico Granata return (c0-224)*4096+(c1-128)*64+(c2-128); 202*ca6c8ee2SEnrico Granata } 203*ca6c8ee2SEnrico Granata static char32_t 204*ca6c8ee2SEnrico Granata ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2, unsigned char c3) 205*ca6c8ee2SEnrico Granata { 206*ca6c8ee2SEnrico Granata return (c0-240)*262144+(c2-128)*4096+(c2-128)*64+(c3-128); 207*ca6c8ee2SEnrico Granata } 208*ca6c8ee2SEnrico Granata 209*ca6c8ee2SEnrico Granata template <> 210*ca6c8ee2SEnrico Granata StringPrinterBufferPointer<> 211*ca6c8ee2SEnrico Granata GetPrintableImpl<StringElementType::UTF8> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) 212*ca6c8ee2SEnrico Granata { 213*ca6c8ee2SEnrico Granata StringPrinterBufferPointer<> retval {nullptr}; 214*ca6c8ee2SEnrico Granata 215*ca6c8ee2SEnrico Granata unsigned utf8_encoded_len = getNumBytesForUTF8(*buffer); 216*ca6c8ee2SEnrico Granata 217*ca6c8ee2SEnrico Granata if (1+buffer_end-buffer < utf8_encoded_len) 218*ca6c8ee2SEnrico Granata { 219*ca6c8ee2SEnrico Granata // I don't have enough bytes - print whatever I have left 220*ca6c8ee2SEnrico Granata retval = {buffer,static_cast<size_t>(1+buffer_end-buffer)}; 221*ca6c8ee2SEnrico Granata next = buffer_end+1; 222*ca6c8ee2SEnrico Granata return retval; 223*ca6c8ee2SEnrico Granata } 224*ca6c8ee2SEnrico Granata 225*ca6c8ee2SEnrico Granata char32_t codepoint = 0; 226*ca6c8ee2SEnrico Granata switch (utf8_encoded_len) 227*ca6c8ee2SEnrico Granata { 228*ca6c8ee2SEnrico Granata case 1: 229*ca6c8ee2SEnrico Granata // this is just an ASCII byte - ask ASCII 230*ca6c8ee2SEnrico Granata return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next); 231*ca6c8ee2SEnrico Granata case 2: 232*ca6c8ee2SEnrico Granata codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1)); 233*ca6c8ee2SEnrico Granata break; 234*ca6c8ee2SEnrico Granata case 3: 235*ca6c8ee2SEnrico Granata codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2)); 236*ca6c8ee2SEnrico Granata break; 237*ca6c8ee2SEnrico Granata case 4: 238*ca6c8ee2SEnrico Granata codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2), (unsigned char)*(buffer+3)); 239*ca6c8ee2SEnrico Granata break; 240*ca6c8ee2SEnrico Granata default: 241*ca6c8ee2SEnrico Granata // this is probably some bogus non-character thing 242*ca6c8ee2SEnrico Granata // just print it as-is and hope to sync up again soon 243*ca6c8ee2SEnrico Granata retval = {buffer,1}; 244*ca6c8ee2SEnrico Granata next = buffer+1; 245*ca6c8ee2SEnrico Granata return retval; 246*ca6c8ee2SEnrico Granata } 247*ca6c8ee2SEnrico Granata 248*ca6c8ee2SEnrico Granata if (codepoint) 249*ca6c8ee2SEnrico Granata { 250*ca6c8ee2SEnrico Granata switch (codepoint) 251*ca6c8ee2SEnrico Granata { 252*ca6c8ee2SEnrico Granata case '\a': 253*ca6c8ee2SEnrico Granata retval = {"\\a",2}; 254*ca6c8ee2SEnrico Granata break; 255*ca6c8ee2SEnrico Granata case '\b': 256*ca6c8ee2SEnrico Granata retval = {"\\b",2}; 257*ca6c8ee2SEnrico Granata break; 258*ca6c8ee2SEnrico Granata case '\f': 259*ca6c8ee2SEnrico Granata retval = {"\\f",2}; 260*ca6c8ee2SEnrico Granata break; 261*ca6c8ee2SEnrico Granata case '\n': 262*ca6c8ee2SEnrico Granata retval = {"\\n",2}; 263*ca6c8ee2SEnrico Granata break; 264*ca6c8ee2SEnrico Granata case '\r': 265*ca6c8ee2SEnrico Granata retval = {"\\r",2}; 266*ca6c8ee2SEnrico Granata break; 267*ca6c8ee2SEnrico Granata case '\t': 268*ca6c8ee2SEnrico Granata retval = {"\\t",2}; 269*ca6c8ee2SEnrico Granata break; 270*ca6c8ee2SEnrico Granata case '\v': 271*ca6c8ee2SEnrico Granata retval = {"\\v",2}; 272*ca6c8ee2SEnrico Granata break; 273*ca6c8ee2SEnrico Granata case '\"': 274*ca6c8ee2SEnrico Granata retval = {"\\\"",2}; 275*ca6c8ee2SEnrico Granata break; 276*ca6c8ee2SEnrico Granata case '\\': 277*ca6c8ee2SEnrico Granata retval = {"\\\\",2}; 278*ca6c8ee2SEnrico Granata break; 279*ca6c8ee2SEnrico Granata default: 280*ca6c8ee2SEnrico Granata if (isprint(codepoint)) 281*ca6c8ee2SEnrico Granata retval = {buffer,utf8_encoded_len}; 282*ca6c8ee2SEnrico Granata else 283*ca6c8ee2SEnrico Granata { 284*ca6c8ee2SEnrico Granata retval = { new uint8_t[11],10,[] (const uint8_t* c) {delete[] c;} }; 285*ca6c8ee2SEnrico Granata sprintf((char*)retval.GetBytes(),"\\U%08x",codepoint); 286*ca6c8ee2SEnrico Granata break; 287*ca6c8ee2SEnrico Granata } 288*ca6c8ee2SEnrico Granata } 289*ca6c8ee2SEnrico Granata 290*ca6c8ee2SEnrico Granata next = buffer + utf8_encoded_len; 291*ca6c8ee2SEnrico Granata return retval; 292*ca6c8ee2SEnrico Granata } 293*ca6c8ee2SEnrico Granata 294*ca6c8ee2SEnrico Granata // this should not happen - but just in case.. try to resync at some point 295*ca6c8ee2SEnrico Granata retval = {buffer,1}; 296*ca6c8ee2SEnrico Granata next = buffer+1; 297*ca6c8ee2SEnrico Granata return retval; 298*ca6c8ee2SEnrico Granata } 299*ca6c8ee2SEnrico Granata 300*ca6c8ee2SEnrico Granata // Given a sequence of bytes, this function returns: 301*ca6c8ee2SEnrico Granata // a sequence of bytes to actually print out + a length 302*ca6c8ee2SEnrico Granata // the following unscanned position of the buffer is in next 303*ca6c8ee2SEnrico Granata static StringPrinterBufferPointer<> 304*ca6c8ee2SEnrico Granata GetPrintable(StringElementType type, uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) 305*ca6c8ee2SEnrico Granata { 306*ca6c8ee2SEnrico Granata if (!buffer) 307*ca6c8ee2SEnrico Granata return {nullptr}; 308*ca6c8ee2SEnrico Granata 309*ca6c8ee2SEnrico Granata switch (type) 310*ca6c8ee2SEnrico Granata { 311*ca6c8ee2SEnrico Granata case StringElementType::ASCII: 312*ca6c8ee2SEnrico Granata return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next); 313*ca6c8ee2SEnrico Granata case StringElementType::UTF8: 314*ca6c8ee2SEnrico Granata return GetPrintableImpl<StringElementType::UTF8>(buffer, buffer_end, next); 315*ca6c8ee2SEnrico Granata default: 316*ca6c8ee2SEnrico Granata return {nullptr}; 317*ca6c8ee2SEnrico Granata } 318*ca6c8ee2SEnrico Granata } 319*ca6c8ee2SEnrico Granata 320*ca6c8ee2SEnrico Granata template <> 321*ca6c8ee2SEnrico Granata bool 322*ca6c8ee2SEnrico Granata lldb_private::formatters::ReadStringAndDumpToStream<StringElementType::ASCII> (ReadStringAndDumpToStreamOptions options) 323*ca6c8ee2SEnrico Granata { 324*ca6c8ee2SEnrico Granata assert(options.GetStream() && "need a Stream to print the string to"); 325*ca6c8ee2SEnrico Granata Error my_error; 326*ca6c8ee2SEnrico Granata size_t my_data_read; 327*ca6c8ee2SEnrico Granata 328*ca6c8ee2SEnrico Granata ProcessSP process_sp(options.GetProcessSP()); 329*ca6c8ee2SEnrico Granata 330*ca6c8ee2SEnrico Granata if (process_sp.get() == nullptr || options.GetLocation() == 0) 331*ca6c8ee2SEnrico Granata return false; 332*ca6c8ee2SEnrico Granata 333*ca6c8ee2SEnrico Granata size_t size; 334*ca6c8ee2SEnrico Granata 335*ca6c8ee2SEnrico Granata if (options.GetSourceSize() == 0) 336*ca6c8ee2SEnrico Granata size = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 337*ca6c8ee2SEnrico Granata else 338*ca6c8ee2SEnrico Granata size = std::min(options.GetSourceSize(),process_sp->GetTarget().GetMaximumSizeOfStringSummary()); 339*ca6c8ee2SEnrico Granata 340*ca6c8ee2SEnrico Granata lldb::DataBufferSP buffer_sp(new DataBufferHeap(size,0)); 341*ca6c8ee2SEnrico Granata 342*ca6c8ee2SEnrico Granata my_data_read = process_sp->ReadCStringFromMemory(options.GetLocation(), (char*)buffer_sp->GetBytes(), size, my_error); 343*ca6c8ee2SEnrico Granata 344*ca6c8ee2SEnrico Granata if (my_error.Fail()) 345*ca6c8ee2SEnrico Granata return false; 346*ca6c8ee2SEnrico Granata 347*ca6c8ee2SEnrico Granata char prefix_token = options.GetPrefixToken(); 348*ca6c8ee2SEnrico Granata char quote = options.GetQuote(); 349*ca6c8ee2SEnrico Granata 350*ca6c8ee2SEnrico Granata if (prefix_token != 0) 351*ca6c8ee2SEnrico Granata options.GetStream()->Printf("%c%c",prefix_token,quote); 352*ca6c8ee2SEnrico Granata else if (quote != 0) 353*ca6c8ee2SEnrico Granata options.GetStream()->Printf("%c",quote); 354*ca6c8ee2SEnrico Granata 355*ca6c8ee2SEnrico Granata uint8_t* data_end = buffer_sp->GetBytes()+buffer_sp->GetByteSize(); 356*ca6c8ee2SEnrico Granata 357*ca6c8ee2SEnrico Granata // since we tend to accept partial data (and even partially malformed data) 358*ca6c8ee2SEnrico Granata // we might end up with no NULL terminator before the end_ptr 359*ca6c8ee2SEnrico Granata // hence we need to take a slower route and ensure we stay within boundaries 360*ca6c8ee2SEnrico Granata for (uint8_t* data = buffer_sp->GetBytes(); *data && (data < data_end);) 361*ca6c8ee2SEnrico Granata { 362*ca6c8ee2SEnrico Granata if (options.GetEscapeNonPrintables()) 363*ca6c8ee2SEnrico Granata { 364*ca6c8ee2SEnrico Granata uint8_t* next_data = nullptr; 365*ca6c8ee2SEnrico Granata auto printable = GetPrintable(StringElementType::ASCII, data, data_end, next_data); 366*ca6c8ee2SEnrico Granata auto printable_bytes = printable.GetBytes(); 367*ca6c8ee2SEnrico Granata auto printable_size = printable.GetSize(); 368*ca6c8ee2SEnrico Granata if (!printable_bytes || !next_data) 369*ca6c8ee2SEnrico Granata { 370*ca6c8ee2SEnrico Granata // GetPrintable() failed on us - print one byte in a desperate resync attempt 371*ca6c8ee2SEnrico Granata printable_bytes = data; 372*ca6c8ee2SEnrico Granata printable_size = 1; 373*ca6c8ee2SEnrico Granata next_data = data+1; 374*ca6c8ee2SEnrico Granata } 375*ca6c8ee2SEnrico Granata for (int c = 0; c < printable_size; c++) 376*ca6c8ee2SEnrico Granata options.GetStream()->Printf("%c", *(printable_bytes+c)); 377*ca6c8ee2SEnrico Granata data = (uint8_t*)next_data; 378*ca6c8ee2SEnrico Granata } 379*ca6c8ee2SEnrico Granata else 380*ca6c8ee2SEnrico Granata { 381*ca6c8ee2SEnrico Granata options.GetStream()->Printf("%c",*data); 382*ca6c8ee2SEnrico Granata data++; 383*ca6c8ee2SEnrico Granata } 384*ca6c8ee2SEnrico Granata } 385*ca6c8ee2SEnrico Granata 386*ca6c8ee2SEnrico Granata if (quote != 0) 387*ca6c8ee2SEnrico Granata options.GetStream()->Printf("%c",quote); 388*ca6c8ee2SEnrico Granata 389*ca6c8ee2SEnrico Granata return true; 390*ca6c8ee2SEnrico Granata } 391*ca6c8ee2SEnrico Granata 392*ca6c8ee2SEnrico Granata // use this call if you already have an LLDB-side buffer for the data 393*ca6c8ee2SEnrico Granata template<typename SourceDataType> 394*ca6c8ee2SEnrico Granata static bool 395*ca6c8ee2SEnrico Granata DumpUTFBufferToStream (ConversionResult (*ConvertFunction) (const SourceDataType**, 396*ca6c8ee2SEnrico Granata const SourceDataType*, 397*ca6c8ee2SEnrico Granata UTF8**, 398*ca6c8ee2SEnrico Granata UTF8*, 399*ca6c8ee2SEnrico Granata ConversionFlags), 400*ca6c8ee2SEnrico Granata const DataExtractor& data, 401*ca6c8ee2SEnrico Granata Stream& stream, 402*ca6c8ee2SEnrico Granata char prefix_token, 403*ca6c8ee2SEnrico Granata char quote, 404*ca6c8ee2SEnrico Granata uint32_t sourceSize, 405*ca6c8ee2SEnrico Granata bool escapeNonPrintables) 406*ca6c8ee2SEnrico Granata { 407*ca6c8ee2SEnrico Granata if (prefix_token != 0) 408*ca6c8ee2SEnrico Granata stream.Printf("%c",prefix_token); 409*ca6c8ee2SEnrico Granata if (quote != 0) 410*ca6c8ee2SEnrico Granata stream.Printf("%c",quote); 411*ca6c8ee2SEnrico Granata if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd()) 412*ca6c8ee2SEnrico Granata { 413*ca6c8ee2SEnrico Granata const int bufferSPSize = data.GetByteSize(); 414*ca6c8ee2SEnrico Granata if (sourceSize == 0) 415*ca6c8ee2SEnrico Granata { 416*ca6c8ee2SEnrico Granata const int origin_encoding = 8*sizeof(SourceDataType); 417*ca6c8ee2SEnrico Granata sourceSize = bufferSPSize/(origin_encoding / 4); 418*ca6c8ee2SEnrico Granata } 419*ca6c8ee2SEnrico Granata 420*ca6c8ee2SEnrico Granata SourceDataType *data_ptr = (SourceDataType*)data.GetDataStart(); 421*ca6c8ee2SEnrico Granata SourceDataType *data_end_ptr = data_ptr + sourceSize; 422*ca6c8ee2SEnrico Granata 423*ca6c8ee2SEnrico Granata while (data_ptr < data_end_ptr) 424*ca6c8ee2SEnrico Granata { 425*ca6c8ee2SEnrico Granata if (!*data_ptr) 426*ca6c8ee2SEnrico Granata { 427*ca6c8ee2SEnrico Granata data_end_ptr = data_ptr; 428*ca6c8ee2SEnrico Granata break; 429*ca6c8ee2SEnrico Granata } 430*ca6c8ee2SEnrico Granata data_ptr++; 431*ca6c8ee2SEnrico Granata } 432*ca6c8ee2SEnrico Granata 433*ca6c8ee2SEnrico Granata data_ptr = (SourceDataType*)data.GetDataStart(); 434*ca6c8ee2SEnrico Granata 435*ca6c8ee2SEnrico Granata lldb::DataBufferSP utf8_data_buffer_sp; 436*ca6c8ee2SEnrico Granata UTF8* utf8_data_ptr = nullptr; 437*ca6c8ee2SEnrico Granata UTF8* utf8_data_end_ptr = nullptr; 438*ca6c8ee2SEnrico Granata 439*ca6c8ee2SEnrico Granata if (ConvertFunction) 440*ca6c8ee2SEnrico Granata { 441*ca6c8ee2SEnrico Granata utf8_data_buffer_sp.reset(new DataBufferHeap(4*bufferSPSize,0)); 442*ca6c8ee2SEnrico Granata utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes(); 443*ca6c8ee2SEnrico Granata utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize(); 444*ca6c8ee2SEnrico Granata ConvertFunction ( (const SourceDataType**)&data_ptr, data_end_ptr, &utf8_data_ptr, utf8_data_end_ptr, lenientConversion ); 445*ca6c8ee2SEnrico Granata utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes(); // needed because the ConvertFunction will change the value of the data_ptr 446*ca6c8ee2SEnrico Granata } 447*ca6c8ee2SEnrico Granata else 448*ca6c8ee2SEnrico Granata { 449*ca6c8ee2SEnrico Granata // just copy the pointers - the cast is necessary to make the compiler happy 450*ca6c8ee2SEnrico Granata // but this should only happen if we are reading UTF8 data 451*ca6c8ee2SEnrico Granata utf8_data_ptr = (UTF8*)data_ptr; 452*ca6c8ee2SEnrico Granata utf8_data_end_ptr = (UTF8*)data_end_ptr; 453*ca6c8ee2SEnrico Granata } 454*ca6c8ee2SEnrico Granata 455*ca6c8ee2SEnrico Granata // since we tend to accept partial data (and even partially malformed data) 456*ca6c8ee2SEnrico Granata // we might end up with no NULL terminator before the end_ptr 457*ca6c8ee2SEnrico Granata // hence we need to take a slower route and ensure we stay within boundaries 458*ca6c8ee2SEnrico Granata for (;utf8_data_ptr < utf8_data_end_ptr;) 459*ca6c8ee2SEnrico Granata { 460*ca6c8ee2SEnrico Granata if (!*utf8_data_ptr) 461*ca6c8ee2SEnrico Granata break; 462*ca6c8ee2SEnrico Granata 463*ca6c8ee2SEnrico Granata if (escapeNonPrintables) 464*ca6c8ee2SEnrico Granata { 465*ca6c8ee2SEnrico Granata uint8_t* next_data = nullptr; 466*ca6c8ee2SEnrico Granata auto printable = GetPrintable(StringElementType::UTF8, utf8_data_ptr, utf8_data_end_ptr, next_data); 467*ca6c8ee2SEnrico Granata auto printable_bytes = printable.GetBytes(); 468*ca6c8ee2SEnrico Granata auto printable_size = printable.GetSize(); 469*ca6c8ee2SEnrico Granata if (!printable_bytes || !next_data) 470*ca6c8ee2SEnrico Granata { 471*ca6c8ee2SEnrico Granata // GetPrintable() failed on us - print one byte in a desperate resync attempt 472*ca6c8ee2SEnrico Granata printable_bytes = utf8_data_ptr; 473*ca6c8ee2SEnrico Granata printable_size = 1; 474*ca6c8ee2SEnrico Granata next_data = utf8_data_ptr+1; 475*ca6c8ee2SEnrico Granata } 476*ca6c8ee2SEnrico Granata for (int c = 0; c < printable_size; c++) 477*ca6c8ee2SEnrico Granata stream.Printf("%c", *(printable_bytes+c)); 478*ca6c8ee2SEnrico Granata utf8_data_ptr = (uint8_t*)next_data; 479*ca6c8ee2SEnrico Granata } 480*ca6c8ee2SEnrico Granata else 481*ca6c8ee2SEnrico Granata { 482*ca6c8ee2SEnrico Granata stream.Printf("%c",*utf8_data_ptr); 483*ca6c8ee2SEnrico Granata utf8_data_ptr++; 484*ca6c8ee2SEnrico Granata } 485*ca6c8ee2SEnrico Granata } 486*ca6c8ee2SEnrico Granata } 487*ca6c8ee2SEnrico Granata if (quote != 0) 488*ca6c8ee2SEnrico Granata stream.Printf("%c",quote); 489*ca6c8ee2SEnrico Granata return true; 490*ca6c8ee2SEnrico Granata } 491*ca6c8ee2SEnrico Granata 492*ca6c8ee2SEnrico Granata template<typename SourceDataType> 493*ca6c8ee2SEnrico Granata static bool 494*ca6c8ee2SEnrico Granata ReadUTFBufferAndDumpToStream (const ReadStringAndDumpToStreamOptions& options, 495*ca6c8ee2SEnrico Granata ConversionResult (*ConvertFunction) (const SourceDataType**, 496*ca6c8ee2SEnrico Granata const SourceDataType*, 497*ca6c8ee2SEnrico Granata UTF8**, 498*ca6c8ee2SEnrico Granata UTF8*, 499*ca6c8ee2SEnrico Granata ConversionFlags)) 500*ca6c8ee2SEnrico Granata { 501*ca6c8ee2SEnrico Granata assert(options.GetStream() && "need a Stream to print the string to"); 502*ca6c8ee2SEnrico Granata 503*ca6c8ee2SEnrico Granata if (options.GetLocation() == 0 || options.GetLocation() == LLDB_INVALID_ADDRESS) 504*ca6c8ee2SEnrico Granata return false; 505*ca6c8ee2SEnrico Granata 506*ca6c8ee2SEnrico Granata lldb::ProcessSP process_sp(options.GetProcessSP()); 507*ca6c8ee2SEnrico Granata 508*ca6c8ee2SEnrico Granata if (!process_sp) 509*ca6c8ee2SEnrico Granata return false; 510*ca6c8ee2SEnrico Granata 511*ca6c8ee2SEnrico Granata const int type_width = sizeof(SourceDataType); 512*ca6c8ee2SEnrico Granata const int origin_encoding = 8 * type_width ; 513*ca6c8ee2SEnrico Granata if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32) 514*ca6c8ee2SEnrico Granata return false; 515*ca6c8ee2SEnrico Granata // if not UTF8, I need a conversion function to return proper UTF8 516*ca6c8ee2SEnrico Granata if (origin_encoding != 8 && !ConvertFunction) 517*ca6c8ee2SEnrico Granata return false; 518*ca6c8ee2SEnrico Granata 519*ca6c8ee2SEnrico Granata if (!options.GetStream()) 520*ca6c8ee2SEnrico Granata return false; 521*ca6c8ee2SEnrico Granata 522*ca6c8ee2SEnrico Granata uint32_t sourceSize = options.GetSourceSize(); 523*ca6c8ee2SEnrico Granata bool needs_zero_terminator = options.GetNeedsZeroTermination(); 524*ca6c8ee2SEnrico Granata 525*ca6c8ee2SEnrico Granata if (!sourceSize) 526*ca6c8ee2SEnrico Granata { 527*ca6c8ee2SEnrico Granata sourceSize = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 528*ca6c8ee2SEnrico Granata needs_zero_terminator = true; 529*ca6c8ee2SEnrico Granata } 530*ca6c8ee2SEnrico Granata else 531*ca6c8ee2SEnrico Granata sourceSize = std::min(sourceSize,process_sp->GetTarget().GetMaximumSizeOfStringSummary()); 532*ca6c8ee2SEnrico Granata 533*ca6c8ee2SEnrico Granata const int bufferSPSize = sourceSize * type_width; 534*ca6c8ee2SEnrico Granata 535*ca6c8ee2SEnrico Granata lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize,0)); 536*ca6c8ee2SEnrico Granata 537*ca6c8ee2SEnrico Granata if (!buffer_sp->GetBytes()) 538*ca6c8ee2SEnrico Granata return false; 539*ca6c8ee2SEnrico Granata 540*ca6c8ee2SEnrico Granata Error error; 541*ca6c8ee2SEnrico Granata char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes()); 542*ca6c8ee2SEnrico Granata 543*ca6c8ee2SEnrico Granata size_t data_read = 0; 544*ca6c8ee2SEnrico Granata if (needs_zero_terminator) 545*ca6c8ee2SEnrico Granata data_read = process_sp->ReadStringFromMemory(options.GetLocation(), buffer, bufferSPSize, error, type_width); 546*ca6c8ee2SEnrico Granata else 547*ca6c8ee2SEnrico Granata data_read = process_sp->ReadMemoryFromInferior(options.GetLocation(), (char*)buffer_sp->GetBytes(), bufferSPSize, error); 548*ca6c8ee2SEnrico Granata 549*ca6c8ee2SEnrico Granata if (error.Fail() || data_read == 0) 550*ca6c8ee2SEnrico Granata { 551*ca6c8ee2SEnrico Granata options.GetStream()->Printf("unable to read data"); 552*ca6c8ee2SEnrico Granata return true; 553*ca6c8ee2SEnrico Granata } 554*ca6c8ee2SEnrico Granata 555*ca6c8ee2SEnrico Granata DataExtractor data(buffer_sp, process_sp->GetByteOrder(), process_sp->GetAddressByteSize()); 556*ca6c8ee2SEnrico Granata 557*ca6c8ee2SEnrico Granata return DumpUTFBufferToStream(ConvertFunction, data, *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), sourceSize, options.GetEscapeNonPrintables()); 558*ca6c8ee2SEnrico Granata } 559*ca6c8ee2SEnrico Granata 560*ca6c8ee2SEnrico Granata template <> 561*ca6c8ee2SEnrico Granata bool 562*ca6c8ee2SEnrico Granata lldb_private::formatters::ReadStringAndDumpToStream<StringElementType::UTF8> (ReadStringAndDumpToStreamOptions options) 563*ca6c8ee2SEnrico Granata { 564*ca6c8ee2SEnrico Granata return ReadUTFBufferAndDumpToStream<UTF8>(options, 565*ca6c8ee2SEnrico Granata nullptr); 566*ca6c8ee2SEnrico Granata } 567*ca6c8ee2SEnrico Granata 568*ca6c8ee2SEnrico Granata template <> 569*ca6c8ee2SEnrico Granata bool 570*ca6c8ee2SEnrico Granata lldb_private::formatters::ReadStringAndDumpToStream<StringElementType::UTF16> (ReadStringAndDumpToStreamOptions options) 571*ca6c8ee2SEnrico Granata { 572*ca6c8ee2SEnrico Granata return ReadUTFBufferAndDumpToStream<UTF16>(options, 573*ca6c8ee2SEnrico Granata ConvertUTF16toUTF8); 574*ca6c8ee2SEnrico Granata } 575*ca6c8ee2SEnrico Granata 576*ca6c8ee2SEnrico Granata template <> 577*ca6c8ee2SEnrico Granata bool 578*ca6c8ee2SEnrico Granata lldb_private::formatters::ReadStringAndDumpToStream<StringElementType::UTF32> (ReadStringAndDumpToStreamOptions options) 579*ca6c8ee2SEnrico Granata { 580*ca6c8ee2SEnrico Granata return ReadUTFBufferAndDumpToStream<UTF32>(options, 581*ca6c8ee2SEnrico Granata ConvertUTF32toUTF8); 582*ca6c8ee2SEnrico Granata } 583*ca6c8ee2SEnrico Granata 584*ca6c8ee2SEnrico Granata template <> 585*ca6c8ee2SEnrico Granata bool 586*ca6c8ee2SEnrico Granata lldb_private::formatters::ReadBufferAndDumpToStream<StringElementType::UTF8> (ReadBufferAndDumpToStreamOptions options) 587*ca6c8ee2SEnrico Granata { 588*ca6c8ee2SEnrico Granata assert(options.GetStream() && "need a Stream to print the string to"); 589*ca6c8ee2SEnrico Granata 590*ca6c8ee2SEnrico Granata return DumpUTFBufferToStream<UTF8>(nullptr, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables()); 591*ca6c8ee2SEnrico Granata } 592*ca6c8ee2SEnrico Granata 593*ca6c8ee2SEnrico Granata template <> 594*ca6c8ee2SEnrico Granata bool 595*ca6c8ee2SEnrico Granata lldb_private::formatters::ReadBufferAndDumpToStream<StringElementType::ASCII> (ReadBufferAndDumpToStreamOptions options) 596*ca6c8ee2SEnrico Granata { 597*ca6c8ee2SEnrico Granata // treat ASCII the same as UTF8 598*ca6c8ee2SEnrico Granata // FIXME: can we optimize ASCII some more? 599*ca6c8ee2SEnrico Granata return ReadBufferAndDumpToStream<StringElementType::UTF8>(options); 600*ca6c8ee2SEnrico Granata } 601*ca6c8ee2SEnrico Granata 602*ca6c8ee2SEnrico Granata template <> 603*ca6c8ee2SEnrico Granata bool 604*ca6c8ee2SEnrico Granata lldb_private::formatters::ReadBufferAndDumpToStream<StringElementType::UTF16> (ReadBufferAndDumpToStreamOptions options) 605*ca6c8ee2SEnrico Granata { 606*ca6c8ee2SEnrico Granata assert(options.GetStream() && "need a Stream to print the string to"); 607*ca6c8ee2SEnrico Granata 608*ca6c8ee2SEnrico Granata return DumpUTFBufferToStream(ConvertUTF16toUTF8, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables()); 609*ca6c8ee2SEnrico Granata } 610*ca6c8ee2SEnrico Granata 611*ca6c8ee2SEnrico Granata template <> 612*ca6c8ee2SEnrico Granata bool 613*ca6c8ee2SEnrico Granata lldb_private::formatters::ReadBufferAndDumpToStream<StringElementType::UTF32> (ReadBufferAndDumpToStreamOptions options) 614*ca6c8ee2SEnrico Granata { 615*ca6c8ee2SEnrico Granata assert(options.GetStream() && "need a Stream to print the string to"); 616*ca6c8ee2SEnrico Granata 617*ca6c8ee2SEnrico Granata return DumpUTFBufferToStream(ConvertUTF32toUTF8, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables()); 618*ca6c8ee2SEnrico Granata } 619