1ca6c8ee2SEnrico Granata //===-- StringPrinter.cpp ----------------------------------------*- C++ -*-===//
2ca6c8ee2SEnrico Granata //
3ca6c8ee2SEnrico Granata //                     The LLVM Compiler Infrastructure
4ca6c8ee2SEnrico Granata //
5ca6c8ee2SEnrico Granata // This file is distributed under the University of Illinois Open Source
6ca6c8ee2SEnrico Granata // License. See LICENSE.TXT for details.
7ca6c8ee2SEnrico Granata //
8ca6c8ee2SEnrico Granata //===----------------------------------------------------------------------===//
9ca6c8ee2SEnrico Granata 
10ca6c8ee2SEnrico Granata #include "lldb/DataFormatters/StringPrinter.h"
11ca6c8ee2SEnrico Granata 
12ebdc1ac0SEnrico Granata #include "lldb/Core/Debugger.h"
13ca6c8ee2SEnrico Granata #include "lldb/Core/Error.h"
14ebdc1ac0SEnrico Granata #include "lldb/Core/ValueObject.h"
15ac49453bSEnrico Granata #include "lldb/Target/Language.h"
16ca6c8ee2SEnrico Granata #include "lldb/Target/Process.h"
17ca6c8ee2SEnrico Granata #include "lldb/Target/Target.h"
18ca6c8ee2SEnrico Granata 
19ca6c8ee2SEnrico Granata #include "llvm/Support/ConvertUTF.h"
20ca6c8ee2SEnrico Granata 
21ca6c8ee2SEnrico Granata #include <ctype.h>
22ca6c8ee2SEnrico Granata #include <locale>
23ca6c8ee2SEnrico Granata 
24ca6c8ee2SEnrico Granata using namespace lldb;
25ca6c8ee2SEnrico Granata using namespace lldb_private;
26ca6c8ee2SEnrico Granata using namespace lldb_private::formatters;
27ca6c8ee2SEnrico Granata 
28ca6c8ee2SEnrico Granata // we define this for all values of type but only implement it for those we care about
29ca6c8ee2SEnrico Granata // that's good because we get linker errors for any unsupported type
30ac49453bSEnrico Granata template <lldb_private::formatters::StringPrinter::StringElementType type>
31ad650a18SEnrico Granata static StringPrinter::StringPrinterBufferPointer<>
32ca6c8ee2SEnrico Granata GetPrintableImpl(uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next);
33ca6c8ee2SEnrico Granata 
34ca6c8ee2SEnrico Granata // mimic isprint() for Unicode codepoints
35ca6c8ee2SEnrico Granata static bool
36ca6c8ee2SEnrico Granata isprint(char32_t codepoint)
37ca6c8ee2SEnrico Granata {
38ca6c8ee2SEnrico Granata     if (codepoint <= 0x1F || codepoint == 0x7F) // C0
39ca6c8ee2SEnrico Granata     {
40ca6c8ee2SEnrico Granata         return false;
41ca6c8ee2SEnrico Granata     }
42ca6c8ee2SEnrico Granata     if (codepoint >= 0x80 && codepoint <= 0x9F) // C1
43ca6c8ee2SEnrico Granata     {
44ca6c8ee2SEnrico Granata         return false;
45ca6c8ee2SEnrico Granata     }
46ca6c8ee2SEnrico Granata     if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators
47ca6c8ee2SEnrico Granata     {
48ca6c8ee2SEnrico Granata         return false;
49ca6c8ee2SEnrico Granata     }
50ca6c8ee2SEnrico Granata     if (codepoint == 0x200E || codepoint == 0x200F || (codepoint >= 0x202A && codepoint <= 0x202E)) // bidirectional text control
51ca6c8ee2SEnrico Granata     {
52ca6c8ee2SEnrico Granata         return false;
53ca6c8ee2SEnrico Granata     }
54ca6c8ee2SEnrico Granata     if (codepoint >= 0xFFF9 && codepoint <= 0xFFFF) // interlinears and generally specials
55ca6c8ee2SEnrico Granata     {
56ca6c8ee2SEnrico Granata         return false;
57ca6c8ee2SEnrico Granata     }
58ca6c8ee2SEnrico Granata     return true;
59ca6c8ee2SEnrico Granata }
60ca6c8ee2SEnrico Granata 
61ca6c8ee2SEnrico Granata template <>
62ad650a18SEnrico Granata StringPrinter::StringPrinterBufferPointer<>
63ac49453bSEnrico Granata GetPrintableImpl<StringPrinter::StringElementType::ASCII> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next)
64ca6c8ee2SEnrico Granata {
65ad650a18SEnrico Granata     StringPrinter::StringPrinterBufferPointer<> retval = {nullptr};
66ca6c8ee2SEnrico Granata 
67ca6c8ee2SEnrico Granata     switch (*buffer)
68ca6c8ee2SEnrico Granata     {
69da04fbb5SEnrico Granata         case 0:
70da04fbb5SEnrico Granata             retval = {"\\0",2};
71da04fbb5SEnrico Granata             break;
72ca6c8ee2SEnrico Granata         case '\a':
73ca6c8ee2SEnrico Granata             retval = {"\\a",2};
74ca6c8ee2SEnrico Granata             break;
75ca6c8ee2SEnrico Granata         case '\b':
76ca6c8ee2SEnrico Granata             retval = {"\\b",2};
77ca6c8ee2SEnrico Granata             break;
78ca6c8ee2SEnrico Granata         case '\f':
79ca6c8ee2SEnrico Granata             retval = {"\\f",2};
80ca6c8ee2SEnrico Granata             break;
81ca6c8ee2SEnrico Granata         case '\n':
82ca6c8ee2SEnrico Granata             retval = {"\\n",2};
83ca6c8ee2SEnrico Granata             break;
84ca6c8ee2SEnrico Granata         case '\r':
85ca6c8ee2SEnrico Granata             retval = {"\\r",2};
86ca6c8ee2SEnrico Granata             break;
87ca6c8ee2SEnrico Granata         case '\t':
88ca6c8ee2SEnrico Granata             retval = {"\\t",2};
89ca6c8ee2SEnrico Granata             break;
90ca6c8ee2SEnrico Granata         case '\v':
91ca6c8ee2SEnrico Granata             retval = {"\\v",2};
92ca6c8ee2SEnrico Granata             break;
93ca6c8ee2SEnrico Granata         case '\"':
94ca6c8ee2SEnrico Granata             retval = {"\\\"",2};
95ca6c8ee2SEnrico Granata             break;
96ca6c8ee2SEnrico Granata         case '\\':
97ca6c8ee2SEnrico Granata             retval = {"\\\\",2};
98ca6c8ee2SEnrico Granata             break;
99ca6c8ee2SEnrico Granata         default:
100ca6c8ee2SEnrico Granata           if (isprint(*buffer))
101ca6c8ee2SEnrico Granata               retval = {buffer,1};
102ca6c8ee2SEnrico Granata           else
103ca6c8ee2SEnrico Granata           {
104d7e6a4f2SVince Harron               uint8_t* data = new uint8_t[5];
105d7e6a4f2SVince Harron               sprintf((char*)data,"\\x%02x",*buffer);
106d7e6a4f2SVince Harron               retval = {data, 4, [] (const uint8_t* c) {delete[] c;} };
107ca6c8ee2SEnrico Granata               break;
108ca6c8ee2SEnrico Granata           }
109ca6c8ee2SEnrico Granata     }
110ca6c8ee2SEnrico Granata 
111ca6c8ee2SEnrico Granata     next = buffer + 1;
112ca6c8ee2SEnrico Granata     return retval;
113ca6c8ee2SEnrico Granata }
114ca6c8ee2SEnrico Granata 
115ca6c8ee2SEnrico Granata static char32_t
116ca6c8ee2SEnrico Granata ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1)
117ca6c8ee2SEnrico Granata {
118ca6c8ee2SEnrico Granata     return (c0-192)*64+(c1-128);
119ca6c8ee2SEnrico Granata }
120ca6c8ee2SEnrico Granata static char32_t
121ca6c8ee2SEnrico Granata ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2)
122ca6c8ee2SEnrico Granata {
123ca6c8ee2SEnrico Granata     return (c0-224)*4096+(c1-128)*64+(c2-128);
124ca6c8ee2SEnrico Granata }
125ca6c8ee2SEnrico Granata static char32_t
126ca6c8ee2SEnrico Granata ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2, unsigned char c3)
127ca6c8ee2SEnrico Granata {
128ca6c8ee2SEnrico Granata     return (c0-240)*262144+(c2-128)*4096+(c2-128)*64+(c3-128);
129ca6c8ee2SEnrico Granata }
130ca6c8ee2SEnrico Granata 
131ca6c8ee2SEnrico Granata template <>
132ad650a18SEnrico Granata StringPrinter::StringPrinterBufferPointer<>
133ac49453bSEnrico Granata GetPrintableImpl<StringPrinter::StringElementType::UTF8> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next)
134ca6c8ee2SEnrico Granata {
135ad650a18SEnrico Granata     StringPrinter::StringPrinterBufferPointer<> retval {nullptr};
136ca6c8ee2SEnrico Granata 
137ca6c8ee2SEnrico Granata     unsigned utf8_encoded_len = getNumBytesForUTF8(*buffer);
138ca6c8ee2SEnrico Granata 
139ca6c8ee2SEnrico Granata     if (1+buffer_end-buffer < utf8_encoded_len)
140ca6c8ee2SEnrico Granata     {
141ca6c8ee2SEnrico Granata         // I don't have enough bytes - print whatever I have left
142ca6c8ee2SEnrico Granata         retval = {buffer,static_cast<size_t>(1+buffer_end-buffer)};
143ca6c8ee2SEnrico Granata         next = buffer_end+1;
144ca6c8ee2SEnrico Granata         return retval;
145ca6c8ee2SEnrico Granata     }
146ca6c8ee2SEnrico Granata 
147ca6c8ee2SEnrico Granata     char32_t codepoint = 0;
148ca6c8ee2SEnrico Granata     switch (utf8_encoded_len)
149ca6c8ee2SEnrico Granata     {
150ca6c8ee2SEnrico Granata         case 1:
151ca6c8ee2SEnrico Granata             // this is just an ASCII byte - ask ASCII
152ac49453bSEnrico Granata             return GetPrintableImpl<StringPrinter::StringElementType::ASCII>(buffer, buffer_end, next);
153ca6c8ee2SEnrico Granata         case 2:
154ca6c8ee2SEnrico Granata             codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1));
155ca6c8ee2SEnrico Granata             break;
156ca6c8ee2SEnrico Granata         case 3:
157ca6c8ee2SEnrico Granata             codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2));
158ca6c8ee2SEnrico Granata             break;
159ca6c8ee2SEnrico Granata         case 4:
160ca6c8ee2SEnrico Granata             codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2), (unsigned char)*(buffer+3));
161ca6c8ee2SEnrico Granata             break;
162ca6c8ee2SEnrico Granata         default:
163ca6c8ee2SEnrico Granata             // this is probably some bogus non-character thing
164ca6c8ee2SEnrico Granata             // just print it as-is and hope to sync up again soon
165ca6c8ee2SEnrico Granata             retval = {buffer,1};
166ca6c8ee2SEnrico Granata             next = buffer+1;
167ca6c8ee2SEnrico Granata             return retval;
168ca6c8ee2SEnrico Granata     }
169ca6c8ee2SEnrico Granata 
170ca6c8ee2SEnrico Granata     if (codepoint)
171ca6c8ee2SEnrico Granata     {
172ca6c8ee2SEnrico Granata         switch (codepoint)
173ca6c8ee2SEnrico Granata         {
174da04fbb5SEnrico Granata             case 0:
175da04fbb5SEnrico Granata                 retval = {"\\0",2};
176da04fbb5SEnrico Granata                 break;
177ca6c8ee2SEnrico Granata             case '\a':
178ca6c8ee2SEnrico Granata                 retval = {"\\a",2};
179ca6c8ee2SEnrico Granata                 break;
180ca6c8ee2SEnrico Granata             case '\b':
181ca6c8ee2SEnrico Granata                 retval = {"\\b",2};
182ca6c8ee2SEnrico Granata                 break;
183ca6c8ee2SEnrico Granata             case '\f':
184ca6c8ee2SEnrico Granata                 retval = {"\\f",2};
185ca6c8ee2SEnrico Granata                 break;
186ca6c8ee2SEnrico Granata             case '\n':
187ca6c8ee2SEnrico Granata                 retval = {"\\n",2};
188ca6c8ee2SEnrico Granata                 break;
189ca6c8ee2SEnrico Granata             case '\r':
190ca6c8ee2SEnrico Granata                 retval = {"\\r",2};
191ca6c8ee2SEnrico Granata                 break;
192ca6c8ee2SEnrico Granata             case '\t':
193ca6c8ee2SEnrico Granata                 retval = {"\\t",2};
194ca6c8ee2SEnrico Granata                 break;
195ca6c8ee2SEnrico Granata             case '\v':
196ca6c8ee2SEnrico Granata                 retval = {"\\v",2};
197ca6c8ee2SEnrico Granata                 break;
198ca6c8ee2SEnrico Granata             case '\"':
199ca6c8ee2SEnrico Granata                 retval = {"\\\"",2};
200ca6c8ee2SEnrico Granata                 break;
201ca6c8ee2SEnrico Granata             case '\\':
202ca6c8ee2SEnrico Granata                 retval = {"\\\\",2};
203ca6c8ee2SEnrico Granata                 break;
204ca6c8ee2SEnrico Granata             default:
205ca6c8ee2SEnrico Granata                 if (isprint(codepoint))
206ca6c8ee2SEnrico Granata                     retval = {buffer,utf8_encoded_len};
207ca6c8ee2SEnrico Granata                 else
208ca6c8ee2SEnrico Granata                 {
209d7e6a4f2SVince Harron                     uint8_t* data = new uint8_t[11];
210*a505be4eSZachary Turner                     sprintf((char *)data, "\\U%08x", (unsigned)codepoint);
211d7e6a4f2SVince Harron                     retval = { data,10,[] (const uint8_t* c) {delete[] c;} };
212ca6c8ee2SEnrico Granata                     break;
213ca6c8ee2SEnrico Granata                 }
214ca6c8ee2SEnrico Granata         }
215ca6c8ee2SEnrico Granata 
216ca6c8ee2SEnrico Granata         next = buffer + utf8_encoded_len;
217ca6c8ee2SEnrico Granata         return retval;
218ca6c8ee2SEnrico Granata     }
219ca6c8ee2SEnrico Granata 
220ca6c8ee2SEnrico Granata     // this should not happen - but just in case.. try to resync at some point
221ca6c8ee2SEnrico Granata     retval = {buffer,1};
222ca6c8ee2SEnrico Granata     next = buffer+1;
223ca6c8ee2SEnrico Granata     return retval;
224ca6c8ee2SEnrico Granata }
225ca6c8ee2SEnrico Granata 
226ca6c8ee2SEnrico Granata // Given a sequence of bytes, this function returns:
227ca6c8ee2SEnrico Granata // a sequence of bytes to actually print out + a length
228ca6c8ee2SEnrico Granata // the following unscanned position of the buffer is in next
229ad650a18SEnrico Granata static StringPrinter::StringPrinterBufferPointer<>
230ac49453bSEnrico Granata GetPrintable(StringPrinter::StringElementType type, uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next)
231ca6c8ee2SEnrico Granata {
232ca6c8ee2SEnrico Granata     if (!buffer)
233ca6c8ee2SEnrico Granata         return {nullptr};
234ca6c8ee2SEnrico Granata 
235ca6c8ee2SEnrico Granata     switch (type)
236ca6c8ee2SEnrico Granata     {
237ac49453bSEnrico Granata         case StringPrinter::StringElementType::ASCII:
238ac49453bSEnrico Granata             return GetPrintableImpl<StringPrinter::StringElementType::ASCII>(buffer, buffer_end, next);
239ac49453bSEnrico Granata         case StringPrinter::StringElementType::UTF8:
240ac49453bSEnrico Granata             return GetPrintableImpl<StringPrinter::StringElementType::UTF8>(buffer, buffer_end, next);
241ca6c8ee2SEnrico Granata         default:
242ca6c8ee2SEnrico Granata             return {nullptr};
243ca6c8ee2SEnrico Granata     }
244ca6c8ee2SEnrico Granata }
245ca6c8ee2SEnrico Granata 
246ac49453bSEnrico Granata StringPrinter::EscapingHelper
247ac49453bSEnrico Granata StringPrinter::GetDefaultEscapingHelper (GetPrintableElementType elem_type)
248ac49453bSEnrico Granata {
249ac49453bSEnrico Granata     switch (elem_type)
250ac49453bSEnrico Granata     {
251ac49453bSEnrico Granata         case GetPrintableElementType::UTF8:
252ac49453bSEnrico Granata             return [] (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) -> StringPrinter::StringPrinterBufferPointer<> {
253ac49453bSEnrico Granata                 return GetPrintable(StringPrinter::StringElementType::UTF8, buffer, buffer_end, next);
254ac49453bSEnrico Granata             };
255ac49453bSEnrico Granata         case GetPrintableElementType::ASCII:
256ac49453bSEnrico Granata             return [] (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) -> StringPrinter::StringPrinterBufferPointer<> {
257ac49453bSEnrico Granata                 return GetPrintable(StringPrinter::StringElementType::ASCII, buffer, buffer_end, next);
258ac49453bSEnrico Granata             };
259ac49453bSEnrico Granata     }
26043d3a7aeSSaleem Abdulrasool     llvm_unreachable("bad element type");
261ac49453bSEnrico Granata }
262ac49453bSEnrico Granata 
263ca6c8ee2SEnrico Granata // use this call if you already have an LLDB-side buffer for the data
264ca6c8ee2SEnrico Granata template<typename SourceDataType>
265ca6c8ee2SEnrico Granata static bool
266ca6c8ee2SEnrico Granata DumpUTFBufferToStream (ConversionResult (*ConvertFunction) (const SourceDataType**,
267ca6c8ee2SEnrico Granata                                                             const SourceDataType*,
268ca6c8ee2SEnrico Granata                                                             UTF8**,
269ca6c8ee2SEnrico Granata                                                             UTF8*,
270ca6c8ee2SEnrico Granata                                                             ConversionFlags),
271ac49453bSEnrico Granata                        const StringPrinter::ReadBufferAndDumpToStreamOptions& dump_options)
272ca6c8ee2SEnrico Granata {
273d07f7550SEnrico Granata     Stream &stream(*dump_options.GetStream());
274d07f7550SEnrico Granata     if (dump_options.GetPrefixToken() != 0)
275d54f7fb8SEnrico Granata         stream.Printf("%s",dump_options.GetPrefixToken());
276d07f7550SEnrico Granata     if (dump_options.GetQuote() != 0)
277d07f7550SEnrico Granata         stream.Printf("%c",dump_options.GetQuote());
278d07f7550SEnrico Granata     auto data(dump_options.GetData());
279d07f7550SEnrico Granata     auto source_size(dump_options.GetSourceSize());
280ca6c8ee2SEnrico Granata     if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd())
281ca6c8ee2SEnrico Granata     {
282ca6c8ee2SEnrico Granata         const int bufferSPSize = data.GetByteSize();
283d07f7550SEnrico Granata         if (dump_options.GetSourceSize() == 0)
284ca6c8ee2SEnrico Granata         {
285ca6c8ee2SEnrico Granata             const int origin_encoding = 8*sizeof(SourceDataType);
286d07f7550SEnrico Granata             source_size = bufferSPSize/(origin_encoding / 4);
287ca6c8ee2SEnrico Granata         }
288ca6c8ee2SEnrico Granata 
289d7e6a4f2SVince Harron         const SourceDataType *data_ptr = (const SourceDataType*)data.GetDataStart();
290d07f7550SEnrico Granata         const SourceDataType *data_end_ptr = data_ptr + source_size;
291ca6c8ee2SEnrico Granata 
292d07f7550SEnrico Granata         const bool zero_is_terminator = dump_options.GetBinaryZeroIsTerminator();
293d07f7550SEnrico Granata 
294d07f7550SEnrico Granata         if (zero_is_terminator)
295d07f7550SEnrico Granata         {
296ca6c8ee2SEnrico Granata             while (data_ptr < data_end_ptr)
297ca6c8ee2SEnrico Granata             {
298ca6c8ee2SEnrico Granata                 if (!*data_ptr)
299ca6c8ee2SEnrico Granata                 {
300ca6c8ee2SEnrico Granata                     data_end_ptr = data_ptr;
301ca6c8ee2SEnrico Granata                     break;
302ca6c8ee2SEnrico Granata                 }
303ca6c8ee2SEnrico Granata                 data_ptr++;
304ca6c8ee2SEnrico Granata             }
305ca6c8ee2SEnrico Granata 
306d7e6a4f2SVince Harron             data_ptr = (const SourceDataType*)data.GetDataStart();
307d07f7550SEnrico Granata         }
308ca6c8ee2SEnrico Granata 
309ca6c8ee2SEnrico Granata         lldb::DataBufferSP utf8_data_buffer_sp;
310ca6c8ee2SEnrico Granata         UTF8* utf8_data_ptr = nullptr;
311ca6c8ee2SEnrico Granata         UTF8* utf8_data_end_ptr = nullptr;
312ca6c8ee2SEnrico Granata 
313ca6c8ee2SEnrico Granata         if (ConvertFunction)
314ca6c8ee2SEnrico Granata         {
315ca6c8ee2SEnrico Granata             utf8_data_buffer_sp.reset(new DataBufferHeap(4*bufferSPSize,0));
316ca6c8ee2SEnrico Granata             utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes();
317ca6c8ee2SEnrico Granata             utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize();
318d7e6a4f2SVince Harron             ConvertFunction ( &data_ptr, data_end_ptr, &utf8_data_ptr, utf8_data_end_ptr, lenientConversion );
3198101f570SEnrico Granata             if (false == zero_is_terminator)
3208101f570SEnrico Granata                 utf8_data_end_ptr = utf8_data_ptr;
321ca6c8ee2SEnrico Granata             utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes(); // needed because the ConvertFunction will change the value of the data_ptr
322ca6c8ee2SEnrico Granata         }
323ca6c8ee2SEnrico Granata         else
324ca6c8ee2SEnrico Granata         {
325ca6c8ee2SEnrico Granata             // just copy the pointers - the cast is necessary to make the compiler happy
326ca6c8ee2SEnrico Granata             // but this should only happen if we are reading UTF8 data
327ba507b04SSaleem Abdulrasool             utf8_data_ptr = const_cast<UTF8 *>(reinterpret_cast<const UTF8*>(data_ptr));
328ba507b04SSaleem Abdulrasool             utf8_data_end_ptr = const_cast<UTF8 *>(reinterpret_cast<const UTF8*>(data_end_ptr));
329ca6c8ee2SEnrico Granata         }
330ca6c8ee2SEnrico Granata 
331d07f7550SEnrico Granata         const bool escape_non_printables = dump_options.GetEscapeNonPrintables();
332ac49453bSEnrico Granata         lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback;
333ac49453bSEnrico Granata         if (escape_non_printables)
334ac49453bSEnrico Granata         {
335ac49453bSEnrico Granata             if (Language *language = Language::FindPlugin(dump_options.GetLanguage()))
336ac49453bSEnrico Granata                 escaping_callback = language->GetStringPrinterEscapingHelper(lldb_private::formatters::StringPrinter::GetPrintableElementType::UTF8);
337ac49453bSEnrico Granata             else
338ac49453bSEnrico Granata                 escaping_callback = lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper(lldb_private::formatters::StringPrinter::GetPrintableElementType::UTF8);
339ac49453bSEnrico Granata         }
340d07f7550SEnrico Granata 
341ca6c8ee2SEnrico Granata         // since we tend to accept partial data (and even partially malformed data)
342ca6c8ee2SEnrico Granata         // we might end up with no NULL terminator before the end_ptr
343ca6c8ee2SEnrico Granata         // hence we need to take a slower route and ensure we stay within boundaries
344ca6c8ee2SEnrico Granata         for (;utf8_data_ptr < utf8_data_end_ptr;)
345ca6c8ee2SEnrico Granata         {
346d07f7550SEnrico Granata             if (zero_is_terminator && !*utf8_data_ptr)
347ca6c8ee2SEnrico Granata                 break;
348ca6c8ee2SEnrico Granata 
349d07f7550SEnrico Granata             if (escape_non_printables)
350ca6c8ee2SEnrico Granata             {
351ca6c8ee2SEnrico Granata                 uint8_t* next_data = nullptr;
352ac49453bSEnrico Granata                 auto printable = escaping_callback(utf8_data_ptr, utf8_data_end_ptr, next_data);
353ca6c8ee2SEnrico Granata                 auto printable_bytes = printable.GetBytes();
354ca6c8ee2SEnrico Granata                 auto printable_size = printable.GetSize();
355ca6c8ee2SEnrico Granata                 if (!printable_bytes || !next_data)
356ca6c8ee2SEnrico Granata                 {
357ca6c8ee2SEnrico Granata                     // GetPrintable() failed on us - print one byte in a desperate resync attempt
358ca6c8ee2SEnrico Granata                     printable_bytes = utf8_data_ptr;
359ca6c8ee2SEnrico Granata                     printable_size = 1;
360ca6c8ee2SEnrico Granata                     next_data = utf8_data_ptr+1;
361ca6c8ee2SEnrico Granata                 }
3623acfe1a3SAndy Gibbs                 for (unsigned c = 0; c < printable_size; c++)
363ca6c8ee2SEnrico Granata                     stream.Printf("%c", *(printable_bytes+c));
364ca6c8ee2SEnrico Granata                 utf8_data_ptr = (uint8_t*)next_data;
365ca6c8ee2SEnrico Granata             }
366ca6c8ee2SEnrico Granata             else
367ca6c8ee2SEnrico Granata             {
368ca6c8ee2SEnrico Granata                 stream.Printf("%c",*utf8_data_ptr);
369ca6c8ee2SEnrico Granata                 utf8_data_ptr++;
370ca6c8ee2SEnrico Granata             }
371ca6c8ee2SEnrico Granata         }
372ca6c8ee2SEnrico Granata     }
373d07f7550SEnrico Granata     if (dump_options.GetQuote() != 0)
374d07f7550SEnrico Granata         stream.Printf("%c",dump_options.GetQuote());
375d54f7fb8SEnrico Granata     if (dump_options.GetSuffixToken() != 0)
376d54f7fb8SEnrico Granata         stream.Printf("%s",dump_options.GetSuffixToken());
377b7662929SEnrico Granata     if (dump_options.GetIsTruncated())
378b7662929SEnrico Granata         stream.Printf("...");
379ca6c8ee2SEnrico Granata     return true;
380ca6c8ee2SEnrico Granata }
381ca6c8ee2SEnrico Granata 
382ac49453bSEnrico Granata lldb_private::formatters::StringPrinter::ReadStringAndDumpToStreamOptions::ReadStringAndDumpToStreamOptions (ValueObject& valobj) :
383ebdc1ac0SEnrico Granata     ReadStringAndDumpToStreamOptions()
384ebdc1ac0SEnrico Granata {
385ebdc1ac0SEnrico Granata     SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables());
386ebdc1ac0SEnrico Granata }
387ebdc1ac0SEnrico Granata 
388ac49453bSEnrico Granata lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions::ReadBufferAndDumpToStreamOptions (ValueObject& valobj) :
389ebdc1ac0SEnrico Granata     ReadBufferAndDumpToStreamOptions()
390ebdc1ac0SEnrico Granata {
391ebdc1ac0SEnrico Granata     SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables());
392ebdc1ac0SEnrico Granata }
393ebdc1ac0SEnrico Granata 
394ac49453bSEnrico Granata lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions::ReadBufferAndDumpToStreamOptions (const ReadStringAndDumpToStreamOptions& options) :
395d07f7550SEnrico Granata     ReadBufferAndDumpToStreamOptions()
396d07f7550SEnrico Granata {
397d07f7550SEnrico Granata     SetStream(options.GetStream());
398d07f7550SEnrico Granata     SetPrefixToken(options.GetPrefixToken());
399d54f7fb8SEnrico Granata     SetSuffixToken(options.GetSuffixToken());
400d07f7550SEnrico Granata     SetQuote(options.GetQuote());
401d07f7550SEnrico Granata     SetEscapeNonPrintables(options.GetEscapeNonPrintables());
402d07f7550SEnrico Granata     SetBinaryZeroIsTerminator(options.GetBinaryZeroIsTerminator());
403ac49453bSEnrico Granata     SetLanguage(options.GetLanguage());
404d07f7550SEnrico Granata }
405d07f7550SEnrico Granata 
406ebdc1ac0SEnrico Granata 
407fd13743fSShawn Best namespace lldb_private
408fd13743fSShawn Best {
409fd13743fSShawn Best 
410fd13743fSShawn Best namespace formatters
411fd13743fSShawn Best {
412fd13743fSShawn Best 
413fd13743fSShawn Best template <>
414fd13743fSShawn Best bool
415ac49453bSEnrico Granata StringPrinter::ReadStringAndDumpToStream<StringPrinter::StringElementType::ASCII> (const ReadStringAndDumpToStreamOptions& options)
416fd13743fSShawn Best {
417fd13743fSShawn Best     assert(options.GetStream() && "need a Stream to print the string to");
418fd13743fSShawn Best     Error my_error;
419fd13743fSShawn Best 
420fd13743fSShawn Best     ProcessSP process_sp(options.GetProcessSP());
421fd13743fSShawn Best 
422fd13743fSShawn Best     if (process_sp.get() == nullptr || options.GetLocation() == 0)
423fd13743fSShawn Best         return false;
424fd13743fSShawn Best 
425fd13743fSShawn Best     size_t size;
426b7662929SEnrico Granata     const auto max_size = process_sp->GetTarget().GetMaximumSizeOfStringSummary();
427b7662929SEnrico Granata     bool is_truncated = false;
428fd13743fSShawn Best 
429fd13743fSShawn Best     if (options.GetSourceSize() == 0)
430b7662929SEnrico Granata         size = max_size;
43134042212SEnrico Granata     else if (!options.GetIgnoreMaxLength())
432b7662929SEnrico Granata     {
433b7662929SEnrico Granata         size = options.GetSourceSize();
434b7662929SEnrico Granata         if (size > max_size)
435b7662929SEnrico Granata         {
436b7662929SEnrico Granata             size = max_size;
437b7662929SEnrico Granata             is_truncated = true;
438b7662929SEnrico Granata         }
439b7662929SEnrico Granata     }
44034042212SEnrico Granata     else
44134042212SEnrico Granata         size = options.GetSourceSize();
442fd13743fSShawn Best 
443fd13743fSShawn Best     lldb::DataBufferSP buffer_sp(new DataBufferHeap(size,0));
444fd13743fSShawn Best 
445d7e6a4f2SVince Harron     process_sp->ReadCStringFromMemory(options.GetLocation(), (char*)buffer_sp->GetBytes(), size, my_error);
446fd13743fSShawn Best 
447fd13743fSShawn Best     if (my_error.Fail())
448fd13743fSShawn Best         return false;
449fd13743fSShawn Best 
450d54f7fb8SEnrico Granata     const char* prefix_token = options.GetPrefixToken();
451fd13743fSShawn Best     char quote = options.GetQuote();
452fd13743fSShawn Best 
453fd13743fSShawn Best     if (prefix_token != 0)
454d54f7fb8SEnrico Granata         options.GetStream()->Printf("%s%c",prefix_token,quote);
455fd13743fSShawn Best     else if (quote != 0)
456fd13743fSShawn Best         options.GetStream()->Printf("%c",quote);
457fd13743fSShawn Best 
458fd13743fSShawn Best     uint8_t* data_end = buffer_sp->GetBytes()+buffer_sp->GetByteSize();
459fd13743fSShawn Best 
460ac49453bSEnrico Granata     const bool escape_non_printables = options.GetEscapeNonPrintables();
461ac49453bSEnrico Granata     lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback;
462ac49453bSEnrico Granata     if (escape_non_printables)
463ac49453bSEnrico Granata     {
464ac49453bSEnrico Granata         if (Language *language = Language::FindPlugin(options.GetLanguage()))
465ac49453bSEnrico Granata             escaping_callback = language->GetStringPrinterEscapingHelper(lldb_private::formatters::StringPrinter::GetPrintableElementType::ASCII);
466ac49453bSEnrico Granata         else
467ac49453bSEnrico Granata             escaping_callback = lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper(lldb_private::formatters::StringPrinter::GetPrintableElementType::ASCII);
468ac49453bSEnrico Granata     }
469ac49453bSEnrico Granata 
470fd13743fSShawn Best     // since we tend to accept partial data (and even partially malformed data)
471fd13743fSShawn Best     // we might end up with no NULL terminator before the end_ptr
472fd13743fSShawn Best     // hence we need to take a slower route and ensure we stay within boundaries
473fd13743fSShawn Best     for (uint8_t* data = buffer_sp->GetBytes(); *data && (data < data_end);)
474fd13743fSShawn Best     {
475ac49453bSEnrico Granata         if (escape_non_printables)
476fd13743fSShawn Best         {
477fd13743fSShawn Best             uint8_t* next_data = nullptr;
478ac49453bSEnrico Granata             auto printable = escaping_callback(data, data_end, next_data);
479fd13743fSShawn Best             auto printable_bytes = printable.GetBytes();
480fd13743fSShawn Best             auto printable_size = printable.GetSize();
481fd13743fSShawn Best             if (!printable_bytes || !next_data)
482fd13743fSShawn Best             {
483fd13743fSShawn Best                 // GetPrintable() failed on us - print one byte in a desperate resync attempt
484fd13743fSShawn Best                 printable_bytes = data;
485fd13743fSShawn Best                 printable_size = 1;
486fd13743fSShawn Best                 next_data = data+1;
487fd13743fSShawn Best             }
4883acfe1a3SAndy Gibbs             for (unsigned c = 0; c < printable_size; c++)
489fd13743fSShawn Best                 options.GetStream()->Printf("%c", *(printable_bytes+c));
490fd13743fSShawn Best             data = (uint8_t*)next_data;
491fd13743fSShawn Best         }
492fd13743fSShawn Best         else
493fd13743fSShawn Best         {
494fd13743fSShawn Best             options.GetStream()->Printf("%c",*data);
495fd13743fSShawn Best             data++;
496fd13743fSShawn Best         }
497fd13743fSShawn Best     }
498fd13743fSShawn Best 
499d54f7fb8SEnrico Granata     const char* suffix_token = options.GetSuffixToken();
500d54f7fb8SEnrico Granata 
501d54f7fb8SEnrico Granata     if (suffix_token != 0)
502d54f7fb8SEnrico Granata         options.GetStream()->Printf("%c%s",quote, suffix_token);
503d54f7fb8SEnrico Granata     else if (quote != 0)
504fd13743fSShawn Best         options.GetStream()->Printf("%c",quote);
505fd13743fSShawn Best 
506b7662929SEnrico Granata     if (is_truncated)
507b7662929SEnrico Granata         options.GetStream()->Printf("...");
508b7662929SEnrico Granata 
509fd13743fSShawn Best     return true;
510fd13743fSShawn Best }
511fd13743fSShawn Best 
512ca6c8ee2SEnrico Granata template<typename SourceDataType>
513ca6c8ee2SEnrico Granata static bool
514ac49453bSEnrico Granata ReadUTFBufferAndDumpToStream (const StringPrinter::ReadStringAndDumpToStreamOptions& options,
515ca6c8ee2SEnrico Granata                               ConversionResult (*ConvertFunction) (const SourceDataType**,
516ca6c8ee2SEnrico Granata                                                                    const SourceDataType*,
517ca6c8ee2SEnrico Granata                                                                    UTF8**,
518ca6c8ee2SEnrico Granata                                                                    UTF8*,
519ca6c8ee2SEnrico Granata                                                                    ConversionFlags))
520ca6c8ee2SEnrico Granata {
521ca6c8ee2SEnrico Granata     assert(options.GetStream() && "need a Stream to print the string to");
522ca6c8ee2SEnrico Granata 
523ca6c8ee2SEnrico Granata     if (options.GetLocation() == 0 || options.GetLocation() == LLDB_INVALID_ADDRESS)
524ca6c8ee2SEnrico Granata         return false;
525ca6c8ee2SEnrico Granata 
526ca6c8ee2SEnrico Granata     lldb::ProcessSP process_sp(options.GetProcessSP());
527ca6c8ee2SEnrico Granata 
528ca6c8ee2SEnrico Granata     if (!process_sp)
529ca6c8ee2SEnrico Granata         return false;
530ca6c8ee2SEnrico Granata 
531ca6c8ee2SEnrico Granata     const int type_width = sizeof(SourceDataType);
532ca6c8ee2SEnrico Granata     const int origin_encoding = 8 * type_width ;
533ca6c8ee2SEnrico Granata     if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32)
534ca6c8ee2SEnrico Granata         return false;
535ca6c8ee2SEnrico Granata     // if not UTF8, I need a conversion function to return proper UTF8
536ca6c8ee2SEnrico Granata     if (origin_encoding != 8 && !ConvertFunction)
537ca6c8ee2SEnrico Granata         return false;
538ca6c8ee2SEnrico Granata 
539ca6c8ee2SEnrico Granata     if (!options.GetStream())
540ca6c8ee2SEnrico Granata         return false;
541ca6c8ee2SEnrico Granata 
542ca6c8ee2SEnrico Granata     uint32_t sourceSize = options.GetSourceSize();
543ca6c8ee2SEnrico Granata     bool needs_zero_terminator = options.GetNeedsZeroTermination();
544ca6c8ee2SEnrico Granata 
545b7662929SEnrico Granata     bool is_truncated = false;
546b7662929SEnrico Granata     const auto max_size = process_sp->GetTarget().GetMaximumSizeOfStringSummary();
547b7662929SEnrico Granata 
548ca6c8ee2SEnrico Granata     if (!sourceSize)
549ca6c8ee2SEnrico Granata     {
550b7662929SEnrico Granata         sourceSize = max_size;
551ca6c8ee2SEnrico Granata         needs_zero_terminator = true;
552ca6c8ee2SEnrico Granata     }
553b0e8a55dSEnrico Granata     else if (!options.GetIgnoreMaxLength())
554b7662929SEnrico Granata     {
555b7662929SEnrico Granata         if (sourceSize > max_size)
556b7662929SEnrico Granata         {
557b7662929SEnrico Granata             sourceSize = max_size;
558b7662929SEnrico Granata             is_truncated = true;
559b7662929SEnrico Granata         }
560b7662929SEnrico Granata     }
561ca6c8ee2SEnrico Granata 
562ca6c8ee2SEnrico Granata     const int bufferSPSize = sourceSize * type_width;
563ca6c8ee2SEnrico Granata 
564ca6c8ee2SEnrico Granata     lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize,0));
565ca6c8ee2SEnrico Granata 
566ca6c8ee2SEnrico Granata     if (!buffer_sp->GetBytes())
567ca6c8ee2SEnrico Granata         return false;
568ca6c8ee2SEnrico Granata 
569ca6c8ee2SEnrico Granata     Error error;
570ca6c8ee2SEnrico Granata     char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes());
571ca6c8ee2SEnrico Granata 
572ca6c8ee2SEnrico Granata     if (needs_zero_terminator)
573d7e6a4f2SVince Harron         process_sp->ReadStringFromMemory(options.GetLocation(), buffer, bufferSPSize, error, type_width);
574ca6c8ee2SEnrico Granata     else
575d7e6a4f2SVince Harron         process_sp->ReadMemoryFromInferior(options.GetLocation(), (char*)buffer_sp->GetBytes(), bufferSPSize, error);
576ca6c8ee2SEnrico Granata 
577099263b4SEnrico Granata     if (error.Fail())
578ca6c8ee2SEnrico Granata     {
579ca6c8ee2SEnrico Granata         options.GetStream()->Printf("unable to read data");
580ca6c8ee2SEnrico Granata         return true;
581ca6c8ee2SEnrico Granata     }
582ca6c8ee2SEnrico Granata 
583ca6c8ee2SEnrico Granata     DataExtractor data(buffer_sp, process_sp->GetByteOrder(), process_sp->GetAddressByteSize());
584ca6c8ee2SEnrico Granata 
585ac49453bSEnrico Granata     StringPrinter::ReadBufferAndDumpToStreamOptions dump_options(options);
586d07f7550SEnrico Granata     dump_options.SetData(data);
587d07f7550SEnrico Granata     dump_options.SetSourceSize(sourceSize);
588b7662929SEnrico Granata     dump_options.SetIsTruncated(is_truncated);
589d07f7550SEnrico Granata 
590d07f7550SEnrico Granata     return DumpUTFBufferToStream(ConvertFunction, dump_options);
591ca6c8ee2SEnrico Granata }
592ca6c8ee2SEnrico Granata 
593ca6c8ee2SEnrico Granata template <>
594ca6c8ee2SEnrico Granata bool
595ac49453bSEnrico Granata StringPrinter::ReadStringAndDumpToStream<StringPrinter::StringElementType::UTF8> (const ReadStringAndDumpToStreamOptions& options)
596ca6c8ee2SEnrico Granata {
597ca6c8ee2SEnrico Granata     return ReadUTFBufferAndDumpToStream<UTF8>(options,
598ca6c8ee2SEnrico Granata                                               nullptr);
599ca6c8ee2SEnrico Granata }
600ca6c8ee2SEnrico Granata 
601ca6c8ee2SEnrico Granata template <>
602ca6c8ee2SEnrico Granata bool
603ac49453bSEnrico Granata StringPrinter::ReadStringAndDumpToStream<StringPrinter::StringElementType::UTF16> (const ReadStringAndDumpToStreamOptions& options)
604ca6c8ee2SEnrico Granata {
605ca6c8ee2SEnrico Granata     return ReadUTFBufferAndDumpToStream<UTF16>(options,
606ca6c8ee2SEnrico Granata                                                ConvertUTF16toUTF8);
607ca6c8ee2SEnrico Granata }
608ca6c8ee2SEnrico Granata 
609ca6c8ee2SEnrico Granata template <>
610ca6c8ee2SEnrico Granata bool
611ac49453bSEnrico Granata StringPrinter::ReadStringAndDumpToStream<StringPrinter::StringElementType::UTF32> (const ReadStringAndDumpToStreamOptions& options)
612ca6c8ee2SEnrico Granata {
613ca6c8ee2SEnrico Granata     return ReadUTFBufferAndDumpToStream<UTF32>(options,
614ca6c8ee2SEnrico Granata                                                ConvertUTF32toUTF8);
615ca6c8ee2SEnrico Granata }
616ca6c8ee2SEnrico Granata 
617ca6c8ee2SEnrico Granata template <>
618ca6c8ee2SEnrico Granata bool
619ac49453bSEnrico Granata StringPrinter::ReadBufferAndDumpToStream<StringPrinter::StringElementType::UTF8> (const ReadBufferAndDumpToStreamOptions& options)
620ca6c8ee2SEnrico Granata {
621ca6c8ee2SEnrico Granata     assert(options.GetStream() && "need a Stream to print the string to");
622ca6c8ee2SEnrico Granata 
623d07f7550SEnrico Granata     return DumpUTFBufferToStream<UTF8>(nullptr, options);
624ca6c8ee2SEnrico Granata }
625ca6c8ee2SEnrico Granata 
626ca6c8ee2SEnrico Granata template <>
627ca6c8ee2SEnrico Granata bool
628ac49453bSEnrico Granata StringPrinter::ReadBufferAndDumpToStream<StringPrinter::StringElementType::ASCII> (const ReadBufferAndDumpToStreamOptions& options)
629ca6c8ee2SEnrico Granata {
630ca6c8ee2SEnrico Granata     // treat ASCII the same as UTF8
631ca6c8ee2SEnrico Granata     // FIXME: can we optimize ASCII some more?
632ca6c8ee2SEnrico Granata     return ReadBufferAndDumpToStream<StringElementType::UTF8>(options);
633ca6c8ee2SEnrico Granata }
634ca6c8ee2SEnrico Granata 
635ca6c8ee2SEnrico Granata template <>
636ca6c8ee2SEnrico Granata bool
637ac49453bSEnrico Granata StringPrinter::ReadBufferAndDumpToStream<StringPrinter::StringElementType::UTF16> (const ReadBufferAndDumpToStreamOptions& options)
638ca6c8ee2SEnrico Granata {
639ca6c8ee2SEnrico Granata     assert(options.GetStream() && "need a Stream to print the string to");
640ca6c8ee2SEnrico Granata 
641d07f7550SEnrico Granata     return DumpUTFBufferToStream(ConvertUTF16toUTF8, options);
642ca6c8ee2SEnrico Granata }
643ca6c8ee2SEnrico Granata 
644ca6c8ee2SEnrico Granata template <>
645ca6c8ee2SEnrico Granata bool
646ac49453bSEnrico Granata StringPrinter::ReadBufferAndDumpToStream<StringPrinter::StringElementType::UTF32> (const ReadBufferAndDumpToStreamOptions& options)
647ca6c8ee2SEnrico Granata {
648ca6c8ee2SEnrico Granata     assert(options.GetStream() && "need a Stream to print the string to");
649ca6c8ee2SEnrico Granata 
650d07f7550SEnrico Granata     return DumpUTFBufferToStream(ConvertUTF32toUTF8, options);
651ca6c8ee2SEnrico Granata }
652fd13743fSShawn Best 
653fd13743fSShawn Best } // namespace formatters
654fd13743fSShawn Best 
655fd13743fSShawn Best } // namespace lldb_private
656