1ca6c8ee2SEnrico Granata //===-- StringPrinter.cpp ----------------------------------------*- C++ -*-===//
2ca6c8ee2SEnrico Granata //
3ca6c8ee2SEnrico Granata //                     The LLVM Compiler Infrastructure
4ca6c8ee2SEnrico Granata //
5ca6c8ee2SEnrico Granata // This file is distributed under the University of Illinois Open Source
6ca6c8ee2SEnrico Granata // License. See LICENSE.TXT for details.
7ca6c8ee2SEnrico Granata //
8ca6c8ee2SEnrico Granata //===----------------------------------------------------------------------===//
9ca6c8ee2SEnrico Granata 
10ca6c8ee2SEnrico Granata #include "lldb/DataFormatters/StringPrinter.h"
11ca6c8ee2SEnrico Granata 
12ca6c8ee2SEnrico Granata #include "lldb/Core/DataExtractor.h"
13ebdc1ac0SEnrico Granata #include "lldb/Core/Debugger.h"
14ca6c8ee2SEnrico Granata #include "lldb/Core/Error.h"
15ebdc1ac0SEnrico Granata #include "lldb/Core/ValueObject.h"
16ca6c8ee2SEnrico Granata #include "lldb/Target/Process.h"
17ca6c8ee2SEnrico Granata #include "lldb/Target/Target.h"
18ca6c8ee2SEnrico Granata 
19ca6c8ee2SEnrico Granata #include "llvm/Support/ConvertUTF.h"
20ca6c8ee2SEnrico Granata 
21ca6c8ee2SEnrico Granata #include <ctype.h>
22ca6c8ee2SEnrico Granata #include <locale>
23ca6c8ee2SEnrico Granata 
24ca6c8ee2SEnrico Granata using namespace lldb;
25ca6c8ee2SEnrico Granata using namespace lldb_private;
26ca6c8ee2SEnrico Granata using namespace lldb_private::formatters;
27ca6c8ee2SEnrico Granata 
28ca6c8ee2SEnrico Granata // we define this for all values of type but only implement it for those we care about
29ca6c8ee2SEnrico Granata // that's good because we get linker errors for any unsupported type
30ca6c8ee2SEnrico Granata template <StringElementType type>
31*ad650a18SEnrico Granata static StringPrinter::StringPrinterBufferPointer<>
32ca6c8ee2SEnrico Granata GetPrintableImpl(uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next);
33ca6c8ee2SEnrico Granata 
34ca6c8ee2SEnrico Granata // mimic isprint() for Unicode codepoints
35ca6c8ee2SEnrico Granata static bool
36ca6c8ee2SEnrico Granata isprint(char32_t codepoint)
37ca6c8ee2SEnrico Granata {
38ca6c8ee2SEnrico Granata     if (codepoint <= 0x1F || codepoint == 0x7F) // C0
39ca6c8ee2SEnrico Granata     {
40ca6c8ee2SEnrico Granata         return false;
41ca6c8ee2SEnrico Granata     }
42ca6c8ee2SEnrico Granata     if (codepoint >= 0x80 && codepoint <= 0x9F) // C1
43ca6c8ee2SEnrico Granata     {
44ca6c8ee2SEnrico Granata         return false;
45ca6c8ee2SEnrico Granata     }
46ca6c8ee2SEnrico Granata     if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators
47ca6c8ee2SEnrico Granata     {
48ca6c8ee2SEnrico Granata         return false;
49ca6c8ee2SEnrico Granata     }
50ca6c8ee2SEnrico Granata     if (codepoint == 0x200E || codepoint == 0x200F || (codepoint >= 0x202A && codepoint <= 0x202E)) // bidirectional text control
51ca6c8ee2SEnrico Granata     {
52ca6c8ee2SEnrico Granata         return false;
53ca6c8ee2SEnrico Granata     }
54ca6c8ee2SEnrico Granata     if (codepoint >= 0xFFF9 && codepoint <= 0xFFFF) // interlinears and generally specials
55ca6c8ee2SEnrico Granata     {
56ca6c8ee2SEnrico Granata         return false;
57ca6c8ee2SEnrico Granata     }
58ca6c8ee2SEnrico Granata     return true;
59ca6c8ee2SEnrico Granata }
60ca6c8ee2SEnrico Granata 
61ca6c8ee2SEnrico Granata template <>
62*ad650a18SEnrico Granata StringPrinter::StringPrinterBufferPointer<>
63ca6c8ee2SEnrico Granata GetPrintableImpl<StringElementType::ASCII> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next)
64ca6c8ee2SEnrico Granata {
65*ad650a18SEnrico Granata     StringPrinter::StringPrinterBufferPointer<> retval = {nullptr};
66ca6c8ee2SEnrico Granata 
67ca6c8ee2SEnrico Granata     switch (*buffer)
68ca6c8ee2SEnrico Granata     {
69da04fbb5SEnrico Granata         case 0:
70da04fbb5SEnrico Granata             retval = {"\\0",2};
71da04fbb5SEnrico Granata             break;
72ca6c8ee2SEnrico Granata         case '\a':
73ca6c8ee2SEnrico Granata             retval = {"\\a",2};
74ca6c8ee2SEnrico Granata             break;
75ca6c8ee2SEnrico Granata         case '\b':
76ca6c8ee2SEnrico Granata             retval = {"\\b",2};
77ca6c8ee2SEnrico Granata             break;
78ca6c8ee2SEnrico Granata         case '\f':
79ca6c8ee2SEnrico Granata             retval = {"\\f",2};
80ca6c8ee2SEnrico Granata             break;
81ca6c8ee2SEnrico Granata         case '\n':
82ca6c8ee2SEnrico Granata             retval = {"\\n",2};
83ca6c8ee2SEnrico Granata             break;
84ca6c8ee2SEnrico Granata         case '\r':
85ca6c8ee2SEnrico Granata             retval = {"\\r",2};
86ca6c8ee2SEnrico Granata             break;
87ca6c8ee2SEnrico Granata         case '\t':
88ca6c8ee2SEnrico Granata             retval = {"\\t",2};
89ca6c8ee2SEnrico Granata             break;
90ca6c8ee2SEnrico Granata         case '\v':
91ca6c8ee2SEnrico Granata             retval = {"\\v",2};
92ca6c8ee2SEnrico Granata             break;
93ca6c8ee2SEnrico Granata         case '\"':
94ca6c8ee2SEnrico Granata             retval = {"\\\"",2};
95ca6c8ee2SEnrico Granata             break;
96ca6c8ee2SEnrico Granata         case '\\':
97ca6c8ee2SEnrico Granata             retval = {"\\\\",2};
98ca6c8ee2SEnrico Granata             break;
99ca6c8ee2SEnrico Granata         default:
100ca6c8ee2SEnrico Granata           if (isprint(*buffer))
101ca6c8ee2SEnrico Granata               retval = {buffer,1};
102ca6c8ee2SEnrico Granata           else
103ca6c8ee2SEnrico Granata           {
104d7e6a4f2SVince Harron               uint8_t* data = new uint8_t[5];
105d7e6a4f2SVince Harron               sprintf((char*)data,"\\x%02x",*buffer);
106d7e6a4f2SVince Harron               retval = {data, 4, [] (const uint8_t* c) {delete[] c;} };
107ca6c8ee2SEnrico Granata               break;
108ca6c8ee2SEnrico Granata           }
109ca6c8ee2SEnrico Granata     }
110ca6c8ee2SEnrico Granata 
111ca6c8ee2SEnrico Granata     next = buffer + 1;
112ca6c8ee2SEnrico Granata     return retval;
113ca6c8ee2SEnrico Granata }
114ca6c8ee2SEnrico Granata 
115ca6c8ee2SEnrico Granata static char32_t
116ca6c8ee2SEnrico Granata ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1)
117ca6c8ee2SEnrico Granata {
118ca6c8ee2SEnrico Granata     return (c0-192)*64+(c1-128);
119ca6c8ee2SEnrico Granata }
120ca6c8ee2SEnrico Granata static char32_t
121ca6c8ee2SEnrico Granata ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2)
122ca6c8ee2SEnrico Granata {
123ca6c8ee2SEnrico Granata     return (c0-224)*4096+(c1-128)*64+(c2-128);
124ca6c8ee2SEnrico Granata }
125ca6c8ee2SEnrico Granata static char32_t
126ca6c8ee2SEnrico Granata ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2, unsigned char c3)
127ca6c8ee2SEnrico Granata {
128ca6c8ee2SEnrico Granata     return (c0-240)*262144+(c2-128)*4096+(c2-128)*64+(c3-128);
129ca6c8ee2SEnrico Granata }
130ca6c8ee2SEnrico Granata 
131ca6c8ee2SEnrico Granata template <>
132*ad650a18SEnrico Granata StringPrinter::StringPrinterBufferPointer<>
133ca6c8ee2SEnrico Granata GetPrintableImpl<StringElementType::UTF8> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next)
134ca6c8ee2SEnrico Granata {
135*ad650a18SEnrico Granata     StringPrinter::StringPrinterBufferPointer<> retval {nullptr};
136ca6c8ee2SEnrico Granata 
137ca6c8ee2SEnrico Granata     unsigned utf8_encoded_len = getNumBytesForUTF8(*buffer);
138ca6c8ee2SEnrico Granata 
139ca6c8ee2SEnrico Granata     if (1+buffer_end-buffer < utf8_encoded_len)
140ca6c8ee2SEnrico Granata     {
141ca6c8ee2SEnrico Granata         // I don't have enough bytes - print whatever I have left
142ca6c8ee2SEnrico Granata         retval = {buffer,static_cast<size_t>(1+buffer_end-buffer)};
143ca6c8ee2SEnrico Granata         next = buffer_end+1;
144ca6c8ee2SEnrico Granata         return retval;
145ca6c8ee2SEnrico Granata     }
146ca6c8ee2SEnrico Granata 
147ca6c8ee2SEnrico Granata     char32_t codepoint = 0;
148ca6c8ee2SEnrico Granata     switch (utf8_encoded_len)
149ca6c8ee2SEnrico Granata     {
150ca6c8ee2SEnrico Granata         case 1:
151ca6c8ee2SEnrico Granata             // this is just an ASCII byte - ask ASCII
152ca6c8ee2SEnrico Granata             return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next);
153ca6c8ee2SEnrico Granata         case 2:
154ca6c8ee2SEnrico Granata             codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1));
155ca6c8ee2SEnrico Granata             break;
156ca6c8ee2SEnrico Granata         case 3:
157ca6c8ee2SEnrico Granata             codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2));
158ca6c8ee2SEnrico Granata             break;
159ca6c8ee2SEnrico Granata         case 4:
160ca6c8ee2SEnrico Granata             codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2), (unsigned char)*(buffer+3));
161ca6c8ee2SEnrico Granata             break;
162ca6c8ee2SEnrico Granata         default:
163ca6c8ee2SEnrico Granata             // this is probably some bogus non-character thing
164ca6c8ee2SEnrico Granata             // just print it as-is and hope to sync up again soon
165ca6c8ee2SEnrico Granata             retval = {buffer,1};
166ca6c8ee2SEnrico Granata             next = buffer+1;
167ca6c8ee2SEnrico Granata             return retval;
168ca6c8ee2SEnrico Granata     }
169ca6c8ee2SEnrico Granata 
170ca6c8ee2SEnrico Granata     if (codepoint)
171ca6c8ee2SEnrico Granata     {
172ca6c8ee2SEnrico Granata         switch (codepoint)
173ca6c8ee2SEnrico Granata         {
174da04fbb5SEnrico Granata             case 0:
175da04fbb5SEnrico Granata                 retval = {"\\0",2};
176da04fbb5SEnrico Granata                 break;
177ca6c8ee2SEnrico Granata             case '\a':
178ca6c8ee2SEnrico Granata                 retval = {"\\a",2};
179ca6c8ee2SEnrico Granata                 break;
180ca6c8ee2SEnrico Granata             case '\b':
181ca6c8ee2SEnrico Granata                 retval = {"\\b",2};
182ca6c8ee2SEnrico Granata                 break;
183ca6c8ee2SEnrico Granata             case '\f':
184ca6c8ee2SEnrico Granata                 retval = {"\\f",2};
185ca6c8ee2SEnrico Granata                 break;
186ca6c8ee2SEnrico Granata             case '\n':
187ca6c8ee2SEnrico Granata                 retval = {"\\n",2};
188ca6c8ee2SEnrico Granata                 break;
189ca6c8ee2SEnrico Granata             case '\r':
190ca6c8ee2SEnrico Granata                 retval = {"\\r",2};
191ca6c8ee2SEnrico Granata                 break;
192ca6c8ee2SEnrico Granata             case '\t':
193ca6c8ee2SEnrico Granata                 retval = {"\\t",2};
194ca6c8ee2SEnrico Granata                 break;
195ca6c8ee2SEnrico Granata             case '\v':
196ca6c8ee2SEnrico Granata                 retval = {"\\v",2};
197ca6c8ee2SEnrico Granata                 break;
198ca6c8ee2SEnrico Granata             case '\"':
199ca6c8ee2SEnrico Granata                 retval = {"\\\"",2};
200ca6c8ee2SEnrico Granata                 break;
201ca6c8ee2SEnrico Granata             case '\\':
202ca6c8ee2SEnrico Granata                 retval = {"\\\\",2};
203ca6c8ee2SEnrico Granata                 break;
204ca6c8ee2SEnrico Granata             default:
205ca6c8ee2SEnrico Granata                 if (isprint(codepoint))
206ca6c8ee2SEnrico Granata                     retval = {buffer,utf8_encoded_len};
207ca6c8ee2SEnrico Granata                 else
208ca6c8ee2SEnrico Granata                 {
209d7e6a4f2SVince Harron                     uint8_t* data = new uint8_t[11];
210d7e6a4f2SVince Harron                     sprintf((char*)data,"\\U%08x",codepoint);
211d7e6a4f2SVince Harron                     retval = { data,10,[] (const uint8_t* c) {delete[] c;} };
212ca6c8ee2SEnrico Granata                     break;
213ca6c8ee2SEnrico Granata                 }
214ca6c8ee2SEnrico Granata         }
215ca6c8ee2SEnrico Granata 
216ca6c8ee2SEnrico Granata         next = buffer + utf8_encoded_len;
217ca6c8ee2SEnrico Granata         return retval;
218ca6c8ee2SEnrico Granata     }
219ca6c8ee2SEnrico Granata 
220ca6c8ee2SEnrico Granata     // this should not happen - but just in case.. try to resync at some point
221ca6c8ee2SEnrico Granata     retval = {buffer,1};
222ca6c8ee2SEnrico Granata     next = buffer+1;
223ca6c8ee2SEnrico Granata     return retval;
224ca6c8ee2SEnrico Granata }
225ca6c8ee2SEnrico Granata 
226ca6c8ee2SEnrico Granata // Given a sequence of bytes, this function returns:
227ca6c8ee2SEnrico Granata // a sequence of bytes to actually print out + a length
228ca6c8ee2SEnrico Granata // the following unscanned position of the buffer is in next
229*ad650a18SEnrico Granata static StringPrinter::StringPrinterBufferPointer<>
230ca6c8ee2SEnrico Granata GetPrintable(StringElementType type, uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next)
231ca6c8ee2SEnrico Granata {
232ca6c8ee2SEnrico Granata     if (!buffer)
233ca6c8ee2SEnrico Granata         return {nullptr};
234ca6c8ee2SEnrico Granata 
235ca6c8ee2SEnrico Granata     switch (type)
236ca6c8ee2SEnrico Granata     {
237ca6c8ee2SEnrico Granata         case StringElementType::ASCII:
238ca6c8ee2SEnrico Granata             return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next);
239ca6c8ee2SEnrico Granata         case StringElementType::UTF8:
240ca6c8ee2SEnrico Granata             return GetPrintableImpl<StringElementType::UTF8>(buffer, buffer_end, next);
241ca6c8ee2SEnrico Granata         default:
242ca6c8ee2SEnrico Granata             return {nullptr};
243ca6c8ee2SEnrico Granata     }
244ca6c8ee2SEnrico Granata }
245ca6c8ee2SEnrico Granata 
246ca6c8ee2SEnrico Granata // use this call if you already have an LLDB-side buffer for the data
247ca6c8ee2SEnrico Granata template<typename SourceDataType>
248ca6c8ee2SEnrico Granata static bool
249ca6c8ee2SEnrico Granata DumpUTFBufferToStream (ConversionResult (*ConvertFunction) (const SourceDataType**,
250ca6c8ee2SEnrico Granata                                                             const SourceDataType*,
251ca6c8ee2SEnrico Granata                                                             UTF8**,
252ca6c8ee2SEnrico Granata                                                             UTF8*,
253ca6c8ee2SEnrico Granata                                                             ConversionFlags),
254d07f7550SEnrico Granata                        const ReadBufferAndDumpToStreamOptions& dump_options)
255ca6c8ee2SEnrico Granata {
256d07f7550SEnrico Granata     Stream &stream(*dump_options.GetStream());
257d07f7550SEnrico Granata     if (dump_options.GetPrefixToken() != 0)
258d07f7550SEnrico Granata         stream.Printf("%c",dump_options.GetPrefixToken());
259d07f7550SEnrico Granata     if (dump_options.GetQuote() != 0)
260d07f7550SEnrico Granata         stream.Printf("%c",dump_options.GetQuote());
261d07f7550SEnrico Granata     auto data(dump_options.GetData());
262d07f7550SEnrico Granata     auto source_size(dump_options.GetSourceSize());
263ca6c8ee2SEnrico Granata     if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd())
264ca6c8ee2SEnrico Granata     {
265ca6c8ee2SEnrico Granata         const int bufferSPSize = data.GetByteSize();
266d07f7550SEnrico Granata         if (dump_options.GetSourceSize() == 0)
267ca6c8ee2SEnrico Granata         {
268ca6c8ee2SEnrico Granata             const int origin_encoding = 8*sizeof(SourceDataType);
269d07f7550SEnrico Granata             source_size = bufferSPSize/(origin_encoding / 4);
270ca6c8ee2SEnrico Granata         }
271ca6c8ee2SEnrico Granata 
272d7e6a4f2SVince Harron         const SourceDataType *data_ptr = (const SourceDataType*)data.GetDataStart();
273d07f7550SEnrico Granata         const SourceDataType *data_end_ptr = data_ptr + source_size;
274ca6c8ee2SEnrico Granata 
275d07f7550SEnrico Granata         const bool zero_is_terminator = dump_options.GetBinaryZeroIsTerminator();
276d07f7550SEnrico Granata 
277d07f7550SEnrico Granata         if (zero_is_terminator)
278d07f7550SEnrico Granata         {
279ca6c8ee2SEnrico Granata             while (data_ptr < data_end_ptr)
280ca6c8ee2SEnrico Granata             {
281ca6c8ee2SEnrico Granata                 if (!*data_ptr)
282ca6c8ee2SEnrico Granata                 {
283ca6c8ee2SEnrico Granata                     data_end_ptr = data_ptr;
284ca6c8ee2SEnrico Granata                     break;
285ca6c8ee2SEnrico Granata                 }
286ca6c8ee2SEnrico Granata                 data_ptr++;
287ca6c8ee2SEnrico Granata             }
288ca6c8ee2SEnrico Granata 
289d7e6a4f2SVince Harron             data_ptr = (const SourceDataType*)data.GetDataStart();
290d07f7550SEnrico Granata         }
291ca6c8ee2SEnrico Granata 
292ca6c8ee2SEnrico Granata         lldb::DataBufferSP utf8_data_buffer_sp;
293ca6c8ee2SEnrico Granata         UTF8* utf8_data_ptr = nullptr;
294ca6c8ee2SEnrico Granata         UTF8* utf8_data_end_ptr = nullptr;
295ca6c8ee2SEnrico Granata 
296ca6c8ee2SEnrico Granata         if (ConvertFunction)
297ca6c8ee2SEnrico Granata         {
298ca6c8ee2SEnrico Granata             utf8_data_buffer_sp.reset(new DataBufferHeap(4*bufferSPSize,0));
299ca6c8ee2SEnrico Granata             utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes();
300ca6c8ee2SEnrico Granata             utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize();
301d7e6a4f2SVince Harron             ConvertFunction ( &data_ptr, data_end_ptr, &utf8_data_ptr, utf8_data_end_ptr, lenientConversion );
3028101f570SEnrico Granata             if (false == zero_is_terminator)
3038101f570SEnrico Granata                 utf8_data_end_ptr = utf8_data_ptr;
304ca6c8ee2SEnrico Granata             utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes(); // needed because the ConvertFunction will change the value of the data_ptr
305ca6c8ee2SEnrico Granata         }
306ca6c8ee2SEnrico Granata         else
307ca6c8ee2SEnrico Granata         {
308ca6c8ee2SEnrico Granata             // just copy the pointers - the cast is necessary to make the compiler happy
309ca6c8ee2SEnrico Granata             // but this should only happen if we are reading UTF8 data
310ca6c8ee2SEnrico Granata             utf8_data_ptr = (UTF8*)data_ptr;
311ca6c8ee2SEnrico Granata             utf8_data_end_ptr = (UTF8*)data_end_ptr;
312ca6c8ee2SEnrico Granata         }
313ca6c8ee2SEnrico Granata 
314d07f7550SEnrico Granata         const bool escape_non_printables = dump_options.GetEscapeNonPrintables();
315d07f7550SEnrico Granata 
316ca6c8ee2SEnrico Granata         // since we tend to accept partial data (and even partially malformed data)
317ca6c8ee2SEnrico Granata         // we might end up with no NULL terminator before the end_ptr
318ca6c8ee2SEnrico Granata         // hence we need to take a slower route and ensure we stay within boundaries
319ca6c8ee2SEnrico Granata         for (;utf8_data_ptr < utf8_data_end_ptr;)
320ca6c8ee2SEnrico Granata         {
321d07f7550SEnrico Granata             if (zero_is_terminator && !*utf8_data_ptr)
322ca6c8ee2SEnrico Granata                 break;
323ca6c8ee2SEnrico Granata 
324d07f7550SEnrico Granata             if (escape_non_printables)
325ca6c8ee2SEnrico Granata             {
326ca6c8ee2SEnrico Granata                 uint8_t* next_data = nullptr;
327ca6c8ee2SEnrico Granata                 auto printable = GetPrintable(StringElementType::UTF8, utf8_data_ptr, utf8_data_end_ptr, next_data);
328ca6c8ee2SEnrico Granata                 auto printable_bytes = printable.GetBytes();
329ca6c8ee2SEnrico Granata                 auto printable_size = printable.GetSize();
330ca6c8ee2SEnrico Granata                 if (!printable_bytes || !next_data)
331ca6c8ee2SEnrico Granata                 {
332ca6c8ee2SEnrico Granata                     // GetPrintable() failed on us - print one byte in a desperate resync attempt
333ca6c8ee2SEnrico Granata                     printable_bytes = utf8_data_ptr;
334ca6c8ee2SEnrico Granata                     printable_size = 1;
335ca6c8ee2SEnrico Granata                     next_data = utf8_data_ptr+1;
336ca6c8ee2SEnrico Granata                 }
3373acfe1a3SAndy Gibbs                 for (unsigned c = 0; c < printable_size; c++)
338ca6c8ee2SEnrico Granata                     stream.Printf("%c", *(printable_bytes+c));
339ca6c8ee2SEnrico Granata                 utf8_data_ptr = (uint8_t*)next_data;
340ca6c8ee2SEnrico Granata             }
341ca6c8ee2SEnrico Granata             else
342ca6c8ee2SEnrico Granata             {
343ca6c8ee2SEnrico Granata                 stream.Printf("%c",*utf8_data_ptr);
344ca6c8ee2SEnrico Granata                 utf8_data_ptr++;
345ca6c8ee2SEnrico Granata             }
346ca6c8ee2SEnrico Granata         }
347ca6c8ee2SEnrico Granata     }
348d07f7550SEnrico Granata     if (dump_options.GetQuote() != 0)
349d07f7550SEnrico Granata         stream.Printf("%c",dump_options.GetQuote());
350ca6c8ee2SEnrico Granata     return true;
351ca6c8ee2SEnrico Granata }
352ca6c8ee2SEnrico Granata 
353ebdc1ac0SEnrico Granata lldb_private::formatters::ReadStringAndDumpToStreamOptions::ReadStringAndDumpToStreamOptions (ValueObject& valobj) :
354ebdc1ac0SEnrico Granata     ReadStringAndDumpToStreamOptions()
355ebdc1ac0SEnrico Granata {
356ebdc1ac0SEnrico Granata     SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables());
357ebdc1ac0SEnrico Granata }
358ebdc1ac0SEnrico Granata 
359ebdc1ac0SEnrico Granata lldb_private::formatters::ReadBufferAndDumpToStreamOptions::ReadBufferAndDumpToStreamOptions (ValueObject& valobj) :
360ebdc1ac0SEnrico Granata     ReadBufferAndDumpToStreamOptions()
361ebdc1ac0SEnrico Granata {
362ebdc1ac0SEnrico Granata     SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables());
363ebdc1ac0SEnrico Granata }
364ebdc1ac0SEnrico Granata 
365d07f7550SEnrico Granata lldb_private::formatters::ReadBufferAndDumpToStreamOptions::ReadBufferAndDumpToStreamOptions (const lldb_private::formatters::ReadStringAndDumpToStreamOptions& options) :
366d07f7550SEnrico Granata     ReadBufferAndDumpToStreamOptions()
367d07f7550SEnrico Granata {
368d07f7550SEnrico Granata     SetStream(options.GetStream());
369d07f7550SEnrico Granata     SetPrefixToken(options.GetPrefixToken());
370d07f7550SEnrico Granata     SetQuote(options.GetQuote());
371d07f7550SEnrico Granata     SetEscapeNonPrintables(options.GetEscapeNonPrintables());
372d07f7550SEnrico Granata     SetBinaryZeroIsTerminator(options.GetBinaryZeroIsTerminator());
373d07f7550SEnrico Granata }
374d07f7550SEnrico Granata 
375ebdc1ac0SEnrico Granata 
376fd13743fSShawn Best namespace lldb_private
377fd13743fSShawn Best {
378fd13743fSShawn Best 
379fd13743fSShawn Best namespace formatters
380fd13743fSShawn Best {
381fd13743fSShawn Best 
382fd13743fSShawn Best template <>
383fd13743fSShawn Best bool
384*ad650a18SEnrico Granata StringPrinter::ReadStringAndDumpToStream<StringElementType::ASCII> (const ReadStringAndDumpToStreamOptions& options)
385fd13743fSShawn Best {
386fd13743fSShawn Best     assert(options.GetStream() && "need a Stream to print the string to");
387fd13743fSShawn Best     Error my_error;
388fd13743fSShawn Best 
389fd13743fSShawn Best     ProcessSP process_sp(options.GetProcessSP());
390fd13743fSShawn Best 
391fd13743fSShawn Best     if (process_sp.get() == nullptr || options.GetLocation() == 0)
392fd13743fSShawn Best         return false;
393fd13743fSShawn Best 
394fd13743fSShawn Best     size_t size;
395fd13743fSShawn Best 
396fd13743fSShawn Best     if (options.GetSourceSize() == 0)
397fd13743fSShawn Best         size = process_sp->GetTarget().GetMaximumSizeOfStringSummary();
39834042212SEnrico Granata     else if (!options.GetIgnoreMaxLength())
399fd13743fSShawn Best         size = std::min(options.GetSourceSize(),process_sp->GetTarget().GetMaximumSizeOfStringSummary());
40034042212SEnrico Granata     else
40134042212SEnrico Granata         size = options.GetSourceSize();
402fd13743fSShawn Best 
403fd13743fSShawn Best     lldb::DataBufferSP buffer_sp(new DataBufferHeap(size,0));
404fd13743fSShawn Best 
405d7e6a4f2SVince Harron     process_sp->ReadCStringFromMemory(options.GetLocation(), (char*)buffer_sp->GetBytes(), size, my_error);
406fd13743fSShawn Best 
407fd13743fSShawn Best     if (my_error.Fail())
408fd13743fSShawn Best         return false;
409fd13743fSShawn Best 
410fd13743fSShawn Best     char prefix_token = options.GetPrefixToken();
411fd13743fSShawn Best     char quote = options.GetQuote();
412fd13743fSShawn Best 
413fd13743fSShawn Best     if (prefix_token != 0)
414fd13743fSShawn Best         options.GetStream()->Printf("%c%c",prefix_token,quote);
415fd13743fSShawn Best     else if (quote != 0)
416fd13743fSShawn Best         options.GetStream()->Printf("%c",quote);
417fd13743fSShawn Best 
418fd13743fSShawn Best     uint8_t* data_end = buffer_sp->GetBytes()+buffer_sp->GetByteSize();
419fd13743fSShawn Best 
420fd13743fSShawn Best     // since we tend to accept partial data (and even partially malformed data)
421fd13743fSShawn Best     // we might end up with no NULL terminator before the end_ptr
422fd13743fSShawn Best     // hence we need to take a slower route and ensure we stay within boundaries
423fd13743fSShawn Best     for (uint8_t* data = buffer_sp->GetBytes(); *data && (data < data_end);)
424fd13743fSShawn Best     {
425fd13743fSShawn Best         if (options.GetEscapeNonPrintables())
426fd13743fSShawn Best         {
427fd13743fSShawn Best             uint8_t* next_data = nullptr;
428fd13743fSShawn Best             auto printable = GetPrintable(StringElementType::ASCII, data, data_end, next_data);
429fd13743fSShawn Best             auto printable_bytes = printable.GetBytes();
430fd13743fSShawn Best             auto printable_size = printable.GetSize();
431fd13743fSShawn Best             if (!printable_bytes || !next_data)
432fd13743fSShawn Best             {
433fd13743fSShawn Best                 // GetPrintable() failed on us - print one byte in a desperate resync attempt
434fd13743fSShawn Best                 printable_bytes = data;
435fd13743fSShawn Best                 printable_size = 1;
436fd13743fSShawn Best                 next_data = data+1;
437fd13743fSShawn Best             }
4383acfe1a3SAndy Gibbs             for (unsigned c = 0; c < printable_size; c++)
439fd13743fSShawn Best                 options.GetStream()->Printf("%c", *(printable_bytes+c));
440fd13743fSShawn Best             data = (uint8_t*)next_data;
441fd13743fSShawn Best         }
442fd13743fSShawn Best         else
443fd13743fSShawn Best         {
444fd13743fSShawn Best             options.GetStream()->Printf("%c",*data);
445fd13743fSShawn Best             data++;
446fd13743fSShawn Best         }
447fd13743fSShawn Best     }
448fd13743fSShawn Best 
449fd13743fSShawn Best     if (quote != 0)
450fd13743fSShawn Best         options.GetStream()->Printf("%c",quote);
451fd13743fSShawn Best 
452fd13743fSShawn Best     return true;
453fd13743fSShawn Best }
454fd13743fSShawn Best 
455ca6c8ee2SEnrico Granata template<typename SourceDataType>
456ca6c8ee2SEnrico Granata static bool
457ca6c8ee2SEnrico Granata ReadUTFBufferAndDumpToStream (const ReadStringAndDumpToStreamOptions& options,
458ca6c8ee2SEnrico Granata                               ConversionResult (*ConvertFunction) (const SourceDataType**,
459ca6c8ee2SEnrico Granata                                                                    const SourceDataType*,
460ca6c8ee2SEnrico Granata                                                                    UTF8**,
461ca6c8ee2SEnrico Granata                                                                    UTF8*,
462ca6c8ee2SEnrico Granata                                                                    ConversionFlags))
463ca6c8ee2SEnrico Granata {
464ca6c8ee2SEnrico Granata     assert(options.GetStream() && "need a Stream to print the string to");
465ca6c8ee2SEnrico Granata 
466ca6c8ee2SEnrico Granata     if (options.GetLocation() == 0 || options.GetLocation() == LLDB_INVALID_ADDRESS)
467ca6c8ee2SEnrico Granata         return false;
468ca6c8ee2SEnrico Granata 
469ca6c8ee2SEnrico Granata     lldb::ProcessSP process_sp(options.GetProcessSP());
470ca6c8ee2SEnrico Granata 
471ca6c8ee2SEnrico Granata     if (!process_sp)
472ca6c8ee2SEnrico Granata         return false;
473ca6c8ee2SEnrico Granata 
474ca6c8ee2SEnrico Granata     const int type_width = sizeof(SourceDataType);
475ca6c8ee2SEnrico Granata     const int origin_encoding = 8 * type_width ;
476ca6c8ee2SEnrico Granata     if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32)
477ca6c8ee2SEnrico Granata         return false;
478ca6c8ee2SEnrico Granata     // if not UTF8, I need a conversion function to return proper UTF8
479ca6c8ee2SEnrico Granata     if (origin_encoding != 8 && !ConvertFunction)
480ca6c8ee2SEnrico Granata         return false;
481ca6c8ee2SEnrico Granata 
482ca6c8ee2SEnrico Granata     if (!options.GetStream())
483ca6c8ee2SEnrico Granata         return false;
484ca6c8ee2SEnrico Granata 
485ca6c8ee2SEnrico Granata     uint32_t sourceSize = options.GetSourceSize();
486ca6c8ee2SEnrico Granata     bool needs_zero_terminator = options.GetNeedsZeroTermination();
487ca6c8ee2SEnrico Granata 
488ca6c8ee2SEnrico Granata     if (!sourceSize)
489ca6c8ee2SEnrico Granata     {
490ca6c8ee2SEnrico Granata         sourceSize = process_sp->GetTarget().GetMaximumSizeOfStringSummary();
491ca6c8ee2SEnrico Granata         needs_zero_terminator = true;
492ca6c8ee2SEnrico Granata     }
493b0e8a55dSEnrico Granata     else if (!options.GetIgnoreMaxLength())
494ca6c8ee2SEnrico Granata         sourceSize = std::min(sourceSize,process_sp->GetTarget().GetMaximumSizeOfStringSummary());
495ca6c8ee2SEnrico Granata 
496ca6c8ee2SEnrico Granata     const int bufferSPSize = sourceSize * type_width;
497ca6c8ee2SEnrico Granata 
498ca6c8ee2SEnrico Granata     lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize,0));
499ca6c8ee2SEnrico Granata 
500ca6c8ee2SEnrico Granata     if (!buffer_sp->GetBytes())
501ca6c8ee2SEnrico Granata         return false;
502ca6c8ee2SEnrico Granata 
503ca6c8ee2SEnrico Granata     Error error;
504ca6c8ee2SEnrico Granata     char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes());
505ca6c8ee2SEnrico Granata 
506ca6c8ee2SEnrico Granata     if (needs_zero_terminator)
507d7e6a4f2SVince Harron         process_sp->ReadStringFromMemory(options.GetLocation(), buffer, bufferSPSize, error, type_width);
508ca6c8ee2SEnrico Granata     else
509d7e6a4f2SVince Harron         process_sp->ReadMemoryFromInferior(options.GetLocation(), (char*)buffer_sp->GetBytes(), bufferSPSize, error);
510ca6c8ee2SEnrico Granata 
511099263b4SEnrico Granata     if (error.Fail())
512ca6c8ee2SEnrico Granata     {
513ca6c8ee2SEnrico Granata         options.GetStream()->Printf("unable to read data");
514ca6c8ee2SEnrico Granata         return true;
515ca6c8ee2SEnrico Granata     }
516ca6c8ee2SEnrico Granata 
517ca6c8ee2SEnrico Granata     DataExtractor data(buffer_sp, process_sp->GetByteOrder(), process_sp->GetAddressByteSize());
518ca6c8ee2SEnrico Granata 
519d07f7550SEnrico Granata     ReadBufferAndDumpToStreamOptions dump_options(options);
520d07f7550SEnrico Granata     dump_options.SetData(data);
521d07f7550SEnrico Granata     dump_options.SetSourceSize(sourceSize);
522d07f7550SEnrico Granata 
523d07f7550SEnrico Granata     return DumpUTFBufferToStream(ConvertFunction, dump_options);
524ca6c8ee2SEnrico Granata }
525ca6c8ee2SEnrico Granata 
526ca6c8ee2SEnrico Granata template <>
527ca6c8ee2SEnrico Granata bool
528*ad650a18SEnrico Granata StringPrinter::ReadStringAndDumpToStream<StringElementType::UTF8> (const ReadStringAndDumpToStreamOptions& options)
529ca6c8ee2SEnrico Granata {
530ca6c8ee2SEnrico Granata     return ReadUTFBufferAndDumpToStream<UTF8>(options,
531ca6c8ee2SEnrico Granata                                               nullptr);
532ca6c8ee2SEnrico Granata }
533ca6c8ee2SEnrico Granata 
534ca6c8ee2SEnrico Granata template <>
535ca6c8ee2SEnrico Granata bool
536*ad650a18SEnrico Granata StringPrinter::ReadStringAndDumpToStream<StringElementType::UTF16> (const ReadStringAndDumpToStreamOptions& options)
537ca6c8ee2SEnrico Granata {
538ca6c8ee2SEnrico Granata     return ReadUTFBufferAndDumpToStream<UTF16>(options,
539ca6c8ee2SEnrico Granata                                                ConvertUTF16toUTF8);
540ca6c8ee2SEnrico Granata }
541ca6c8ee2SEnrico Granata 
542ca6c8ee2SEnrico Granata template <>
543ca6c8ee2SEnrico Granata bool
544*ad650a18SEnrico Granata StringPrinter::ReadStringAndDumpToStream<StringElementType::UTF32> (const ReadStringAndDumpToStreamOptions& options)
545ca6c8ee2SEnrico Granata {
546ca6c8ee2SEnrico Granata     return ReadUTFBufferAndDumpToStream<UTF32>(options,
547ca6c8ee2SEnrico Granata                                                ConvertUTF32toUTF8);
548ca6c8ee2SEnrico Granata }
549ca6c8ee2SEnrico Granata 
550ca6c8ee2SEnrico Granata template <>
551ca6c8ee2SEnrico Granata bool
552*ad650a18SEnrico Granata StringPrinter::ReadBufferAndDumpToStream<StringElementType::UTF8> (const ReadBufferAndDumpToStreamOptions& options)
553ca6c8ee2SEnrico Granata {
554ca6c8ee2SEnrico Granata     assert(options.GetStream() && "need a Stream to print the string to");
555ca6c8ee2SEnrico Granata 
556d07f7550SEnrico Granata     return DumpUTFBufferToStream<UTF8>(nullptr, options);
557ca6c8ee2SEnrico Granata }
558ca6c8ee2SEnrico Granata 
559ca6c8ee2SEnrico Granata template <>
560ca6c8ee2SEnrico Granata bool
561*ad650a18SEnrico Granata StringPrinter::ReadBufferAndDumpToStream<StringElementType::ASCII> (const ReadBufferAndDumpToStreamOptions& options)
562ca6c8ee2SEnrico Granata {
563ca6c8ee2SEnrico Granata     // treat ASCII the same as UTF8
564ca6c8ee2SEnrico Granata     // FIXME: can we optimize ASCII some more?
565ca6c8ee2SEnrico Granata     return ReadBufferAndDumpToStream<StringElementType::UTF8>(options);
566ca6c8ee2SEnrico Granata }
567ca6c8ee2SEnrico Granata 
568ca6c8ee2SEnrico Granata template <>
569ca6c8ee2SEnrico Granata bool
570*ad650a18SEnrico Granata StringPrinter::ReadBufferAndDumpToStream<StringElementType::UTF16> (const ReadBufferAndDumpToStreamOptions& options)
571ca6c8ee2SEnrico Granata {
572ca6c8ee2SEnrico Granata     assert(options.GetStream() && "need a Stream to print the string to");
573ca6c8ee2SEnrico Granata 
574d07f7550SEnrico Granata     return DumpUTFBufferToStream(ConvertUTF16toUTF8, options);
575ca6c8ee2SEnrico Granata }
576ca6c8ee2SEnrico Granata 
577ca6c8ee2SEnrico Granata template <>
578ca6c8ee2SEnrico Granata bool
579*ad650a18SEnrico Granata StringPrinter::ReadBufferAndDumpToStream<StringElementType::UTF32> (const ReadBufferAndDumpToStreamOptions& options)
580ca6c8ee2SEnrico Granata {
581ca6c8ee2SEnrico Granata     assert(options.GetStream() && "need a Stream to print the string to");
582ca6c8ee2SEnrico Granata 
583d07f7550SEnrico Granata     return DumpUTFBufferToStream(ConvertUTF32toUTF8, options);
584ca6c8ee2SEnrico Granata }
585fd13743fSShawn Best 
586fd13743fSShawn Best } // namespace formatters
587fd13743fSShawn Best 
588fd13743fSShawn Best } // namespace lldb_private
589