1 //===-- StringPrinter.cpp ----------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "lldb/DataFormatters/StringPrinter.h"
11 
12 #include "lldb/Core/Debugger.h"
13 #include "lldb/Core/Error.h"
14 #include "lldb/Core/ValueObject.h"
15 #include "lldb/Target/Language.h"
16 #include "lldb/Target/Process.h"
17 #include "lldb/Target/Target.h"
18 
19 #include "llvm/Support/ConvertUTF.h"
20 
21 #include <ctype.h>
22 #include <locale>
23 
24 using namespace lldb;
25 using namespace lldb_private;
26 using namespace lldb_private::formatters;
27 
28 // we define this for all values of type but only implement it for those we care about
29 // that's good because we get linker errors for any unsupported type
30 template <lldb_private::formatters::StringPrinter::StringElementType type>
31 static StringPrinter::StringPrinterBufferPointer<>
32 GetPrintableImpl(uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next);
33 
34 // mimic isprint() for Unicode codepoints
35 static bool
36 isprint(char32_t codepoint)
37 {
38     if (codepoint <= 0x1F || codepoint == 0x7F) // C0
39     {
40         return false;
41     }
42     if (codepoint >= 0x80 && codepoint <= 0x9F) // C1
43     {
44         return false;
45     }
46     if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators
47     {
48         return false;
49     }
50     if (codepoint == 0x200E || codepoint == 0x200F || (codepoint >= 0x202A && codepoint <= 0x202E)) // bidirectional text control
51     {
52         return false;
53     }
54     if (codepoint >= 0xFFF9 && codepoint <= 0xFFFF) // interlinears and generally specials
55     {
56         return false;
57     }
58     return true;
59 }
60 
61 template <>
62 StringPrinter::StringPrinterBufferPointer<>
63 GetPrintableImpl<StringPrinter::StringElementType::ASCII> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next)
64 {
65     StringPrinter::StringPrinterBufferPointer<> retval = {nullptr};
66 
67     switch (*buffer)
68     {
69         case 0:
70             retval = {"\\0",2};
71             break;
72         case '\a':
73             retval = {"\\a",2};
74             break;
75         case '\b':
76             retval = {"\\b",2};
77             break;
78         case '\f':
79             retval = {"\\f",2};
80             break;
81         case '\n':
82             retval = {"\\n",2};
83             break;
84         case '\r':
85             retval = {"\\r",2};
86             break;
87         case '\t':
88             retval = {"\\t",2};
89             break;
90         case '\v':
91             retval = {"\\v",2};
92             break;
93         case '\"':
94             retval = {"\\\"",2};
95             break;
96         case '\\':
97             retval = {"\\\\",2};
98             break;
99         default:
100           if (isprint(*buffer))
101               retval = {buffer,1};
102           else
103           {
104               uint8_t* data = new uint8_t[5];
105               sprintf((char*)data,"\\x%02x",*buffer);
106               retval = {data, 4, [] (const uint8_t* c) {delete[] c;} };
107               break;
108           }
109     }
110 
111     next = buffer + 1;
112     return retval;
113 }
114 
115 static char32_t
116 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1)
117 {
118     return (c0-192)*64+(c1-128);
119 }
120 static char32_t
121 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2)
122 {
123     return (c0-224)*4096+(c1-128)*64+(c2-128);
124 }
125 static char32_t
126 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2, unsigned char c3)
127 {
128     return (c0-240)*262144+(c2-128)*4096+(c2-128)*64+(c3-128);
129 }
130 
131 template <>
132 StringPrinter::StringPrinterBufferPointer<>
133 GetPrintableImpl<StringPrinter::StringElementType::UTF8> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next)
134 {
135     StringPrinter::StringPrinterBufferPointer<> retval {nullptr};
136 
137     unsigned utf8_encoded_len = getNumBytesForUTF8(*buffer);
138 
139     if (1+buffer_end-buffer < utf8_encoded_len)
140     {
141         // I don't have enough bytes - print whatever I have left
142         retval = {buffer,static_cast<size_t>(1+buffer_end-buffer)};
143         next = buffer_end+1;
144         return retval;
145     }
146 
147     char32_t codepoint = 0;
148     switch (utf8_encoded_len)
149     {
150         case 1:
151             // this is just an ASCII byte - ask ASCII
152             return GetPrintableImpl<StringPrinter::StringElementType::ASCII>(buffer, buffer_end, next);
153         case 2:
154             codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1));
155             break;
156         case 3:
157             codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2));
158             break;
159         case 4:
160             codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2), (unsigned char)*(buffer+3));
161             break;
162         default:
163             // this is probably some bogus non-character thing
164             // just print it as-is and hope to sync up again soon
165             retval = {buffer,1};
166             next = buffer+1;
167             return retval;
168     }
169 
170     if (codepoint)
171     {
172         switch (codepoint)
173         {
174             case 0:
175                 retval = {"\\0",2};
176                 break;
177             case '\a':
178                 retval = {"\\a",2};
179                 break;
180             case '\b':
181                 retval = {"\\b",2};
182                 break;
183             case '\f':
184                 retval = {"\\f",2};
185                 break;
186             case '\n':
187                 retval = {"\\n",2};
188                 break;
189             case '\r':
190                 retval = {"\\r",2};
191                 break;
192             case '\t':
193                 retval = {"\\t",2};
194                 break;
195             case '\v':
196                 retval = {"\\v",2};
197                 break;
198             case '\"':
199                 retval = {"\\\"",2};
200                 break;
201             case '\\':
202                 retval = {"\\\\",2};
203                 break;
204             default:
205                 if (isprint(codepoint))
206                     retval = {buffer,utf8_encoded_len};
207                 else
208                 {
209                     uint8_t* data = new uint8_t[11];
210                     sprintf((char*)data,"\\U%08x",codepoint);
211                     retval = { data,10,[] (const uint8_t* c) {delete[] c;} };
212                     break;
213                 }
214         }
215 
216         next = buffer + utf8_encoded_len;
217         return retval;
218     }
219 
220     // this should not happen - but just in case.. try to resync at some point
221     retval = {buffer,1};
222     next = buffer+1;
223     return retval;
224 }
225 
226 // Given a sequence of bytes, this function returns:
227 // a sequence of bytes to actually print out + a length
228 // the following unscanned position of the buffer is in next
229 static StringPrinter::StringPrinterBufferPointer<>
230 GetPrintable(StringPrinter::StringElementType type, uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next)
231 {
232     if (!buffer)
233         return {nullptr};
234 
235     switch (type)
236     {
237         case StringPrinter::StringElementType::ASCII:
238             return GetPrintableImpl<StringPrinter::StringElementType::ASCII>(buffer, buffer_end, next);
239         case StringPrinter::StringElementType::UTF8:
240             return GetPrintableImpl<StringPrinter::StringElementType::UTF8>(buffer, buffer_end, next);
241         default:
242             return {nullptr};
243     }
244 }
245 
246 StringPrinter::EscapingHelper
247 StringPrinter::GetDefaultEscapingHelper (GetPrintableElementType elem_type)
248 {
249     switch (elem_type)
250     {
251         case GetPrintableElementType::UTF8:
252             return [] (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) -> StringPrinter::StringPrinterBufferPointer<> {
253                 return GetPrintable(StringPrinter::StringElementType::UTF8, buffer, buffer_end, next);
254             };
255         case GetPrintableElementType::ASCII:
256             return [] (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) -> StringPrinter::StringPrinterBufferPointer<> {
257                 return GetPrintable(StringPrinter::StringElementType::ASCII, buffer, buffer_end, next);
258             };
259     }
260     llvm_unreachable("bad element type");
261 }
262 
263 // use this call if you already have an LLDB-side buffer for the data
264 template<typename SourceDataType>
265 static bool
266 DumpUTFBufferToStream (ConversionResult (*ConvertFunction) (const SourceDataType**,
267                                                             const SourceDataType*,
268                                                             UTF8**,
269                                                             UTF8*,
270                                                             ConversionFlags),
271                        const StringPrinter::ReadBufferAndDumpToStreamOptions& dump_options)
272 {
273     Stream &stream(*dump_options.GetStream());
274     if (dump_options.GetPrefixToken() != 0)
275         stream.Printf("%s",dump_options.GetPrefixToken());
276     if (dump_options.GetQuote() != 0)
277         stream.Printf("%c",dump_options.GetQuote());
278     auto data(dump_options.GetData());
279     auto source_size(dump_options.GetSourceSize());
280     if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd())
281     {
282         const int bufferSPSize = data.GetByteSize();
283         if (dump_options.GetSourceSize() == 0)
284         {
285             const int origin_encoding = 8*sizeof(SourceDataType);
286             source_size = bufferSPSize/(origin_encoding / 4);
287         }
288 
289         const SourceDataType *data_ptr = (const SourceDataType*)data.GetDataStart();
290         const SourceDataType *data_end_ptr = data_ptr + source_size;
291 
292         const bool zero_is_terminator = dump_options.GetBinaryZeroIsTerminator();
293 
294         if (zero_is_terminator)
295         {
296             while (data_ptr < data_end_ptr)
297             {
298                 if (!*data_ptr)
299                 {
300                     data_end_ptr = data_ptr;
301                     break;
302                 }
303                 data_ptr++;
304             }
305 
306             data_ptr = (const SourceDataType*)data.GetDataStart();
307         }
308 
309         lldb::DataBufferSP utf8_data_buffer_sp;
310         UTF8* utf8_data_ptr = nullptr;
311         UTF8* utf8_data_end_ptr = nullptr;
312 
313         if (ConvertFunction)
314         {
315             utf8_data_buffer_sp.reset(new DataBufferHeap(4*bufferSPSize,0));
316             utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes();
317             utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize();
318             ConvertFunction ( &data_ptr, data_end_ptr, &utf8_data_ptr, utf8_data_end_ptr, lenientConversion );
319             if (false == zero_is_terminator)
320                 utf8_data_end_ptr = utf8_data_ptr;
321             utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes(); // needed because the ConvertFunction will change the value of the data_ptr
322         }
323         else
324         {
325             // just copy the pointers - the cast is necessary to make the compiler happy
326             // but this should only happen if we are reading UTF8 data
327             utf8_data_ptr = const_cast<UTF8 *>(reinterpret_cast<const UTF8*>(data_ptr));
328             utf8_data_end_ptr = const_cast<UTF8 *>(reinterpret_cast<const UTF8*>(data_end_ptr));
329         }
330 
331         const bool escape_non_printables = dump_options.GetEscapeNonPrintables();
332         lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback;
333         if (escape_non_printables)
334         {
335             if (Language *language = Language::FindPlugin(dump_options.GetLanguage()))
336                 escaping_callback = language->GetStringPrinterEscapingHelper(lldb_private::formatters::StringPrinter::GetPrintableElementType::UTF8);
337             else
338                 escaping_callback = lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper(lldb_private::formatters::StringPrinter::GetPrintableElementType::UTF8);
339         }
340 
341         // since we tend to accept partial data (and even partially malformed data)
342         // we might end up with no NULL terminator before the end_ptr
343         // hence we need to take a slower route and ensure we stay within boundaries
344         for (;utf8_data_ptr < utf8_data_end_ptr;)
345         {
346             if (zero_is_terminator && !*utf8_data_ptr)
347                 break;
348 
349             if (escape_non_printables)
350             {
351                 uint8_t* next_data = nullptr;
352                 auto printable = escaping_callback(utf8_data_ptr, utf8_data_end_ptr, next_data);
353                 auto printable_bytes = printable.GetBytes();
354                 auto printable_size = printable.GetSize();
355                 if (!printable_bytes || !next_data)
356                 {
357                     // GetPrintable() failed on us - print one byte in a desperate resync attempt
358                     printable_bytes = utf8_data_ptr;
359                     printable_size = 1;
360                     next_data = utf8_data_ptr+1;
361                 }
362                 for (unsigned c = 0; c < printable_size; c++)
363                     stream.Printf("%c", *(printable_bytes+c));
364                 utf8_data_ptr = (uint8_t*)next_data;
365             }
366             else
367             {
368                 stream.Printf("%c",*utf8_data_ptr);
369                 utf8_data_ptr++;
370             }
371         }
372     }
373     if (dump_options.GetQuote() != 0)
374         stream.Printf("%c",dump_options.GetQuote());
375     if (dump_options.GetSuffixToken() != 0)
376         stream.Printf("%s",dump_options.GetSuffixToken());
377     return true;
378 }
379 
380 lldb_private::formatters::StringPrinter::ReadStringAndDumpToStreamOptions::ReadStringAndDumpToStreamOptions (ValueObject& valobj) :
381     ReadStringAndDumpToStreamOptions()
382 {
383     SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables());
384 }
385 
386 lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions::ReadBufferAndDumpToStreamOptions (ValueObject& valobj) :
387     ReadBufferAndDumpToStreamOptions()
388 {
389     SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables());
390 }
391 
392 lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions::ReadBufferAndDumpToStreamOptions (const ReadStringAndDumpToStreamOptions& options) :
393     ReadBufferAndDumpToStreamOptions()
394 {
395     SetStream(options.GetStream());
396     SetPrefixToken(options.GetPrefixToken());
397     SetSuffixToken(options.GetSuffixToken());
398     SetQuote(options.GetQuote());
399     SetEscapeNonPrintables(options.GetEscapeNonPrintables());
400     SetBinaryZeroIsTerminator(options.GetBinaryZeroIsTerminator());
401     SetLanguage(options.GetLanguage());
402 }
403 
404 
405 namespace lldb_private
406 {
407 
408 namespace formatters
409 {
410 
411 template <>
412 bool
413 StringPrinter::ReadStringAndDumpToStream<StringPrinter::StringElementType::ASCII> (const ReadStringAndDumpToStreamOptions& options)
414 {
415     assert(options.GetStream() && "need a Stream to print the string to");
416     Error my_error;
417 
418     ProcessSP process_sp(options.GetProcessSP());
419 
420     if (process_sp.get() == nullptr || options.GetLocation() == 0)
421         return false;
422 
423     size_t size;
424 
425     if (options.GetSourceSize() == 0)
426         size = process_sp->GetTarget().GetMaximumSizeOfStringSummary();
427     else if (!options.GetIgnoreMaxLength())
428         size = std::min(options.GetSourceSize(),process_sp->GetTarget().GetMaximumSizeOfStringSummary());
429     else
430         size = options.GetSourceSize();
431 
432     lldb::DataBufferSP buffer_sp(new DataBufferHeap(size,0));
433 
434     process_sp->ReadCStringFromMemory(options.GetLocation(), (char*)buffer_sp->GetBytes(), size, my_error);
435 
436     if (my_error.Fail())
437         return false;
438 
439     const char* prefix_token = options.GetPrefixToken();
440     char quote = options.GetQuote();
441 
442     if (prefix_token != 0)
443         options.GetStream()->Printf("%s%c",prefix_token,quote);
444     else if (quote != 0)
445         options.GetStream()->Printf("%c",quote);
446 
447     uint8_t* data_end = buffer_sp->GetBytes()+buffer_sp->GetByteSize();
448 
449     const bool escape_non_printables = options.GetEscapeNonPrintables();
450     lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback;
451     if (escape_non_printables)
452     {
453         if (Language *language = Language::FindPlugin(options.GetLanguage()))
454             escaping_callback = language->GetStringPrinterEscapingHelper(lldb_private::formatters::StringPrinter::GetPrintableElementType::ASCII);
455         else
456             escaping_callback = lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper(lldb_private::formatters::StringPrinter::GetPrintableElementType::ASCII);
457     }
458 
459     // since we tend to accept partial data (and even partially malformed data)
460     // we might end up with no NULL terminator before the end_ptr
461     // hence we need to take a slower route and ensure we stay within boundaries
462     for (uint8_t* data = buffer_sp->GetBytes(); *data && (data < data_end);)
463     {
464         if (escape_non_printables)
465         {
466             uint8_t* next_data = nullptr;
467             auto printable = escaping_callback(data, data_end, next_data);
468             auto printable_bytes = printable.GetBytes();
469             auto printable_size = printable.GetSize();
470             if (!printable_bytes || !next_data)
471             {
472                 // GetPrintable() failed on us - print one byte in a desperate resync attempt
473                 printable_bytes = data;
474                 printable_size = 1;
475                 next_data = data+1;
476             }
477             for (unsigned c = 0; c < printable_size; c++)
478                 options.GetStream()->Printf("%c", *(printable_bytes+c));
479             data = (uint8_t*)next_data;
480         }
481         else
482         {
483             options.GetStream()->Printf("%c",*data);
484             data++;
485         }
486     }
487 
488     const char* suffix_token = options.GetSuffixToken();
489 
490     if (suffix_token != 0)
491         options.GetStream()->Printf("%c%s",quote, suffix_token);
492     else if (quote != 0)
493         options.GetStream()->Printf("%c",quote);
494 
495     return true;
496 }
497 
498 template<typename SourceDataType>
499 static bool
500 ReadUTFBufferAndDumpToStream (const StringPrinter::ReadStringAndDumpToStreamOptions& options,
501                               ConversionResult (*ConvertFunction) (const SourceDataType**,
502                                                                    const SourceDataType*,
503                                                                    UTF8**,
504                                                                    UTF8*,
505                                                                    ConversionFlags))
506 {
507     assert(options.GetStream() && "need a Stream to print the string to");
508 
509     if (options.GetLocation() == 0 || options.GetLocation() == LLDB_INVALID_ADDRESS)
510         return false;
511 
512     lldb::ProcessSP process_sp(options.GetProcessSP());
513 
514     if (!process_sp)
515         return false;
516 
517     const int type_width = sizeof(SourceDataType);
518     const int origin_encoding = 8 * type_width ;
519     if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32)
520         return false;
521     // if not UTF8, I need a conversion function to return proper UTF8
522     if (origin_encoding != 8 && !ConvertFunction)
523         return false;
524 
525     if (!options.GetStream())
526         return false;
527 
528     uint32_t sourceSize = options.GetSourceSize();
529     bool needs_zero_terminator = options.GetNeedsZeroTermination();
530 
531     if (!sourceSize)
532     {
533         sourceSize = process_sp->GetTarget().GetMaximumSizeOfStringSummary();
534         needs_zero_terminator = true;
535     }
536     else if (!options.GetIgnoreMaxLength())
537         sourceSize = std::min(sourceSize,process_sp->GetTarget().GetMaximumSizeOfStringSummary());
538 
539     const int bufferSPSize = sourceSize * type_width;
540 
541     lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize,0));
542 
543     if (!buffer_sp->GetBytes())
544         return false;
545 
546     Error error;
547     char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes());
548 
549     if (needs_zero_terminator)
550         process_sp->ReadStringFromMemory(options.GetLocation(), buffer, bufferSPSize, error, type_width);
551     else
552         process_sp->ReadMemoryFromInferior(options.GetLocation(), (char*)buffer_sp->GetBytes(), bufferSPSize, error);
553 
554     if (error.Fail())
555     {
556         options.GetStream()->Printf("unable to read data");
557         return true;
558     }
559 
560     DataExtractor data(buffer_sp, process_sp->GetByteOrder(), process_sp->GetAddressByteSize());
561 
562     StringPrinter::ReadBufferAndDumpToStreamOptions dump_options(options);
563     dump_options.SetData(data);
564     dump_options.SetSourceSize(sourceSize);
565 
566     return DumpUTFBufferToStream(ConvertFunction, dump_options);
567 }
568 
569 template <>
570 bool
571 StringPrinter::ReadStringAndDumpToStream<StringPrinter::StringElementType::UTF8> (const ReadStringAndDumpToStreamOptions& options)
572 {
573     return ReadUTFBufferAndDumpToStream<UTF8>(options,
574                                               nullptr);
575 }
576 
577 template <>
578 bool
579 StringPrinter::ReadStringAndDumpToStream<StringPrinter::StringElementType::UTF16> (const ReadStringAndDumpToStreamOptions& options)
580 {
581     return ReadUTFBufferAndDumpToStream<UTF16>(options,
582                                                ConvertUTF16toUTF8);
583 }
584 
585 template <>
586 bool
587 StringPrinter::ReadStringAndDumpToStream<StringPrinter::StringElementType::UTF32> (const ReadStringAndDumpToStreamOptions& options)
588 {
589     return ReadUTFBufferAndDumpToStream<UTF32>(options,
590                                                ConvertUTF32toUTF8);
591 }
592 
593 template <>
594 bool
595 StringPrinter::ReadBufferAndDumpToStream<StringPrinter::StringElementType::UTF8> (const ReadBufferAndDumpToStreamOptions& options)
596 {
597     assert(options.GetStream() && "need a Stream to print the string to");
598 
599     return DumpUTFBufferToStream<UTF8>(nullptr, options);
600 }
601 
602 template <>
603 bool
604 StringPrinter::ReadBufferAndDumpToStream<StringPrinter::StringElementType::ASCII> (const ReadBufferAndDumpToStreamOptions& options)
605 {
606     // treat ASCII the same as UTF8
607     // FIXME: can we optimize ASCII some more?
608     return ReadBufferAndDumpToStream<StringElementType::UTF8>(options);
609 }
610 
611 template <>
612 bool
613 StringPrinter::ReadBufferAndDumpToStream<StringPrinter::StringElementType::UTF16> (const ReadBufferAndDumpToStreamOptions& options)
614 {
615     assert(options.GetStream() && "need a Stream to print the string to");
616 
617     return DumpUTFBufferToStream(ConvertUTF16toUTF8, options);
618 }
619 
620 template <>
621 bool
622 StringPrinter::ReadBufferAndDumpToStream<StringPrinter::StringElementType::UTF32> (const ReadBufferAndDumpToStreamOptions& options)
623 {
624     assert(options.GetStream() && "need a Stream to print the string to");
625 
626     return DumpUTFBufferToStream(ConvertUTF32toUTF8, options);
627 }
628 
629 } // namespace formatters
630 
631 } // namespace lldb_private
632