1 //===-- StringPrinter.cpp ----------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "lldb/DataFormatters/StringPrinter.h"
11 
12 #include "lldb/Core/DataExtractor.h"
13 #include "lldb/Core/Debugger.h"
14 #include "lldb/Core/Error.h"
15 #include "lldb/Core/ValueObject.h"
16 #include "lldb/Target/Language.h"
17 #include "lldb/Target/Process.h"
18 #include "lldb/Target/Target.h"
19 
20 #include "llvm/Support/ConvertUTF.h"
21 
22 #include <ctype.h>
23 #include <locale>
24 
25 using namespace lldb;
26 using namespace lldb_private;
27 using namespace lldb_private::formatters;
28 
29 // we define this for all values of type but only implement it for those we care about
30 // that's good because we get linker errors for any unsupported type
31 template <lldb_private::formatters::StringPrinter::StringElementType type>
32 static StringPrinter::StringPrinterBufferPointer<>
33 GetPrintableImpl(uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next);
34 
35 // mimic isprint() for Unicode codepoints
36 static bool
37 isprint(char32_t codepoint)
38 {
39     if (codepoint <= 0x1F || codepoint == 0x7F) // C0
40     {
41         return false;
42     }
43     if (codepoint >= 0x80 && codepoint <= 0x9F) // C1
44     {
45         return false;
46     }
47     if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators
48     {
49         return false;
50     }
51     if (codepoint == 0x200E || codepoint == 0x200F || (codepoint >= 0x202A && codepoint <= 0x202E)) // bidirectional text control
52     {
53         return false;
54     }
55     if (codepoint >= 0xFFF9 && codepoint <= 0xFFFF) // interlinears and generally specials
56     {
57         return false;
58     }
59     return true;
60 }
61 
62 template <>
63 StringPrinter::StringPrinterBufferPointer<>
64 GetPrintableImpl<StringPrinter::StringElementType::ASCII> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next)
65 {
66     StringPrinter::StringPrinterBufferPointer<> retval = {nullptr};
67 
68     switch (*buffer)
69     {
70         case 0:
71             retval = {"\\0",2};
72             break;
73         case '\a':
74             retval = {"\\a",2};
75             break;
76         case '\b':
77             retval = {"\\b",2};
78             break;
79         case '\f':
80             retval = {"\\f",2};
81             break;
82         case '\n':
83             retval = {"\\n",2};
84             break;
85         case '\r':
86             retval = {"\\r",2};
87             break;
88         case '\t':
89             retval = {"\\t",2};
90             break;
91         case '\v':
92             retval = {"\\v",2};
93             break;
94         case '\"':
95             retval = {"\\\"",2};
96             break;
97         case '\\':
98             retval = {"\\\\",2};
99             break;
100         default:
101           if (isprint(*buffer))
102               retval = {buffer,1};
103           else
104           {
105               uint8_t* data = new uint8_t[5];
106               sprintf((char*)data,"\\x%02x",*buffer);
107               retval = {data, 4, [] (const uint8_t* c) {delete[] c;} };
108               break;
109           }
110     }
111 
112     next = buffer + 1;
113     return retval;
114 }
115 
116 static char32_t
117 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1)
118 {
119     return (c0-192)*64+(c1-128);
120 }
121 static char32_t
122 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2)
123 {
124     return (c0-224)*4096+(c1-128)*64+(c2-128);
125 }
126 static char32_t
127 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2, unsigned char c3)
128 {
129     return (c0-240)*262144+(c2-128)*4096+(c2-128)*64+(c3-128);
130 }
131 
132 template <>
133 StringPrinter::StringPrinterBufferPointer<>
134 GetPrintableImpl<StringPrinter::StringElementType::UTF8> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next)
135 {
136     StringPrinter::StringPrinterBufferPointer<> retval {nullptr};
137 
138     unsigned utf8_encoded_len = getNumBytesForUTF8(*buffer);
139 
140     if (1+buffer_end-buffer < utf8_encoded_len)
141     {
142         // I don't have enough bytes - print whatever I have left
143         retval = {buffer,static_cast<size_t>(1+buffer_end-buffer)};
144         next = buffer_end+1;
145         return retval;
146     }
147 
148     char32_t codepoint = 0;
149     switch (utf8_encoded_len)
150     {
151         case 1:
152             // this is just an ASCII byte - ask ASCII
153             return GetPrintableImpl<StringPrinter::StringElementType::ASCII>(buffer, buffer_end, next);
154         case 2:
155             codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1));
156             break;
157         case 3:
158             codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2));
159             break;
160         case 4:
161             codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2), (unsigned char)*(buffer+3));
162             break;
163         default:
164             // this is probably some bogus non-character thing
165             // just print it as-is and hope to sync up again soon
166             retval = {buffer,1};
167             next = buffer+1;
168             return retval;
169     }
170 
171     if (codepoint)
172     {
173         switch (codepoint)
174         {
175             case 0:
176                 retval = {"\\0",2};
177                 break;
178             case '\a':
179                 retval = {"\\a",2};
180                 break;
181             case '\b':
182                 retval = {"\\b",2};
183                 break;
184             case '\f':
185                 retval = {"\\f",2};
186                 break;
187             case '\n':
188                 retval = {"\\n",2};
189                 break;
190             case '\r':
191                 retval = {"\\r",2};
192                 break;
193             case '\t':
194                 retval = {"\\t",2};
195                 break;
196             case '\v':
197                 retval = {"\\v",2};
198                 break;
199             case '\"':
200                 retval = {"\\\"",2};
201                 break;
202             case '\\':
203                 retval = {"\\\\",2};
204                 break;
205             default:
206                 if (isprint(codepoint))
207                     retval = {buffer,utf8_encoded_len};
208                 else
209                 {
210                     uint8_t* data = new uint8_t[11];
211                     sprintf((char*)data,"\\U%08x",codepoint);
212                     retval = { data,10,[] (const uint8_t* c) {delete[] c;} };
213                     break;
214                 }
215         }
216 
217         next = buffer + utf8_encoded_len;
218         return retval;
219     }
220 
221     // this should not happen - but just in case.. try to resync at some point
222     retval = {buffer,1};
223     next = buffer+1;
224     return retval;
225 }
226 
227 // Given a sequence of bytes, this function returns:
228 // a sequence of bytes to actually print out + a length
229 // the following unscanned position of the buffer is in next
230 static StringPrinter::StringPrinterBufferPointer<>
231 GetPrintable(StringPrinter::StringElementType type, uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next)
232 {
233     if (!buffer)
234         return {nullptr};
235 
236     switch (type)
237     {
238         case StringPrinter::StringElementType::ASCII:
239             return GetPrintableImpl<StringPrinter::StringElementType::ASCII>(buffer, buffer_end, next);
240         case StringPrinter::StringElementType::UTF8:
241             return GetPrintableImpl<StringPrinter::StringElementType::UTF8>(buffer, buffer_end, next);
242         default:
243             return {nullptr};
244     }
245 }
246 
247 StringPrinter::EscapingHelper
248 StringPrinter::GetDefaultEscapingHelper (GetPrintableElementType elem_type)
249 {
250     switch (elem_type)
251     {
252         case GetPrintableElementType::UTF8:
253             return [] (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) -> StringPrinter::StringPrinterBufferPointer<> {
254                 return GetPrintable(StringPrinter::StringElementType::UTF8, buffer, buffer_end, next);
255             };
256         case GetPrintableElementType::ASCII:
257             return [] (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) -> StringPrinter::StringPrinterBufferPointer<> {
258                 return GetPrintable(StringPrinter::StringElementType::ASCII, buffer, buffer_end, next);
259             };
260     }
261 }
262 
263 // use this call if you already have an LLDB-side buffer for the data
264 template<typename SourceDataType>
265 static bool
266 DumpUTFBufferToStream (ConversionResult (*ConvertFunction) (const SourceDataType**,
267                                                             const SourceDataType*,
268                                                             UTF8**,
269                                                             UTF8*,
270                                                             ConversionFlags),
271                        const StringPrinter::ReadBufferAndDumpToStreamOptions& dump_options)
272 {
273     Stream &stream(*dump_options.GetStream());
274     if (dump_options.GetPrefixToken() != 0)
275         stream.Printf("%c",dump_options.GetPrefixToken());
276     if (dump_options.GetQuote() != 0)
277         stream.Printf("%c",dump_options.GetQuote());
278     auto data(dump_options.GetData());
279     auto source_size(dump_options.GetSourceSize());
280     if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd())
281     {
282         const int bufferSPSize = data.GetByteSize();
283         if (dump_options.GetSourceSize() == 0)
284         {
285             const int origin_encoding = 8*sizeof(SourceDataType);
286             source_size = bufferSPSize/(origin_encoding / 4);
287         }
288 
289         const SourceDataType *data_ptr = (const SourceDataType*)data.GetDataStart();
290         const SourceDataType *data_end_ptr = data_ptr + source_size;
291 
292         const bool zero_is_terminator = dump_options.GetBinaryZeroIsTerminator();
293 
294         if (zero_is_terminator)
295         {
296             while (data_ptr < data_end_ptr)
297             {
298                 if (!*data_ptr)
299                 {
300                     data_end_ptr = data_ptr;
301                     break;
302                 }
303                 data_ptr++;
304             }
305 
306             data_ptr = (const SourceDataType*)data.GetDataStart();
307         }
308 
309         lldb::DataBufferSP utf8_data_buffer_sp;
310         UTF8* utf8_data_ptr = nullptr;
311         UTF8* utf8_data_end_ptr = nullptr;
312 
313         if (ConvertFunction)
314         {
315             utf8_data_buffer_sp.reset(new DataBufferHeap(4*bufferSPSize,0));
316             utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes();
317             utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize();
318             ConvertFunction ( &data_ptr, data_end_ptr, &utf8_data_ptr, utf8_data_end_ptr, lenientConversion );
319             if (false == zero_is_terminator)
320                 utf8_data_end_ptr = utf8_data_ptr;
321             utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes(); // needed because the ConvertFunction will change the value of the data_ptr
322         }
323         else
324         {
325             // just copy the pointers - the cast is necessary to make the compiler happy
326             // but this should only happen if we are reading UTF8 data
327             utf8_data_ptr = (UTF8*)data_ptr;
328             utf8_data_end_ptr = (UTF8*)data_end_ptr;
329         }
330 
331         const bool escape_non_printables = dump_options.GetEscapeNonPrintables();
332         lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback;
333         if (escape_non_printables)
334         {
335             if (Language *language = Language::FindPlugin(dump_options.GetLanguage()))
336                 escaping_callback = language->GetStringPrinterEscapingHelper(lldb_private::formatters::StringPrinter::GetPrintableElementType::UTF8);
337             else
338                 escaping_callback = lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper(lldb_private::formatters::StringPrinter::GetPrintableElementType::UTF8);
339         }
340 
341         // since we tend to accept partial data (and even partially malformed data)
342         // we might end up with no NULL terminator before the end_ptr
343         // hence we need to take a slower route and ensure we stay within boundaries
344         for (;utf8_data_ptr < utf8_data_end_ptr;)
345         {
346             if (zero_is_terminator && !*utf8_data_ptr)
347                 break;
348 
349             if (escape_non_printables)
350             {
351                 uint8_t* next_data = nullptr;
352                 auto printable = escaping_callback(utf8_data_ptr, utf8_data_end_ptr, next_data);
353                 auto printable_bytes = printable.GetBytes();
354                 auto printable_size = printable.GetSize();
355                 if (!printable_bytes || !next_data)
356                 {
357                     // GetPrintable() failed on us - print one byte in a desperate resync attempt
358                     printable_bytes = utf8_data_ptr;
359                     printable_size = 1;
360                     next_data = utf8_data_ptr+1;
361                 }
362                 for (unsigned c = 0; c < printable_size; c++)
363                     stream.Printf("%c", *(printable_bytes+c));
364                 utf8_data_ptr = (uint8_t*)next_data;
365             }
366             else
367             {
368                 stream.Printf("%c",*utf8_data_ptr);
369                 utf8_data_ptr++;
370             }
371         }
372     }
373     if (dump_options.GetQuote() != 0)
374         stream.Printf("%c",dump_options.GetQuote());
375     return true;
376 }
377 
378 lldb_private::formatters::StringPrinter::ReadStringAndDumpToStreamOptions::ReadStringAndDumpToStreamOptions (ValueObject& valobj) :
379     ReadStringAndDumpToStreamOptions()
380 {
381     SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables());
382 }
383 
384 lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions::ReadBufferAndDumpToStreamOptions (ValueObject& valobj) :
385     ReadBufferAndDumpToStreamOptions()
386 {
387     SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables());
388 }
389 
390 lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions::ReadBufferAndDumpToStreamOptions (const ReadStringAndDumpToStreamOptions& options) :
391     ReadBufferAndDumpToStreamOptions()
392 {
393     SetStream(options.GetStream());
394     SetPrefixToken(options.GetPrefixToken());
395     SetQuote(options.GetQuote());
396     SetEscapeNonPrintables(options.GetEscapeNonPrintables());
397     SetBinaryZeroIsTerminator(options.GetBinaryZeroIsTerminator());
398     SetLanguage(options.GetLanguage());
399 }
400 
401 
402 namespace lldb_private
403 {
404 
405 namespace formatters
406 {
407 
408 template <>
409 bool
410 StringPrinter::ReadStringAndDumpToStream<StringPrinter::StringElementType::ASCII> (const ReadStringAndDumpToStreamOptions& options)
411 {
412     assert(options.GetStream() && "need a Stream to print the string to");
413     Error my_error;
414 
415     ProcessSP process_sp(options.GetProcessSP());
416 
417     if (process_sp.get() == nullptr || options.GetLocation() == 0)
418         return false;
419 
420     size_t size;
421 
422     if (options.GetSourceSize() == 0)
423         size = process_sp->GetTarget().GetMaximumSizeOfStringSummary();
424     else if (!options.GetIgnoreMaxLength())
425         size = std::min(options.GetSourceSize(),process_sp->GetTarget().GetMaximumSizeOfStringSummary());
426     else
427         size = options.GetSourceSize();
428 
429     lldb::DataBufferSP buffer_sp(new DataBufferHeap(size,0));
430 
431     process_sp->ReadCStringFromMemory(options.GetLocation(), (char*)buffer_sp->GetBytes(), size, my_error);
432 
433     if (my_error.Fail())
434         return false;
435 
436     char prefix_token = options.GetPrefixToken();
437     char quote = options.GetQuote();
438 
439     if (prefix_token != 0)
440         options.GetStream()->Printf("%c%c",prefix_token,quote);
441     else if (quote != 0)
442         options.GetStream()->Printf("%c",quote);
443 
444     uint8_t* data_end = buffer_sp->GetBytes()+buffer_sp->GetByteSize();
445 
446     const bool escape_non_printables = options.GetEscapeNonPrintables();
447     lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback;
448     if (escape_non_printables)
449     {
450         if (Language *language = Language::FindPlugin(options.GetLanguage()))
451             escaping_callback = language->GetStringPrinterEscapingHelper(lldb_private::formatters::StringPrinter::GetPrintableElementType::ASCII);
452         else
453             escaping_callback = lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper(lldb_private::formatters::StringPrinter::GetPrintableElementType::ASCII);
454     }
455 
456     // since we tend to accept partial data (and even partially malformed data)
457     // we might end up with no NULL terminator before the end_ptr
458     // hence we need to take a slower route and ensure we stay within boundaries
459     for (uint8_t* data = buffer_sp->GetBytes(); *data && (data < data_end);)
460     {
461         if (escape_non_printables)
462         {
463             uint8_t* next_data = nullptr;
464             auto printable = escaping_callback(data, data_end, next_data);
465             auto printable_bytes = printable.GetBytes();
466             auto printable_size = printable.GetSize();
467             if (!printable_bytes || !next_data)
468             {
469                 // GetPrintable() failed on us - print one byte in a desperate resync attempt
470                 printable_bytes = data;
471                 printable_size = 1;
472                 next_data = data+1;
473             }
474             for (unsigned c = 0; c < printable_size; c++)
475                 options.GetStream()->Printf("%c", *(printable_bytes+c));
476             data = (uint8_t*)next_data;
477         }
478         else
479         {
480             options.GetStream()->Printf("%c",*data);
481             data++;
482         }
483     }
484 
485     if (quote != 0)
486         options.GetStream()->Printf("%c",quote);
487 
488     return true;
489 }
490 
491 template<typename SourceDataType>
492 static bool
493 ReadUTFBufferAndDumpToStream (const StringPrinter::ReadStringAndDumpToStreamOptions& options,
494                               ConversionResult (*ConvertFunction) (const SourceDataType**,
495                                                                    const SourceDataType*,
496                                                                    UTF8**,
497                                                                    UTF8*,
498                                                                    ConversionFlags))
499 {
500     assert(options.GetStream() && "need a Stream to print the string to");
501 
502     if (options.GetLocation() == 0 || options.GetLocation() == LLDB_INVALID_ADDRESS)
503         return false;
504 
505     lldb::ProcessSP process_sp(options.GetProcessSP());
506 
507     if (!process_sp)
508         return false;
509 
510     const int type_width = sizeof(SourceDataType);
511     const int origin_encoding = 8 * type_width ;
512     if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32)
513         return false;
514     // if not UTF8, I need a conversion function to return proper UTF8
515     if (origin_encoding != 8 && !ConvertFunction)
516         return false;
517 
518     if (!options.GetStream())
519         return false;
520 
521     uint32_t sourceSize = options.GetSourceSize();
522     bool needs_zero_terminator = options.GetNeedsZeroTermination();
523 
524     if (!sourceSize)
525     {
526         sourceSize = process_sp->GetTarget().GetMaximumSizeOfStringSummary();
527         needs_zero_terminator = true;
528     }
529     else if (!options.GetIgnoreMaxLength())
530         sourceSize = std::min(sourceSize,process_sp->GetTarget().GetMaximumSizeOfStringSummary());
531 
532     const int bufferSPSize = sourceSize * type_width;
533 
534     lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize,0));
535 
536     if (!buffer_sp->GetBytes())
537         return false;
538 
539     Error error;
540     char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes());
541 
542     if (needs_zero_terminator)
543         process_sp->ReadStringFromMemory(options.GetLocation(), buffer, bufferSPSize, error, type_width);
544     else
545         process_sp->ReadMemoryFromInferior(options.GetLocation(), (char*)buffer_sp->GetBytes(), bufferSPSize, error);
546 
547     if (error.Fail())
548     {
549         options.GetStream()->Printf("unable to read data");
550         return true;
551     }
552 
553     DataExtractor data(buffer_sp, process_sp->GetByteOrder(), process_sp->GetAddressByteSize());
554 
555     StringPrinter::ReadBufferAndDumpToStreamOptions dump_options(options);
556     dump_options.SetData(data);
557     dump_options.SetSourceSize(sourceSize);
558 
559     return DumpUTFBufferToStream(ConvertFunction, dump_options);
560 }
561 
562 template <>
563 bool
564 StringPrinter::ReadStringAndDumpToStream<StringPrinter::StringElementType::UTF8> (const ReadStringAndDumpToStreamOptions& options)
565 {
566     return ReadUTFBufferAndDumpToStream<UTF8>(options,
567                                               nullptr);
568 }
569 
570 template <>
571 bool
572 StringPrinter::ReadStringAndDumpToStream<StringPrinter::StringElementType::UTF16> (const ReadStringAndDumpToStreamOptions& options)
573 {
574     return ReadUTFBufferAndDumpToStream<UTF16>(options,
575                                                ConvertUTF16toUTF8);
576 }
577 
578 template <>
579 bool
580 StringPrinter::ReadStringAndDumpToStream<StringPrinter::StringElementType::UTF32> (const ReadStringAndDumpToStreamOptions& options)
581 {
582     return ReadUTFBufferAndDumpToStream<UTF32>(options,
583                                                ConvertUTF32toUTF8);
584 }
585 
586 template <>
587 bool
588 StringPrinter::ReadBufferAndDumpToStream<StringPrinter::StringElementType::UTF8> (const ReadBufferAndDumpToStreamOptions& options)
589 {
590     assert(options.GetStream() && "need a Stream to print the string to");
591 
592     return DumpUTFBufferToStream<UTF8>(nullptr, options);
593 }
594 
595 template <>
596 bool
597 StringPrinter::ReadBufferAndDumpToStream<StringPrinter::StringElementType::ASCII> (const ReadBufferAndDumpToStreamOptions& options)
598 {
599     // treat ASCII the same as UTF8
600     // FIXME: can we optimize ASCII some more?
601     return ReadBufferAndDumpToStream<StringElementType::UTF8>(options);
602 }
603 
604 template <>
605 bool
606 StringPrinter::ReadBufferAndDumpToStream<StringPrinter::StringElementType::UTF16> (const ReadBufferAndDumpToStreamOptions& options)
607 {
608     assert(options.GetStream() && "need a Stream to print the string to");
609 
610     return DumpUTFBufferToStream(ConvertUTF16toUTF8, options);
611 }
612 
613 template <>
614 bool
615 StringPrinter::ReadBufferAndDumpToStream<StringPrinter::StringElementType::UTF32> (const ReadBufferAndDumpToStreamOptions& options)
616 {
617     assert(options.GetStream() && "need a Stream to print the string to");
618 
619     return DumpUTFBufferToStream(ConvertUTF32toUTF8, options);
620 }
621 
622 } // namespace formatters
623 
624 } // namespace lldb_private
625