1 //===-- StringPrinter.cpp ----------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "lldb/DataFormatters/StringPrinter.h"
11 
12 #include "lldb/Core/DataExtractor.h"
13 #include "lldb/Core/Error.h"
14 #include "lldb/Target/Process.h"
15 #include "lldb/Target/Target.h"
16 
17 #include "llvm/Support/ConvertUTF.h"
18 
19 #include <ctype.h>
20 #include <functional>
21 #include <locale>
22 
23 using namespace lldb;
24 using namespace lldb_private;
25 using namespace lldb_private::formatters;
26 
27 // I can't use a std::unique_ptr for this because the Deleter is a template argument there
28 // and I want the same type to represent both pointers I want to free and pointers I don't need
29 // to free - which is what this class essentially is
30 // It's very specialized to the needs of this file, and not suggested for general use
31 template <typename T = uint8_t, typename U = char, typename S = size_t>
32 struct StringPrinterBufferPointer
33 {
34 public:
35 
36     typedef std::function<void(const T*)> Deleter;
37 
38     StringPrinterBufferPointer (std::nullptr_t ptr) :
39     m_data(nullptr),
40     m_size(0),
41     m_deleter()
42     {}
43 
44     StringPrinterBufferPointer(const T* bytes, S size, Deleter deleter = nullptr) :
45     m_data(bytes),
46     m_size(size),
47     m_deleter(deleter)
48     {}
49 
50     StringPrinterBufferPointer(const U* bytes, S size, Deleter deleter = nullptr) :
51     m_data((T*)bytes),
52     m_size(size),
53     m_deleter(deleter)
54     {}
55 
56     StringPrinterBufferPointer(StringPrinterBufferPointer&& rhs) :
57     m_data(rhs.m_data),
58     m_size(rhs.m_size),
59     m_deleter(rhs.m_deleter)
60     {
61         rhs.m_data = nullptr;
62     }
63 
64     StringPrinterBufferPointer(const StringPrinterBufferPointer& rhs) :
65     m_data(rhs.m_data),
66     m_size(rhs.m_size),
67     m_deleter(rhs.m_deleter)
68     {
69         rhs.m_data = nullptr; // this is why m_data has to be mutable
70     }
71 
72     const T*
73     GetBytes () const
74     {
75         return m_data;
76     }
77 
78     const S
79     GetSize () const
80     {
81         return m_size;
82     }
83 
84     ~StringPrinterBufferPointer ()
85     {
86         if (m_data && m_deleter)
87             m_deleter(m_data);
88         m_data = nullptr;
89     }
90 
91     StringPrinterBufferPointer&
92     operator = (const StringPrinterBufferPointer& rhs)
93     {
94         if (m_data && m_deleter)
95             m_deleter(m_data);
96         m_data = rhs.m_data;
97         m_size = rhs.m_size;
98         m_deleter = rhs.m_deleter;
99         rhs.m_data = nullptr;
100         return *this;
101     }
102 
103 private:
104     mutable const T* m_data;
105     size_t m_size;
106     Deleter m_deleter;
107 };
108 
109 // we define this for all values of type but only implement it for those we care about
110 // that's good because we get linker errors for any unsupported type
111 template <StringElementType type>
112 static StringPrinterBufferPointer<>
113 GetPrintableImpl(uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next);
114 
115 // mimic isprint() for Unicode codepoints
116 static bool
117 isprint(char32_t codepoint)
118 {
119     if (codepoint <= 0x1F || codepoint == 0x7F) // C0
120     {
121         return false;
122     }
123     if (codepoint >= 0x80 && codepoint <= 0x9F) // C1
124     {
125         return false;
126     }
127     if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators
128     {
129         return false;
130     }
131     if (codepoint == 0x200E || codepoint == 0x200F || (codepoint >= 0x202A && codepoint <= 0x202E)) // bidirectional text control
132     {
133         return false;
134     }
135     if (codepoint >= 0xFFF9 && codepoint <= 0xFFFF) // interlinears and generally specials
136     {
137         return false;
138     }
139     return true;
140 }
141 
142 template <>
143 StringPrinterBufferPointer<>
144 GetPrintableImpl<StringElementType::ASCII> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next)
145 {
146     StringPrinterBufferPointer<> retval = {nullptr};
147 
148     switch (*buffer)
149     {
150         case '\a':
151             retval = {"\\a",2};
152             break;
153         case '\b':
154             retval = {"\\b",2};
155             break;
156         case '\f':
157             retval = {"\\f",2};
158             break;
159         case '\n':
160             retval = {"\\n",2};
161             break;
162         case '\r':
163             retval = {"\\r",2};
164             break;
165         case '\t':
166             retval = {"\\t",2};
167             break;
168         case '\v':
169             retval = {"\\v",2};
170             break;
171         case '\"':
172             retval = {"\\\"",2};
173             break;
174         case '\\':
175             retval = {"\\\\",2};
176             break;
177         default:
178           if (isprint(*buffer))
179             retval = {buffer,1};
180           else
181           {
182             retval = { new uint8_t[5],4,[] (const uint8_t* c) {delete[] c;} };
183             sprintf((char*)retval.GetBytes(),"\\x%02x",*buffer);
184             break;
185           }
186     }
187 
188     next = buffer + 1;
189     return retval;
190 }
191 
192 static char32_t
193 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1)
194 {
195     return (c0-192)*64+(c1-128);
196 }
197 static char32_t
198 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2)
199 {
200     return (c0-224)*4096+(c1-128)*64+(c2-128);
201 }
202 static char32_t
203 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2, unsigned char c3)
204 {
205     return (c0-240)*262144+(c2-128)*4096+(c2-128)*64+(c3-128);
206 }
207 
208 template <>
209 StringPrinterBufferPointer<>
210 GetPrintableImpl<StringElementType::UTF8> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next)
211 {
212     StringPrinterBufferPointer<> retval {nullptr};
213 
214     unsigned utf8_encoded_len = getNumBytesForUTF8(*buffer);
215 
216     if (1+buffer_end-buffer < utf8_encoded_len)
217     {
218         // I don't have enough bytes - print whatever I have left
219         retval = {buffer,static_cast<size_t>(1+buffer_end-buffer)};
220         next = buffer_end+1;
221         return retval;
222     }
223 
224     char32_t codepoint = 0;
225     switch (utf8_encoded_len)
226     {
227         case 1:
228             // this is just an ASCII byte - ask ASCII
229             return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next);
230         case 2:
231             codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1));
232             break;
233         case 3:
234             codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2));
235             break;
236         case 4:
237             codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2), (unsigned char)*(buffer+3));
238             break;
239         default:
240             // this is probably some bogus non-character thing
241             // just print it as-is and hope to sync up again soon
242             retval = {buffer,1};
243             next = buffer+1;
244             return retval;
245     }
246 
247     if (codepoint)
248     {
249         switch (codepoint)
250         {
251             case '\a':
252                 retval = {"\\a",2};
253                 break;
254             case '\b':
255                 retval = {"\\b",2};
256                 break;
257             case '\f':
258                 retval = {"\\f",2};
259                 break;
260             case '\n':
261                 retval = {"\\n",2};
262                 break;
263             case '\r':
264                 retval = {"\\r",2};
265                 break;
266             case '\t':
267                 retval = {"\\t",2};
268                 break;
269             case '\v':
270                 retval = {"\\v",2};
271                 break;
272             case '\"':
273                 retval = {"\\\"",2};
274                 break;
275             case '\\':
276                 retval = {"\\\\",2};
277                 break;
278             default:
279                 if (isprint(codepoint))
280                     retval = {buffer,utf8_encoded_len};
281                 else
282                 {
283                     retval = { new uint8_t[11],10,[] (const uint8_t* c) {delete[] c;} };
284                     sprintf((char*)retval.GetBytes(),"\\U%08x",codepoint);
285                     break;
286                 }
287         }
288 
289         next = buffer + utf8_encoded_len;
290         return retval;
291     }
292 
293     // this should not happen - but just in case.. try to resync at some point
294     retval = {buffer,1};
295     next = buffer+1;
296     return retval;
297 }
298 
299 // Given a sequence of bytes, this function returns:
300 // a sequence of bytes to actually print out + a length
301 // the following unscanned position of the buffer is in next
302 static StringPrinterBufferPointer<>
303 GetPrintable(StringElementType type, uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next)
304 {
305     if (!buffer)
306         return {nullptr};
307 
308     switch (type)
309     {
310         case StringElementType::ASCII:
311             return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next);
312         case StringElementType::UTF8:
313             return GetPrintableImpl<StringElementType::UTF8>(buffer, buffer_end, next);
314         default:
315             return {nullptr};
316     }
317 }
318 
319 // use this call if you already have an LLDB-side buffer for the data
320 template<typename SourceDataType>
321 static bool
322 DumpUTFBufferToStream (ConversionResult (*ConvertFunction) (const SourceDataType**,
323                                                             const SourceDataType*,
324                                                             UTF8**,
325                                                             UTF8*,
326                                                             ConversionFlags),
327                        const DataExtractor& data,
328                        Stream& stream,
329                        char prefix_token,
330                        char quote,
331                        uint32_t sourceSize,
332                        bool escapeNonPrintables)
333 {
334     if (prefix_token != 0)
335         stream.Printf("%c",prefix_token);
336     if (quote != 0)
337         stream.Printf("%c",quote);
338     if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd())
339     {
340         const int bufferSPSize = data.GetByteSize();
341         if (sourceSize == 0)
342         {
343             const int origin_encoding = 8*sizeof(SourceDataType);
344             sourceSize = bufferSPSize/(origin_encoding / 4);
345         }
346 
347         SourceDataType *data_ptr = (SourceDataType*)data.GetDataStart();
348         SourceDataType *data_end_ptr = data_ptr + sourceSize;
349 
350         while (data_ptr < data_end_ptr)
351         {
352             if (!*data_ptr)
353             {
354                 data_end_ptr = data_ptr;
355                 break;
356             }
357             data_ptr++;
358         }
359 
360         data_ptr = (SourceDataType*)data.GetDataStart();
361 
362         lldb::DataBufferSP utf8_data_buffer_sp;
363         UTF8* utf8_data_ptr = nullptr;
364         UTF8* utf8_data_end_ptr = nullptr;
365 
366         if (ConvertFunction)
367         {
368             utf8_data_buffer_sp.reset(new DataBufferHeap(4*bufferSPSize,0));
369             utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes();
370             utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize();
371             ConvertFunction ( (const SourceDataType**)&data_ptr, data_end_ptr, &utf8_data_ptr, utf8_data_end_ptr, lenientConversion );
372             utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes(); // needed because the ConvertFunction will change the value of the data_ptr
373         }
374         else
375         {
376             // just copy the pointers - the cast is necessary to make the compiler happy
377             // but this should only happen if we are reading UTF8 data
378             utf8_data_ptr = (UTF8*)data_ptr;
379             utf8_data_end_ptr = (UTF8*)data_end_ptr;
380         }
381 
382         // since we tend to accept partial data (and even partially malformed data)
383         // we might end up with no NULL terminator before the end_ptr
384         // hence we need to take a slower route and ensure we stay within boundaries
385         for (;utf8_data_ptr < utf8_data_end_ptr;)
386         {
387             if (!*utf8_data_ptr)
388                 break;
389 
390             if (escapeNonPrintables)
391             {
392                 uint8_t* next_data = nullptr;
393                 auto printable = GetPrintable(StringElementType::UTF8, utf8_data_ptr, utf8_data_end_ptr, next_data);
394                 auto printable_bytes = printable.GetBytes();
395                 auto printable_size = printable.GetSize();
396                 if (!printable_bytes || !next_data)
397                 {
398                     // GetPrintable() failed on us - print one byte in a desperate resync attempt
399                     printable_bytes = utf8_data_ptr;
400                     printable_size = 1;
401                     next_data = utf8_data_ptr+1;
402                 }
403                 for (int c = 0; c < printable_size; c++)
404                     stream.Printf("%c", *(printable_bytes+c));
405                 utf8_data_ptr = (uint8_t*)next_data;
406             }
407             else
408             {
409                 stream.Printf("%c",*utf8_data_ptr);
410                 utf8_data_ptr++;
411             }
412         }
413     }
414     if (quote != 0)
415         stream.Printf("%c",quote);
416     return true;
417 }
418 
419 namespace lldb_private
420 {
421 
422 namespace formatters
423 {
424 
425 template <>
426 bool
427 ReadStringAndDumpToStream<StringElementType::ASCII> (ReadStringAndDumpToStreamOptions options)
428 {
429     assert(options.GetStream() && "need a Stream to print the string to");
430     Error my_error;
431     size_t my_data_read;
432 
433     ProcessSP process_sp(options.GetProcessSP());
434 
435     if (process_sp.get() == nullptr || options.GetLocation() == 0)
436         return false;
437 
438     size_t size;
439 
440     if (options.GetSourceSize() == 0)
441         size = process_sp->GetTarget().GetMaximumSizeOfStringSummary();
442     else
443         size = std::min(options.GetSourceSize(),process_sp->GetTarget().GetMaximumSizeOfStringSummary());
444 
445     lldb::DataBufferSP buffer_sp(new DataBufferHeap(size,0));
446 
447     my_data_read = process_sp->ReadCStringFromMemory(options.GetLocation(), (char*)buffer_sp->GetBytes(), size, my_error);
448 
449     if (my_error.Fail())
450         return false;
451 
452     char prefix_token = options.GetPrefixToken();
453     char quote = options.GetQuote();
454 
455     if (prefix_token != 0)
456         options.GetStream()->Printf("%c%c",prefix_token,quote);
457     else if (quote != 0)
458         options.GetStream()->Printf("%c",quote);
459 
460     uint8_t* data_end = buffer_sp->GetBytes()+buffer_sp->GetByteSize();
461 
462     // since we tend to accept partial data (and even partially malformed data)
463     // we might end up with no NULL terminator before the end_ptr
464     // hence we need to take a slower route and ensure we stay within boundaries
465     for (uint8_t* data = buffer_sp->GetBytes(); *data && (data < data_end);)
466     {
467         if (options.GetEscapeNonPrintables())
468         {
469             uint8_t* next_data = nullptr;
470             auto printable = GetPrintable(StringElementType::ASCII, data, data_end, next_data);
471             auto printable_bytes = printable.GetBytes();
472             auto printable_size = printable.GetSize();
473             if (!printable_bytes || !next_data)
474             {
475                 // GetPrintable() failed on us - print one byte in a desperate resync attempt
476                 printable_bytes = data;
477                 printable_size = 1;
478                 next_data = data+1;
479             }
480             for (int c = 0; c < printable_size; c++)
481                 options.GetStream()->Printf("%c", *(printable_bytes+c));
482             data = (uint8_t*)next_data;
483         }
484         else
485         {
486             options.GetStream()->Printf("%c",*data);
487             data++;
488         }
489     }
490 
491     if (quote != 0)
492         options.GetStream()->Printf("%c",quote);
493 
494     return true;
495 }
496 
497 template<typename SourceDataType>
498 static bool
499 ReadUTFBufferAndDumpToStream (const ReadStringAndDumpToStreamOptions& options,
500                               ConversionResult (*ConvertFunction) (const SourceDataType**,
501                                                                    const SourceDataType*,
502                                                                    UTF8**,
503                                                                    UTF8*,
504                                                                    ConversionFlags))
505 {
506     assert(options.GetStream() && "need a Stream to print the string to");
507 
508     if (options.GetLocation() == 0 || options.GetLocation() == LLDB_INVALID_ADDRESS)
509         return false;
510 
511     lldb::ProcessSP process_sp(options.GetProcessSP());
512 
513     if (!process_sp)
514         return false;
515 
516     const int type_width = sizeof(SourceDataType);
517     const int origin_encoding = 8 * type_width ;
518     if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32)
519         return false;
520     // if not UTF8, I need a conversion function to return proper UTF8
521     if (origin_encoding != 8 && !ConvertFunction)
522         return false;
523 
524     if (!options.GetStream())
525         return false;
526 
527     uint32_t sourceSize = options.GetSourceSize();
528     bool needs_zero_terminator = options.GetNeedsZeroTermination();
529 
530     if (!sourceSize)
531     {
532         sourceSize = process_sp->GetTarget().GetMaximumSizeOfStringSummary();
533         needs_zero_terminator = true;
534     }
535     else
536         sourceSize = std::min(sourceSize,process_sp->GetTarget().GetMaximumSizeOfStringSummary());
537 
538     const int bufferSPSize = sourceSize * type_width;
539 
540     lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize,0));
541 
542     if (!buffer_sp->GetBytes())
543         return false;
544 
545     Error error;
546     char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes());
547 
548     size_t data_read = 0;
549     if (needs_zero_terminator)
550         data_read = process_sp->ReadStringFromMemory(options.GetLocation(), buffer, bufferSPSize, error, type_width);
551     else
552         data_read = process_sp->ReadMemoryFromInferior(options.GetLocation(), (char*)buffer_sp->GetBytes(), bufferSPSize, error);
553 
554     if (error.Fail() || data_read == 0)
555     {
556         options.GetStream()->Printf("unable to read data");
557         return true;
558     }
559 
560     DataExtractor data(buffer_sp, process_sp->GetByteOrder(), process_sp->GetAddressByteSize());
561 
562     return DumpUTFBufferToStream(ConvertFunction, data, *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), sourceSize, options.GetEscapeNonPrintables());
563 }
564 
565 template <>
566 bool
567 ReadStringAndDumpToStream<StringElementType::UTF8> (ReadStringAndDumpToStreamOptions options)
568 {
569     return ReadUTFBufferAndDumpToStream<UTF8>(options,
570                                               nullptr);
571 }
572 
573 template <>
574 bool
575 ReadStringAndDumpToStream<StringElementType::UTF16> (ReadStringAndDumpToStreamOptions options)
576 {
577     return ReadUTFBufferAndDumpToStream<UTF16>(options,
578                                                ConvertUTF16toUTF8);
579 }
580 
581 template <>
582 bool
583 ReadStringAndDumpToStream<StringElementType::UTF32> (ReadStringAndDumpToStreamOptions options)
584 {
585     return ReadUTFBufferAndDumpToStream<UTF32>(options,
586                                                ConvertUTF32toUTF8);
587 }
588 
589 template <>
590 bool
591 ReadBufferAndDumpToStream<StringElementType::UTF8> (ReadBufferAndDumpToStreamOptions options)
592 {
593     assert(options.GetStream() && "need a Stream to print the string to");
594 
595     return DumpUTFBufferToStream<UTF8>(nullptr, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables());
596 }
597 
598 template <>
599 bool
600 ReadBufferAndDumpToStream<StringElementType::ASCII> (ReadBufferAndDumpToStreamOptions options)
601 {
602     // treat ASCII the same as UTF8
603     // FIXME: can we optimize ASCII some more?
604     return ReadBufferAndDumpToStream<StringElementType::UTF8>(options);
605 }
606 
607 template <>
608 bool
609 ReadBufferAndDumpToStream<StringElementType::UTF16> (ReadBufferAndDumpToStreamOptions options)
610 {
611     assert(options.GetStream() && "need a Stream to print the string to");
612 
613     return DumpUTFBufferToStream(ConvertUTF16toUTF8, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables());
614 }
615 
616 template <>
617 bool
618 ReadBufferAndDumpToStream<StringElementType::UTF32> (ReadBufferAndDumpToStreamOptions options)
619 {
620     assert(options.GetStream() && "need a Stream to print the string to");
621 
622     return DumpUTFBufferToStream(ConvertUTF32toUTF8, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables());
623 }
624 
625 } // namespace formatters
626 
627 } // namespace lldb_private
628