1 //===-- StringPrinter.cpp ----------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "lldb/DataFormatters/StringPrinter.h"
11 
12 #include "lldb/Core/DataExtractor.h"
13 #include "lldb/Core/Debugger.h"
14 #include "lldb/Core/Error.h"
15 #include "lldb/Core/ValueObject.h"
16 #include "lldb/Target/Process.h"
17 #include "lldb/Target/Target.h"
18 
19 #include "llvm/Support/ConvertUTF.h"
20 
21 #include <ctype.h>
22 #include <functional>
23 #include <locale>
24 
25 using namespace lldb;
26 using namespace lldb_private;
27 using namespace lldb_private::formatters;
28 
29 // I can't use a std::unique_ptr for this because the Deleter is a template argument there
30 // and I want the same type to represent both pointers I want to free and pointers I don't need
31 // to free - which is what this class essentially is
32 // It's very specialized to the needs of this file, and not suggested for general use
33 template <typename T = uint8_t, typename U = char, typename S = size_t>
34 struct StringPrinterBufferPointer
35 {
36 public:
37 
38     typedef std::function<void(const T*)> Deleter;
39 
40     StringPrinterBufferPointer (std::nullptr_t ptr) :
41     m_data(nullptr),
42     m_size(0),
43     m_deleter()
44     {}
45 
46     StringPrinterBufferPointer(const T* bytes, S size, Deleter deleter = nullptr) :
47     m_data(bytes),
48     m_size(size),
49     m_deleter(deleter)
50     {}
51 
52     StringPrinterBufferPointer(const U* bytes, S size, Deleter deleter = nullptr) :
53     m_data((T*)bytes),
54     m_size(size),
55     m_deleter(deleter)
56     {}
57 
58     StringPrinterBufferPointer(StringPrinterBufferPointer&& rhs) :
59     m_data(rhs.m_data),
60     m_size(rhs.m_size),
61     m_deleter(rhs.m_deleter)
62     {
63         rhs.m_data = nullptr;
64     }
65 
66     StringPrinterBufferPointer(const StringPrinterBufferPointer& rhs) :
67     m_data(rhs.m_data),
68     m_size(rhs.m_size),
69     m_deleter(rhs.m_deleter)
70     {
71         rhs.m_data = nullptr; // this is why m_data has to be mutable
72     }
73 
74     const T*
75     GetBytes () const
76     {
77         return m_data;
78     }
79 
80     const S
81     GetSize () const
82     {
83         return m_size;
84     }
85 
86     ~StringPrinterBufferPointer ()
87     {
88         if (m_data && m_deleter)
89             m_deleter(m_data);
90         m_data = nullptr;
91     }
92 
93     StringPrinterBufferPointer&
94     operator = (const StringPrinterBufferPointer& rhs)
95     {
96         if (m_data && m_deleter)
97             m_deleter(m_data);
98         m_data = rhs.m_data;
99         m_size = rhs.m_size;
100         m_deleter = rhs.m_deleter;
101         rhs.m_data = nullptr;
102         return *this;
103     }
104 
105 private:
106     mutable const T* m_data;
107     size_t m_size;
108     Deleter m_deleter;
109 };
110 
111 // we define this for all values of type but only implement it for those we care about
112 // that's good because we get linker errors for any unsupported type
113 template <StringElementType type>
114 static StringPrinterBufferPointer<>
115 GetPrintableImpl(uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next);
116 
117 // mimic isprint() for Unicode codepoints
118 static bool
119 isprint(char32_t codepoint)
120 {
121     if (codepoint <= 0x1F || codepoint == 0x7F) // C0
122     {
123         return false;
124     }
125     if (codepoint >= 0x80 && codepoint <= 0x9F) // C1
126     {
127         return false;
128     }
129     if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators
130     {
131         return false;
132     }
133     if (codepoint == 0x200E || codepoint == 0x200F || (codepoint >= 0x202A && codepoint <= 0x202E)) // bidirectional text control
134     {
135         return false;
136     }
137     if (codepoint >= 0xFFF9 && codepoint <= 0xFFFF) // interlinears and generally specials
138     {
139         return false;
140     }
141     return true;
142 }
143 
144 template <>
145 StringPrinterBufferPointer<>
146 GetPrintableImpl<StringElementType::ASCII> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next)
147 {
148     StringPrinterBufferPointer<> retval = {nullptr};
149 
150     switch (*buffer)
151     {
152         case '\a':
153             retval = {"\\a",2};
154             break;
155         case '\b':
156             retval = {"\\b",2};
157             break;
158         case '\f':
159             retval = {"\\f",2};
160             break;
161         case '\n':
162             retval = {"\\n",2};
163             break;
164         case '\r':
165             retval = {"\\r",2};
166             break;
167         case '\t':
168             retval = {"\\t",2};
169             break;
170         case '\v':
171             retval = {"\\v",2};
172             break;
173         case '\"':
174             retval = {"\\\"",2};
175             break;
176         case '\\':
177             retval = {"\\\\",2};
178             break;
179         default:
180           if (isprint(*buffer))
181             retval = {buffer,1};
182           else
183           {
184             retval = { new uint8_t[5],4,[] (const uint8_t* c) {delete[] c;} };
185             sprintf((char*)retval.GetBytes(),"\\x%02x",*buffer);
186             break;
187           }
188     }
189 
190     next = buffer + 1;
191     return retval;
192 }
193 
194 static char32_t
195 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1)
196 {
197     return (c0-192)*64+(c1-128);
198 }
199 static char32_t
200 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2)
201 {
202     return (c0-224)*4096+(c1-128)*64+(c2-128);
203 }
204 static char32_t
205 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2, unsigned char c3)
206 {
207     return (c0-240)*262144+(c2-128)*4096+(c2-128)*64+(c3-128);
208 }
209 
210 template <>
211 StringPrinterBufferPointer<>
212 GetPrintableImpl<StringElementType::UTF8> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next)
213 {
214     StringPrinterBufferPointer<> retval {nullptr};
215 
216     unsigned utf8_encoded_len = getNumBytesForUTF8(*buffer);
217 
218     if (1+buffer_end-buffer < utf8_encoded_len)
219     {
220         // I don't have enough bytes - print whatever I have left
221         retval = {buffer,static_cast<size_t>(1+buffer_end-buffer)};
222         next = buffer_end+1;
223         return retval;
224     }
225 
226     char32_t codepoint = 0;
227     switch (utf8_encoded_len)
228     {
229         case 1:
230             // this is just an ASCII byte - ask ASCII
231             return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next);
232         case 2:
233             codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1));
234             break;
235         case 3:
236             codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2));
237             break;
238         case 4:
239             codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2), (unsigned char)*(buffer+3));
240             break;
241         default:
242             // this is probably some bogus non-character thing
243             // just print it as-is and hope to sync up again soon
244             retval = {buffer,1};
245             next = buffer+1;
246             return retval;
247     }
248 
249     if (codepoint)
250     {
251         switch (codepoint)
252         {
253             case '\a':
254                 retval = {"\\a",2};
255                 break;
256             case '\b':
257                 retval = {"\\b",2};
258                 break;
259             case '\f':
260                 retval = {"\\f",2};
261                 break;
262             case '\n':
263                 retval = {"\\n",2};
264                 break;
265             case '\r':
266                 retval = {"\\r",2};
267                 break;
268             case '\t':
269                 retval = {"\\t",2};
270                 break;
271             case '\v':
272                 retval = {"\\v",2};
273                 break;
274             case '\"':
275                 retval = {"\\\"",2};
276                 break;
277             case '\\':
278                 retval = {"\\\\",2};
279                 break;
280             default:
281                 if (isprint(codepoint))
282                     retval = {buffer,utf8_encoded_len};
283                 else
284                 {
285                     retval = { new uint8_t[11],10,[] (const uint8_t* c) {delete[] c;} };
286                     sprintf((char*)retval.GetBytes(),"\\U%08x",codepoint);
287                     break;
288                 }
289         }
290 
291         next = buffer + utf8_encoded_len;
292         return retval;
293     }
294 
295     // this should not happen - but just in case.. try to resync at some point
296     retval = {buffer,1};
297     next = buffer+1;
298     return retval;
299 }
300 
301 // Given a sequence of bytes, this function returns:
302 // a sequence of bytes to actually print out + a length
303 // the following unscanned position of the buffer is in next
304 static StringPrinterBufferPointer<>
305 GetPrintable(StringElementType type, uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next)
306 {
307     if (!buffer)
308         return {nullptr};
309 
310     switch (type)
311     {
312         case StringElementType::ASCII:
313             return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next);
314         case StringElementType::UTF8:
315             return GetPrintableImpl<StringElementType::UTF8>(buffer, buffer_end, next);
316         default:
317             return {nullptr};
318     }
319 }
320 
321 // use this call if you already have an LLDB-side buffer for the data
322 template<typename SourceDataType>
323 static bool
324 DumpUTFBufferToStream (ConversionResult (*ConvertFunction) (const SourceDataType**,
325                                                             const SourceDataType*,
326                                                             UTF8**,
327                                                             UTF8*,
328                                                             ConversionFlags),
329                        const DataExtractor& data,
330                        Stream& stream,
331                        char prefix_token,
332                        char quote,
333                        uint32_t sourceSize,
334                        bool escapeNonPrintables)
335 {
336     if (prefix_token != 0)
337         stream.Printf("%c",prefix_token);
338     if (quote != 0)
339         stream.Printf("%c",quote);
340     if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd())
341     {
342         const int bufferSPSize = data.GetByteSize();
343         if (sourceSize == 0)
344         {
345             const int origin_encoding = 8*sizeof(SourceDataType);
346             sourceSize = bufferSPSize/(origin_encoding / 4);
347         }
348 
349         SourceDataType *data_ptr = (SourceDataType*)data.GetDataStart();
350         SourceDataType *data_end_ptr = data_ptr + sourceSize;
351 
352         while (data_ptr < data_end_ptr)
353         {
354             if (!*data_ptr)
355             {
356                 data_end_ptr = data_ptr;
357                 break;
358             }
359             data_ptr++;
360         }
361 
362         data_ptr = (SourceDataType*)data.GetDataStart();
363 
364         lldb::DataBufferSP utf8_data_buffer_sp;
365         UTF8* utf8_data_ptr = nullptr;
366         UTF8* utf8_data_end_ptr = nullptr;
367 
368         if (ConvertFunction)
369         {
370             utf8_data_buffer_sp.reset(new DataBufferHeap(4*bufferSPSize,0));
371             utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes();
372             utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize();
373             ConvertFunction ( (const SourceDataType**)&data_ptr, data_end_ptr, &utf8_data_ptr, utf8_data_end_ptr, lenientConversion );
374             utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes(); // needed because the ConvertFunction will change the value of the data_ptr
375         }
376         else
377         {
378             // just copy the pointers - the cast is necessary to make the compiler happy
379             // but this should only happen if we are reading UTF8 data
380             utf8_data_ptr = (UTF8*)data_ptr;
381             utf8_data_end_ptr = (UTF8*)data_end_ptr;
382         }
383 
384         // since we tend to accept partial data (and even partially malformed data)
385         // we might end up with no NULL terminator before the end_ptr
386         // hence we need to take a slower route and ensure we stay within boundaries
387         for (;utf8_data_ptr < utf8_data_end_ptr;)
388         {
389             if (!*utf8_data_ptr)
390                 break;
391 
392             if (escapeNonPrintables)
393             {
394                 uint8_t* next_data = nullptr;
395                 auto printable = GetPrintable(StringElementType::UTF8, utf8_data_ptr, utf8_data_end_ptr, next_data);
396                 auto printable_bytes = printable.GetBytes();
397                 auto printable_size = printable.GetSize();
398                 if (!printable_bytes || !next_data)
399                 {
400                     // GetPrintable() failed on us - print one byte in a desperate resync attempt
401                     printable_bytes = utf8_data_ptr;
402                     printable_size = 1;
403                     next_data = utf8_data_ptr+1;
404                 }
405                 for (int c = 0; c < printable_size; c++)
406                     stream.Printf("%c", *(printable_bytes+c));
407                 utf8_data_ptr = (uint8_t*)next_data;
408             }
409             else
410             {
411                 stream.Printf("%c",*utf8_data_ptr);
412                 utf8_data_ptr++;
413             }
414         }
415     }
416     if (quote != 0)
417         stream.Printf("%c",quote);
418     return true;
419 }
420 
421 lldb_private::formatters::ReadStringAndDumpToStreamOptions::ReadStringAndDumpToStreamOptions (ValueObject& valobj) :
422     ReadStringAndDumpToStreamOptions()
423 {
424     SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables());
425 }
426 
427 lldb_private::formatters::ReadBufferAndDumpToStreamOptions::ReadBufferAndDumpToStreamOptions (ValueObject& valobj) :
428     ReadBufferAndDumpToStreamOptions()
429 {
430     SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables());
431 }
432 
433 
434 namespace lldb_private
435 {
436 
437 namespace formatters
438 {
439 
440 template <>
441 bool
442 ReadStringAndDumpToStream<StringElementType::ASCII> (ReadStringAndDumpToStreamOptions options)
443 {
444     assert(options.GetStream() && "need a Stream to print the string to");
445     Error my_error;
446     size_t my_data_read;
447 
448     ProcessSP process_sp(options.GetProcessSP());
449 
450     if (process_sp.get() == nullptr || options.GetLocation() == 0)
451         return false;
452 
453     size_t size;
454 
455     if (options.GetSourceSize() == 0)
456         size = process_sp->GetTarget().GetMaximumSizeOfStringSummary();
457     else if (!options.GetIgnoreMaxLength())
458         size = std::min(options.GetSourceSize(),process_sp->GetTarget().GetMaximumSizeOfStringSummary());
459     else
460         size = options.GetSourceSize();
461 
462     lldb::DataBufferSP buffer_sp(new DataBufferHeap(size,0));
463 
464     my_data_read = process_sp->ReadCStringFromMemory(options.GetLocation(), (char*)buffer_sp->GetBytes(), size, my_error);
465 
466     if (my_error.Fail())
467         return false;
468 
469     char prefix_token = options.GetPrefixToken();
470     char quote = options.GetQuote();
471 
472     if (prefix_token != 0)
473         options.GetStream()->Printf("%c%c",prefix_token,quote);
474     else if (quote != 0)
475         options.GetStream()->Printf("%c",quote);
476 
477     uint8_t* data_end = buffer_sp->GetBytes()+buffer_sp->GetByteSize();
478 
479     // since we tend to accept partial data (and even partially malformed data)
480     // we might end up with no NULL terminator before the end_ptr
481     // hence we need to take a slower route and ensure we stay within boundaries
482     for (uint8_t* data = buffer_sp->GetBytes(); *data && (data < data_end);)
483     {
484         if (options.GetEscapeNonPrintables())
485         {
486             uint8_t* next_data = nullptr;
487             auto printable = GetPrintable(StringElementType::ASCII, data, data_end, next_data);
488             auto printable_bytes = printable.GetBytes();
489             auto printable_size = printable.GetSize();
490             if (!printable_bytes || !next_data)
491             {
492                 // GetPrintable() failed on us - print one byte in a desperate resync attempt
493                 printable_bytes = data;
494                 printable_size = 1;
495                 next_data = data+1;
496             }
497             for (int c = 0; c < printable_size; c++)
498                 options.GetStream()->Printf("%c", *(printable_bytes+c));
499             data = (uint8_t*)next_data;
500         }
501         else
502         {
503             options.GetStream()->Printf("%c",*data);
504             data++;
505         }
506     }
507 
508     if (quote != 0)
509         options.GetStream()->Printf("%c",quote);
510 
511     return true;
512 }
513 
514 template<typename SourceDataType>
515 static bool
516 ReadUTFBufferAndDumpToStream (const ReadStringAndDumpToStreamOptions& options,
517                               ConversionResult (*ConvertFunction) (const SourceDataType**,
518                                                                    const SourceDataType*,
519                                                                    UTF8**,
520                                                                    UTF8*,
521                                                                    ConversionFlags))
522 {
523     assert(options.GetStream() && "need a Stream to print the string to");
524 
525     if (options.GetLocation() == 0 || options.GetLocation() == LLDB_INVALID_ADDRESS)
526         return false;
527 
528     lldb::ProcessSP process_sp(options.GetProcessSP());
529 
530     if (!process_sp)
531         return false;
532 
533     const int type_width = sizeof(SourceDataType);
534     const int origin_encoding = 8 * type_width ;
535     if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32)
536         return false;
537     // if not UTF8, I need a conversion function to return proper UTF8
538     if (origin_encoding != 8 && !ConvertFunction)
539         return false;
540 
541     if (!options.GetStream())
542         return false;
543 
544     uint32_t sourceSize = options.GetSourceSize();
545     bool needs_zero_terminator = options.GetNeedsZeroTermination();
546 
547     if (!sourceSize)
548     {
549         sourceSize = process_sp->GetTarget().GetMaximumSizeOfStringSummary();
550         needs_zero_terminator = true;
551     }
552     else
553         sourceSize = std::min(sourceSize,process_sp->GetTarget().GetMaximumSizeOfStringSummary());
554 
555     const int bufferSPSize = sourceSize * type_width;
556 
557     lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize,0));
558 
559     if (!buffer_sp->GetBytes())
560         return false;
561 
562     Error error;
563     char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes());
564 
565     size_t data_read = 0;
566     if (needs_zero_terminator)
567         data_read = process_sp->ReadStringFromMemory(options.GetLocation(), buffer, bufferSPSize, error, type_width);
568     else
569         data_read = process_sp->ReadMemoryFromInferior(options.GetLocation(), (char*)buffer_sp->GetBytes(), bufferSPSize, error);
570 
571     if (error.Fail())
572     {
573         options.GetStream()->Printf("unable to read data");
574         return true;
575     }
576 
577     DataExtractor data(buffer_sp, process_sp->GetByteOrder(), process_sp->GetAddressByteSize());
578 
579     return DumpUTFBufferToStream(ConvertFunction, data, *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), sourceSize, options.GetEscapeNonPrintables());
580 }
581 
582 template <>
583 bool
584 ReadStringAndDumpToStream<StringElementType::UTF8> (ReadStringAndDumpToStreamOptions options)
585 {
586     return ReadUTFBufferAndDumpToStream<UTF8>(options,
587                                               nullptr);
588 }
589 
590 template <>
591 bool
592 ReadStringAndDumpToStream<StringElementType::UTF16> (ReadStringAndDumpToStreamOptions options)
593 {
594     return ReadUTFBufferAndDumpToStream<UTF16>(options,
595                                                ConvertUTF16toUTF8);
596 }
597 
598 template <>
599 bool
600 ReadStringAndDumpToStream<StringElementType::UTF32> (ReadStringAndDumpToStreamOptions options)
601 {
602     return ReadUTFBufferAndDumpToStream<UTF32>(options,
603                                                ConvertUTF32toUTF8);
604 }
605 
606 template <>
607 bool
608 ReadBufferAndDumpToStream<StringElementType::UTF8> (ReadBufferAndDumpToStreamOptions options)
609 {
610     assert(options.GetStream() && "need a Stream to print the string to");
611 
612     return DumpUTFBufferToStream<UTF8>(nullptr, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables());
613 }
614 
615 template <>
616 bool
617 ReadBufferAndDumpToStream<StringElementType::ASCII> (ReadBufferAndDumpToStreamOptions options)
618 {
619     // treat ASCII the same as UTF8
620     // FIXME: can we optimize ASCII some more?
621     return ReadBufferAndDumpToStream<StringElementType::UTF8>(options);
622 }
623 
624 template <>
625 bool
626 ReadBufferAndDumpToStream<StringElementType::UTF16> (ReadBufferAndDumpToStreamOptions options)
627 {
628     assert(options.GetStream() && "need a Stream to print the string to");
629 
630     return DumpUTFBufferToStream(ConvertUTF16toUTF8, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables());
631 }
632 
633 template <>
634 bool
635 ReadBufferAndDumpToStream<StringElementType::UTF32> (ReadBufferAndDumpToStreamOptions options)
636 {
637     assert(options.GetStream() && "need a Stream to print the string to");
638 
639     return DumpUTFBufferToStream(ConvertUTF32toUTF8, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables());
640 }
641 
642 } // namespace formatters
643 
644 } // namespace lldb_private
645