1ca6c8ee2SEnrico Granata //===-- StringPrinter.cpp ----------------------------------------*- C++ -*-===//
2ca6c8ee2SEnrico Granata //
3ca6c8ee2SEnrico Granata //                     The LLVM Compiler Infrastructure
4ca6c8ee2SEnrico Granata //
5ca6c8ee2SEnrico Granata // This file is distributed under the University of Illinois Open Source
6ca6c8ee2SEnrico Granata // License. See LICENSE.TXT for details.
7ca6c8ee2SEnrico Granata //
8ca6c8ee2SEnrico Granata //===----------------------------------------------------------------------===//
9ca6c8ee2SEnrico Granata 
10ca6c8ee2SEnrico Granata #include "lldb/DataFormatters/StringPrinter.h"
11ca6c8ee2SEnrico Granata 
12ca6c8ee2SEnrico Granata #include "lldb/Core/DataExtractor.h"
13ebdc1ac0SEnrico Granata #include "lldb/Core/Debugger.h"
14ca6c8ee2SEnrico Granata #include "lldb/Core/Error.h"
15ebdc1ac0SEnrico Granata #include "lldb/Core/ValueObject.h"
16ca6c8ee2SEnrico Granata #include "lldb/Target/Process.h"
17ca6c8ee2SEnrico Granata #include "lldb/Target/Target.h"
18ca6c8ee2SEnrico Granata 
19ca6c8ee2SEnrico Granata #include "llvm/Support/ConvertUTF.h"
20ca6c8ee2SEnrico Granata 
21ca6c8ee2SEnrico Granata #include <ctype.h>
22ca6c8ee2SEnrico Granata #include <functional>
23ca6c8ee2SEnrico Granata #include <locale>
24ca6c8ee2SEnrico Granata 
25ca6c8ee2SEnrico Granata using namespace lldb;
26ca6c8ee2SEnrico Granata using namespace lldb_private;
27ca6c8ee2SEnrico Granata using namespace lldb_private::formatters;
28ca6c8ee2SEnrico Granata 
29ca6c8ee2SEnrico Granata // I can't use a std::unique_ptr for this because the Deleter is a template argument there
30ca6c8ee2SEnrico Granata // and I want the same type to represent both pointers I want to free and pointers I don't need
31ca6c8ee2SEnrico Granata // to free - which is what this class essentially is
32ca6c8ee2SEnrico Granata // It's very specialized to the needs of this file, and not suggested for general use
33ca6c8ee2SEnrico Granata template <typename T = uint8_t, typename U = char, typename S = size_t>
34ca6c8ee2SEnrico Granata struct StringPrinterBufferPointer
35ca6c8ee2SEnrico Granata {
36ca6c8ee2SEnrico Granata public:
37ca6c8ee2SEnrico Granata 
38ca6c8ee2SEnrico Granata     typedef std::function<void(const T*)> Deleter;
39ca6c8ee2SEnrico Granata 
40ca6c8ee2SEnrico Granata     StringPrinterBufferPointer (std::nullptr_t ptr) :
41ca6c8ee2SEnrico Granata     m_data(nullptr),
42ca6c8ee2SEnrico Granata     m_size(0),
43ca6c8ee2SEnrico Granata     m_deleter()
44ca6c8ee2SEnrico Granata     {}
45ca6c8ee2SEnrico Granata 
46ca6c8ee2SEnrico Granata     StringPrinterBufferPointer(const T* bytes, S size, Deleter deleter = nullptr) :
47ca6c8ee2SEnrico Granata     m_data(bytes),
48ca6c8ee2SEnrico Granata     m_size(size),
49ca6c8ee2SEnrico Granata     m_deleter(deleter)
50ca6c8ee2SEnrico Granata     {}
51ca6c8ee2SEnrico Granata 
52ca6c8ee2SEnrico Granata     StringPrinterBufferPointer(const U* bytes, S size, Deleter deleter = nullptr) :
53ca6c8ee2SEnrico Granata     m_data((T*)bytes),
54ca6c8ee2SEnrico Granata     m_size(size),
55ca6c8ee2SEnrico Granata     m_deleter(deleter)
56ca6c8ee2SEnrico Granata     {}
57ca6c8ee2SEnrico Granata 
58ca6c8ee2SEnrico Granata     StringPrinterBufferPointer(StringPrinterBufferPointer&& rhs) :
59ca6c8ee2SEnrico Granata     m_data(rhs.m_data),
60ca6c8ee2SEnrico Granata     m_size(rhs.m_size),
61ca6c8ee2SEnrico Granata     m_deleter(rhs.m_deleter)
62ca6c8ee2SEnrico Granata     {
63ca6c8ee2SEnrico Granata         rhs.m_data = nullptr;
64ca6c8ee2SEnrico Granata     }
65ca6c8ee2SEnrico Granata 
66ca6c8ee2SEnrico Granata     StringPrinterBufferPointer(const StringPrinterBufferPointer& rhs) :
67ca6c8ee2SEnrico Granata     m_data(rhs.m_data),
68ca6c8ee2SEnrico Granata     m_size(rhs.m_size),
69ca6c8ee2SEnrico Granata     m_deleter(rhs.m_deleter)
70ca6c8ee2SEnrico Granata     {
71ca6c8ee2SEnrico Granata         rhs.m_data = nullptr; // this is why m_data has to be mutable
72ca6c8ee2SEnrico Granata     }
73ca6c8ee2SEnrico Granata 
74ca6c8ee2SEnrico Granata     const T*
75ca6c8ee2SEnrico Granata     GetBytes () const
76ca6c8ee2SEnrico Granata     {
77ca6c8ee2SEnrico Granata         return m_data;
78ca6c8ee2SEnrico Granata     }
79ca6c8ee2SEnrico Granata 
80ca6c8ee2SEnrico Granata     const S
81ca6c8ee2SEnrico Granata     GetSize () const
82ca6c8ee2SEnrico Granata     {
83ca6c8ee2SEnrico Granata         return m_size;
84ca6c8ee2SEnrico Granata     }
85ca6c8ee2SEnrico Granata 
86ca6c8ee2SEnrico Granata     ~StringPrinterBufferPointer ()
87ca6c8ee2SEnrico Granata     {
88ca6c8ee2SEnrico Granata         if (m_data && m_deleter)
89ca6c8ee2SEnrico Granata             m_deleter(m_data);
90ca6c8ee2SEnrico Granata         m_data = nullptr;
91ca6c8ee2SEnrico Granata     }
92ca6c8ee2SEnrico Granata 
93ca6c8ee2SEnrico Granata     StringPrinterBufferPointer&
94ca6c8ee2SEnrico Granata     operator = (const StringPrinterBufferPointer& rhs)
95ca6c8ee2SEnrico Granata     {
96ca6c8ee2SEnrico Granata         if (m_data && m_deleter)
97ca6c8ee2SEnrico Granata             m_deleter(m_data);
98ca6c8ee2SEnrico Granata         m_data = rhs.m_data;
99ca6c8ee2SEnrico Granata         m_size = rhs.m_size;
100ca6c8ee2SEnrico Granata         m_deleter = rhs.m_deleter;
101ca6c8ee2SEnrico Granata         rhs.m_data = nullptr;
102ca6c8ee2SEnrico Granata         return *this;
103ca6c8ee2SEnrico Granata     }
104ca6c8ee2SEnrico Granata 
105ca6c8ee2SEnrico Granata private:
106ca6c8ee2SEnrico Granata     mutable const T* m_data;
107ca6c8ee2SEnrico Granata     size_t m_size;
108ca6c8ee2SEnrico Granata     Deleter m_deleter;
109ca6c8ee2SEnrico Granata };
110ca6c8ee2SEnrico Granata 
111ca6c8ee2SEnrico Granata // we define this for all values of type but only implement it for those we care about
112ca6c8ee2SEnrico Granata // that's good because we get linker errors for any unsupported type
113ca6c8ee2SEnrico Granata template <StringElementType type>
114ca6c8ee2SEnrico Granata static StringPrinterBufferPointer<>
115ca6c8ee2SEnrico Granata GetPrintableImpl(uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next);
116ca6c8ee2SEnrico Granata 
117ca6c8ee2SEnrico Granata // mimic isprint() for Unicode codepoints
118ca6c8ee2SEnrico Granata static bool
119ca6c8ee2SEnrico Granata isprint(char32_t codepoint)
120ca6c8ee2SEnrico Granata {
121ca6c8ee2SEnrico Granata     if (codepoint <= 0x1F || codepoint == 0x7F) // C0
122ca6c8ee2SEnrico Granata     {
123ca6c8ee2SEnrico Granata         return false;
124ca6c8ee2SEnrico Granata     }
125ca6c8ee2SEnrico Granata     if (codepoint >= 0x80 && codepoint <= 0x9F) // C1
126ca6c8ee2SEnrico Granata     {
127ca6c8ee2SEnrico Granata         return false;
128ca6c8ee2SEnrico Granata     }
129ca6c8ee2SEnrico Granata     if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators
130ca6c8ee2SEnrico Granata     {
131ca6c8ee2SEnrico Granata         return false;
132ca6c8ee2SEnrico Granata     }
133ca6c8ee2SEnrico Granata     if (codepoint == 0x200E || codepoint == 0x200F || (codepoint >= 0x202A && codepoint <= 0x202E)) // bidirectional text control
134ca6c8ee2SEnrico Granata     {
135ca6c8ee2SEnrico Granata         return false;
136ca6c8ee2SEnrico Granata     }
137ca6c8ee2SEnrico Granata     if (codepoint >= 0xFFF9 && codepoint <= 0xFFFF) // interlinears and generally specials
138ca6c8ee2SEnrico Granata     {
139ca6c8ee2SEnrico Granata         return false;
140ca6c8ee2SEnrico Granata     }
141ca6c8ee2SEnrico Granata     return true;
142ca6c8ee2SEnrico Granata }
143ca6c8ee2SEnrico Granata 
144ca6c8ee2SEnrico Granata template <>
145ca6c8ee2SEnrico Granata StringPrinterBufferPointer<>
146ca6c8ee2SEnrico Granata GetPrintableImpl<StringElementType::ASCII> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next)
147ca6c8ee2SEnrico Granata {
148ca6c8ee2SEnrico Granata     StringPrinterBufferPointer<> retval = {nullptr};
149ca6c8ee2SEnrico Granata 
150ca6c8ee2SEnrico Granata     switch (*buffer)
151ca6c8ee2SEnrico Granata     {
152da04fbb5SEnrico Granata         case 0:
153da04fbb5SEnrico Granata             retval = {"\\0",2};
154da04fbb5SEnrico Granata             break;
155ca6c8ee2SEnrico Granata         case '\a':
156ca6c8ee2SEnrico Granata             retval = {"\\a",2};
157ca6c8ee2SEnrico Granata             break;
158ca6c8ee2SEnrico Granata         case '\b':
159ca6c8ee2SEnrico Granata             retval = {"\\b",2};
160ca6c8ee2SEnrico Granata             break;
161ca6c8ee2SEnrico Granata         case '\f':
162ca6c8ee2SEnrico Granata             retval = {"\\f",2};
163ca6c8ee2SEnrico Granata             break;
164ca6c8ee2SEnrico Granata         case '\n':
165ca6c8ee2SEnrico Granata             retval = {"\\n",2};
166ca6c8ee2SEnrico Granata             break;
167ca6c8ee2SEnrico Granata         case '\r':
168ca6c8ee2SEnrico Granata             retval = {"\\r",2};
169ca6c8ee2SEnrico Granata             break;
170ca6c8ee2SEnrico Granata         case '\t':
171ca6c8ee2SEnrico Granata             retval = {"\\t",2};
172ca6c8ee2SEnrico Granata             break;
173ca6c8ee2SEnrico Granata         case '\v':
174ca6c8ee2SEnrico Granata             retval = {"\\v",2};
175ca6c8ee2SEnrico Granata             break;
176ca6c8ee2SEnrico Granata         case '\"':
177ca6c8ee2SEnrico Granata             retval = {"\\\"",2};
178ca6c8ee2SEnrico Granata             break;
179ca6c8ee2SEnrico Granata         case '\\':
180ca6c8ee2SEnrico Granata             retval = {"\\\\",2};
181ca6c8ee2SEnrico Granata             break;
182ca6c8ee2SEnrico Granata         default:
183ca6c8ee2SEnrico Granata           if (isprint(*buffer))
184ca6c8ee2SEnrico Granata               retval = {buffer,1};
185ca6c8ee2SEnrico Granata           else
186ca6c8ee2SEnrico Granata           {
187*d7e6a4f2SVince Harron               uint8_t* data = new uint8_t[5];
188*d7e6a4f2SVince Harron               sprintf((char*)data,"\\x%02x",*buffer);
189*d7e6a4f2SVince Harron               retval = {data, 4, [] (const uint8_t* c) {delete[] c;} };
190ca6c8ee2SEnrico Granata               break;
191ca6c8ee2SEnrico Granata           }
192ca6c8ee2SEnrico Granata     }
193ca6c8ee2SEnrico Granata 
194ca6c8ee2SEnrico Granata     next = buffer + 1;
195ca6c8ee2SEnrico Granata     return retval;
196ca6c8ee2SEnrico Granata }
197ca6c8ee2SEnrico Granata 
198ca6c8ee2SEnrico Granata static char32_t
199ca6c8ee2SEnrico Granata ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1)
200ca6c8ee2SEnrico Granata {
201ca6c8ee2SEnrico Granata     return (c0-192)*64+(c1-128);
202ca6c8ee2SEnrico Granata }
203ca6c8ee2SEnrico Granata static char32_t
204ca6c8ee2SEnrico Granata ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2)
205ca6c8ee2SEnrico Granata {
206ca6c8ee2SEnrico Granata     return (c0-224)*4096+(c1-128)*64+(c2-128);
207ca6c8ee2SEnrico Granata }
208ca6c8ee2SEnrico Granata static char32_t
209ca6c8ee2SEnrico Granata ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2, unsigned char c3)
210ca6c8ee2SEnrico Granata {
211ca6c8ee2SEnrico Granata     return (c0-240)*262144+(c2-128)*4096+(c2-128)*64+(c3-128);
212ca6c8ee2SEnrico Granata }
213ca6c8ee2SEnrico Granata 
214ca6c8ee2SEnrico Granata template <>
215ca6c8ee2SEnrico Granata StringPrinterBufferPointer<>
216ca6c8ee2SEnrico Granata GetPrintableImpl<StringElementType::UTF8> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next)
217ca6c8ee2SEnrico Granata {
218ca6c8ee2SEnrico Granata     StringPrinterBufferPointer<> retval {nullptr};
219ca6c8ee2SEnrico Granata 
220ca6c8ee2SEnrico Granata     unsigned utf8_encoded_len = getNumBytesForUTF8(*buffer);
221ca6c8ee2SEnrico Granata 
222ca6c8ee2SEnrico Granata     if (1+buffer_end-buffer < utf8_encoded_len)
223ca6c8ee2SEnrico Granata     {
224ca6c8ee2SEnrico Granata         // I don't have enough bytes - print whatever I have left
225ca6c8ee2SEnrico Granata         retval = {buffer,static_cast<size_t>(1+buffer_end-buffer)};
226ca6c8ee2SEnrico Granata         next = buffer_end+1;
227ca6c8ee2SEnrico Granata         return retval;
228ca6c8ee2SEnrico Granata     }
229ca6c8ee2SEnrico Granata 
230ca6c8ee2SEnrico Granata     char32_t codepoint = 0;
231ca6c8ee2SEnrico Granata     switch (utf8_encoded_len)
232ca6c8ee2SEnrico Granata     {
233ca6c8ee2SEnrico Granata         case 1:
234ca6c8ee2SEnrico Granata             // this is just an ASCII byte - ask ASCII
235ca6c8ee2SEnrico Granata             return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next);
236ca6c8ee2SEnrico Granata         case 2:
237ca6c8ee2SEnrico Granata             codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1));
238ca6c8ee2SEnrico Granata             break;
239ca6c8ee2SEnrico Granata         case 3:
240ca6c8ee2SEnrico Granata             codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2));
241ca6c8ee2SEnrico Granata             break;
242ca6c8ee2SEnrico Granata         case 4:
243ca6c8ee2SEnrico Granata             codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2), (unsigned char)*(buffer+3));
244ca6c8ee2SEnrico Granata             break;
245ca6c8ee2SEnrico Granata         default:
246ca6c8ee2SEnrico Granata             // this is probably some bogus non-character thing
247ca6c8ee2SEnrico Granata             // just print it as-is and hope to sync up again soon
248ca6c8ee2SEnrico Granata             retval = {buffer,1};
249ca6c8ee2SEnrico Granata             next = buffer+1;
250ca6c8ee2SEnrico Granata             return retval;
251ca6c8ee2SEnrico Granata     }
252ca6c8ee2SEnrico Granata 
253ca6c8ee2SEnrico Granata     if (codepoint)
254ca6c8ee2SEnrico Granata     {
255ca6c8ee2SEnrico Granata         switch (codepoint)
256ca6c8ee2SEnrico Granata         {
257da04fbb5SEnrico Granata             case 0:
258da04fbb5SEnrico Granata                 retval = {"\\0",2};
259da04fbb5SEnrico Granata                 break;
260ca6c8ee2SEnrico Granata             case '\a':
261ca6c8ee2SEnrico Granata                 retval = {"\\a",2};
262ca6c8ee2SEnrico Granata                 break;
263ca6c8ee2SEnrico Granata             case '\b':
264ca6c8ee2SEnrico Granata                 retval = {"\\b",2};
265ca6c8ee2SEnrico Granata                 break;
266ca6c8ee2SEnrico Granata             case '\f':
267ca6c8ee2SEnrico Granata                 retval = {"\\f",2};
268ca6c8ee2SEnrico Granata                 break;
269ca6c8ee2SEnrico Granata             case '\n':
270ca6c8ee2SEnrico Granata                 retval = {"\\n",2};
271ca6c8ee2SEnrico Granata                 break;
272ca6c8ee2SEnrico Granata             case '\r':
273ca6c8ee2SEnrico Granata                 retval = {"\\r",2};
274ca6c8ee2SEnrico Granata                 break;
275ca6c8ee2SEnrico Granata             case '\t':
276ca6c8ee2SEnrico Granata                 retval = {"\\t",2};
277ca6c8ee2SEnrico Granata                 break;
278ca6c8ee2SEnrico Granata             case '\v':
279ca6c8ee2SEnrico Granata                 retval = {"\\v",2};
280ca6c8ee2SEnrico Granata                 break;
281ca6c8ee2SEnrico Granata             case '\"':
282ca6c8ee2SEnrico Granata                 retval = {"\\\"",2};
283ca6c8ee2SEnrico Granata                 break;
284ca6c8ee2SEnrico Granata             case '\\':
285ca6c8ee2SEnrico Granata                 retval = {"\\\\",2};
286ca6c8ee2SEnrico Granata                 break;
287ca6c8ee2SEnrico Granata             default:
288ca6c8ee2SEnrico Granata                 if (isprint(codepoint))
289ca6c8ee2SEnrico Granata                     retval = {buffer,utf8_encoded_len};
290ca6c8ee2SEnrico Granata                 else
291ca6c8ee2SEnrico Granata                 {
292*d7e6a4f2SVince Harron                     uint8_t* data = new uint8_t[11];
293*d7e6a4f2SVince Harron                     sprintf((char*)data,"\\U%08x",codepoint);
294*d7e6a4f2SVince Harron                     retval = { data,10,[] (const uint8_t* c) {delete[] c;} };
295ca6c8ee2SEnrico Granata                     break;
296ca6c8ee2SEnrico Granata                 }
297ca6c8ee2SEnrico Granata         }
298ca6c8ee2SEnrico Granata 
299ca6c8ee2SEnrico Granata         next = buffer + utf8_encoded_len;
300ca6c8ee2SEnrico Granata         return retval;
301ca6c8ee2SEnrico Granata     }
302ca6c8ee2SEnrico Granata 
303ca6c8ee2SEnrico Granata     // this should not happen - but just in case.. try to resync at some point
304ca6c8ee2SEnrico Granata     retval = {buffer,1};
305ca6c8ee2SEnrico Granata     next = buffer+1;
306ca6c8ee2SEnrico Granata     return retval;
307ca6c8ee2SEnrico Granata }
308ca6c8ee2SEnrico Granata 
309ca6c8ee2SEnrico Granata // Given a sequence of bytes, this function returns:
310ca6c8ee2SEnrico Granata // a sequence of bytes to actually print out + a length
311ca6c8ee2SEnrico Granata // the following unscanned position of the buffer is in next
312ca6c8ee2SEnrico Granata static StringPrinterBufferPointer<>
313ca6c8ee2SEnrico Granata GetPrintable(StringElementType type, uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next)
314ca6c8ee2SEnrico Granata {
315ca6c8ee2SEnrico Granata     if (!buffer)
316ca6c8ee2SEnrico Granata         return {nullptr};
317ca6c8ee2SEnrico Granata 
318ca6c8ee2SEnrico Granata     switch (type)
319ca6c8ee2SEnrico Granata     {
320ca6c8ee2SEnrico Granata         case StringElementType::ASCII:
321ca6c8ee2SEnrico Granata             return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next);
322ca6c8ee2SEnrico Granata         case StringElementType::UTF8:
323ca6c8ee2SEnrico Granata             return GetPrintableImpl<StringElementType::UTF8>(buffer, buffer_end, next);
324ca6c8ee2SEnrico Granata         default:
325ca6c8ee2SEnrico Granata             return {nullptr};
326ca6c8ee2SEnrico Granata     }
327ca6c8ee2SEnrico Granata }
328ca6c8ee2SEnrico Granata 
329ca6c8ee2SEnrico Granata // use this call if you already have an LLDB-side buffer for the data
330ca6c8ee2SEnrico Granata template<typename SourceDataType>
331ca6c8ee2SEnrico Granata static bool
332ca6c8ee2SEnrico Granata DumpUTFBufferToStream (ConversionResult (*ConvertFunction) (const SourceDataType**,
333ca6c8ee2SEnrico Granata                                                             const SourceDataType*,
334ca6c8ee2SEnrico Granata                                                             UTF8**,
335ca6c8ee2SEnrico Granata                                                             UTF8*,
336ca6c8ee2SEnrico Granata                                                             ConversionFlags),
337ca6c8ee2SEnrico Granata                        const DataExtractor& data,
338ca6c8ee2SEnrico Granata                        Stream& stream,
339ca6c8ee2SEnrico Granata                        char prefix_token,
340ca6c8ee2SEnrico Granata                        char quote,
341ca6c8ee2SEnrico Granata                        uint32_t sourceSize,
342ca6c8ee2SEnrico Granata                        bool escapeNonPrintables)
343ca6c8ee2SEnrico Granata {
344ca6c8ee2SEnrico Granata     if (prefix_token != 0)
345ca6c8ee2SEnrico Granata         stream.Printf("%c",prefix_token);
346ca6c8ee2SEnrico Granata     if (quote != 0)
347ca6c8ee2SEnrico Granata         stream.Printf("%c",quote);
348ca6c8ee2SEnrico Granata     if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd())
349ca6c8ee2SEnrico Granata     {
350ca6c8ee2SEnrico Granata         const int bufferSPSize = data.GetByteSize();
351ca6c8ee2SEnrico Granata         if (sourceSize == 0)
352ca6c8ee2SEnrico Granata         {
353ca6c8ee2SEnrico Granata             const int origin_encoding = 8*sizeof(SourceDataType);
354ca6c8ee2SEnrico Granata             sourceSize = bufferSPSize/(origin_encoding / 4);
355ca6c8ee2SEnrico Granata         }
356ca6c8ee2SEnrico Granata 
357*d7e6a4f2SVince Harron         const SourceDataType *data_ptr = (const SourceDataType*)data.GetDataStart();
358*d7e6a4f2SVince Harron         const SourceDataType *data_end_ptr = data_ptr + sourceSize;
359ca6c8ee2SEnrico Granata 
360ca6c8ee2SEnrico Granata         while (data_ptr < data_end_ptr)
361ca6c8ee2SEnrico Granata         {
362ca6c8ee2SEnrico Granata             if (!*data_ptr)
363ca6c8ee2SEnrico Granata             {
364ca6c8ee2SEnrico Granata                 data_end_ptr = data_ptr;
365ca6c8ee2SEnrico Granata                 break;
366ca6c8ee2SEnrico Granata             }
367ca6c8ee2SEnrico Granata             data_ptr++;
368ca6c8ee2SEnrico Granata         }
369ca6c8ee2SEnrico Granata 
370*d7e6a4f2SVince Harron         data_ptr = (const SourceDataType*)data.GetDataStart();
371ca6c8ee2SEnrico Granata 
372ca6c8ee2SEnrico Granata         lldb::DataBufferSP utf8_data_buffer_sp;
373ca6c8ee2SEnrico Granata         UTF8* utf8_data_ptr = nullptr;
374ca6c8ee2SEnrico Granata         UTF8* utf8_data_end_ptr = nullptr;
375ca6c8ee2SEnrico Granata 
376ca6c8ee2SEnrico Granata         if (ConvertFunction)
377ca6c8ee2SEnrico Granata         {
378ca6c8ee2SEnrico Granata             utf8_data_buffer_sp.reset(new DataBufferHeap(4*bufferSPSize,0));
379ca6c8ee2SEnrico Granata             utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes();
380ca6c8ee2SEnrico Granata             utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize();
381*d7e6a4f2SVince Harron             ConvertFunction ( &data_ptr, data_end_ptr, &utf8_data_ptr, utf8_data_end_ptr, lenientConversion );
382ca6c8ee2SEnrico Granata             utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes(); // needed because the ConvertFunction will change the value of the data_ptr
383ca6c8ee2SEnrico Granata         }
384ca6c8ee2SEnrico Granata         else
385ca6c8ee2SEnrico Granata         {
386ca6c8ee2SEnrico Granata             // just copy the pointers - the cast is necessary to make the compiler happy
387ca6c8ee2SEnrico Granata             // but this should only happen if we are reading UTF8 data
388ca6c8ee2SEnrico Granata             utf8_data_ptr = (UTF8*)data_ptr;
389ca6c8ee2SEnrico Granata             utf8_data_end_ptr = (UTF8*)data_end_ptr;
390ca6c8ee2SEnrico Granata         }
391ca6c8ee2SEnrico Granata 
392ca6c8ee2SEnrico Granata         // since we tend to accept partial data (and even partially malformed data)
393ca6c8ee2SEnrico Granata         // we might end up with no NULL terminator before the end_ptr
394ca6c8ee2SEnrico Granata         // hence we need to take a slower route and ensure we stay within boundaries
395ca6c8ee2SEnrico Granata         for (;utf8_data_ptr < utf8_data_end_ptr;)
396ca6c8ee2SEnrico Granata         {
397ca6c8ee2SEnrico Granata             if (!*utf8_data_ptr)
398ca6c8ee2SEnrico Granata                 break;
399ca6c8ee2SEnrico Granata 
400ca6c8ee2SEnrico Granata             if (escapeNonPrintables)
401ca6c8ee2SEnrico Granata             {
402ca6c8ee2SEnrico Granata                 uint8_t* next_data = nullptr;
403ca6c8ee2SEnrico Granata                 auto printable = GetPrintable(StringElementType::UTF8, utf8_data_ptr, utf8_data_end_ptr, next_data);
404ca6c8ee2SEnrico Granata                 auto printable_bytes = printable.GetBytes();
405ca6c8ee2SEnrico Granata                 auto printable_size = printable.GetSize();
406ca6c8ee2SEnrico Granata                 if (!printable_bytes || !next_data)
407ca6c8ee2SEnrico Granata                 {
408ca6c8ee2SEnrico Granata                     // GetPrintable() failed on us - print one byte in a desperate resync attempt
409ca6c8ee2SEnrico Granata                     printable_bytes = utf8_data_ptr;
410ca6c8ee2SEnrico Granata                     printable_size = 1;
411ca6c8ee2SEnrico Granata                     next_data = utf8_data_ptr+1;
412ca6c8ee2SEnrico Granata                 }
4133acfe1a3SAndy Gibbs                 for (unsigned c = 0; c < printable_size; c++)
414ca6c8ee2SEnrico Granata                     stream.Printf("%c", *(printable_bytes+c));
415ca6c8ee2SEnrico Granata                 utf8_data_ptr = (uint8_t*)next_data;
416ca6c8ee2SEnrico Granata             }
417ca6c8ee2SEnrico Granata             else
418ca6c8ee2SEnrico Granata             {
419ca6c8ee2SEnrico Granata                 stream.Printf("%c",*utf8_data_ptr);
420ca6c8ee2SEnrico Granata                 utf8_data_ptr++;
421ca6c8ee2SEnrico Granata             }
422ca6c8ee2SEnrico Granata         }
423ca6c8ee2SEnrico Granata     }
424ca6c8ee2SEnrico Granata     if (quote != 0)
425ca6c8ee2SEnrico Granata         stream.Printf("%c",quote);
426ca6c8ee2SEnrico Granata     return true;
427ca6c8ee2SEnrico Granata }
428ca6c8ee2SEnrico Granata 
429ebdc1ac0SEnrico Granata lldb_private::formatters::ReadStringAndDumpToStreamOptions::ReadStringAndDumpToStreamOptions (ValueObject& valobj) :
430ebdc1ac0SEnrico Granata     ReadStringAndDumpToStreamOptions()
431ebdc1ac0SEnrico Granata {
432ebdc1ac0SEnrico Granata     SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables());
433ebdc1ac0SEnrico Granata }
434ebdc1ac0SEnrico Granata 
435ebdc1ac0SEnrico Granata lldb_private::formatters::ReadBufferAndDumpToStreamOptions::ReadBufferAndDumpToStreamOptions (ValueObject& valobj) :
436ebdc1ac0SEnrico Granata     ReadBufferAndDumpToStreamOptions()
437ebdc1ac0SEnrico Granata {
438ebdc1ac0SEnrico Granata     SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables());
439ebdc1ac0SEnrico Granata }
440ebdc1ac0SEnrico Granata 
441ebdc1ac0SEnrico Granata 
442fd13743fSShawn Best namespace lldb_private
443fd13743fSShawn Best {
444fd13743fSShawn Best 
445fd13743fSShawn Best namespace formatters
446fd13743fSShawn Best {
447fd13743fSShawn Best 
448fd13743fSShawn Best template <>
449fd13743fSShawn Best bool
450fd13743fSShawn Best ReadStringAndDumpToStream<StringElementType::ASCII> (ReadStringAndDumpToStreamOptions options)
451fd13743fSShawn Best {
452fd13743fSShawn Best     assert(options.GetStream() && "need a Stream to print the string to");
453fd13743fSShawn Best     Error my_error;
454fd13743fSShawn Best 
455fd13743fSShawn Best     ProcessSP process_sp(options.GetProcessSP());
456fd13743fSShawn Best 
457fd13743fSShawn Best     if (process_sp.get() == nullptr || options.GetLocation() == 0)
458fd13743fSShawn Best         return false;
459fd13743fSShawn Best 
460fd13743fSShawn Best     size_t size;
461fd13743fSShawn Best 
462fd13743fSShawn Best     if (options.GetSourceSize() == 0)
463fd13743fSShawn Best         size = process_sp->GetTarget().GetMaximumSizeOfStringSummary();
46434042212SEnrico Granata     else if (!options.GetIgnoreMaxLength())
465fd13743fSShawn Best         size = std::min(options.GetSourceSize(),process_sp->GetTarget().GetMaximumSizeOfStringSummary());
46634042212SEnrico Granata     else
46734042212SEnrico Granata         size = options.GetSourceSize();
468fd13743fSShawn Best 
469fd13743fSShawn Best     lldb::DataBufferSP buffer_sp(new DataBufferHeap(size,0));
470fd13743fSShawn Best 
471*d7e6a4f2SVince Harron     process_sp->ReadCStringFromMemory(options.GetLocation(), (char*)buffer_sp->GetBytes(), size, my_error);
472fd13743fSShawn Best 
473fd13743fSShawn Best     if (my_error.Fail())
474fd13743fSShawn Best         return false;
475fd13743fSShawn Best 
476fd13743fSShawn Best     char prefix_token = options.GetPrefixToken();
477fd13743fSShawn Best     char quote = options.GetQuote();
478fd13743fSShawn Best 
479fd13743fSShawn Best     if (prefix_token != 0)
480fd13743fSShawn Best         options.GetStream()->Printf("%c%c",prefix_token,quote);
481fd13743fSShawn Best     else if (quote != 0)
482fd13743fSShawn Best         options.GetStream()->Printf("%c",quote);
483fd13743fSShawn Best 
484fd13743fSShawn Best     uint8_t* data_end = buffer_sp->GetBytes()+buffer_sp->GetByteSize();
485fd13743fSShawn Best 
486fd13743fSShawn Best     // since we tend to accept partial data (and even partially malformed data)
487fd13743fSShawn Best     // we might end up with no NULL terminator before the end_ptr
488fd13743fSShawn Best     // hence we need to take a slower route and ensure we stay within boundaries
489fd13743fSShawn Best     for (uint8_t* data = buffer_sp->GetBytes(); *data && (data < data_end);)
490fd13743fSShawn Best     {
491fd13743fSShawn Best         if (options.GetEscapeNonPrintables())
492fd13743fSShawn Best         {
493fd13743fSShawn Best             uint8_t* next_data = nullptr;
494fd13743fSShawn Best             auto printable = GetPrintable(StringElementType::ASCII, data, data_end, next_data);
495fd13743fSShawn Best             auto printable_bytes = printable.GetBytes();
496fd13743fSShawn Best             auto printable_size = printable.GetSize();
497fd13743fSShawn Best             if (!printable_bytes || !next_data)
498fd13743fSShawn Best             {
499fd13743fSShawn Best                 // GetPrintable() failed on us - print one byte in a desperate resync attempt
500fd13743fSShawn Best                 printable_bytes = data;
501fd13743fSShawn Best                 printable_size = 1;
502fd13743fSShawn Best                 next_data = data+1;
503fd13743fSShawn Best             }
5043acfe1a3SAndy Gibbs             for (unsigned c = 0; c < printable_size; c++)
505fd13743fSShawn Best                 options.GetStream()->Printf("%c", *(printable_bytes+c));
506fd13743fSShawn Best             data = (uint8_t*)next_data;
507fd13743fSShawn Best         }
508fd13743fSShawn Best         else
509fd13743fSShawn Best         {
510fd13743fSShawn Best             options.GetStream()->Printf("%c",*data);
511fd13743fSShawn Best             data++;
512fd13743fSShawn Best         }
513fd13743fSShawn Best     }
514fd13743fSShawn Best 
515fd13743fSShawn Best     if (quote != 0)
516fd13743fSShawn Best         options.GetStream()->Printf("%c",quote);
517fd13743fSShawn Best 
518fd13743fSShawn Best     return true;
519fd13743fSShawn Best }
520fd13743fSShawn Best 
521ca6c8ee2SEnrico Granata template<typename SourceDataType>
522ca6c8ee2SEnrico Granata static bool
523ca6c8ee2SEnrico Granata ReadUTFBufferAndDumpToStream (const ReadStringAndDumpToStreamOptions& options,
524ca6c8ee2SEnrico Granata                               ConversionResult (*ConvertFunction) (const SourceDataType**,
525ca6c8ee2SEnrico Granata                                                                    const SourceDataType*,
526ca6c8ee2SEnrico Granata                                                                    UTF8**,
527ca6c8ee2SEnrico Granata                                                                    UTF8*,
528ca6c8ee2SEnrico Granata                                                                    ConversionFlags))
529ca6c8ee2SEnrico Granata {
530ca6c8ee2SEnrico Granata     assert(options.GetStream() && "need a Stream to print the string to");
531ca6c8ee2SEnrico Granata 
532ca6c8ee2SEnrico Granata     if (options.GetLocation() == 0 || options.GetLocation() == LLDB_INVALID_ADDRESS)
533ca6c8ee2SEnrico Granata         return false;
534ca6c8ee2SEnrico Granata 
535ca6c8ee2SEnrico Granata     lldb::ProcessSP process_sp(options.GetProcessSP());
536ca6c8ee2SEnrico Granata 
537ca6c8ee2SEnrico Granata     if (!process_sp)
538ca6c8ee2SEnrico Granata         return false;
539ca6c8ee2SEnrico Granata 
540ca6c8ee2SEnrico Granata     const int type_width = sizeof(SourceDataType);
541ca6c8ee2SEnrico Granata     const int origin_encoding = 8 * type_width ;
542ca6c8ee2SEnrico Granata     if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32)
543ca6c8ee2SEnrico Granata         return false;
544ca6c8ee2SEnrico Granata     // if not UTF8, I need a conversion function to return proper UTF8
545ca6c8ee2SEnrico Granata     if (origin_encoding != 8 && !ConvertFunction)
546ca6c8ee2SEnrico Granata         return false;
547ca6c8ee2SEnrico Granata 
548ca6c8ee2SEnrico Granata     if (!options.GetStream())
549ca6c8ee2SEnrico Granata         return false;
550ca6c8ee2SEnrico Granata 
551ca6c8ee2SEnrico Granata     uint32_t sourceSize = options.GetSourceSize();
552ca6c8ee2SEnrico Granata     bool needs_zero_terminator = options.GetNeedsZeroTermination();
553ca6c8ee2SEnrico Granata 
554ca6c8ee2SEnrico Granata     if (!sourceSize)
555ca6c8ee2SEnrico Granata     {
556ca6c8ee2SEnrico Granata         sourceSize = process_sp->GetTarget().GetMaximumSizeOfStringSummary();
557ca6c8ee2SEnrico Granata         needs_zero_terminator = true;
558ca6c8ee2SEnrico Granata     }
559b0e8a55dSEnrico Granata     else if (!options.GetIgnoreMaxLength())
560ca6c8ee2SEnrico Granata         sourceSize = std::min(sourceSize,process_sp->GetTarget().GetMaximumSizeOfStringSummary());
561ca6c8ee2SEnrico Granata 
562ca6c8ee2SEnrico Granata     const int bufferSPSize = sourceSize * type_width;
563ca6c8ee2SEnrico Granata 
564ca6c8ee2SEnrico Granata     lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize,0));
565ca6c8ee2SEnrico Granata 
566ca6c8ee2SEnrico Granata     if (!buffer_sp->GetBytes())
567ca6c8ee2SEnrico Granata         return false;
568ca6c8ee2SEnrico Granata 
569ca6c8ee2SEnrico Granata     Error error;
570ca6c8ee2SEnrico Granata     char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes());
571ca6c8ee2SEnrico Granata 
572ca6c8ee2SEnrico Granata     if (needs_zero_terminator)
573*d7e6a4f2SVince Harron         process_sp->ReadStringFromMemory(options.GetLocation(), buffer, bufferSPSize, error, type_width);
574ca6c8ee2SEnrico Granata     else
575*d7e6a4f2SVince Harron         process_sp->ReadMemoryFromInferior(options.GetLocation(), (char*)buffer_sp->GetBytes(), bufferSPSize, error);
576ca6c8ee2SEnrico Granata 
577099263b4SEnrico Granata     if (error.Fail())
578ca6c8ee2SEnrico Granata     {
579ca6c8ee2SEnrico Granata         options.GetStream()->Printf("unable to read data");
580ca6c8ee2SEnrico Granata         return true;
581ca6c8ee2SEnrico Granata     }
582ca6c8ee2SEnrico Granata 
583ca6c8ee2SEnrico Granata     DataExtractor data(buffer_sp, process_sp->GetByteOrder(), process_sp->GetAddressByteSize());
584ca6c8ee2SEnrico Granata 
585ca6c8ee2SEnrico Granata     return DumpUTFBufferToStream(ConvertFunction, data, *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), sourceSize, options.GetEscapeNonPrintables());
586ca6c8ee2SEnrico Granata }
587ca6c8ee2SEnrico Granata 
588ca6c8ee2SEnrico Granata template <>
589ca6c8ee2SEnrico Granata bool
590fd13743fSShawn Best ReadStringAndDumpToStream<StringElementType::UTF8> (ReadStringAndDumpToStreamOptions options)
591ca6c8ee2SEnrico Granata {
592ca6c8ee2SEnrico Granata     return ReadUTFBufferAndDumpToStream<UTF8>(options,
593ca6c8ee2SEnrico Granata                                               nullptr);
594ca6c8ee2SEnrico Granata }
595ca6c8ee2SEnrico Granata 
596ca6c8ee2SEnrico Granata template <>
597ca6c8ee2SEnrico Granata bool
598fd13743fSShawn Best ReadStringAndDumpToStream<StringElementType::UTF16> (ReadStringAndDumpToStreamOptions options)
599ca6c8ee2SEnrico Granata {
600ca6c8ee2SEnrico Granata     return ReadUTFBufferAndDumpToStream<UTF16>(options,
601ca6c8ee2SEnrico Granata                                                ConvertUTF16toUTF8);
602ca6c8ee2SEnrico Granata }
603ca6c8ee2SEnrico Granata 
604ca6c8ee2SEnrico Granata template <>
605ca6c8ee2SEnrico Granata bool
606fd13743fSShawn Best ReadStringAndDumpToStream<StringElementType::UTF32> (ReadStringAndDumpToStreamOptions options)
607ca6c8ee2SEnrico Granata {
608ca6c8ee2SEnrico Granata     return ReadUTFBufferAndDumpToStream<UTF32>(options,
609ca6c8ee2SEnrico Granata                                                ConvertUTF32toUTF8);
610ca6c8ee2SEnrico Granata }
611ca6c8ee2SEnrico Granata 
612ca6c8ee2SEnrico Granata template <>
613ca6c8ee2SEnrico Granata bool
614fd13743fSShawn Best ReadBufferAndDumpToStream<StringElementType::UTF8> (ReadBufferAndDumpToStreamOptions options)
615ca6c8ee2SEnrico Granata {
616ca6c8ee2SEnrico Granata     assert(options.GetStream() && "need a Stream to print the string to");
617ca6c8ee2SEnrico Granata 
618ca6c8ee2SEnrico Granata     return DumpUTFBufferToStream<UTF8>(nullptr, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables());
619ca6c8ee2SEnrico Granata }
620ca6c8ee2SEnrico Granata 
621ca6c8ee2SEnrico Granata template <>
622ca6c8ee2SEnrico Granata bool
623fd13743fSShawn Best ReadBufferAndDumpToStream<StringElementType::ASCII> (ReadBufferAndDumpToStreamOptions options)
624ca6c8ee2SEnrico Granata {
625ca6c8ee2SEnrico Granata     // treat ASCII the same as UTF8
626ca6c8ee2SEnrico Granata     // FIXME: can we optimize ASCII some more?
627ca6c8ee2SEnrico Granata     return ReadBufferAndDumpToStream<StringElementType::UTF8>(options);
628ca6c8ee2SEnrico Granata }
629ca6c8ee2SEnrico Granata 
630ca6c8ee2SEnrico Granata template <>
631ca6c8ee2SEnrico Granata bool
632fd13743fSShawn Best ReadBufferAndDumpToStream<StringElementType::UTF16> (ReadBufferAndDumpToStreamOptions options)
633ca6c8ee2SEnrico Granata {
634ca6c8ee2SEnrico Granata     assert(options.GetStream() && "need a Stream to print the string to");
635ca6c8ee2SEnrico Granata 
636ca6c8ee2SEnrico Granata     return DumpUTFBufferToStream(ConvertUTF16toUTF8, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables());
637ca6c8ee2SEnrico Granata }
638ca6c8ee2SEnrico Granata 
639ca6c8ee2SEnrico Granata template <>
640ca6c8ee2SEnrico Granata bool
641fd13743fSShawn Best ReadBufferAndDumpToStream<StringElementType::UTF32> (ReadBufferAndDumpToStreamOptions options)
642ca6c8ee2SEnrico Granata {
643ca6c8ee2SEnrico Granata     assert(options.GetStream() && "need a Stream to print the string to");
644ca6c8ee2SEnrico Granata 
645ca6c8ee2SEnrico Granata     return DumpUTFBufferToStream(ConvertUTF32toUTF8, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables());
646ca6c8ee2SEnrico Granata }
647fd13743fSShawn Best 
648fd13743fSShawn Best } // namespace formatters
649fd13743fSShawn Best 
650fd13743fSShawn Best } // namespace lldb_private
651