180814287SRaphael Isemann //===-- StringPrinter.cpp -------------------------------------------------===//
2ca6c8ee2SEnrico Granata //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6ca6c8ee2SEnrico Granata //
7ca6c8ee2SEnrico Granata //===----------------------------------------------------------------------===//
8ca6c8ee2SEnrico Granata 
9ca6c8ee2SEnrico Granata #include "lldb/DataFormatters/StringPrinter.h"
10ca6c8ee2SEnrico Granata 
11ebdc1ac0SEnrico Granata #include "lldb/Core/Debugger.h"
12ebdc1ac0SEnrico Granata #include "lldb/Core/ValueObject.h"
13ac49453bSEnrico Granata #include "lldb/Target/Language.h"
14ca6c8ee2SEnrico Granata #include "lldb/Target/Process.h"
15ca6c8ee2SEnrico Granata #include "lldb/Target/Target.h"
1697206d57SZachary Turner #include "lldb/Utility/Status.h"
17ca6c8ee2SEnrico Granata 
18ca6c8ee2SEnrico Granata #include "llvm/Support/ConvertUTF.h"
19ca6c8ee2SEnrico Granata 
20ca6c8ee2SEnrico Granata #include <ctype.h>
21ca6c8ee2SEnrico Granata #include <locale>
22796ac80bSJonas Devlieghere #include <memory>
23ca6c8ee2SEnrico Granata 
24ca6c8ee2SEnrico Granata using namespace lldb;
25ca6c8ee2SEnrico Granata using namespace lldb_private;
26ca6c8ee2SEnrico Granata using namespace lldb_private::formatters;
27ca6c8ee2SEnrico Granata 
2805097246SAdrian Prantl // we define this for all values of type but only implement it for those we
2905097246SAdrian Prantl // care about that's good because we get linker errors for any unsupported type
30ac49453bSEnrico Granata template <lldb_private::formatters::StringPrinter::StringElementType type>
3163e65082SVedant Kumar static StringPrinter::StringPrinterBufferPointer
32ca6c8ee2SEnrico Granata GetPrintableImpl(uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next);
33ca6c8ee2SEnrico Granata 
34ca6c8ee2SEnrico Granata // mimic isprint() for Unicode codepoints
35b9c1b51eSKate Stone static bool isprint(char32_t codepoint) {
36ca6c8ee2SEnrico Granata   if (codepoint <= 0x1F || codepoint == 0x7F) // C0
37ca6c8ee2SEnrico Granata   {
38ca6c8ee2SEnrico Granata     return false;
39ca6c8ee2SEnrico Granata   }
40ca6c8ee2SEnrico Granata   if (codepoint >= 0x80 && codepoint <= 0x9F) // C1
41ca6c8ee2SEnrico Granata   {
42ca6c8ee2SEnrico Granata     return false;
43ca6c8ee2SEnrico Granata   }
44ca6c8ee2SEnrico Granata   if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators
45ca6c8ee2SEnrico Granata   {
46ca6c8ee2SEnrico Granata     return false;
47ca6c8ee2SEnrico Granata   }
48b9c1b51eSKate Stone   if (codepoint == 0x200E || codepoint == 0x200F ||
49b9c1b51eSKate Stone       (codepoint >= 0x202A &&
50b9c1b51eSKate Stone        codepoint <= 0x202E)) // bidirectional text control
51ca6c8ee2SEnrico Granata   {
52ca6c8ee2SEnrico Granata     return false;
53ca6c8ee2SEnrico Granata   }
54b9c1b51eSKate Stone   if (codepoint >= 0xFFF9 &&
55b9c1b51eSKate Stone       codepoint <= 0xFFFF) // interlinears and generally specials
56ca6c8ee2SEnrico Granata   {
57ca6c8ee2SEnrico Granata     return false;
58ca6c8ee2SEnrico Granata   }
59ca6c8ee2SEnrico Granata   return true;
60ca6c8ee2SEnrico Granata }
61ca6c8ee2SEnrico Granata 
62ca6c8ee2SEnrico Granata template <>
6363e65082SVedant Kumar StringPrinter::StringPrinterBufferPointer
64b9c1b51eSKate Stone GetPrintableImpl<StringPrinter::StringElementType::ASCII>(uint8_t *buffer,
65b9c1b51eSKate Stone                                                           uint8_t *buffer_end,
66b9c1b51eSKate Stone                                                           uint8_t *&next) {
6763e65082SVedant Kumar   StringPrinter::StringPrinterBufferPointer retval = {nullptr};
68ca6c8ee2SEnrico Granata 
69b9c1b51eSKate Stone   switch (*buffer) {
70da04fbb5SEnrico Granata   case 0:
71da04fbb5SEnrico Granata     retval = {"\\0", 2};
72da04fbb5SEnrico Granata     break;
73ca6c8ee2SEnrico Granata   case '\a':
74ca6c8ee2SEnrico Granata     retval = {"\\a", 2};
75ca6c8ee2SEnrico Granata     break;
76ca6c8ee2SEnrico Granata   case '\b':
77ca6c8ee2SEnrico Granata     retval = {"\\b", 2};
78ca6c8ee2SEnrico Granata     break;
79ca6c8ee2SEnrico Granata   case '\f':
80ca6c8ee2SEnrico Granata     retval = {"\\f", 2};
81ca6c8ee2SEnrico Granata     break;
82ca6c8ee2SEnrico Granata   case '\n':
83ca6c8ee2SEnrico Granata     retval = {"\\n", 2};
84ca6c8ee2SEnrico Granata     break;
85ca6c8ee2SEnrico Granata   case '\r':
86ca6c8ee2SEnrico Granata     retval = {"\\r", 2};
87ca6c8ee2SEnrico Granata     break;
88ca6c8ee2SEnrico Granata   case '\t':
89ca6c8ee2SEnrico Granata     retval = {"\\t", 2};
90ca6c8ee2SEnrico Granata     break;
91ca6c8ee2SEnrico Granata   case '\v':
92ca6c8ee2SEnrico Granata     retval = {"\\v", 2};
93ca6c8ee2SEnrico Granata     break;
94ca6c8ee2SEnrico Granata   case '\"':
95ca6c8ee2SEnrico Granata     retval = {"\\\"", 2};
96ca6c8ee2SEnrico Granata     break;
97ca6c8ee2SEnrico Granata   case '\\':
98ca6c8ee2SEnrico Granata     retval = {"\\\\", 2};
99ca6c8ee2SEnrico Granata     break;
100ca6c8ee2SEnrico Granata   default:
101ca6c8ee2SEnrico Granata     if (isprint(*buffer))
102ca6c8ee2SEnrico Granata       retval = {buffer, 1};
103b9c1b51eSKate Stone     else {
104d7e6a4f2SVince Harron       uint8_t *data = new uint8_t[5];
105d7e6a4f2SVince Harron       sprintf((char *)data, "\\x%02x", *buffer);
106d7e6a4f2SVince Harron       retval = {data, 4, [](const uint8_t *c) { delete[] c; }};
107ca6c8ee2SEnrico Granata       break;
108ca6c8ee2SEnrico Granata     }
109ca6c8ee2SEnrico Granata   }
110ca6c8ee2SEnrico Granata 
111ca6c8ee2SEnrico Granata   next = buffer + 1;
112ca6c8ee2SEnrico Granata   return retval;
113ca6c8ee2SEnrico Granata }
114ca6c8ee2SEnrico Granata 
115b9c1b51eSKate Stone static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1) {
116ca6c8ee2SEnrico Granata   return (c0 - 192) * 64 + (c1 - 128);
117ca6c8ee2SEnrico Granata }
118b9c1b51eSKate Stone static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1,
119b9c1b51eSKate Stone                                        unsigned char c2) {
120ca6c8ee2SEnrico Granata   return (c0 - 224) * 4096 + (c1 - 128) * 64 + (c2 - 128);
121ca6c8ee2SEnrico Granata }
122b9c1b51eSKate Stone static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1,
123b9c1b51eSKate Stone                                        unsigned char c2, unsigned char c3) {
124ca6c8ee2SEnrico Granata   return (c0 - 240) * 262144 + (c2 - 128) * 4096 + (c2 - 128) * 64 + (c3 - 128);
125ca6c8ee2SEnrico Granata }
126ca6c8ee2SEnrico Granata 
127ca6c8ee2SEnrico Granata template <>
12863e65082SVedant Kumar StringPrinter::StringPrinterBufferPointer
129b9c1b51eSKate Stone GetPrintableImpl<StringPrinter::StringElementType::UTF8>(uint8_t *buffer,
130b9c1b51eSKate Stone                                                          uint8_t *buffer_end,
131b9c1b51eSKate Stone                                                          uint8_t *&next) {
13263e65082SVedant Kumar   StringPrinter::StringPrinterBufferPointer retval{nullptr};
133ca6c8ee2SEnrico Granata 
1347aabad13SVedant Kumar   const unsigned utf8_encoded_len = llvm::getNumBytesForUTF8(*buffer);
135ca6c8ee2SEnrico Granata 
1367aabad13SVedant Kumar   // If the utf8 encoded length is invalid, or if there aren't enough bytes to
1377aabad13SVedant Kumar   // print, this is some kind of corrupted string.
1387aabad13SVedant Kumar   if (utf8_encoded_len == 0 || utf8_encoded_len > 4)
139ca6c8ee2SEnrico Granata     return retval;
1407aabad13SVedant Kumar   if ((buffer_end - buffer) < utf8_encoded_len)
1417aabad13SVedant Kumar     // There's no room in the buffer for the utf8 sequence.
1427aabad13SVedant Kumar     return retval;
143ca6c8ee2SEnrico Granata 
144ca6c8ee2SEnrico Granata   char32_t codepoint = 0;
145b9c1b51eSKate Stone   switch (utf8_encoded_len) {
146ca6c8ee2SEnrico Granata   case 1:
147ca6c8ee2SEnrico Granata     // this is just an ASCII byte - ask ASCII
148b9c1b51eSKate Stone     return GetPrintableImpl<StringPrinter::StringElementType::ASCII>(
149b9c1b51eSKate Stone         buffer, buffer_end, next);
150ca6c8ee2SEnrico Granata   case 2:
151b9c1b51eSKate Stone     codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer,
152b9c1b51eSKate Stone                                        (unsigned char)*(buffer + 1));
153ca6c8ee2SEnrico Granata     break;
154ca6c8ee2SEnrico Granata   case 3:
155b9c1b51eSKate Stone     codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer,
156b9c1b51eSKate Stone                                        (unsigned char)*(buffer + 1),
157b9c1b51eSKate Stone                                        (unsigned char)*(buffer + 2));
158ca6c8ee2SEnrico Granata     break;
159ca6c8ee2SEnrico Granata   case 4:
160b9c1b51eSKate Stone     codepoint = ConvertUTF8ToCodePoint(
161b9c1b51eSKate Stone         (unsigned char)*buffer, (unsigned char)*(buffer + 1),
162b9c1b51eSKate Stone         (unsigned char)*(buffer + 2), (unsigned char)*(buffer + 3));
163ca6c8ee2SEnrico Granata     break;
164ca6c8ee2SEnrico Granata   }
165ca6c8ee2SEnrico Granata 
166b9c1b51eSKate Stone   if (codepoint) {
167b9c1b51eSKate Stone     switch (codepoint) {
168da04fbb5SEnrico Granata     case 0:
169da04fbb5SEnrico Granata       retval = {"\\0", 2};
170da04fbb5SEnrico Granata       break;
171ca6c8ee2SEnrico Granata     case '\a':
172ca6c8ee2SEnrico Granata       retval = {"\\a", 2};
173ca6c8ee2SEnrico Granata       break;
174ca6c8ee2SEnrico Granata     case '\b':
175ca6c8ee2SEnrico Granata       retval = {"\\b", 2};
176ca6c8ee2SEnrico Granata       break;
177ca6c8ee2SEnrico Granata     case '\f':
178ca6c8ee2SEnrico Granata       retval = {"\\f", 2};
179ca6c8ee2SEnrico Granata       break;
180ca6c8ee2SEnrico Granata     case '\n':
181ca6c8ee2SEnrico Granata       retval = {"\\n", 2};
182ca6c8ee2SEnrico Granata       break;
183ca6c8ee2SEnrico Granata     case '\r':
184ca6c8ee2SEnrico Granata       retval = {"\\r", 2};
185ca6c8ee2SEnrico Granata       break;
186ca6c8ee2SEnrico Granata     case '\t':
187ca6c8ee2SEnrico Granata       retval = {"\\t", 2};
188ca6c8ee2SEnrico Granata       break;
189ca6c8ee2SEnrico Granata     case '\v':
190ca6c8ee2SEnrico Granata       retval = {"\\v", 2};
191ca6c8ee2SEnrico Granata       break;
192ca6c8ee2SEnrico Granata     case '\"':
193ca6c8ee2SEnrico Granata       retval = {"\\\"", 2};
194ca6c8ee2SEnrico Granata       break;
195ca6c8ee2SEnrico Granata     case '\\':
196ca6c8ee2SEnrico Granata       retval = {"\\\\", 2};
197ca6c8ee2SEnrico Granata       break;
198ca6c8ee2SEnrico Granata     default:
199ca6c8ee2SEnrico Granata       if (isprint(codepoint))
200ca6c8ee2SEnrico Granata         retval = {buffer, utf8_encoded_len};
201b9c1b51eSKate Stone       else {
202d7e6a4f2SVince Harron         uint8_t *data = new uint8_t[11];
203a505be4eSZachary Turner         sprintf((char *)data, "\\U%08x", (unsigned)codepoint);
204d7e6a4f2SVince Harron         retval = {data, 10, [](const uint8_t *c) { delete[] c; }};
205ca6c8ee2SEnrico Granata         break;
206ca6c8ee2SEnrico Granata       }
207ca6c8ee2SEnrico Granata     }
208ca6c8ee2SEnrico Granata 
209ca6c8ee2SEnrico Granata     next = buffer + utf8_encoded_len;
210ca6c8ee2SEnrico Granata     return retval;
211ca6c8ee2SEnrico Granata   }
212ca6c8ee2SEnrico Granata 
2137aabad13SVedant Kumar   // We couldn't figure out how to print this string.
214ca6c8ee2SEnrico Granata   return retval;
215ca6c8ee2SEnrico Granata }
216ca6c8ee2SEnrico Granata 
21705097246SAdrian Prantl // Given a sequence of bytes, this function returns: a sequence of bytes to
21805097246SAdrian Prantl // actually print out + a length the following unscanned position of the buffer
21905097246SAdrian Prantl // is in next
22063e65082SVedant Kumar static StringPrinter::StringPrinterBufferPointer
221b9c1b51eSKate Stone GetPrintable(StringPrinter::StringElementType type, uint8_t *buffer,
222b9c1b51eSKate Stone              uint8_t *buffer_end, uint8_t *&next) {
2237aabad13SVedant Kumar   if (!buffer || buffer >= buffer_end)
224ca6c8ee2SEnrico Granata     return {nullptr};
225ca6c8ee2SEnrico Granata 
226b9c1b51eSKate Stone   switch (type) {
227ac49453bSEnrico Granata   case StringPrinter::StringElementType::ASCII:
228b9c1b51eSKate Stone     return GetPrintableImpl<StringPrinter::StringElementType::ASCII>(
229b9c1b51eSKate Stone         buffer, buffer_end, next);
230ac49453bSEnrico Granata   case StringPrinter::StringElementType::UTF8:
231b9c1b51eSKate Stone     return GetPrintableImpl<StringPrinter::StringElementType::UTF8>(
232b9c1b51eSKate Stone         buffer, buffer_end, next);
233ca6c8ee2SEnrico Granata   default:
234ca6c8ee2SEnrico Granata     return {nullptr};
235ca6c8ee2SEnrico Granata   }
236ca6c8ee2SEnrico Granata }
237ca6c8ee2SEnrico Granata 
238ac49453bSEnrico Granata StringPrinter::EscapingHelper
239b9c1b51eSKate Stone StringPrinter::GetDefaultEscapingHelper(GetPrintableElementType elem_type) {
240b9c1b51eSKate Stone   switch (elem_type) {
241ac49453bSEnrico Granata   case GetPrintableElementType::UTF8:
242b9c1b51eSKate Stone     return [](uint8_t *buffer, uint8_t *buffer_end,
24363e65082SVedant Kumar               uint8_t *&next) -> StringPrinter::StringPrinterBufferPointer {
244b9c1b51eSKate Stone       return GetPrintable(StringPrinter::StringElementType::UTF8, buffer,
245b9c1b51eSKate Stone                           buffer_end, next);
246ac49453bSEnrico Granata     };
247ac49453bSEnrico Granata   case GetPrintableElementType::ASCII:
248b9c1b51eSKate Stone     return [](uint8_t *buffer, uint8_t *buffer_end,
24963e65082SVedant Kumar               uint8_t *&next) -> StringPrinter::StringPrinterBufferPointer {
250b9c1b51eSKate Stone       return GetPrintable(StringPrinter::StringElementType::ASCII, buffer,
251b9c1b51eSKate Stone                           buffer_end, next);
252ac49453bSEnrico Granata     };
253ac49453bSEnrico Granata   }
25443d3a7aeSSaleem Abdulrasool   llvm_unreachable("bad element type");
255ac49453bSEnrico Granata }
256ac49453bSEnrico Granata 
257ca6c8ee2SEnrico Granata // use this call if you already have an LLDB-side buffer for the data
258ca6c8ee2SEnrico Granata template <typename SourceDataType>
259b9c1b51eSKate Stone static bool DumpUTFBufferToStream(
2609091055eSJustin Lebar     llvm::ConversionResult (*ConvertFunction)(const SourceDataType **,
2619091055eSJustin Lebar                                               const SourceDataType *,
2629091055eSJustin Lebar                                               llvm::UTF8 **, llvm::UTF8 *,
2639091055eSJustin Lebar                                               llvm::ConversionFlags),
264b9c1b51eSKate Stone     const StringPrinter::ReadBufferAndDumpToStreamOptions &dump_options) {
265d07f7550SEnrico Granata   Stream &stream(*dump_options.GetStream());
266248a1305SKonrad Kleine   if (dump_options.GetPrefixToken() != nullptr)
267d54f7fb8SEnrico Granata     stream.Printf("%s", dump_options.GetPrefixToken());
268d07f7550SEnrico Granata   if (dump_options.GetQuote() != 0)
269d07f7550SEnrico Granata     stream.Printf("%c", dump_options.GetQuote());
270d07f7550SEnrico Granata   auto data(dump_options.GetData());
271d07f7550SEnrico Granata   auto source_size(dump_options.GetSourceSize());
272b9c1b51eSKate Stone   if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd()) {
273ca6c8ee2SEnrico Granata     const int bufferSPSize = data.GetByteSize();
274b9c1b51eSKate Stone     if (dump_options.GetSourceSize() == 0) {
275ca6c8ee2SEnrico Granata       const int origin_encoding = 8 * sizeof(SourceDataType);
276d07f7550SEnrico Granata       source_size = bufferSPSize / (origin_encoding / 4);
277ca6c8ee2SEnrico Granata     }
278ca6c8ee2SEnrico Granata 
279b9c1b51eSKate Stone     const SourceDataType *data_ptr =
280b9c1b51eSKate Stone         (const SourceDataType *)data.GetDataStart();
281d07f7550SEnrico Granata     const SourceDataType *data_end_ptr = data_ptr + source_size;
282ca6c8ee2SEnrico Granata 
283d07f7550SEnrico Granata     const bool zero_is_terminator = dump_options.GetBinaryZeroIsTerminator();
284d07f7550SEnrico Granata 
285b9c1b51eSKate Stone     if (zero_is_terminator) {
286b9c1b51eSKate Stone       while (data_ptr < data_end_ptr) {
287b9c1b51eSKate Stone         if (!*data_ptr) {
288ca6c8ee2SEnrico Granata           data_end_ptr = data_ptr;
289ca6c8ee2SEnrico Granata           break;
290ca6c8ee2SEnrico Granata         }
291ca6c8ee2SEnrico Granata         data_ptr++;
292ca6c8ee2SEnrico Granata       }
293ca6c8ee2SEnrico Granata 
294d7e6a4f2SVince Harron       data_ptr = (const SourceDataType *)data.GetDataStart();
295d07f7550SEnrico Granata     }
296ca6c8ee2SEnrico Granata 
297ca6c8ee2SEnrico Granata     lldb::DataBufferSP utf8_data_buffer_sp;
2989091055eSJustin Lebar     llvm::UTF8 *utf8_data_ptr = nullptr;
2999091055eSJustin Lebar     llvm::UTF8 *utf8_data_end_ptr = nullptr;
300ca6c8ee2SEnrico Granata 
301b9c1b51eSKate Stone     if (ConvertFunction) {
302796ac80bSJonas Devlieghere       utf8_data_buffer_sp =
303796ac80bSJonas Devlieghere           std::make_shared<DataBufferHeap>(4 * bufferSPSize, 0);
3049091055eSJustin Lebar       utf8_data_ptr = (llvm::UTF8 *)utf8_data_buffer_sp->GetBytes();
305ca6c8ee2SEnrico Granata       utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize();
306b9c1b51eSKate Stone       ConvertFunction(&data_ptr, data_end_ptr, &utf8_data_ptr,
3079091055eSJustin Lebar                       utf8_data_end_ptr, llvm::lenientConversion);
308a6682a41SJonas Devlieghere       if (!zero_is_terminator)
3098101f570SEnrico Granata         utf8_data_end_ptr = utf8_data_ptr;
3109091055eSJustin Lebar       // needed because the ConvertFunction will change the value of the
3119091055eSJustin Lebar       // data_ptr.
312b9c1b51eSKate Stone       utf8_data_ptr =
3139091055eSJustin Lebar           (llvm::UTF8 *)utf8_data_buffer_sp->GetBytes();
314b9c1b51eSKate Stone     } else {
315b9c1b51eSKate Stone       // just copy the pointers - the cast is necessary to make the compiler
31605097246SAdrian Prantl       // happy but this should only happen if we are reading UTF8 data
3179091055eSJustin Lebar       utf8_data_ptr = const_cast<llvm::UTF8 *>(
3189091055eSJustin Lebar           reinterpret_cast<const llvm::UTF8 *>(data_ptr));
3199091055eSJustin Lebar       utf8_data_end_ptr = const_cast<llvm::UTF8 *>(
3209091055eSJustin Lebar           reinterpret_cast<const llvm::UTF8 *>(data_end_ptr));
321ca6c8ee2SEnrico Granata     }
322ca6c8ee2SEnrico Granata 
323d07f7550SEnrico Granata     const bool escape_non_printables = dump_options.GetEscapeNonPrintables();
324ac49453bSEnrico Granata     lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback;
325b9c1b51eSKate Stone     if (escape_non_printables) {
326ac49453bSEnrico Granata       if (Language *language = Language::FindPlugin(dump_options.GetLanguage()))
327b9c1b51eSKate Stone         escaping_callback = language->GetStringPrinterEscapingHelper(
328b9c1b51eSKate Stone             lldb_private::formatters::StringPrinter::GetPrintableElementType::
329b9c1b51eSKate Stone                 UTF8);
330ac49453bSEnrico Granata       else
331b9c1b51eSKate Stone         escaping_callback =
332b9c1b51eSKate Stone             lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper(
333b9c1b51eSKate Stone                 lldb_private::formatters::StringPrinter::
334b9c1b51eSKate Stone                     GetPrintableElementType::UTF8);
335ac49453bSEnrico Granata     }
336d07f7550SEnrico Granata 
337ca6c8ee2SEnrico Granata     // since we tend to accept partial data (and even partially malformed data)
33805097246SAdrian Prantl     // we might end up with no NULL terminator before the end_ptr hence we need
33905097246SAdrian Prantl     // to take a slower route and ensure we stay within boundaries
340b9c1b51eSKate Stone     for (; utf8_data_ptr < utf8_data_end_ptr;) {
341d07f7550SEnrico Granata       if (zero_is_terminator && !*utf8_data_ptr)
342ca6c8ee2SEnrico Granata         break;
343ca6c8ee2SEnrico Granata 
344b9c1b51eSKate Stone       if (escape_non_printables) {
345ca6c8ee2SEnrico Granata         uint8_t *next_data = nullptr;
346b9c1b51eSKate Stone         auto printable =
347b9c1b51eSKate Stone             escaping_callback(utf8_data_ptr, utf8_data_end_ptr, next_data);
348ca6c8ee2SEnrico Granata         auto printable_bytes = printable.GetBytes();
349ca6c8ee2SEnrico Granata         auto printable_size = printable.GetSize();
3507aabad13SVedant Kumar 
3517aabad13SVedant Kumar         // We failed to figure out how to print this string.
3527aabad13SVedant Kumar         if (!printable_bytes || !next_data)
3537aabad13SVedant Kumar           return false;
3547aabad13SVedant Kumar 
3553acfe1a3SAndy Gibbs         for (unsigned c = 0; c < printable_size; c++)
356ca6c8ee2SEnrico Granata           stream.Printf("%c", *(printable_bytes + c));
357ca6c8ee2SEnrico Granata         utf8_data_ptr = (uint8_t *)next_data;
358b9c1b51eSKate Stone       } else {
359ca6c8ee2SEnrico Granata         stream.Printf("%c", *utf8_data_ptr);
360ca6c8ee2SEnrico Granata         utf8_data_ptr++;
361ca6c8ee2SEnrico Granata       }
362ca6c8ee2SEnrico Granata     }
363ca6c8ee2SEnrico Granata   }
364d07f7550SEnrico Granata   if (dump_options.GetQuote() != 0)
365d07f7550SEnrico Granata     stream.Printf("%c", dump_options.GetQuote());
366248a1305SKonrad Kleine   if (dump_options.GetSuffixToken() != nullptr)
367d54f7fb8SEnrico Granata     stream.Printf("%s", dump_options.GetSuffixToken());
368b7662929SEnrico Granata   if (dump_options.GetIsTruncated())
369b7662929SEnrico Granata     stream.Printf("...");
370ca6c8ee2SEnrico Granata   return true;
371ca6c8ee2SEnrico Granata }
372ca6c8ee2SEnrico Granata 
373b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::ReadStringAndDumpToStreamOptions::
374b9c1b51eSKate Stone     ReadStringAndDumpToStreamOptions(ValueObject &valobj)
375b9c1b51eSKate Stone     : ReadStringAndDumpToStreamOptions() {
376b9c1b51eSKate Stone   SetEscapeNonPrintables(
377b9c1b51eSKate Stone       valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables());
378ebdc1ac0SEnrico Granata }
379ebdc1ac0SEnrico Granata 
380b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions::
381b9c1b51eSKate Stone     ReadBufferAndDumpToStreamOptions(ValueObject &valobj)
382b9c1b51eSKate Stone     : ReadBufferAndDumpToStreamOptions() {
383b9c1b51eSKate Stone   SetEscapeNonPrintables(
384b9c1b51eSKate Stone       valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables());
385ebdc1ac0SEnrico Granata }
386ebdc1ac0SEnrico Granata 
387b9c1b51eSKate Stone lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions::
388b9c1b51eSKate Stone     ReadBufferAndDumpToStreamOptions(
389b9c1b51eSKate Stone         const ReadStringAndDumpToStreamOptions &options)
390b9c1b51eSKate Stone     : ReadBufferAndDumpToStreamOptions() {
391d07f7550SEnrico Granata   SetStream(options.GetStream());
392d07f7550SEnrico Granata   SetPrefixToken(options.GetPrefixToken());
393d54f7fb8SEnrico Granata   SetSuffixToken(options.GetSuffixToken());
394d07f7550SEnrico Granata   SetQuote(options.GetQuote());
395d07f7550SEnrico Granata   SetEscapeNonPrintables(options.GetEscapeNonPrintables());
396d07f7550SEnrico Granata   SetBinaryZeroIsTerminator(options.GetBinaryZeroIsTerminator());
397ac49453bSEnrico Granata   SetLanguage(options.GetLanguage());
398d07f7550SEnrico Granata }
399d07f7550SEnrico Granata 
400b9c1b51eSKate Stone namespace lldb_private {
401ebdc1ac0SEnrico Granata 
402b9c1b51eSKate Stone namespace formatters {
403fd13743fSShawn Best 
404fd13743fSShawn Best template <>
405b9c1b51eSKate Stone bool StringPrinter::ReadStringAndDumpToStream<
406b9c1b51eSKate Stone     StringPrinter::StringElementType::ASCII>(
407b9c1b51eSKate Stone     const ReadStringAndDumpToStreamOptions &options) {
408fd13743fSShawn Best   assert(options.GetStream() && "need a Stream to print the string to");
40997206d57SZachary Turner   Status my_error;
410fd13743fSShawn Best 
411fd13743fSShawn Best   ProcessSP process_sp(options.GetProcessSP());
412fd13743fSShawn Best 
413fd13743fSShawn Best   if (process_sp.get() == nullptr || options.GetLocation() == 0)
414fd13743fSShawn Best     return false;
415fd13743fSShawn Best 
416fd13743fSShawn Best   size_t size;
417b7662929SEnrico Granata   const auto max_size = process_sp->GetTarget().GetMaximumSizeOfStringSummary();
418b7662929SEnrico Granata   bool is_truncated = false;
419fd13743fSShawn Best 
420fd13743fSShawn Best   if (options.GetSourceSize() == 0)
421b7662929SEnrico Granata     size = max_size;
422b9c1b51eSKate Stone   else if (!options.GetIgnoreMaxLength()) {
423b7662929SEnrico Granata     size = options.GetSourceSize();
424b9c1b51eSKate Stone     if (size > max_size) {
425b7662929SEnrico Granata       size = max_size;
426b7662929SEnrico Granata       is_truncated = true;
427b7662929SEnrico Granata     }
428b9c1b51eSKate Stone   } else
42934042212SEnrico Granata     size = options.GetSourceSize();
430fd13743fSShawn Best 
431fd13743fSShawn Best   lldb::DataBufferSP buffer_sp(new DataBufferHeap(size, 0));
432fd13743fSShawn Best 
433b9c1b51eSKate Stone   process_sp->ReadCStringFromMemory(
434b9c1b51eSKate Stone       options.GetLocation(), (char *)buffer_sp->GetBytes(), size, my_error);
435fd13743fSShawn Best 
436fd13743fSShawn Best   if (my_error.Fail())
437fd13743fSShawn Best     return false;
438fd13743fSShawn Best 
439d54f7fb8SEnrico Granata   const char *prefix_token = options.GetPrefixToken();
440fd13743fSShawn Best   char quote = options.GetQuote();
441fd13743fSShawn Best 
442248a1305SKonrad Kleine   if (prefix_token != nullptr)
443d54f7fb8SEnrico Granata     options.GetStream()->Printf("%s%c", prefix_token, quote);
444fd13743fSShawn Best   else if (quote != 0)
445fd13743fSShawn Best     options.GetStream()->Printf("%c", quote);
446fd13743fSShawn Best 
447fd13743fSShawn Best   uint8_t *data_end = buffer_sp->GetBytes() + buffer_sp->GetByteSize();
448fd13743fSShawn Best 
449ac49453bSEnrico Granata   const bool escape_non_printables = options.GetEscapeNonPrintables();
450ac49453bSEnrico Granata   lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback;
451b9c1b51eSKate Stone   if (escape_non_printables) {
452ac49453bSEnrico Granata     if (Language *language = Language::FindPlugin(options.GetLanguage()))
453b9c1b51eSKate Stone       escaping_callback = language->GetStringPrinterEscapingHelper(
454b9c1b51eSKate Stone           lldb_private::formatters::StringPrinter::GetPrintableElementType::
455b9c1b51eSKate Stone               ASCII);
456ac49453bSEnrico Granata     else
457b9c1b51eSKate Stone       escaping_callback =
458b9c1b51eSKate Stone           lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper(
459b9c1b51eSKate Stone               lldb_private::formatters::StringPrinter::GetPrintableElementType::
460b9c1b51eSKate Stone                   ASCII);
461ac49453bSEnrico Granata   }
462ac49453bSEnrico Granata 
463fd13743fSShawn Best   // since we tend to accept partial data (and even partially malformed data)
46405097246SAdrian Prantl   // we might end up with no NULL terminator before the end_ptr hence we need
46505097246SAdrian Prantl   // to take a slower route and ensure we stay within boundaries
466b9c1b51eSKate Stone   for (uint8_t *data = buffer_sp->GetBytes(); *data && (data < data_end);) {
467b9c1b51eSKate Stone     if (escape_non_printables) {
468fd13743fSShawn Best       uint8_t *next_data = nullptr;
469ac49453bSEnrico Granata       auto printable = escaping_callback(data, data_end, next_data);
470fd13743fSShawn Best       auto printable_bytes = printable.GetBytes();
471fd13743fSShawn Best       auto printable_size = printable.GetSize();
4727aabad13SVedant Kumar 
4737aabad13SVedant Kumar       // We failed to figure out how to print this string.
4747aabad13SVedant Kumar       if (!printable_bytes || !next_data)
4757aabad13SVedant Kumar         return false;
4767aabad13SVedant Kumar 
4773acfe1a3SAndy Gibbs       for (unsigned c = 0; c < printable_size; c++)
478fd13743fSShawn Best         options.GetStream()->Printf("%c", *(printable_bytes + c));
479fd13743fSShawn Best       data = (uint8_t *)next_data;
480b9c1b51eSKate Stone     } else {
481fd13743fSShawn Best       options.GetStream()->Printf("%c", *data);
482fd13743fSShawn Best       data++;
483fd13743fSShawn Best     }
484fd13743fSShawn Best   }
485fd13743fSShawn Best 
486d54f7fb8SEnrico Granata   const char *suffix_token = options.GetSuffixToken();
487d54f7fb8SEnrico Granata 
488248a1305SKonrad Kleine   if (suffix_token != nullptr)
489d54f7fb8SEnrico Granata     options.GetStream()->Printf("%c%s", quote, suffix_token);
490d54f7fb8SEnrico Granata   else if (quote != 0)
491fd13743fSShawn Best     options.GetStream()->Printf("%c", quote);
492fd13743fSShawn Best 
493b7662929SEnrico Granata   if (is_truncated)
494b7662929SEnrico Granata     options.GetStream()->Printf("...");
495b7662929SEnrico Granata 
496fd13743fSShawn Best   return true;
497fd13743fSShawn Best }
498fd13743fSShawn Best 
499ca6c8ee2SEnrico Granata template <typename SourceDataType>
500b9c1b51eSKate Stone static bool ReadUTFBufferAndDumpToStream(
501b9c1b51eSKate Stone     const StringPrinter::ReadStringAndDumpToStreamOptions &options,
5029091055eSJustin Lebar     llvm::ConversionResult (*ConvertFunction)(const SourceDataType **,
5039091055eSJustin Lebar                                               const SourceDataType *,
5049091055eSJustin Lebar                                               llvm::UTF8 **, llvm::UTF8 *,
5059091055eSJustin Lebar                                               llvm::ConversionFlags)) {
506ca6c8ee2SEnrico Granata   assert(options.GetStream() && "need a Stream to print the string to");
507ca6c8ee2SEnrico Granata 
508b9c1b51eSKate Stone   if (options.GetLocation() == 0 ||
509b9c1b51eSKate Stone       options.GetLocation() == LLDB_INVALID_ADDRESS)
510ca6c8ee2SEnrico Granata     return false;
511ca6c8ee2SEnrico Granata 
512ca6c8ee2SEnrico Granata   lldb::ProcessSP process_sp(options.GetProcessSP());
513ca6c8ee2SEnrico Granata 
514ca6c8ee2SEnrico Granata   if (!process_sp)
515ca6c8ee2SEnrico Granata     return false;
516ca6c8ee2SEnrico Granata 
517ca6c8ee2SEnrico Granata   const int type_width = sizeof(SourceDataType);
518ca6c8ee2SEnrico Granata   const int origin_encoding = 8 * type_width;
519ca6c8ee2SEnrico Granata   if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32)
520ca6c8ee2SEnrico Granata     return false;
521ca6c8ee2SEnrico Granata   // if not UTF8, I need a conversion function to return proper UTF8
522ca6c8ee2SEnrico Granata   if (origin_encoding != 8 && !ConvertFunction)
523ca6c8ee2SEnrico Granata     return false;
524ca6c8ee2SEnrico Granata 
525ca6c8ee2SEnrico Granata   if (!options.GetStream())
526ca6c8ee2SEnrico Granata     return false;
527ca6c8ee2SEnrico Granata 
528*7b244258SRaphael Isemann   uint32_t sourceSize;
529ca6c8ee2SEnrico Granata   bool needs_zero_terminator = options.GetNeedsZeroTermination();
530ca6c8ee2SEnrico Granata 
531b7662929SEnrico Granata   bool is_truncated = false;
532b7662929SEnrico Granata   const auto max_size = process_sp->GetTarget().GetMaximumSizeOfStringSummary();
533b7662929SEnrico Granata 
534*7b244258SRaphael Isemann   if (options.HasSourceSize()) {
535*7b244258SRaphael Isemann     sourceSize = options.GetSourceSize();
536*7b244258SRaphael Isemann     if (!options.GetIgnoreMaxLength()) {
537b9c1b51eSKate Stone       if (sourceSize > max_size) {
538b7662929SEnrico Granata         sourceSize = max_size;
539b7662929SEnrico Granata         is_truncated = true;
540b7662929SEnrico Granata       }
541b7662929SEnrico Granata     }
542*7b244258SRaphael Isemann   } else {
543*7b244258SRaphael Isemann     sourceSize = max_size;
544*7b244258SRaphael Isemann     needs_zero_terminator = true;
545*7b244258SRaphael Isemann   }
546ca6c8ee2SEnrico Granata 
547ca6c8ee2SEnrico Granata   const int bufferSPSize = sourceSize * type_width;
548ca6c8ee2SEnrico Granata 
549ca6c8ee2SEnrico Granata   lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize, 0));
550ca6c8ee2SEnrico Granata 
551*7b244258SRaphael Isemann   // Check if we got bytes. We never get any bytes if we have an empty
552*7b244258SRaphael Isemann   // string, but we still continue so that we end up actually printing
553*7b244258SRaphael Isemann   // an empty string ("").
554*7b244258SRaphael Isemann   if (sourceSize != 0 && !buffer_sp->GetBytes())
555ca6c8ee2SEnrico Granata     return false;
556ca6c8ee2SEnrico Granata 
55797206d57SZachary Turner   Status error;
558ca6c8ee2SEnrico Granata   char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes());
559ca6c8ee2SEnrico Granata 
560ca6c8ee2SEnrico Granata   if (needs_zero_terminator)
561b9c1b51eSKate Stone     process_sp->ReadStringFromMemory(options.GetLocation(), buffer,
562b9c1b51eSKate Stone                                      bufferSPSize, error, type_width);
563ca6c8ee2SEnrico Granata   else
564b9c1b51eSKate Stone     process_sp->ReadMemoryFromInferior(options.GetLocation(),
565b9c1b51eSKate Stone                                        (char *)buffer_sp->GetBytes(),
566b9c1b51eSKate Stone                                        bufferSPSize, error);
567ca6c8ee2SEnrico Granata 
568b9c1b51eSKate Stone   if (error.Fail()) {
569ca6c8ee2SEnrico Granata     options.GetStream()->Printf("unable to read data");
570ca6c8ee2SEnrico Granata     return true;
571ca6c8ee2SEnrico Granata   }
572ca6c8ee2SEnrico Granata 
573b9c1b51eSKate Stone   DataExtractor data(buffer_sp, process_sp->GetByteOrder(),
574b9c1b51eSKate Stone                      process_sp->GetAddressByteSize());
575ca6c8ee2SEnrico Granata 
576ac49453bSEnrico Granata   StringPrinter::ReadBufferAndDumpToStreamOptions dump_options(options);
577d07f7550SEnrico Granata   dump_options.SetData(data);
578d07f7550SEnrico Granata   dump_options.SetSourceSize(sourceSize);
579b7662929SEnrico Granata   dump_options.SetIsTruncated(is_truncated);
580d07f7550SEnrico Granata 
581d07f7550SEnrico Granata   return DumpUTFBufferToStream(ConvertFunction, dump_options);
582ca6c8ee2SEnrico Granata }
583ca6c8ee2SEnrico Granata 
584ca6c8ee2SEnrico Granata template <>
585b9c1b51eSKate Stone bool StringPrinter::ReadStringAndDumpToStream<
586b9c1b51eSKate Stone     StringPrinter::StringElementType::UTF8>(
587b9c1b51eSKate Stone     const ReadStringAndDumpToStreamOptions &options) {
5889091055eSJustin Lebar   return ReadUTFBufferAndDumpToStream<llvm::UTF8>(options, nullptr);
589ca6c8ee2SEnrico Granata }
590ca6c8ee2SEnrico Granata 
591ca6c8ee2SEnrico Granata template <>
592b9c1b51eSKate Stone bool StringPrinter::ReadStringAndDumpToStream<
593b9c1b51eSKate Stone     StringPrinter::StringElementType::UTF16>(
594b9c1b51eSKate Stone     const ReadStringAndDumpToStreamOptions &options) {
5959091055eSJustin Lebar   return ReadUTFBufferAndDumpToStream<llvm::UTF16>(options,
5969091055eSJustin Lebar                                                    llvm::ConvertUTF16toUTF8);
597ca6c8ee2SEnrico Granata }
598ca6c8ee2SEnrico Granata 
599ca6c8ee2SEnrico Granata template <>
600b9c1b51eSKate Stone bool StringPrinter::ReadStringAndDumpToStream<
601b9c1b51eSKate Stone     StringPrinter::StringElementType::UTF32>(
602b9c1b51eSKate Stone     const ReadStringAndDumpToStreamOptions &options) {
6039091055eSJustin Lebar   return ReadUTFBufferAndDumpToStream<llvm::UTF32>(options,
6049091055eSJustin Lebar                                                    llvm::ConvertUTF32toUTF8);
605ca6c8ee2SEnrico Granata }
606ca6c8ee2SEnrico Granata 
607ca6c8ee2SEnrico Granata template <>
608b9c1b51eSKate Stone bool StringPrinter::ReadBufferAndDumpToStream<
609b9c1b51eSKate Stone     StringPrinter::StringElementType::UTF8>(
610b9c1b51eSKate Stone     const ReadBufferAndDumpToStreamOptions &options) {
611ca6c8ee2SEnrico Granata   assert(options.GetStream() && "need a Stream to print the string to");
612ca6c8ee2SEnrico Granata 
6139091055eSJustin Lebar   return DumpUTFBufferToStream<llvm::UTF8>(nullptr, options);
614ca6c8ee2SEnrico Granata }
615ca6c8ee2SEnrico Granata 
616ca6c8ee2SEnrico Granata template <>
617b9c1b51eSKate Stone bool StringPrinter::ReadBufferAndDumpToStream<
618b9c1b51eSKate Stone     StringPrinter::StringElementType::ASCII>(
619b9c1b51eSKate Stone     const ReadBufferAndDumpToStreamOptions &options) {
620ca6c8ee2SEnrico Granata   // treat ASCII the same as UTF8
621ca6c8ee2SEnrico Granata   // FIXME: can we optimize ASCII some more?
622ca6c8ee2SEnrico Granata   return ReadBufferAndDumpToStream<StringElementType::UTF8>(options);
623ca6c8ee2SEnrico Granata }
624ca6c8ee2SEnrico Granata 
625ca6c8ee2SEnrico Granata template <>
626b9c1b51eSKate Stone bool StringPrinter::ReadBufferAndDumpToStream<
627b9c1b51eSKate Stone     StringPrinter::StringElementType::UTF16>(
628b9c1b51eSKate Stone     const ReadBufferAndDumpToStreamOptions &options) {
629ca6c8ee2SEnrico Granata   assert(options.GetStream() && "need a Stream to print the string to");
630ca6c8ee2SEnrico Granata 
6319091055eSJustin Lebar   return DumpUTFBufferToStream(llvm::ConvertUTF16toUTF8, options);
632ca6c8ee2SEnrico Granata }
633ca6c8ee2SEnrico Granata 
634ca6c8ee2SEnrico Granata template <>
635b9c1b51eSKate Stone bool StringPrinter::ReadBufferAndDumpToStream<
636b9c1b51eSKate Stone     StringPrinter::StringElementType::UTF32>(
637b9c1b51eSKate Stone     const ReadBufferAndDumpToStreamOptions &options) {
638ca6c8ee2SEnrico Granata   assert(options.GetStream() && "need a Stream to print the string to");
639ca6c8ee2SEnrico Granata 
6409091055eSJustin Lebar   return DumpUTFBufferToStream(llvm::ConvertUTF32toUTF8, options);
641ca6c8ee2SEnrico Granata }
642fd13743fSShawn Best 
643fd13743fSShawn Best } // namespace formatters
644fd13743fSShawn Best 
645fd13743fSShawn Best } // namespace lldb_private
646