1 //===-- StringPrinterTests.cpp --------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/DataFormatters/StringPrinter.h"
10 #include "lldb/Utility/DataExtractor.h"
11 #include "lldb/Utility/Endian.h"
12 #include "lldb/Utility/StreamString.h"
13 #include "llvm/ADT/Optional.h"
14 #include "llvm/ADT/StringRef.h"
15 #include "llvm/Support/raw_ostream.h"
16 #include "gtest/gtest.h"
17 #include <string>
18 
19 using namespace lldb;
20 using namespace lldb_private;
21 using lldb_private::formatters::StringPrinter;
22 using llvm::Optional;
23 using llvm::StringRef;
24 
25 #define QUOTE(x) std::string("\"" x "\"")
26 
27 /// Format \p input according to the specified string encoding and special char
28 /// escape style.
29 template <StringPrinter::StringElementType elem_ty>
30 static Optional<std::string> format(StringRef input,
31                                     StringPrinter::EscapeStyle escape_style) {
32   StreamString out;
33   StringPrinter::ReadBufferAndDumpToStreamOptions opts;
34   opts.SetStream(&out);
35   opts.SetSourceSize(input.size());
36   opts.SetNeedsZeroTermination(true);
37   opts.SetEscapeNonPrintables(true);
38   opts.SetIgnoreMaxLength(false);
39   opts.SetEscapeStyle(escape_style);
40   DataExtractor extractor(input.data(), input.size(),
41                           endian::InlHostByteOrder(), sizeof(void *));
42   opts.SetData(extractor);
43   const bool success = StringPrinter::ReadBufferAndDumpToStream<elem_ty>(opts);
44   if (!success)
45     return llvm::None;
46   return out.GetString().str();
47 }
48 
49 // Test ASCII formatting for C++. This behaves exactly like UTF8 formatting for
50 // C++, although that's questionable (see FIXME in StringPrinter.cpp).
51 TEST(StringPrinterTests, CxxASCII) {
52   auto fmt = [](StringRef str) {
53     return format<StringPrinter::StringElementType::ASCII>(
54         str, StringPrinter::EscapeStyle::CXX);
55   };
56 
57   // Special escapes.
58   EXPECT_EQ(fmt({"\0", 1}), QUOTE(""));
59   EXPECT_EQ(fmt("\a"), QUOTE(R"(\a)"));
60   EXPECT_EQ(fmt("\b"), QUOTE(R"(\b)"));
61   EXPECT_EQ(fmt("\f"), QUOTE(R"(\f)"));
62   EXPECT_EQ(fmt("\n"), QUOTE(R"(\n)"));
63   EXPECT_EQ(fmt("\r"), QUOTE(R"(\r)"));
64   EXPECT_EQ(fmt("\t"), QUOTE(R"(\t)"));
65   EXPECT_EQ(fmt("\v"), QUOTE(R"(\v)"));
66   EXPECT_EQ(fmt("\""), QUOTE(R"(\")"));
67   EXPECT_EQ(fmt("\'"), QUOTE(R"(')"));
68   EXPECT_EQ(fmt("\\"), QUOTE(R"(\\)"));
69 
70   // Printable characters.
71   EXPECT_EQ(fmt("'"), QUOTE("'"));
72   EXPECT_EQ(fmt("a"), QUOTE("a"));
73   EXPECT_EQ(fmt("Z"), QUOTE("Z"));
74   EXPECT_EQ(fmt("��"), QUOTE("��"));
75 
76   // Octal (\nnn), hex (\xnn), extended octal (\unnnn or \Unnnnnnnn).
77   EXPECT_EQ(fmt("\uD55C"), QUOTE("\uD55C"));
78   EXPECT_EQ(fmt("\U00010348"), QUOTE("\U00010348"));
79 
80   // FIXME: These strings are all rejected, but shouldn't be AFAICT. LLDB finds
81   // that these are not valid utf8 sequences, but that's OK, the raw values
82   // should still be printed out.
83   EXPECT_NE(fmt("\376"), QUOTE(R"(\xfe)")); // \376 is 254 in decimal.
84   EXPECT_NE(fmt("\xfe"), QUOTE(R"(\xfe)")); // \xfe is 254 in decimal.
85 }
86 
87 // Test UTF8 formatting for C++.
88 TEST(StringPrinterTests, CxxUTF8) {
89   auto fmt = [](StringRef str) {
90     return format<StringPrinter::StringElementType::UTF8>(
91         str, StringPrinter::EscapeStyle::CXX);
92   };
93 
94   // Special escapes.
95   EXPECT_EQ(fmt({"\0", 1}), QUOTE(""));
96   EXPECT_EQ(fmt("\a"), QUOTE(R"(\a)"));
97   EXPECT_EQ(fmt("\b"), QUOTE(R"(\b)"));
98   EXPECT_EQ(fmt("\f"), QUOTE(R"(\f)"));
99   EXPECT_EQ(fmt("\n"), QUOTE(R"(\n)"));
100   EXPECT_EQ(fmt("\r"), QUOTE(R"(\r)"));
101   EXPECT_EQ(fmt("\t"), QUOTE(R"(\t)"));
102   EXPECT_EQ(fmt("\v"), QUOTE(R"(\v)"));
103   EXPECT_EQ(fmt("\""), QUOTE(R"(\")"));
104   EXPECT_EQ(fmt("\'"), QUOTE(R"(')"));
105   EXPECT_EQ(fmt("\\"), QUOTE(R"(\\)"));
106 
107   // Printable characters.
108   EXPECT_EQ(fmt("'"), QUOTE("'"));
109   EXPECT_EQ(fmt("a"), QUOTE("a"));
110   EXPECT_EQ(fmt("Z"), QUOTE("Z"));
111   EXPECT_EQ(fmt("��"), QUOTE("��"));
112 
113   // Octal (\nnn), hex (\xnn), extended octal (\unnnn or \Unnnnnnnn).
114   EXPECT_EQ(fmt("\uD55C"), QUOTE("\uD55C"));
115   EXPECT_EQ(fmt("\U00010348"), QUOTE("\U00010348"));
116 
117   // FIXME: These strings are all rejected, but shouldn't be AFAICT. LLDB finds
118   // that these are not valid utf8 sequences, but that's OK, the raw values
119   // should still be printed out.
120   EXPECT_NE(fmt("\376"), QUOTE(R"(\xfe)")); // \376 is 254 in decimal.
121   EXPECT_NE(fmt("\xfe"), QUOTE(R"(\xfe)")); // \xfe is 254 in decimal.
122 }
123 
124 // Test UTF8 formatting for Swift.
125 TEST(StringPrinterTests, SwiftUTF8) {
126   auto fmt = [](StringRef str) {
127     return format<StringPrinter::StringElementType::UTF8>(
128         str, StringPrinter::EscapeStyle::Swift);
129   };
130 
131   // Special escapes.
132   EXPECT_EQ(fmt({"\0", 1}), QUOTE(""));
133   EXPECT_EQ(fmt("\a"), QUOTE(R"(\a)"));
134   EXPECT_EQ(fmt("\b"), QUOTE(R"(\u{8})"));
135   EXPECT_EQ(fmt("\f"), QUOTE(R"(\u{c})"));
136   EXPECT_EQ(fmt("\n"), QUOTE(R"(\n)"));
137   EXPECT_EQ(fmt("\r"), QUOTE(R"(\r)"));
138   EXPECT_EQ(fmt("\t"), QUOTE(R"(\t)"));
139   EXPECT_EQ(fmt("\v"), QUOTE(R"(\u{b})"));
140   EXPECT_EQ(fmt("\""), QUOTE(R"(\")"));
141   EXPECT_EQ(fmt("\'"), QUOTE(R"(\')"));
142   EXPECT_EQ(fmt("\\"), QUOTE(R"(\\)"));
143 
144   // Printable characters.
145   EXPECT_EQ(fmt("'"), QUOTE(R"(\')"));
146   EXPECT_EQ(fmt("a"), QUOTE("a"));
147   EXPECT_EQ(fmt("Z"), QUOTE("Z"));
148   EXPECT_EQ(fmt("��"), QUOTE("��"));
149 
150   // Octal (\nnn), hex (\xnn), extended octal (\unnnn or \Unnnnnnnn).
151   EXPECT_EQ(fmt("\uD55C"), QUOTE("\uD55C"));
152   EXPECT_EQ(fmt("\U00010348"), QUOTE("\U00010348"));
153 
154   // FIXME: These strings are all rejected, but shouldn't be AFAICT. LLDB finds
155   // that these are not valid utf8 sequences, but that's OK, the raw values
156   // should still be printed out.
157   EXPECT_NE(fmt("\376"), QUOTE(R"(\xfe)")); // \376 is 254 in decimal.
158   EXPECT_NE(fmt("\xfe"), QUOTE(R"(\xfe)")); // \xfe is 254 in decimal.
159 }
160