1 #include <string>
2 #include <stdint.h>
3 
4 // For more information about libc++'s std::string ABI, see:
5 //
6 //   https://joellaity.com/2020/01/31/string.html
7 
8 // A corrupt string which hits the SSO code path, but has an invalid size.
9 static struct {
10   // Set the size of this short-mode string to 116. Note that in short mode,
11   // the size is encoded as `size << 1`.
12   unsigned char size = 232;
13 
14   // 23 garbage bytes for the inline string payload.
15   char inline_buf[23] = {0};
16 } garbage_string_short_mode;
17 
18 // A corrupt libcxx string in long mode with a payload that contains a utf8
19 // sequence that's inherently too long.
20 static unsigned char garbage_utf8_payload1[] = {
21   250, // This means that we expect a 5-byte sequence, this is invalid. LLDB
22        // should fall back to ASCII printing.
23   250, 250, 250
24 };
25 static struct {
26   uint64_t cap = 5;
27   uint64_t size = 4;
28   unsigned char *data = &garbage_utf8_payload1[0];
29 } garbage_string_long_mode1;
30 
31 // A corrupt libcxx string in long mode with a payload that contains a utf8
32 // sequence that's too long to fit in the buffer.
33 static unsigned char garbage_utf8_payload2[] = {
34   240, // This means that we expect a 4-byte sequence, but the buffer is too
35        // small for this. LLDB should fall back to ASCII printing.
36   240
37 };
38 static struct {
39   uint64_t cap = 3;
40   uint64_t size = 2;
41   unsigned char *data = &garbage_utf8_payload2[0];
42 } garbage_string_long_mode2;
43 
44 // A corrupt libcxx string which has an invalid size (i.e. a size greater than
45 // the capacity of the string).
46 static struct {
47   uint64_t cap = 5;
48   uint64_t size = 7;
49   const char *data = "foo";
50 } garbage_string_long_mode3;
51 
52 // A corrupt libcxx string in long mode with a payload that would trigger a
53 // buffer overflow.
54 static struct {
55   uint64_t cap = 5;
56   uint64_t size = 2;
57   uint64_t data = 0xfffffffffffffffeULL;
58 } garbage_string_long_mode4;
59 
touch_string(std::string & in_str)60 size_t touch_string(std::string &in_str)
61 {
62   return in_str.size(); // Break here to look at bad string
63 }
64 
main()65 int main()
66 {
67     std::wstring wempty(L"");
68     std::wstring s(L"hello world! מזל טוב!");
69     std::wstring S(L"!!!!");
70     const wchar_t *mazeltov = L"מזל טוב";
71     std::string empty("");
72     std::string q("hello world");
73     std::string Q("quite a long std::strin with lots of info inside it");
74     std::string TheVeryLongOne("1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890someText1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890");
75     std::string IHaveEmbeddedZeros("a\0b\0c\0d",7);
76     std::wstring IHaveEmbeddedZerosToo(L"hello world!\0てざ ル゜䋨ミ㠧槊 きゅへ狦穤襩 じゃ馩リョ 䤦監", 38);
77     std::u16string u16_string(u"ß水氶");
78     std::u16string u16_empty(u"");
79     std::u32string u32_string(U"��������");
80     std::u32string u32_empty(U"");
81     std::basic_string<unsigned char> uchar(5, 'a');
82     std::string *null_str = nullptr;
83 
84 #if _LIBCPP_ABI_VERSION == 1
85     std::string garbage1, garbage2, garbage3, garbage4, garbage5;
86     if (sizeof(std::string) == sizeof(garbage_string_short_mode))
87       memcpy((void *)&garbage1, &garbage_string_short_mode, sizeof(std::string));
88     if (sizeof(std::string) == sizeof(garbage_string_long_mode1))
89       memcpy((void *)&garbage2, &garbage_string_long_mode1, sizeof(std::string));
90     if (sizeof(std::string) == sizeof(garbage_string_long_mode2))
91       memcpy((void *)&garbage3, &garbage_string_long_mode2, sizeof(std::string));
92     if (sizeof(std::string) == sizeof(garbage_string_long_mode3))
93       memcpy((void *)&garbage4, &garbage_string_long_mode3, sizeof(std::string));
94     if (sizeof(std::string) == sizeof(garbage_string_long_mode4))
95       memcpy((void *)&garbage5, &garbage_string_long_mode4, sizeof(std::string));
96 #else
97 #error "Test potentially needs to be updated for a new std::string ABI."
98 #endif
99 
100     S.assign(L"!!!!!"); // Set break point at this line.
101     std::string *not_a_string = (std::string *) 0x0;
102     touch_string(*not_a_string);
103     return 0;
104 }
105