1f678e45dSDimitry Andric //===-- ConstString.cpp -----------------------------------------*- C++ -*-===//
2f678e45dSDimitry Andric //
3f678e45dSDimitry Andric // The LLVM Compiler Infrastructure
4f678e45dSDimitry Andric //
5f678e45dSDimitry Andric // This file is distributed under the University of Illinois Open Source
6f678e45dSDimitry Andric // License. See LICENSE.TXT for details.
7f678e45dSDimitry Andric //
8f678e45dSDimitry Andric //===----------------------------------------------------------------------===//
9f678e45dSDimitry Andric
10f678e45dSDimitry Andric #include "lldb/Utility/ConstString.h"
11f678e45dSDimitry Andric
12f678e45dSDimitry Andric #include "lldb/Utility/Stream.h"
13f678e45dSDimitry Andric
14f678e45dSDimitry Andric #include "llvm/ADT/StringMap.h"
15*b5893f02SDimitry Andric #include "llvm/ADT/iterator.h"
16*b5893f02SDimitry Andric #include "llvm/Support/Allocator.h"
17*b5893f02SDimitry Andric #include "llvm/Support/DJB.h"
18*b5893f02SDimitry Andric #include "llvm/Support/FormatProviders.h"
19f678e45dSDimitry Andric #include "llvm/Support/RWMutex.h"
20f678e45dSDimitry Andric #include "llvm/Support/Threading.h"
21f678e45dSDimitry Andric
22*b5893f02SDimitry Andric #include <algorithm>
23f678e45dSDimitry Andric #include <array>
24*b5893f02SDimitry Andric #include <utility>
25f678e45dSDimitry Andric
26*b5893f02SDimitry Andric #include <inttypes.h>
27*b5893f02SDimitry Andric #include <stdint.h>
28*b5893f02SDimitry Andric #include <string.h>
29f678e45dSDimitry Andric
30f678e45dSDimitry Andric using namespace lldb_private;
31f678e45dSDimitry Andric
32f678e45dSDimitry Andric class Pool {
33f678e45dSDimitry Andric public:
34f678e45dSDimitry Andric typedef const char *StringPoolValueType;
35f678e45dSDimitry Andric typedef llvm::StringMap<StringPoolValueType, llvm::BumpPtrAllocator>
36f678e45dSDimitry Andric StringPool;
37f678e45dSDimitry Andric typedef llvm::StringMapEntry<StringPoolValueType> StringPoolEntryType;
38f678e45dSDimitry Andric
39f678e45dSDimitry Andric static StringPoolEntryType &
GetStringMapEntryFromKeyData(const char * keyData)40f678e45dSDimitry Andric GetStringMapEntryFromKeyData(const char *keyData) {
41f37b6182SDimitry Andric return StringPoolEntryType::GetStringMapEntryFromKeyData(keyData);
42f678e45dSDimitry Andric }
43f678e45dSDimitry Andric
GetConstCStringLength(const char * ccstr)44f37b6182SDimitry Andric static size_t GetConstCStringLength(const char *ccstr) {
45f678e45dSDimitry Andric if (ccstr != nullptr) {
464ba319b5SDimitry Andric // Since the entry is read only, and we derive the entry entirely from
474ba319b5SDimitry Andric // the pointer, we don't need the lock.
48f678e45dSDimitry Andric const StringPoolEntryType &entry = GetStringMapEntryFromKeyData(ccstr);
49f678e45dSDimitry Andric return entry.getKey().size();
50f678e45dSDimitry Andric }
51f678e45dSDimitry Andric return 0;
52f678e45dSDimitry Andric }
53f678e45dSDimitry Andric
GetMangledCounterpart(const char * ccstr) const54f678e45dSDimitry Andric StringPoolValueType GetMangledCounterpart(const char *ccstr) const {
55f678e45dSDimitry Andric if (ccstr != nullptr) {
56f678e45dSDimitry Andric const uint8_t h = hash(llvm::StringRef(ccstr));
57f678e45dSDimitry Andric llvm::sys::SmartScopedReader<false> rlock(m_string_pools[h].m_mutex);
58f678e45dSDimitry Andric return GetStringMapEntryFromKeyData(ccstr).getValue();
59f678e45dSDimitry Andric }
60f678e45dSDimitry Andric return nullptr;
61f678e45dSDimitry Andric }
62f678e45dSDimitry Andric
SetMangledCounterparts(const char * key_ccstr,const char * value_ccstr)63f678e45dSDimitry Andric bool SetMangledCounterparts(const char *key_ccstr, const char *value_ccstr) {
64f678e45dSDimitry Andric if (key_ccstr != nullptr && value_ccstr != nullptr) {
65f678e45dSDimitry Andric {
66f678e45dSDimitry Andric const uint8_t h = hash(llvm::StringRef(key_ccstr));
67f678e45dSDimitry Andric llvm::sys::SmartScopedWriter<false> wlock(m_string_pools[h].m_mutex);
68f678e45dSDimitry Andric GetStringMapEntryFromKeyData(key_ccstr).setValue(value_ccstr);
69f678e45dSDimitry Andric }
70f678e45dSDimitry Andric {
71f678e45dSDimitry Andric const uint8_t h = hash(llvm::StringRef(value_ccstr));
72f678e45dSDimitry Andric llvm::sys::SmartScopedWriter<false> wlock(m_string_pools[h].m_mutex);
73f678e45dSDimitry Andric GetStringMapEntryFromKeyData(value_ccstr).setValue(key_ccstr);
74f678e45dSDimitry Andric }
75f678e45dSDimitry Andric return true;
76f678e45dSDimitry Andric }
77f678e45dSDimitry Andric return false;
78f678e45dSDimitry Andric }
79f678e45dSDimitry Andric
GetConstCString(const char * cstr)80f678e45dSDimitry Andric const char *GetConstCString(const char *cstr) {
81f678e45dSDimitry Andric if (cstr != nullptr)
82f678e45dSDimitry Andric return GetConstCStringWithLength(cstr, strlen(cstr));
83f678e45dSDimitry Andric return nullptr;
84f678e45dSDimitry Andric }
85f678e45dSDimitry Andric
GetConstCStringWithLength(const char * cstr,size_t cstr_len)86f678e45dSDimitry Andric const char *GetConstCStringWithLength(const char *cstr, size_t cstr_len) {
87f678e45dSDimitry Andric if (cstr != nullptr)
88f678e45dSDimitry Andric return GetConstCStringWithStringRef(llvm::StringRef(cstr, cstr_len));
89f678e45dSDimitry Andric return nullptr;
90f678e45dSDimitry Andric }
91f678e45dSDimitry Andric
GetConstCStringWithStringRef(const llvm::StringRef & string_ref)92f678e45dSDimitry Andric const char *GetConstCStringWithStringRef(const llvm::StringRef &string_ref) {
93f678e45dSDimitry Andric if (string_ref.data()) {
94f678e45dSDimitry Andric const uint8_t h = hash(string_ref);
95f678e45dSDimitry Andric
96f678e45dSDimitry Andric {
97f678e45dSDimitry Andric llvm::sys::SmartScopedReader<false> rlock(m_string_pools[h].m_mutex);
98f678e45dSDimitry Andric auto it = m_string_pools[h].m_string_map.find(string_ref);
99f678e45dSDimitry Andric if (it != m_string_pools[h].m_string_map.end())
100f678e45dSDimitry Andric return it->getKeyData();
101f678e45dSDimitry Andric }
102f678e45dSDimitry Andric
103f678e45dSDimitry Andric llvm::sys::SmartScopedWriter<false> wlock(m_string_pools[h].m_mutex);
104f678e45dSDimitry Andric StringPoolEntryType &entry =
105f678e45dSDimitry Andric *m_string_pools[h]
106f678e45dSDimitry Andric .m_string_map.insert(std::make_pair(string_ref, nullptr))
107f678e45dSDimitry Andric .first;
108f678e45dSDimitry Andric return entry.getKeyData();
109f678e45dSDimitry Andric }
110f678e45dSDimitry Andric return nullptr;
111f678e45dSDimitry Andric }
112f678e45dSDimitry Andric
113f678e45dSDimitry Andric const char *
GetConstCStringAndSetMangledCounterPart(llvm::StringRef demangled,const char * mangled_ccstr)114*b5893f02SDimitry Andric GetConstCStringAndSetMangledCounterPart(llvm::StringRef demangled,
115f678e45dSDimitry Andric const char *mangled_ccstr) {
116f678e45dSDimitry Andric const char *demangled_ccstr = nullptr;
117f678e45dSDimitry Andric
118f678e45dSDimitry Andric {
119*b5893f02SDimitry Andric const uint8_t h = hash(demangled);
120f678e45dSDimitry Andric llvm::sys::SmartScopedWriter<false> wlock(m_string_pools[h].m_mutex);
121f678e45dSDimitry Andric
122*b5893f02SDimitry Andric // Make or update string pool entry with the mangled counterpart
123*b5893f02SDimitry Andric StringPool &map = m_string_pools[h].m_string_map;
124*b5893f02SDimitry Andric StringPoolEntryType &entry = *map.try_emplace(demangled).first;
125*b5893f02SDimitry Andric
126*b5893f02SDimitry Andric entry.second = mangled_ccstr;
127f678e45dSDimitry Andric
128f678e45dSDimitry Andric // Extract the const version of the demangled_cstr
129f678e45dSDimitry Andric demangled_ccstr = entry.getKeyData();
130f678e45dSDimitry Andric }
131f678e45dSDimitry Andric
132f678e45dSDimitry Andric {
133f678e45dSDimitry Andric // Now assign the demangled const string as the counterpart of the
134f678e45dSDimitry Andric // mangled const string...
135f678e45dSDimitry Andric const uint8_t h = hash(llvm::StringRef(mangled_ccstr));
136f678e45dSDimitry Andric llvm::sys::SmartScopedWriter<false> wlock(m_string_pools[h].m_mutex);
137f678e45dSDimitry Andric GetStringMapEntryFromKeyData(mangled_ccstr).setValue(demangled_ccstr);
138f678e45dSDimitry Andric }
139f678e45dSDimitry Andric
140f678e45dSDimitry Andric // Return the constant demangled C string
141f678e45dSDimitry Andric return demangled_ccstr;
142f678e45dSDimitry Andric }
143f678e45dSDimitry Andric
GetConstTrimmedCStringWithLength(const char * cstr,size_t cstr_len)144f678e45dSDimitry Andric const char *GetConstTrimmedCStringWithLength(const char *cstr,
145f678e45dSDimitry Andric size_t cstr_len) {
146f678e45dSDimitry Andric if (cstr != nullptr) {
147f678e45dSDimitry Andric const size_t trimmed_len = std::min<size_t>(strlen(cstr), cstr_len);
148f678e45dSDimitry Andric return GetConstCStringWithLength(cstr, trimmed_len);
149f678e45dSDimitry Andric }
150f678e45dSDimitry Andric return nullptr;
151f678e45dSDimitry Andric }
152f678e45dSDimitry Andric
153f678e45dSDimitry Andric //------------------------------------------------------------------
1544ba319b5SDimitry Andric // Return the size in bytes that this object and any items in its collection
1554ba319b5SDimitry Andric // of uniqued strings + data count values takes in memory.
156f678e45dSDimitry Andric //------------------------------------------------------------------
MemorySize() const157f678e45dSDimitry Andric size_t MemorySize() const {
158f678e45dSDimitry Andric size_t mem_size = sizeof(Pool);
159f678e45dSDimitry Andric for (const auto &pool : m_string_pools) {
160f678e45dSDimitry Andric llvm::sys::SmartScopedReader<false> rlock(pool.m_mutex);
161f678e45dSDimitry Andric for (const auto &entry : pool.m_string_map)
162f678e45dSDimitry Andric mem_size += sizeof(StringPoolEntryType) + entry.getKey().size();
163f678e45dSDimitry Andric }
164f678e45dSDimitry Andric return mem_size;
165f678e45dSDimitry Andric }
166f678e45dSDimitry Andric
167f678e45dSDimitry Andric protected:
hash(const llvm::StringRef & s) const168f678e45dSDimitry Andric uint8_t hash(const llvm::StringRef &s) const {
1694ba319b5SDimitry Andric uint32_t h = llvm::djbHash(s);
170f678e45dSDimitry Andric return ((h >> 24) ^ (h >> 16) ^ (h >> 8) ^ h) & 0xff;
171f678e45dSDimitry Andric }
172f678e45dSDimitry Andric
173f678e45dSDimitry Andric struct PoolEntry {
174f678e45dSDimitry Andric mutable llvm::sys::SmartRWMutex<false> m_mutex;
175f678e45dSDimitry Andric StringPool m_string_map;
176f678e45dSDimitry Andric };
177f678e45dSDimitry Andric
178f678e45dSDimitry Andric std::array<PoolEntry, 256> m_string_pools;
179f678e45dSDimitry Andric };
180f678e45dSDimitry Andric
181f678e45dSDimitry Andric //----------------------------------------------------------------------
1824ba319b5SDimitry Andric // Frameworks and dylibs aren't supposed to have global C++ initializers so we
1834ba319b5SDimitry Andric // hide the string pool in a static function so that it will get initialized on
1844ba319b5SDimitry Andric // the first call to this static function.
185f678e45dSDimitry Andric //
1864ba319b5SDimitry Andric // Note, for now we make the string pool a pointer to the pool, because we
1874ba319b5SDimitry Andric // can't guarantee that some objects won't get destroyed after the global
1884ba319b5SDimitry Andric // destructor chain is run, and trying to make sure no destructors touch
1894ba319b5SDimitry Andric // ConstStrings is difficult. So we leak the pool instead.
190f678e45dSDimitry Andric //----------------------------------------------------------------------
StringPool()191f678e45dSDimitry Andric static Pool &StringPool() {
192f678e45dSDimitry Andric static llvm::once_flag g_pool_initialization_flag;
193f678e45dSDimitry Andric static Pool *g_string_pool = nullptr;
194f678e45dSDimitry Andric
195f678e45dSDimitry Andric llvm::call_once(g_pool_initialization_flag,
196f678e45dSDimitry Andric []() { g_string_pool = new Pool(); });
197f678e45dSDimitry Andric
198f678e45dSDimitry Andric return *g_string_pool;
199f678e45dSDimitry Andric }
200f678e45dSDimitry Andric
ConstString(const char * cstr)201f678e45dSDimitry Andric ConstString::ConstString(const char *cstr)
202f678e45dSDimitry Andric : m_string(StringPool().GetConstCString(cstr)) {}
203f678e45dSDimitry Andric
ConstString(const char * cstr,size_t cstr_len)204f678e45dSDimitry Andric ConstString::ConstString(const char *cstr, size_t cstr_len)
205f678e45dSDimitry Andric : m_string(StringPool().GetConstCStringWithLength(cstr, cstr_len)) {}
206f678e45dSDimitry Andric
ConstString(const llvm::StringRef & s)207f678e45dSDimitry Andric ConstString::ConstString(const llvm::StringRef &s)
208f678e45dSDimitry Andric : m_string(StringPool().GetConstCStringWithLength(s.data(), s.size())) {}
209f678e45dSDimitry Andric
operator <(const ConstString & rhs) const210f678e45dSDimitry Andric bool ConstString::operator<(const ConstString &rhs) const {
211f678e45dSDimitry Andric if (m_string == rhs.m_string)
212f678e45dSDimitry Andric return false;
213f678e45dSDimitry Andric
214f37b6182SDimitry Andric llvm::StringRef lhs_string_ref(GetStringRef());
215f37b6182SDimitry Andric llvm::StringRef rhs_string_ref(rhs.GetStringRef());
216f678e45dSDimitry Andric
217f678e45dSDimitry Andric // If both have valid C strings, then return the comparison
218f678e45dSDimitry Andric if (lhs_string_ref.data() && rhs_string_ref.data())
219f678e45dSDimitry Andric return lhs_string_ref < rhs_string_ref;
220f678e45dSDimitry Andric
221f678e45dSDimitry Andric // Else one of them was nullptr, so if LHS is nullptr then it is less than
222f678e45dSDimitry Andric return lhs_string_ref.data() == nullptr;
223f678e45dSDimitry Andric }
224f678e45dSDimitry Andric
operator <<(Stream & s,const ConstString & str)225f678e45dSDimitry Andric Stream &lldb_private::operator<<(Stream &s, const ConstString &str) {
226f678e45dSDimitry Andric const char *cstr = str.GetCString();
227f678e45dSDimitry Andric if (cstr != nullptr)
228f678e45dSDimitry Andric s << cstr;
229f678e45dSDimitry Andric
230f678e45dSDimitry Andric return s;
231f678e45dSDimitry Andric }
232f678e45dSDimitry Andric
GetLength() const233f678e45dSDimitry Andric size_t ConstString::GetLength() const {
234f37b6182SDimitry Andric return Pool::GetConstCStringLength(m_string);
235f678e45dSDimitry Andric }
236f678e45dSDimitry Andric
Equals(const ConstString & lhs,const ConstString & rhs,const bool case_sensitive)237f678e45dSDimitry Andric bool ConstString::Equals(const ConstString &lhs, const ConstString &rhs,
238f678e45dSDimitry Andric const bool case_sensitive) {
239f678e45dSDimitry Andric if (lhs.m_string == rhs.m_string)
240f678e45dSDimitry Andric return true;
241f678e45dSDimitry Andric
242f678e45dSDimitry Andric // Since the pointers weren't equal, and identical ConstStrings always have
2434ba319b5SDimitry Andric // identical pointers, the result must be false for case sensitive equality
2444ba319b5SDimitry Andric // test.
245f678e45dSDimitry Andric if (case_sensitive)
246f678e45dSDimitry Andric return false;
247f678e45dSDimitry Andric
248f678e45dSDimitry Andric // perform case insensitive equality test
249f37b6182SDimitry Andric llvm::StringRef lhs_string_ref(lhs.GetStringRef());
250f37b6182SDimitry Andric llvm::StringRef rhs_string_ref(rhs.GetStringRef());
251f678e45dSDimitry Andric return lhs_string_ref.equals_lower(rhs_string_ref);
252f678e45dSDimitry Andric }
253f678e45dSDimitry Andric
Compare(const ConstString & lhs,const ConstString & rhs,const bool case_sensitive)254f678e45dSDimitry Andric int ConstString::Compare(const ConstString &lhs, const ConstString &rhs,
255f678e45dSDimitry Andric const bool case_sensitive) {
256f678e45dSDimitry Andric // If the iterators are the same, this is the same string
257f678e45dSDimitry Andric const char *lhs_cstr = lhs.m_string;
258f678e45dSDimitry Andric const char *rhs_cstr = rhs.m_string;
259f678e45dSDimitry Andric if (lhs_cstr == rhs_cstr)
260f678e45dSDimitry Andric return 0;
261f678e45dSDimitry Andric if (lhs_cstr && rhs_cstr) {
262f37b6182SDimitry Andric llvm::StringRef lhs_string_ref(lhs.GetStringRef());
263f37b6182SDimitry Andric llvm::StringRef rhs_string_ref(rhs.GetStringRef());
264f678e45dSDimitry Andric
265f678e45dSDimitry Andric if (case_sensitive) {
266f678e45dSDimitry Andric return lhs_string_ref.compare(rhs_string_ref);
267f678e45dSDimitry Andric } else {
268f678e45dSDimitry Andric return lhs_string_ref.compare_lower(rhs_string_ref);
269f678e45dSDimitry Andric }
270f678e45dSDimitry Andric }
271f678e45dSDimitry Andric
272f678e45dSDimitry Andric if (lhs_cstr)
273f678e45dSDimitry Andric return +1; // LHS isn't nullptr but RHS is
274f678e45dSDimitry Andric else
275f678e45dSDimitry Andric return -1; // LHS is nullptr but RHS isn't
276f678e45dSDimitry Andric }
277f678e45dSDimitry Andric
Dump(Stream * s,const char * fail_value) const278f678e45dSDimitry Andric void ConstString::Dump(Stream *s, const char *fail_value) const {
279f678e45dSDimitry Andric if (s != nullptr) {
280f678e45dSDimitry Andric const char *cstr = AsCString(fail_value);
281f678e45dSDimitry Andric if (cstr != nullptr)
282f678e45dSDimitry Andric s->PutCString(cstr);
283f678e45dSDimitry Andric }
284f678e45dSDimitry Andric }
285f678e45dSDimitry Andric
DumpDebug(Stream * s) const286f678e45dSDimitry Andric void ConstString::DumpDebug(Stream *s) const {
287f678e45dSDimitry Andric const char *cstr = GetCString();
288f678e45dSDimitry Andric size_t cstr_len = GetLength();
289f678e45dSDimitry Andric // Only print the parens if we have a non-nullptr string
290f678e45dSDimitry Andric const char *parens = cstr ? "\"" : "";
291f678e45dSDimitry Andric s->Printf("%*p: ConstString, string = %s%s%s, length = %" PRIu64,
292f678e45dSDimitry Andric static_cast<int>(sizeof(void *) * 2),
293f678e45dSDimitry Andric static_cast<const void *>(this), parens, cstr, parens,
294f678e45dSDimitry Andric static_cast<uint64_t>(cstr_len));
295f678e45dSDimitry Andric }
296f678e45dSDimitry Andric
SetCString(const char * cstr)297f678e45dSDimitry Andric void ConstString::SetCString(const char *cstr) {
298f678e45dSDimitry Andric m_string = StringPool().GetConstCString(cstr);
299f678e45dSDimitry Andric }
300f678e45dSDimitry Andric
SetString(const llvm::StringRef & s)301f678e45dSDimitry Andric void ConstString::SetString(const llvm::StringRef &s) {
302f678e45dSDimitry Andric m_string = StringPool().GetConstCStringWithLength(s.data(), s.size());
303f678e45dSDimitry Andric }
304f678e45dSDimitry Andric
SetStringWithMangledCounterpart(llvm::StringRef demangled,const ConstString & mangled)305*b5893f02SDimitry Andric void ConstString::SetStringWithMangledCounterpart(llvm::StringRef demangled,
306f678e45dSDimitry Andric const ConstString &mangled) {
307f678e45dSDimitry Andric m_string = StringPool().GetConstCStringAndSetMangledCounterPart(
308f678e45dSDimitry Andric demangled, mangled.m_string);
309f678e45dSDimitry Andric }
310f678e45dSDimitry Andric
GetMangledCounterpart(ConstString & counterpart) const311f678e45dSDimitry Andric bool ConstString::GetMangledCounterpart(ConstString &counterpart) const {
312f678e45dSDimitry Andric counterpart.m_string = StringPool().GetMangledCounterpart(m_string);
313f678e45dSDimitry Andric return (bool)counterpart;
314f678e45dSDimitry Andric }
315f678e45dSDimitry Andric
SetCStringWithLength(const char * cstr,size_t cstr_len)316f678e45dSDimitry Andric void ConstString::SetCStringWithLength(const char *cstr, size_t cstr_len) {
317f678e45dSDimitry Andric m_string = StringPool().GetConstCStringWithLength(cstr, cstr_len);
318f678e45dSDimitry Andric }
319f678e45dSDimitry Andric
SetTrimmedCStringWithLength(const char * cstr,size_t cstr_len)320f678e45dSDimitry Andric void ConstString::SetTrimmedCStringWithLength(const char *cstr,
321f678e45dSDimitry Andric size_t cstr_len) {
322f678e45dSDimitry Andric m_string = StringPool().GetConstTrimmedCStringWithLength(cstr, cstr_len);
323f678e45dSDimitry Andric }
324f678e45dSDimitry Andric
StaticMemorySize()325f678e45dSDimitry Andric size_t ConstString::StaticMemorySize() {
326f678e45dSDimitry Andric // Get the size of the static string pool
327f678e45dSDimitry Andric return StringPool().MemorySize();
328f678e45dSDimitry Andric }
329f678e45dSDimitry Andric
format(const ConstString & CS,llvm::raw_ostream & OS,llvm::StringRef Options)330f678e45dSDimitry Andric void llvm::format_provider<ConstString>::format(const ConstString &CS,
331f678e45dSDimitry Andric llvm::raw_ostream &OS,
332f678e45dSDimitry Andric llvm::StringRef Options) {
333f678e45dSDimitry Andric format_provider<StringRef>::format(CS.AsCString(), OS, Options);
334f678e45dSDimitry Andric }
335