1 //===-- ConstString.cpp -----------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Utility/ConstString.h"
10 
11 #include "lldb/Utility/Stream.h"
12 
13 #include "llvm/ADT/StringMap.h"
14 #include "llvm/ADT/iterator.h"
15 #include "llvm/Support/Allocator.h"
16 #include "llvm/Support/DJB.h"
17 #include "llvm/Support/FormatProviders.h"
18 #include "llvm/Support/RWMutex.h"
19 #include "llvm/Support/Threading.h"
20 
21 #include <algorithm>
22 #include <array>
23 #include <utility>
24 
25 #include <inttypes.h>
26 #include <stdint.h>
27 #include <string.h>
28 
29 using namespace lldb_private;
30 
31 class Pool {
32 public:
33   typedef const char *StringPoolValueType;
34   // BumpPtrAllocator allocates in 4KiB chunks, any larger C++ project is going
35   // to have megabytes of symbols, so allocate in larger chunks.
36   typedef llvm::BumpPtrAllocatorImpl<llvm::MallocAllocator, 1048576> Allocator;
37   typedef llvm::StringMap<StringPoolValueType, Allocator>
38       StringPool;
39   typedef llvm::StringMapEntry<StringPoolValueType> StringPoolEntryType;
40 
41   static StringPoolEntryType &
42   GetStringMapEntryFromKeyData(const char *keyData) {
43     return StringPoolEntryType::GetStringMapEntryFromKeyData(keyData);
44   }
45 
46   static size_t GetConstCStringLength(const char *ccstr) {
47     if (ccstr != nullptr) {
48       // Since the entry is read only, and we derive the entry entirely from
49       // the pointer, we don't need the lock.
50       const StringPoolEntryType &entry = GetStringMapEntryFromKeyData(ccstr);
51       return entry.getKey().size();
52     }
53     return 0;
54   }
55 
56   StringPoolValueType GetMangledCounterpart(const char *ccstr) const {
57     if (ccstr != nullptr) {
58       const uint8_t h = hash(llvm::StringRef(ccstr));
59       llvm::sys::SmartScopedReader<false> rlock(m_string_pools[h].m_mutex);
60       return GetStringMapEntryFromKeyData(ccstr).getValue();
61     }
62     return nullptr;
63   }
64 
65   const char *GetConstCString(const char *cstr) {
66     if (cstr != nullptr)
67       return GetConstCStringWithLength(cstr, strlen(cstr));
68     return nullptr;
69   }
70 
71   const char *GetConstCStringWithLength(const char *cstr, size_t cstr_len) {
72     if (cstr != nullptr)
73       return GetConstCStringWithStringRef(llvm::StringRef(cstr, cstr_len));
74     return nullptr;
75   }
76 
77   const char *GetConstCStringWithStringRef(const llvm::StringRef &string_ref) {
78     if (string_ref.data()) {
79       const uint8_t h = hash(string_ref);
80 
81       {
82         llvm::sys::SmartScopedReader<false> rlock(m_string_pools[h].m_mutex);
83         auto it = m_string_pools[h].m_string_map.find(string_ref);
84         if (it != m_string_pools[h].m_string_map.end())
85           return it->getKeyData();
86       }
87 
88       llvm::sys::SmartScopedWriter<false> wlock(m_string_pools[h].m_mutex);
89       StringPoolEntryType &entry =
90           *m_string_pools[h]
91                .m_string_map.insert(std::make_pair(string_ref, nullptr))
92                .first;
93       return entry.getKeyData();
94     }
95     return nullptr;
96   }
97 
98   const char *
99   GetConstCStringAndSetMangledCounterPart(llvm::StringRef demangled,
100                                           const char *mangled_ccstr) {
101     const char *demangled_ccstr = nullptr;
102 
103     {
104       const uint8_t h = hash(demangled);
105       llvm::sys::SmartScopedWriter<false> wlock(m_string_pools[h].m_mutex);
106 
107       // Make or update string pool entry with the mangled counterpart
108       StringPool &map = m_string_pools[h].m_string_map;
109       StringPoolEntryType &entry = *map.try_emplace(demangled).first;
110 
111       entry.second = mangled_ccstr;
112 
113       // Extract the const version of the demangled_cstr
114       demangled_ccstr = entry.getKeyData();
115     }
116 
117     {
118       // Now assign the demangled const string as the counterpart of the
119       // mangled const string...
120       const uint8_t h = hash(llvm::StringRef(mangled_ccstr));
121       llvm::sys::SmartScopedWriter<false> wlock(m_string_pools[h].m_mutex);
122       GetStringMapEntryFromKeyData(mangled_ccstr).setValue(demangled_ccstr);
123     }
124 
125     // Return the constant demangled C string
126     return demangled_ccstr;
127   }
128 
129   const char *GetConstTrimmedCStringWithLength(const char *cstr,
130                                                size_t cstr_len) {
131     if (cstr != nullptr) {
132       const size_t trimmed_len = strnlen(cstr, cstr_len);
133       return GetConstCStringWithLength(cstr, trimmed_len);
134     }
135     return nullptr;
136   }
137 
138   // Return the size in bytes that this object and any items in its collection
139   // of uniqued strings + data count values takes in memory.
140   size_t MemorySize() const {
141     size_t mem_size = sizeof(Pool);
142     for (const auto &pool : m_string_pools) {
143       llvm::sys::SmartScopedReader<false> rlock(pool.m_mutex);
144       for (const auto &entry : pool.m_string_map)
145         mem_size += sizeof(StringPoolEntryType) + entry.getKey().size();
146     }
147     return mem_size;
148   }
149 
150 protected:
151   uint8_t hash(const llvm::StringRef &s) const {
152     uint32_t h = llvm::djbHash(s);
153     return ((h >> 24) ^ (h >> 16) ^ (h >> 8) ^ h) & 0xff;
154   }
155 
156   struct PoolEntry {
157     mutable llvm::sys::SmartRWMutex<false> m_mutex;
158     // StringMap by default starts with 16 buckets, any larger project is
159     // going to have many symbols, so start with a larger value.
160     StringPool m_string_map = StringPool( 65536 );
161   };
162 
163   std::array<PoolEntry, 256> m_string_pools;
164 };
165 
166 // Frameworks and dylibs aren't supposed to have global C++ initializers so we
167 // hide the string pool in a static function so that it will get initialized on
168 // the first call to this static function.
169 //
170 // Note, for now we make the string pool a pointer to the pool, because we
171 // can't guarantee that some objects won't get destroyed after the global
172 // destructor chain is run, and trying to make sure no destructors touch
173 // ConstStrings is difficult.  So we leak the pool instead.
174 static Pool &StringPool() {
175   static llvm::once_flag g_pool_initialization_flag;
176   static Pool *g_string_pool = nullptr;
177 
178   llvm::call_once(g_pool_initialization_flag,
179                  []() { g_string_pool = new Pool(); });
180 
181   return *g_string_pool;
182 }
183 
184 ConstString::ConstString(const char *cstr)
185     : m_string(StringPool().GetConstCString(cstr)) {}
186 
187 ConstString::ConstString(const char *cstr, size_t cstr_len)
188     : m_string(StringPool().GetConstCStringWithLength(cstr, cstr_len)) {}
189 
190 ConstString::ConstString(const llvm::StringRef &s)
191     : m_string(StringPool().GetConstCStringWithStringRef(s)) {}
192 
193 bool ConstString::operator<(ConstString rhs) const {
194   if (m_string == rhs.m_string)
195     return false;
196 
197   llvm::StringRef lhs_string_ref(GetStringRef());
198   llvm::StringRef rhs_string_ref(rhs.GetStringRef());
199 
200   // If both have valid C strings, then return the comparison
201   if (lhs_string_ref.data() && rhs_string_ref.data())
202     return lhs_string_ref < rhs_string_ref;
203 
204   // Else one of them was nullptr, so if LHS is nullptr then it is less than
205   return lhs_string_ref.data() == nullptr;
206 }
207 
208 Stream &lldb_private::operator<<(Stream &s, ConstString str) {
209   const char *cstr = str.GetCString();
210   if (cstr != nullptr)
211     s << cstr;
212 
213   return s;
214 }
215 
216 size_t ConstString::GetLength() const {
217   return Pool::GetConstCStringLength(m_string);
218 }
219 
220 bool ConstString::Equals(ConstString lhs, ConstString rhs,
221                          const bool case_sensitive) {
222   if (lhs.m_string == rhs.m_string)
223     return true;
224 
225   // Since the pointers weren't equal, and identical ConstStrings always have
226   // identical pointers, the result must be false for case sensitive equality
227   // test.
228   if (case_sensitive)
229     return false;
230 
231   // perform case insensitive equality test
232   llvm::StringRef lhs_string_ref(lhs.GetStringRef());
233   llvm::StringRef rhs_string_ref(rhs.GetStringRef());
234   return lhs_string_ref.equals_lower(rhs_string_ref);
235 }
236 
237 int ConstString::Compare(ConstString lhs, ConstString rhs,
238                          const bool case_sensitive) {
239   // If the iterators are the same, this is the same string
240   const char *lhs_cstr = lhs.m_string;
241   const char *rhs_cstr = rhs.m_string;
242   if (lhs_cstr == rhs_cstr)
243     return 0;
244   if (lhs_cstr && rhs_cstr) {
245     llvm::StringRef lhs_string_ref(lhs.GetStringRef());
246     llvm::StringRef rhs_string_ref(rhs.GetStringRef());
247 
248     if (case_sensitive) {
249       return lhs_string_ref.compare(rhs_string_ref);
250     } else {
251       return lhs_string_ref.compare_lower(rhs_string_ref);
252     }
253   }
254 
255   if (lhs_cstr)
256     return +1; // LHS isn't nullptr but RHS is
257   else
258     return -1; // LHS is nullptr but RHS isn't
259 }
260 
261 void ConstString::Dump(Stream *s, const char *fail_value) const {
262   if (s != nullptr) {
263     const char *cstr = AsCString(fail_value);
264     if (cstr != nullptr)
265       s->PutCString(cstr);
266   }
267 }
268 
269 void ConstString::DumpDebug(Stream *s) const {
270   const char *cstr = GetCString();
271   size_t cstr_len = GetLength();
272   // Only print the parens if we have a non-nullptr string
273   const char *parens = cstr ? "\"" : "";
274   s->Printf("%*p: ConstString, string = %s%s%s, length = %" PRIu64,
275             static_cast<int>(sizeof(void *) * 2),
276             static_cast<const void *>(this), parens, cstr, parens,
277             static_cast<uint64_t>(cstr_len));
278 }
279 
280 void ConstString::SetCString(const char *cstr) {
281   m_string = StringPool().GetConstCString(cstr);
282 }
283 
284 void ConstString::SetString(const llvm::StringRef &s) {
285   m_string = StringPool().GetConstCStringWithLength(s.data(), s.size());
286 }
287 
288 void ConstString::SetStringWithMangledCounterpart(llvm::StringRef demangled,
289                                                    ConstString mangled) {
290   m_string = StringPool().GetConstCStringAndSetMangledCounterPart(
291       demangled, mangled.m_string);
292 }
293 
294 bool ConstString::GetMangledCounterpart(ConstString &counterpart) const {
295   counterpart.m_string = StringPool().GetMangledCounterpart(m_string);
296   return (bool)counterpart;
297 }
298 
299 void ConstString::SetCStringWithLength(const char *cstr, size_t cstr_len) {
300   m_string = StringPool().GetConstCStringWithLength(cstr, cstr_len);
301 }
302 
303 void ConstString::SetTrimmedCStringWithLength(const char *cstr,
304                                               size_t cstr_len) {
305   m_string = StringPool().GetConstTrimmedCStringWithLength(cstr, cstr_len);
306 }
307 
308 size_t ConstString::StaticMemorySize() {
309   // Get the size of the static string pool
310   return StringPool().MemorySize();
311 }
312 
313 void llvm::format_provider<ConstString>::format(const ConstString &CS,
314                                                 llvm::raw_ostream &OS,
315                                                 llvm::StringRef Options) {
316   format_provider<StringRef>::format(CS.AsCString(), OS, Options);
317 }
318