1 //===-- ConstString.cpp ---------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Utility/ConstString.h"
10 
11 #include "lldb/Utility/Stream.h"
12 
13 #include "llvm/ADT/StringMap.h"
14 #include "llvm/ADT/iterator.h"
15 #include "llvm/Support/Allocator.h"
16 #include "llvm/Support/DJB.h"
17 #include "llvm/Support/FormatProviders.h"
18 #include "llvm/Support/RWMutex.h"
19 #include "llvm/Support/Threading.h"
20 
21 #include <array>
22 #include <utility>
23 
24 #include <inttypes.h>
25 #include <stdint.h>
26 #include <string.h>
27 
28 using namespace lldb_private;
29 
30 class Pool {
31 public:
32   /// The default BumpPtrAllocatorImpl slab size.
33   static const size_t AllocatorSlabSize = 4096 * 4;
34   /// Every Pool has its own allocator which receives an equal share of
35   /// the ConstString allocations. This means that when allocating many
36   /// ConstStrings, every allocator sees only its small share of allocations and
37   /// assumes LLDB only allocated a small amount of memory so far. In reality
38   /// LLDB allocated a total memory that is N times as large as what the
39   /// allocator sees (where N is the number of string pools). This causes that
40   /// the BumpPtrAllocator continues a long time to allocate memory in small
41   /// chunks which only makes sense when allocating a small amount of memory
42   /// (which is true from the perspective of a single allocator). On some
43   /// systems doing all these small memory allocations causes LLDB to spend
44   /// a lot of time in malloc, so we need to force all these allocators to
45   /// behave like one allocator in terms of scaling their memory allocations
46   /// with increased demand. To do this we set the growth delay for each single
47   /// allocator to a rate so that our pool of allocators scales their memory
48   /// allocations similar to a single BumpPtrAllocatorImpl.
49   ///
50   /// Currently we have 256 string pools and the normal growth delay of the
51   /// BumpPtrAllocatorImpl is 128 (i.e., the memory allocation size increases
52   /// every 128 full chunks), so by changing the delay to 1 we get a
53   /// total growth delay in our allocator collection of 256/1 = 256. This is
54   /// still only half as fast as a normal allocator but we can't go any faster
55   /// without decreasing the number of string pools.
56   static const size_t AllocatorGrowthDelay = 1;
57   typedef llvm::BumpPtrAllocatorImpl<llvm::MallocAllocator, AllocatorSlabSize,
58                                      AllocatorSlabSize, AllocatorGrowthDelay>
59       Allocator;
60   typedef const char *StringPoolValueType;
61   typedef llvm::StringMap<StringPoolValueType, Allocator> StringPool;
62   typedef llvm::StringMapEntry<StringPoolValueType> StringPoolEntryType;
63 
64   static StringPoolEntryType &
65   GetStringMapEntryFromKeyData(const char *keyData) {
66     return StringPoolEntryType::GetStringMapEntryFromKeyData(keyData);
67   }
68 
69   static size_t GetConstCStringLength(const char *ccstr) {
70     if (ccstr != nullptr) {
71       // Since the entry is read only, and we derive the entry entirely from
72       // the pointer, we don't need the lock.
73       const StringPoolEntryType &entry = GetStringMapEntryFromKeyData(ccstr);
74       return entry.getKey().size();
75     }
76     return 0;
77   }
78 
79   StringPoolValueType GetMangledCounterpart(const char *ccstr) const {
80     if (ccstr != nullptr) {
81       const uint8_t h = hash(llvm::StringRef(ccstr));
82       llvm::sys::SmartScopedReader<false> rlock(m_string_pools[h].m_mutex);
83       return GetStringMapEntryFromKeyData(ccstr).getValue();
84     }
85     return nullptr;
86   }
87 
88   const char *GetConstCString(const char *cstr) {
89     if (cstr != nullptr)
90       return GetConstCStringWithLength(cstr, strlen(cstr));
91     return nullptr;
92   }
93 
94   const char *GetConstCStringWithLength(const char *cstr, size_t cstr_len) {
95     if (cstr != nullptr)
96       return GetConstCStringWithStringRef(llvm::StringRef(cstr, cstr_len));
97     return nullptr;
98   }
99 
100   const char *GetConstCStringWithStringRef(const llvm::StringRef &string_ref) {
101     if (string_ref.data()) {
102       const uint8_t h = hash(string_ref);
103 
104       {
105         llvm::sys::SmartScopedReader<false> rlock(m_string_pools[h].m_mutex);
106         auto it = m_string_pools[h].m_string_map.find(string_ref);
107         if (it != m_string_pools[h].m_string_map.end())
108           return it->getKeyData();
109       }
110 
111       llvm::sys::SmartScopedWriter<false> wlock(m_string_pools[h].m_mutex);
112       StringPoolEntryType &entry =
113           *m_string_pools[h]
114                .m_string_map.insert(std::make_pair(string_ref, nullptr))
115                .first;
116       return entry.getKeyData();
117     }
118     return nullptr;
119   }
120 
121   const char *
122   GetConstCStringAndSetMangledCounterPart(llvm::StringRef demangled,
123                                           const char *mangled_ccstr) {
124     const char *demangled_ccstr = nullptr;
125 
126     {
127       const uint8_t h = hash(demangled);
128       llvm::sys::SmartScopedWriter<false> wlock(m_string_pools[h].m_mutex);
129 
130       // Make or update string pool entry with the mangled counterpart
131       StringPool &map = m_string_pools[h].m_string_map;
132       StringPoolEntryType &entry = *map.try_emplace(demangled).first;
133 
134       entry.second = mangled_ccstr;
135 
136       // Extract the const version of the demangled_cstr
137       demangled_ccstr = entry.getKeyData();
138     }
139 
140     {
141       // Now assign the demangled const string as the counterpart of the
142       // mangled const string...
143       const uint8_t h = hash(llvm::StringRef(mangled_ccstr));
144       llvm::sys::SmartScopedWriter<false> wlock(m_string_pools[h].m_mutex);
145       GetStringMapEntryFromKeyData(mangled_ccstr).setValue(demangled_ccstr);
146     }
147 
148     // Return the constant demangled C string
149     return demangled_ccstr;
150   }
151 
152   const char *GetConstTrimmedCStringWithLength(const char *cstr,
153                                                size_t cstr_len) {
154     if (cstr != nullptr) {
155       const size_t trimmed_len = strnlen(cstr, cstr_len);
156       return GetConstCStringWithLength(cstr, trimmed_len);
157     }
158     return nullptr;
159   }
160 
161   // Return the size in bytes that this object and any items in its collection
162   // of uniqued strings + data count values takes in memory.
163   size_t MemorySize() const {
164     size_t mem_size = sizeof(Pool);
165     for (const auto &pool : m_string_pools) {
166       llvm::sys::SmartScopedReader<false> rlock(pool.m_mutex);
167       for (const auto &entry : pool.m_string_map)
168         mem_size += sizeof(StringPoolEntryType) + entry.getKey().size();
169     }
170     return mem_size;
171   }
172 
173 protected:
174   uint8_t hash(const llvm::StringRef &s) const {
175     uint32_t h = llvm::djbHash(s);
176     return ((h >> 24) ^ (h >> 16) ^ (h >> 8) ^ h) & 0xff;
177   }
178 
179   struct PoolEntry {
180     mutable llvm::sys::SmartRWMutex<false> m_mutex;
181     StringPool m_string_map;
182   };
183 
184   std::array<PoolEntry, 256> m_string_pools;
185 };
186 
187 // Frameworks and dylibs aren't supposed to have global C++ initializers so we
188 // hide the string pool in a static function so that it will get initialized on
189 // the first call to this static function.
190 //
191 // Note, for now we make the string pool a pointer to the pool, because we
192 // can't guarantee that some objects won't get destroyed after the global
193 // destructor chain is run, and trying to make sure no destructors touch
194 // ConstStrings is difficult.  So we leak the pool instead.
195 static Pool &StringPool() {
196   static llvm::once_flag g_pool_initialization_flag;
197   static Pool *g_string_pool = nullptr;
198 
199   llvm::call_once(g_pool_initialization_flag,
200                  []() { g_string_pool = new Pool(); });
201 
202   return *g_string_pool;
203 }
204 
205 ConstString::ConstString(const char *cstr)
206     : m_string(StringPool().GetConstCString(cstr)) {}
207 
208 ConstString::ConstString(const char *cstr, size_t cstr_len)
209     : m_string(StringPool().GetConstCStringWithLength(cstr, cstr_len)) {}
210 
211 ConstString::ConstString(const llvm::StringRef &s)
212     : m_string(StringPool().GetConstCStringWithStringRef(s)) {}
213 
214 bool ConstString::operator<(ConstString rhs) const {
215   if (m_string == rhs.m_string)
216     return false;
217 
218   llvm::StringRef lhs_string_ref(GetStringRef());
219   llvm::StringRef rhs_string_ref(rhs.GetStringRef());
220 
221   // If both have valid C strings, then return the comparison
222   if (lhs_string_ref.data() && rhs_string_ref.data())
223     return lhs_string_ref < rhs_string_ref;
224 
225   // Else one of them was nullptr, so if LHS is nullptr then it is less than
226   return lhs_string_ref.data() == nullptr;
227 }
228 
229 Stream &lldb_private::operator<<(Stream &s, ConstString str) {
230   const char *cstr = str.GetCString();
231   if (cstr != nullptr)
232     s << cstr;
233 
234   return s;
235 }
236 
237 size_t ConstString::GetLength() const {
238   return Pool::GetConstCStringLength(m_string);
239 }
240 
241 bool ConstString::Equals(ConstString lhs, ConstString rhs,
242                          const bool case_sensitive) {
243   if (lhs.m_string == rhs.m_string)
244     return true;
245 
246   // Since the pointers weren't equal, and identical ConstStrings always have
247   // identical pointers, the result must be false for case sensitive equality
248   // test.
249   if (case_sensitive)
250     return false;
251 
252   // perform case insensitive equality test
253   llvm::StringRef lhs_string_ref(lhs.GetStringRef());
254   llvm::StringRef rhs_string_ref(rhs.GetStringRef());
255   return lhs_string_ref.equals_lower(rhs_string_ref);
256 }
257 
258 int ConstString::Compare(ConstString lhs, ConstString rhs,
259                          const bool case_sensitive) {
260   // If the iterators are the same, this is the same string
261   const char *lhs_cstr = lhs.m_string;
262   const char *rhs_cstr = rhs.m_string;
263   if (lhs_cstr == rhs_cstr)
264     return 0;
265   if (lhs_cstr && rhs_cstr) {
266     llvm::StringRef lhs_string_ref(lhs.GetStringRef());
267     llvm::StringRef rhs_string_ref(rhs.GetStringRef());
268 
269     if (case_sensitive) {
270       return lhs_string_ref.compare(rhs_string_ref);
271     } else {
272       return lhs_string_ref.compare_lower(rhs_string_ref);
273     }
274   }
275 
276   if (lhs_cstr)
277     return +1; // LHS isn't nullptr but RHS is
278   else
279     return -1; // LHS is nullptr but RHS isn't
280 }
281 
282 void ConstString::Dump(Stream *s, const char *fail_value) const {
283   if (s != nullptr) {
284     const char *cstr = AsCString(fail_value);
285     if (cstr != nullptr)
286       s->PutCString(cstr);
287   }
288 }
289 
290 void ConstString::DumpDebug(Stream *s) const {
291   const char *cstr = GetCString();
292   size_t cstr_len = GetLength();
293   // Only print the parens if we have a non-nullptr string
294   const char *parens = cstr ? "\"" : "";
295   s->Printf("%*p: ConstString, string = %s%s%s, length = %" PRIu64,
296             static_cast<int>(sizeof(void *) * 2),
297             static_cast<const void *>(this), parens, cstr, parens,
298             static_cast<uint64_t>(cstr_len));
299 }
300 
301 void ConstString::SetCString(const char *cstr) {
302   m_string = StringPool().GetConstCString(cstr);
303 }
304 
305 void ConstString::SetString(const llvm::StringRef &s) {
306   m_string = StringPool().GetConstCStringWithLength(s.data(), s.size());
307 }
308 
309 void ConstString::SetStringWithMangledCounterpart(llvm::StringRef demangled,
310                                                    ConstString mangled) {
311   m_string = StringPool().GetConstCStringAndSetMangledCounterPart(
312       demangled, mangled.m_string);
313 }
314 
315 bool ConstString::GetMangledCounterpart(ConstString &counterpart) const {
316   counterpart.m_string = StringPool().GetMangledCounterpart(m_string);
317   return (bool)counterpart;
318 }
319 
320 void ConstString::SetCStringWithLength(const char *cstr, size_t cstr_len) {
321   m_string = StringPool().GetConstCStringWithLength(cstr, cstr_len);
322 }
323 
324 void ConstString::SetTrimmedCStringWithLength(const char *cstr,
325                                               size_t cstr_len) {
326   m_string = StringPool().GetConstTrimmedCStringWithLength(cstr, cstr_len);
327 }
328 
329 size_t ConstString::StaticMemorySize() {
330   // Get the size of the static string pool
331   return StringPool().MemorySize();
332 }
333 
334 void llvm::format_provider<ConstString>::format(const ConstString &CS,
335                                                 llvm::raw_ostream &OS,
336                                                 llvm::StringRef Options) {
337   format_provider<StringRef>::format(CS.AsCString(), OS, Options);
338 }
339