1 //===-- ConstString.cpp -----------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Utility/ConstString.h"
10 
11 #include "lldb/Utility/Stream.h"
12 
13 #include "llvm/ADT/StringMap.h"
14 #include "llvm/ADT/iterator.h"
15 #include "llvm/Support/Allocator.h"
16 #include "llvm/Support/DJB.h"
17 #include "llvm/Support/FormatProviders.h"
18 #include "llvm/Support/RWMutex.h"
19 #include "llvm/Support/Threading.h"
20 
21 #include <algorithm>
22 #include <array>
23 #include <utility>
24 
25 #include <inttypes.h>
26 #include <stdint.h>
27 #include <string.h>
28 
29 using namespace lldb_private;
30 
31 class Pool {
32 public:
33   typedef const char *StringPoolValueType;
34   typedef llvm::StringMap<StringPoolValueType, llvm::BumpPtrAllocator>
35       StringPool;
36   typedef llvm::StringMapEntry<StringPoolValueType> StringPoolEntryType;
37 
38   static StringPoolEntryType &
39   GetStringMapEntryFromKeyData(const char *keyData) {
40     return StringPoolEntryType::GetStringMapEntryFromKeyData(keyData);
41   }
42 
43   static size_t GetConstCStringLength(const char *ccstr) {
44     if (ccstr != nullptr) {
45       // Since the entry is read only, and we derive the entry entirely from
46       // the pointer, we don't need the lock.
47       const StringPoolEntryType &entry = GetStringMapEntryFromKeyData(ccstr);
48       return entry.getKey().size();
49     }
50     return 0;
51   }
52 
53   StringPoolValueType GetMangledCounterpart(const char *ccstr) const {
54     if (ccstr != nullptr) {
55       const uint8_t h = hash(llvm::StringRef(ccstr));
56       llvm::sys::SmartScopedReader<false> rlock(m_string_pools[h].m_mutex);
57       return GetStringMapEntryFromKeyData(ccstr).getValue();
58     }
59     return nullptr;
60   }
61 
62   bool SetMangledCounterparts(const char *key_ccstr, const char *value_ccstr) {
63     if (key_ccstr != nullptr && value_ccstr != nullptr) {
64       {
65         const uint8_t h = hash(llvm::StringRef(key_ccstr));
66         llvm::sys::SmartScopedWriter<false> wlock(m_string_pools[h].m_mutex);
67         GetStringMapEntryFromKeyData(key_ccstr).setValue(value_ccstr);
68       }
69       {
70         const uint8_t h = hash(llvm::StringRef(value_ccstr));
71         llvm::sys::SmartScopedWriter<false> wlock(m_string_pools[h].m_mutex);
72         GetStringMapEntryFromKeyData(value_ccstr).setValue(key_ccstr);
73       }
74       return true;
75     }
76     return false;
77   }
78 
79   const char *GetConstCString(const char *cstr) {
80     if (cstr != nullptr)
81       return GetConstCStringWithLength(cstr, strlen(cstr));
82     return nullptr;
83   }
84 
85   const char *GetConstCStringWithLength(const char *cstr, size_t cstr_len) {
86     if (cstr != nullptr)
87       return GetConstCStringWithStringRef(llvm::StringRef(cstr, cstr_len));
88     return nullptr;
89   }
90 
91   const char *GetConstCStringWithStringRef(const llvm::StringRef &string_ref) {
92     if (string_ref.data()) {
93       const uint8_t h = hash(string_ref);
94 
95       {
96         llvm::sys::SmartScopedReader<false> rlock(m_string_pools[h].m_mutex);
97         auto it = m_string_pools[h].m_string_map.find(string_ref);
98         if (it != m_string_pools[h].m_string_map.end())
99           return it->getKeyData();
100       }
101 
102       llvm::sys::SmartScopedWriter<false> wlock(m_string_pools[h].m_mutex);
103       StringPoolEntryType &entry =
104           *m_string_pools[h]
105                .m_string_map.insert(std::make_pair(string_ref, nullptr))
106                .first;
107       return entry.getKeyData();
108     }
109     return nullptr;
110   }
111 
112   const char *
113   GetConstCStringAndSetMangledCounterPart(llvm::StringRef demangled,
114                                           const char *mangled_ccstr) {
115     const char *demangled_ccstr = nullptr;
116 
117     {
118       const uint8_t h = hash(demangled);
119       llvm::sys::SmartScopedWriter<false> wlock(m_string_pools[h].m_mutex);
120 
121       // Make or update string pool entry with the mangled counterpart
122       StringPool &map = m_string_pools[h].m_string_map;
123       StringPoolEntryType &entry = *map.try_emplace(demangled).first;
124 
125       entry.second = mangled_ccstr;
126 
127       // Extract the const version of the demangled_cstr
128       demangled_ccstr = entry.getKeyData();
129     }
130 
131     {
132       // Now assign the demangled const string as the counterpart of the
133       // mangled const string...
134       const uint8_t h = hash(llvm::StringRef(mangled_ccstr));
135       llvm::sys::SmartScopedWriter<false> wlock(m_string_pools[h].m_mutex);
136       GetStringMapEntryFromKeyData(mangled_ccstr).setValue(demangled_ccstr);
137     }
138 
139     // Return the constant demangled C string
140     return demangled_ccstr;
141   }
142 
143   const char *GetConstTrimmedCStringWithLength(const char *cstr,
144                                                size_t cstr_len) {
145     if (cstr != nullptr) {
146       const size_t trimmed_len = strnlen(cstr, cstr_len);
147       return GetConstCStringWithLength(cstr, trimmed_len);
148     }
149     return nullptr;
150   }
151 
152   //------------------------------------------------------------------
153   // Return the size in bytes that this object and any items in its collection
154   // of uniqued strings + data count values takes in memory.
155   //------------------------------------------------------------------
156   size_t MemorySize() const {
157     size_t mem_size = sizeof(Pool);
158     for (const auto &pool : m_string_pools) {
159       llvm::sys::SmartScopedReader<false> rlock(pool.m_mutex);
160       for (const auto &entry : pool.m_string_map)
161         mem_size += sizeof(StringPoolEntryType) + entry.getKey().size();
162     }
163     return mem_size;
164   }
165 
166 protected:
167   uint8_t hash(const llvm::StringRef &s) const {
168     uint32_t h = llvm::djbHash(s);
169     return ((h >> 24) ^ (h >> 16) ^ (h >> 8) ^ h) & 0xff;
170   }
171 
172   struct PoolEntry {
173     mutable llvm::sys::SmartRWMutex<false> m_mutex;
174     StringPool m_string_map;
175   };
176 
177   std::array<PoolEntry, 256> m_string_pools;
178 };
179 
180 //----------------------------------------------------------------------
181 // Frameworks and dylibs aren't supposed to have global C++ initializers so we
182 // hide the string pool in a static function so that it will get initialized on
183 // the first call to this static function.
184 //
185 // Note, for now we make the string pool a pointer to the pool, because we
186 // can't guarantee that some objects won't get destroyed after the global
187 // destructor chain is run, and trying to make sure no destructors touch
188 // ConstStrings is difficult.  So we leak the pool instead.
189 //----------------------------------------------------------------------
190 static Pool &StringPool() {
191   static llvm::once_flag g_pool_initialization_flag;
192   static Pool *g_string_pool = nullptr;
193 
194   llvm::call_once(g_pool_initialization_flag,
195                  []() { g_string_pool = new Pool(); });
196 
197   return *g_string_pool;
198 }
199 
200 ConstString::ConstString(const char *cstr)
201     : m_string(StringPool().GetConstCString(cstr)) {}
202 
203 ConstString::ConstString(const char *cstr, size_t cstr_len)
204     : m_string(StringPool().GetConstCStringWithLength(cstr, cstr_len)) {}
205 
206 ConstString::ConstString(const llvm::StringRef &s)
207     : m_string(StringPool().GetConstCStringWithLength(s.data(), s.size())) {}
208 
209 bool ConstString::operator<(const ConstString &rhs) const {
210   if (m_string == rhs.m_string)
211     return false;
212 
213   llvm::StringRef lhs_string_ref(GetStringRef());
214   llvm::StringRef rhs_string_ref(rhs.GetStringRef());
215 
216   // If both have valid C strings, then return the comparison
217   if (lhs_string_ref.data() && rhs_string_ref.data())
218     return lhs_string_ref < rhs_string_ref;
219 
220   // Else one of them was nullptr, so if LHS is nullptr then it is less than
221   return lhs_string_ref.data() == nullptr;
222 }
223 
224 Stream &lldb_private::operator<<(Stream &s, const ConstString &str) {
225   const char *cstr = str.GetCString();
226   if (cstr != nullptr)
227     s << cstr;
228 
229   return s;
230 }
231 
232 size_t ConstString::GetLength() const {
233   return Pool::GetConstCStringLength(m_string);
234 }
235 
236 bool ConstString::Equals(const ConstString &lhs, const ConstString &rhs,
237                          const bool case_sensitive) {
238   if (lhs.m_string == rhs.m_string)
239     return true;
240 
241   // Since the pointers weren't equal, and identical ConstStrings always have
242   // identical pointers, the result must be false for case sensitive equality
243   // test.
244   if (case_sensitive)
245     return false;
246 
247   // perform case insensitive equality test
248   llvm::StringRef lhs_string_ref(lhs.GetStringRef());
249   llvm::StringRef rhs_string_ref(rhs.GetStringRef());
250   return lhs_string_ref.equals_lower(rhs_string_ref);
251 }
252 
253 int ConstString::Compare(const ConstString &lhs, const ConstString &rhs,
254                          const bool case_sensitive) {
255   // If the iterators are the same, this is the same string
256   const char *lhs_cstr = lhs.m_string;
257   const char *rhs_cstr = rhs.m_string;
258   if (lhs_cstr == rhs_cstr)
259     return 0;
260   if (lhs_cstr && rhs_cstr) {
261     llvm::StringRef lhs_string_ref(lhs.GetStringRef());
262     llvm::StringRef rhs_string_ref(rhs.GetStringRef());
263 
264     if (case_sensitive) {
265       return lhs_string_ref.compare(rhs_string_ref);
266     } else {
267       return lhs_string_ref.compare_lower(rhs_string_ref);
268     }
269   }
270 
271   if (lhs_cstr)
272     return +1; // LHS isn't nullptr but RHS is
273   else
274     return -1; // LHS is nullptr but RHS isn't
275 }
276 
277 void ConstString::Dump(Stream *s, const char *fail_value) const {
278   if (s != nullptr) {
279     const char *cstr = AsCString(fail_value);
280     if (cstr != nullptr)
281       s->PutCString(cstr);
282   }
283 }
284 
285 void ConstString::DumpDebug(Stream *s) const {
286   const char *cstr = GetCString();
287   size_t cstr_len = GetLength();
288   // Only print the parens if we have a non-nullptr string
289   const char *parens = cstr ? "\"" : "";
290   s->Printf("%*p: ConstString, string = %s%s%s, length = %" PRIu64,
291             static_cast<int>(sizeof(void *) * 2),
292             static_cast<const void *>(this), parens, cstr, parens,
293             static_cast<uint64_t>(cstr_len));
294 }
295 
296 void ConstString::SetCString(const char *cstr) {
297   m_string = StringPool().GetConstCString(cstr);
298 }
299 
300 void ConstString::SetString(const llvm::StringRef &s) {
301   m_string = StringPool().GetConstCStringWithLength(s.data(), s.size());
302 }
303 
304 void ConstString::SetStringWithMangledCounterpart(llvm::StringRef demangled,
305                                                    const ConstString &mangled) {
306   m_string = StringPool().GetConstCStringAndSetMangledCounterPart(
307       demangled, mangled.m_string);
308 }
309 
310 bool ConstString::GetMangledCounterpart(ConstString &counterpart) const {
311   counterpart.m_string = StringPool().GetMangledCounterpart(m_string);
312   return (bool)counterpart;
313 }
314 
315 void ConstString::SetCStringWithLength(const char *cstr, size_t cstr_len) {
316   m_string = StringPool().GetConstCStringWithLength(cstr, cstr_len);
317 }
318 
319 void ConstString::SetTrimmedCStringWithLength(const char *cstr,
320                                               size_t cstr_len) {
321   m_string = StringPool().GetConstTrimmedCStringWithLength(cstr, cstr_len);
322 }
323 
324 size_t ConstString::StaticMemorySize() {
325   // Get the size of the static string pool
326   return StringPool().MemorySize();
327 }
328 
329 void llvm::format_provider<ConstString>::format(const ConstString &CS,
330                                                 llvm::raw_ostream &OS,
331                                                 llvm::StringRef Options) {
332   format_provider<StringRef>::format(CS.AsCString(), OS, Options);
333 }
334