1 //===-- Symtab.cpp --------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include <map>
10 #include <set>
11 
12 #include "lldb/Core/Module.h"
13 #include "lldb/Core/RichManglingContext.h"
14 #include "lldb/Core/Section.h"
15 #include "lldb/Symbol/ObjectFile.h"
16 #include "lldb/Symbol/Symbol.h"
17 #include "lldb/Symbol/SymbolContext.h"
18 #include "lldb/Symbol/Symtab.h"
19 #include "lldb/Target/Language.h"
20 #include "lldb/Utility/RegularExpression.h"
21 #include "lldb/Utility/Stream.h"
22 #include "lldb/Utility/Timer.h"
23 
24 #include "llvm/ADT/StringRef.h"
25 
26 using namespace lldb;
27 using namespace lldb_private;
28 
29 Symtab::Symtab(ObjectFile *objfile)
30     : m_objfile(objfile), m_symbols(), m_file_addr_to_index(*this),
31       m_name_to_symbol_indices(), m_mutex(),
32       m_file_addr_to_index_computed(false), m_name_indexes_computed(false) {
33   m_name_to_symbol_indices.emplace(std::make_pair(
34       lldb::eFunctionNameTypeNone, UniqueCStringMap<uint32_t>()));
35   m_name_to_symbol_indices.emplace(std::make_pair(
36       lldb::eFunctionNameTypeBase, UniqueCStringMap<uint32_t>()));
37   m_name_to_symbol_indices.emplace(std::make_pair(
38       lldb::eFunctionNameTypeMethod, UniqueCStringMap<uint32_t>()));
39   m_name_to_symbol_indices.emplace(std::make_pair(
40       lldb::eFunctionNameTypeSelector, UniqueCStringMap<uint32_t>()));
41 }
42 
43 Symtab::~Symtab() {}
44 
45 void Symtab::Reserve(size_t count) {
46   // Clients should grab the mutex from this symbol table and lock it manually
47   // when calling this function to avoid performance issues.
48   m_symbols.reserve(count);
49 }
50 
51 Symbol *Symtab::Resize(size_t count) {
52   // Clients should grab the mutex from this symbol table and lock it manually
53   // when calling this function to avoid performance issues.
54   m_symbols.resize(count);
55   return m_symbols.empty() ? nullptr : &m_symbols[0];
56 }
57 
58 uint32_t Symtab::AddSymbol(const Symbol &symbol) {
59   // Clients should grab the mutex from this symbol table and lock it manually
60   // when calling this function to avoid performance issues.
61   uint32_t symbol_idx = m_symbols.size();
62   auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone);
63   name_to_index.Clear();
64   m_file_addr_to_index.Clear();
65   m_symbols.push_back(symbol);
66   m_file_addr_to_index_computed = false;
67   m_name_indexes_computed = false;
68   return symbol_idx;
69 }
70 
71 size_t Symtab::GetNumSymbols() const {
72   std::lock_guard<std::recursive_mutex> guard(m_mutex);
73   return m_symbols.size();
74 }
75 
76 void Symtab::SectionFileAddressesChanged() {
77   auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone);
78   name_to_index.Clear();
79   m_file_addr_to_index_computed = false;
80 }
81 
82 void Symtab::Dump(Stream *s, Target *target, SortOrder sort_order,
83                   Mangled::NamePreference name_preference) {
84   std::lock_guard<std::recursive_mutex> guard(m_mutex);
85 
86   //    s->Printf("%.*p: ", (int)sizeof(void*) * 2, this);
87   s->Indent();
88   const FileSpec &file_spec = m_objfile->GetFileSpec();
89   const char *object_name = nullptr;
90   if (m_objfile->GetModule())
91     object_name = m_objfile->GetModule()->GetObjectName().GetCString();
92 
93   if (file_spec)
94     s->Printf("Symtab, file = %s%s%s%s, num_symbols = %" PRIu64,
95               file_spec.GetPath().c_str(), object_name ? "(" : "",
96               object_name ? object_name : "", object_name ? ")" : "",
97               (uint64_t)m_symbols.size());
98   else
99     s->Printf("Symtab, num_symbols = %" PRIu64 "", (uint64_t)m_symbols.size());
100 
101   if (!m_symbols.empty()) {
102     switch (sort_order) {
103     case eSortOrderNone: {
104       s->PutCString(":\n");
105       DumpSymbolHeader(s);
106       const_iterator begin = m_symbols.begin();
107       const_iterator end = m_symbols.end();
108       for (const_iterator pos = m_symbols.begin(); pos != end; ++pos) {
109         s->Indent();
110         pos->Dump(s, target, std::distance(begin, pos), name_preference);
111       }
112     } break;
113 
114     case eSortOrderByName: {
115       // Although we maintain a lookup by exact name map, the table isn't
116       // sorted by name. So we must make the ordered symbol list up ourselves.
117       s->PutCString(" (sorted by name):\n");
118       DumpSymbolHeader(s);
119 
120       std::multimap<llvm::StringRef, const Symbol *> name_map;
121       for (const_iterator pos = m_symbols.begin(), end = m_symbols.end();
122            pos != end; ++pos) {
123         const char *name = pos->GetName().AsCString();
124         if (name && name[0])
125           name_map.insert(std::make_pair(name, &(*pos)));
126       }
127 
128       for (const auto &name_to_symbol : name_map) {
129         const Symbol *symbol = name_to_symbol.second;
130         s->Indent();
131         symbol->Dump(s, target, symbol - &m_symbols[0], name_preference);
132       }
133     } break;
134 
135     case eSortOrderByAddress:
136       s->PutCString(" (sorted by address):\n");
137       DumpSymbolHeader(s);
138       if (!m_file_addr_to_index_computed)
139         InitAddressIndexes();
140       const size_t num_entries = m_file_addr_to_index.GetSize();
141       for (size_t i = 0; i < num_entries; ++i) {
142         s->Indent();
143         const uint32_t symbol_idx = m_file_addr_to_index.GetEntryRef(i).data;
144         m_symbols[symbol_idx].Dump(s, target, symbol_idx, name_preference);
145       }
146       break;
147     }
148   } else {
149     s->PutCString("\n");
150   }
151 }
152 
153 void Symtab::Dump(Stream *s, Target *target, std::vector<uint32_t> &indexes,
154                   Mangled::NamePreference name_preference) const {
155   std::lock_guard<std::recursive_mutex> guard(m_mutex);
156 
157   const size_t num_symbols = GetNumSymbols();
158   // s->Printf("%.*p: ", (int)sizeof(void*) * 2, this);
159   s->Indent();
160   s->Printf("Symtab %" PRIu64 " symbol indexes (%" PRIu64 " symbols total):\n",
161             (uint64_t)indexes.size(), (uint64_t)m_symbols.size());
162   s->IndentMore();
163 
164   if (!indexes.empty()) {
165     std::vector<uint32_t>::const_iterator pos;
166     std::vector<uint32_t>::const_iterator end = indexes.end();
167     DumpSymbolHeader(s);
168     for (pos = indexes.begin(); pos != end; ++pos) {
169       size_t idx = *pos;
170       if (idx < num_symbols) {
171         s->Indent();
172         m_symbols[idx].Dump(s, target, idx, name_preference);
173       }
174     }
175   }
176   s->IndentLess();
177 }
178 
179 void Symtab::DumpSymbolHeader(Stream *s) {
180   s->Indent("               Debug symbol\n");
181   s->Indent("               |Synthetic symbol\n");
182   s->Indent("               ||Externally Visible\n");
183   s->Indent("               |||\n");
184   s->Indent("Index   UserID DSX Type            File Address/Value Load "
185             "Address       Size               Flags      Name\n");
186   s->Indent("------- ------ --- --------------- ------------------ "
187             "------------------ ------------------ ---------- "
188             "----------------------------------\n");
189 }
190 
191 static int CompareSymbolID(const void *key, const void *p) {
192   const user_id_t match_uid = *(const user_id_t *)key;
193   const user_id_t symbol_uid = ((const Symbol *)p)->GetID();
194   if (match_uid < symbol_uid)
195     return -1;
196   if (match_uid > symbol_uid)
197     return 1;
198   return 0;
199 }
200 
201 Symbol *Symtab::FindSymbolByID(lldb::user_id_t symbol_uid) const {
202   std::lock_guard<std::recursive_mutex> guard(m_mutex);
203 
204   Symbol *symbol =
205       (Symbol *)::bsearch(&symbol_uid, &m_symbols[0], m_symbols.size(),
206                           sizeof(m_symbols[0]), CompareSymbolID);
207   return symbol;
208 }
209 
210 Symbol *Symtab::SymbolAtIndex(size_t idx) {
211   // Clients should grab the mutex from this symbol table and lock it manually
212   // when calling this function to avoid performance issues.
213   if (idx < m_symbols.size())
214     return &m_symbols[idx];
215   return nullptr;
216 }
217 
218 const Symbol *Symtab::SymbolAtIndex(size_t idx) const {
219   // Clients should grab the mutex from this symbol table and lock it manually
220   // when calling this function to avoid performance issues.
221   if (idx < m_symbols.size())
222     return &m_symbols[idx];
223   return nullptr;
224 }
225 
226 static bool lldb_skip_name(llvm::StringRef mangled,
227                            Mangled::ManglingScheme scheme) {
228   switch (scheme) {
229   case Mangled::eManglingSchemeItanium: {
230     if (mangled.size() < 3 || !mangled.startswith("_Z"))
231       return true;
232 
233     // Avoid the following types of symbols in the index.
234     switch (mangled[2]) {
235     case 'G': // guard variables
236     case 'T': // virtual tables, VTT structures, typeinfo structures + names
237     case 'Z': // named local entities (if we eventually handle
238               // eSymbolTypeData, we will want this back)
239       return true;
240 
241     default:
242       break;
243     }
244 
245     // Include this name in the index.
246     return false;
247   }
248 
249   // No filters for this scheme yet. Include all names in indexing.
250   case Mangled::eManglingSchemeMSVC:
251     return false;
252 
253   // No filters for this scheme yet. Include all names in indexing.
254   case Mangled::eManglingSchemeRustV0:
255     return false;
256 
257   // Don't try and demangle things we can't categorize.
258   case Mangled::eManglingSchemeNone:
259     return true;
260   }
261   llvm_unreachable("unknown scheme!");
262 }
263 
264 void Symtab::InitNameIndexes() {
265   // Protected function, no need to lock mutex...
266   if (!m_name_indexes_computed) {
267     m_name_indexes_computed = true;
268     LLDB_SCOPED_TIMER();
269 
270     // Collect all loaded language plugins.
271     std::vector<Language *> languages;
272     Language::ForEach([&languages](Language *l) {
273       languages.push_back(l);
274       return true;
275     });
276 
277     auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone);
278     auto &basename_to_index =
279         GetNameToSymbolIndexMap(lldb::eFunctionNameTypeBase);
280     auto &method_to_index =
281         GetNameToSymbolIndexMap(lldb::eFunctionNameTypeMethod);
282     auto &selector_to_index =
283         GetNameToSymbolIndexMap(lldb::eFunctionNameTypeSelector);
284     // Create the name index vector to be able to quickly search by name
285     const size_t num_symbols = m_symbols.size();
286     name_to_index.Reserve(num_symbols);
287 
288     // The "const char *" in "class_contexts" and backlog::value_type::second
289     // must come from a ConstString::GetCString()
290     std::set<const char *> class_contexts;
291     std::vector<std::pair<NameToIndexMap::Entry, const char *>> backlog;
292     backlog.reserve(num_symbols / 2);
293 
294     // Instantiation of the demangler is expensive, so better use a single one
295     // for all entries during batch processing.
296     RichManglingContext rmc;
297     for (uint32_t value = 0; value < num_symbols; ++value) {
298       Symbol *symbol = &m_symbols[value];
299 
300       // Don't let trampolines get into the lookup by name map If we ever need
301       // the trampoline symbols to be searchable by name we can remove this and
302       // then possibly add a new bool to any of the Symtab functions that
303       // lookup symbols by name to indicate if they want trampolines.
304       if (symbol->IsTrampoline() || symbol->IsSynthetic())
305         continue;
306 
307       // If the symbol's name string matched a Mangled::ManglingScheme, it is
308       // stored in the mangled field.
309       Mangled &mangled = symbol->GetMangled();
310       if (ConstString name = mangled.GetMangledName()) {
311         name_to_index.Append(name, value);
312 
313         if (symbol->ContainsLinkerAnnotations()) {
314           // If the symbol has linker annotations, also add the version without
315           // the annotations.
316           ConstString stripped = ConstString(
317               m_objfile->StripLinkerSymbolAnnotations(name.GetStringRef()));
318           name_to_index.Append(stripped, value);
319         }
320 
321         const SymbolType type = symbol->GetType();
322         if (type == eSymbolTypeCode || type == eSymbolTypeResolver) {
323           if (mangled.DemangleWithRichManglingInfo(rmc, lldb_skip_name))
324             RegisterMangledNameEntry(value, class_contexts, backlog, rmc);
325         }
326       }
327 
328       // Symbol name strings that didn't match a Mangled::ManglingScheme, are
329       // stored in the demangled field.
330       if (ConstString name = mangled.GetDemangledName()) {
331         name_to_index.Append(name, value);
332 
333         if (symbol->ContainsLinkerAnnotations()) {
334           // If the symbol has linker annotations, also add the version without
335           // the annotations.
336           name = ConstString(
337               m_objfile->StripLinkerSymbolAnnotations(name.GetStringRef()));
338           name_to_index.Append(name, value);
339         }
340 
341         // If the demangled name turns out to be an ObjC name, and is a category
342         // name, add the version without categories to the index too.
343         for (Language *lang : languages) {
344           for (auto variant : lang->GetMethodNameVariants(name)) {
345             if (variant.GetType() & lldb::eFunctionNameTypeSelector)
346               selector_to_index.Append(variant.GetName(), value);
347             else if (variant.GetType() & lldb::eFunctionNameTypeFull)
348               name_to_index.Append(variant.GetName(), value);
349             else if (variant.GetType() & lldb::eFunctionNameTypeMethod)
350               method_to_index.Append(variant.GetName(), value);
351             else if (variant.GetType() & lldb::eFunctionNameTypeBase)
352               basename_to_index.Append(variant.GetName(), value);
353           }
354         }
355       }
356     }
357 
358     for (const auto &record : backlog) {
359       RegisterBacklogEntry(record.first, record.second, class_contexts);
360     }
361 
362     name_to_index.Sort();
363     name_to_index.SizeToFit();
364     selector_to_index.Sort();
365     selector_to_index.SizeToFit();
366     basename_to_index.Sort();
367     basename_to_index.SizeToFit();
368     method_to_index.Sort();
369     method_to_index.SizeToFit();
370   }
371 }
372 
373 void Symtab::RegisterMangledNameEntry(
374     uint32_t value, std::set<const char *> &class_contexts,
375     std::vector<std::pair<NameToIndexMap::Entry, const char *>> &backlog,
376     RichManglingContext &rmc) {
377   // Only register functions that have a base name.
378   rmc.ParseFunctionBaseName();
379   llvm::StringRef base_name = rmc.GetBufferRef();
380   if (base_name.empty())
381     return;
382 
383   // The base name will be our entry's name.
384   NameToIndexMap::Entry entry(ConstString(base_name), value);
385 
386   rmc.ParseFunctionDeclContextName();
387   llvm::StringRef decl_context = rmc.GetBufferRef();
388 
389   // Register functions with no context.
390   if (decl_context.empty()) {
391     // This has to be a basename
392     auto &basename_to_index =
393         GetNameToSymbolIndexMap(lldb::eFunctionNameTypeBase);
394     basename_to_index.Append(entry);
395     // If there is no context (no namespaces or class scopes that come before
396     // the function name) then this also could be a fullname.
397     auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone);
398     name_to_index.Append(entry);
399     return;
400   }
401 
402   // Make sure we have a pool-string pointer and see if we already know the
403   // context name.
404   const char *decl_context_ccstr = ConstString(decl_context).GetCString();
405   auto it = class_contexts.find(decl_context_ccstr);
406 
407   auto &method_to_index =
408       GetNameToSymbolIndexMap(lldb::eFunctionNameTypeMethod);
409   // Register constructors and destructors. They are methods and create
410   // declaration contexts.
411   if (rmc.IsCtorOrDtor()) {
412     method_to_index.Append(entry);
413     if (it == class_contexts.end())
414       class_contexts.insert(it, decl_context_ccstr);
415     return;
416   }
417 
418   // Register regular methods with a known declaration context.
419   if (it != class_contexts.end()) {
420     method_to_index.Append(entry);
421     return;
422   }
423 
424   // Regular methods in unknown declaration contexts are put to the backlog. We
425   // will revisit them once we processed all remaining symbols.
426   backlog.push_back(std::make_pair(entry, decl_context_ccstr));
427 }
428 
429 void Symtab::RegisterBacklogEntry(
430     const NameToIndexMap::Entry &entry, const char *decl_context,
431     const std::set<const char *> &class_contexts) {
432   auto &method_to_index =
433       GetNameToSymbolIndexMap(lldb::eFunctionNameTypeMethod);
434   auto it = class_contexts.find(decl_context);
435   if (it != class_contexts.end()) {
436     method_to_index.Append(entry);
437   } else {
438     // If we got here, we have something that had a context (was inside
439     // a namespace or class) yet we don't know the entry
440     method_to_index.Append(entry);
441     auto &basename_to_index =
442         GetNameToSymbolIndexMap(lldb::eFunctionNameTypeBase);
443     basename_to_index.Append(entry);
444   }
445 }
446 
447 void Symtab::PreloadSymbols() {
448   std::lock_guard<std::recursive_mutex> guard(m_mutex);
449   InitNameIndexes();
450 }
451 
452 void Symtab::AppendSymbolNamesToMap(const IndexCollection &indexes,
453                                     bool add_demangled, bool add_mangled,
454                                     NameToIndexMap &name_to_index_map) const {
455   LLDB_SCOPED_TIMER();
456   if (add_demangled || add_mangled) {
457     std::lock_guard<std::recursive_mutex> guard(m_mutex);
458 
459     // Create the name index vector to be able to quickly search by name
460     const size_t num_indexes = indexes.size();
461     for (size_t i = 0; i < num_indexes; ++i) {
462       uint32_t value = indexes[i];
463       assert(i < m_symbols.size());
464       const Symbol *symbol = &m_symbols[value];
465 
466       const Mangled &mangled = symbol->GetMangled();
467       if (add_demangled) {
468         if (ConstString name = mangled.GetDemangledName())
469           name_to_index_map.Append(name, value);
470       }
471 
472       if (add_mangled) {
473         if (ConstString name = mangled.GetMangledName())
474           name_to_index_map.Append(name, value);
475       }
476     }
477   }
478 }
479 
480 uint32_t Symtab::AppendSymbolIndexesWithType(SymbolType symbol_type,
481                                              std::vector<uint32_t> &indexes,
482                                              uint32_t start_idx,
483                                              uint32_t end_index) const {
484   std::lock_guard<std::recursive_mutex> guard(m_mutex);
485 
486   uint32_t prev_size = indexes.size();
487 
488   const uint32_t count = std::min<uint32_t>(m_symbols.size(), end_index);
489 
490   for (uint32_t i = start_idx; i < count; ++i) {
491     if (symbol_type == eSymbolTypeAny || m_symbols[i].GetType() == symbol_type)
492       indexes.push_back(i);
493   }
494 
495   return indexes.size() - prev_size;
496 }
497 
498 uint32_t Symtab::AppendSymbolIndexesWithTypeAndFlagsValue(
499     SymbolType symbol_type, uint32_t flags_value,
500     std::vector<uint32_t> &indexes, uint32_t start_idx,
501     uint32_t end_index) const {
502   std::lock_guard<std::recursive_mutex> guard(m_mutex);
503 
504   uint32_t prev_size = indexes.size();
505 
506   const uint32_t count = std::min<uint32_t>(m_symbols.size(), end_index);
507 
508   for (uint32_t i = start_idx; i < count; ++i) {
509     if ((symbol_type == eSymbolTypeAny ||
510          m_symbols[i].GetType() == symbol_type) &&
511         m_symbols[i].GetFlags() == flags_value)
512       indexes.push_back(i);
513   }
514 
515   return indexes.size() - prev_size;
516 }
517 
518 uint32_t Symtab::AppendSymbolIndexesWithType(SymbolType symbol_type,
519                                              Debug symbol_debug_type,
520                                              Visibility symbol_visibility,
521                                              std::vector<uint32_t> &indexes,
522                                              uint32_t start_idx,
523                                              uint32_t end_index) const {
524   std::lock_guard<std::recursive_mutex> guard(m_mutex);
525 
526   uint32_t prev_size = indexes.size();
527 
528   const uint32_t count = std::min<uint32_t>(m_symbols.size(), end_index);
529 
530   for (uint32_t i = start_idx; i < count; ++i) {
531     if (symbol_type == eSymbolTypeAny ||
532         m_symbols[i].GetType() == symbol_type) {
533       if (CheckSymbolAtIndex(i, symbol_debug_type, symbol_visibility))
534         indexes.push_back(i);
535     }
536   }
537 
538   return indexes.size() - prev_size;
539 }
540 
541 uint32_t Symtab::GetIndexForSymbol(const Symbol *symbol) const {
542   if (!m_symbols.empty()) {
543     const Symbol *first_symbol = &m_symbols[0];
544     if (symbol >= first_symbol && symbol < first_symbol + m_symbols.size())
545       return symbol - first_symbol;
546   }
547   return UINT32_MAX;
548 }
549 
550 struct SymbolSortInfo {
551   const bool sort_by_load_addr;
552   const Symbol *symbols;
553 };
554 
555 namespace {
556 struct SymbolIndexComparator {
557   const std::vector<Symbol> &symbols;
558   std::vector<lldb::addr_t> &addr_cache;
559 
560   // Getting from the symbol to the Address to the File Address involves some
561   // work. Since there are potentially many symbols here, and we're using this
562   // for sorting so we're going to be computing the address many times, cache
563   // that in addr_cache. The array passed in has to be the same size as the
564   // symbols array passed into the member variable symbols, and should be
565   // initialized with LLDB_INVALID_ADDRESS.
566   // NOTE: You have to make addr_cache externally and pass it in because
567   // std::stable_sort
568   // makes copies of the comparator it is initially passed in, and you end up
569   // spending huge amounts of time copying this array...
570 
571   SymbolIndexComparator(const std::vector<Symbol> &s,
572                         std::vector<lldb::addr_t> &a)
573       : symbols(s), addr_cache(a) {
574     assert(symbols.size() == addr_cache.size());
575   }
576   bool operator()(uint32_t index_a, uint32_t index_b) {
577     addr_t value_a = addr_cache[index_a];
578     if (value_a == LLDB_INVALID_ADDRESS) {
579       value_a = symbols[index_a].GetAddressRef().GetFileAddress();
580       addr_cache[index_a] = value_a;
581     }
582 
583     addr_t value_b = addr_cache[index_b];
584     if (value_b == LLDB_INVALID_ADDRESS) {
585       value_b = symbols[index_b].GetAddressRef().GetFileAddress();
586       addr_cache[index_b] = value_b;
587     }
588 
589     if (value_a == value_b) {
590       // The if the values are equal, use the original symbol user ID
591       lldb::user_id_t uid_a = symbols[index_a].GetID();
592       lldb::user_id_t uid_b = symbols[index_b].GetID();
593       if (uid_a < uid_b)
594         return true;
595       if (uid_a > uid_b)
596         return false;
597       return false;
598     } else if (value_a < value_b)
599       return true;
600 
601     return false;
602   }
603 };
604 }
605 
606 void Symtab::SortSymbolIndexesByValue(std::vector<uint32_t> &indexes,
607                                       bool remove_duplicates) const {
608   std::lock_guard<std::recursive_mutex> guard(m_mutex);
609   LLDB_SCOPED_TIMER();
610   // No need to sort if we have zero or one items...
611   if (indexes.size() <= 1)
612     return;
613 
614   // Sort the indexes in place using std::stable_sort.
615   // NOTE: The use of std::stable_sort instead of llvm::sort here is strictly
616   // for performance, not correctness.  The indexes vector tends to be "close"
617   // to sorted, which the stable sort handles better.
618 
619   std::vector<lldb::addr_t> addr_cache(m_symbols.size(), LLDB_INVALID_ADDRESS);
620 
621   SymbolIndexComparator comparator(m_symbols, addr_cache);
622   std::stable_sort(indexes.begin(), indexes.end(), comparator);
623 
624   // Remove any duplicates if requested
625   if (remove_duplicates) {
626     auto last = std::unique(indexes.begin(), indexes.end());
627     indexes.erase(last, indexes.end());
628   }
629 }
630 
631 uint32_t Symtab::GetNameIndexes(ConstString symbol_name,
632                                 std::vector<uint32_t> &indexes) {
633   auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone);
634   const uint32_t count = name_to_index.GetValues(symbol_name, indexes);
635   if (count)
636     return count;
637   // Synthetic symbol names are not added to the name indexes, but they start
638   // with a prefix and end with a the symbol UserID. This allows users to find
639   // these symbols without having to add them to the name indexes. These
640   // queries will not happen very often since the names don't mean anything, so
641   // performance is not paramount in this case.
642   llvm::StringRef name = symbol_name.GetStringRef();
643   // String the synthetic prefix if the name starts with it.
644   if (!name.consume_front(Symbol::GetSyntheticSymbolPrefix()))
645     return 0; // Not a synthetic symbol name
646 
647   // Extract the user ID from the symbol name
648   unsigned long long uid = 0;
649   if (getAsUnsignedInteger(name, /*Radix=*/10, uid))
650     return 0; // Failed to extract the user ID as an integer
651   Symbol *symbol = FindSymbolByID(uid);
652   if (symbol == nullptr)
653     return 0;
654   const uint32_t symbol_idx = GetIndexForSymbol(symbol);
655   if (symbol_idx == UINT32_MAX)
656     return 0;
657   indexes.push_back(symbol_idx);
658   return 1;
659 }
660 
661 uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name,
662                                              std::vector<uint32_t> &indexes) {
663   std::lock_guard<std::recursive_mutex> guard(m_mutex);
664 
665   LLDB_SCOPED_TIMER();
666   if (symbol_name) {
667     if (!m_name_indexes_computed)
668       InitNameIndexes();
669 
670     return GetNameIndexes(symbol_name, indexes);
671   }
672   return 0;
673 }
674 
675 uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name,
676                                              Debug symbol_debug_type,
677                                              Visibility symbol_visibility,
678                                              std::vector<uint32_t> &indexes) {
679   std::lock_guard<std::recursive_mutex> guard(m_mutex);
680 
681   LLDB_SCOPED_TIMER();
682   if (symbol_name) {
683     const size_t old_size = indexes.size();
684     if (!m_name_indexes_computed)
685       InitNameIndexes();
686 
687     std::vector<uint32_t> all_name_indexes;
688     const size_t name_match_count =
689         GetNameIndexes(symbol_name, all_name_indexes);
690     for (size_t i = 0; i < name_match_count; ++i) {
691       if (CheckSymbolAtIndex(all_name_indexes[i], symbol_debug_type,
692                              symbol_visibility))
693         indexes.push_back(all_name_indexes[i]);
694     }
695     return indexes.size() - old_size;
696   }
697   return 0;
698 }
699 
700 uint32_t
701 Symtab::AppendSymbolIndexesWithNameAndType(ConstString symbol_name,
702                                            SymbolType symbol_type,
703                                            std::vector<uint32_t> &indexes) {
704   std::lock_guard<std::recursive_mutex> guard(m_mutex);
705 
706   if (AppendSymbolIndexesWithName(symbol_name, indexes) > 0) {
707     std::vector<uint32_t>::iterator pos = indexes.begin();
708     while (pos != indexes.end()) {
709       if (symbol_type == eSymbolTypeAny ||
710           m_symbols[*pos].GetType() == symbol_type)
711         ++pos;
712       else
713         pos = indexes.erase(pos);
714     }
715   }
716   return indexes.size();
717 }
718 
719 uint32_t Symtab::AppendSymbolIndexesWithNameAndType(
720     ConstString symbol_name, SymbolType symbol_type,
721     Debug symbol_debug_type, Visibility symbol_visibility,
722     std::vector<uint32_t> &indexes) {
723   std::lock_guard<std::recursive_mutex> guard(m_mutex);
724 
725   if (AppendSymbolIndexesWithName(symbol_name, symbol_debug_type,
726                                   symbol_visibility, indexes) > 0) {
727     std::vector<uint32_t>::iterator pos = indexes.begin();
728     while (pos != indexes.end()) {
729       if (symbol_type == eSymbolTypeAny ||
730           m_symbols[*pos].GetType() == symbol_type)
731         ++pos;
732       else
733         pos = indexes.erase(pos);
734     }
735   }
736   return indexes.size();
737 }
738 
739 uint32_t Symtab::AppendSymbolIndexesMatchingRegExAndType(
740     const RegularExpression &regexp, SymbolType symbol_type,
741     std::vector<uint32_t> &indexes) {
742   std::lock_guard<std::recursive_mutex> guard(m_mutex);
743 
744   uint32_t prev_size = indexes.size();
745   uint32_t sym_end = m_symbols.size();
746 
747   for (uint32_t i = 0; i < sym_end; i++) {
748     if (symbol_type == eSymbolTypeAny ||
749         m_symbols[i].GetType() == symbol_type) {
750       const char *name = m_symbols[i].GetName().AsCString();
751       if (name) {
752         if (regexp.Execute(name))
753           indexes.push_back(i);
754       }
755     }
756   }
757   return indexes.size() - prev_size;
758 }
759 
760 uint32_t Symtab::AppendSymbolIndexesMatchingRegExAndType(
761     const RegularExpression &regexp, SymbolType symbol_type,
762     Debug symbol_debug_type, Visibility symbol_visibility,
763     std::vector<uint32_t> &indexes) {
764   std::lock_guard<std::recursive_mutex> guard(m_mutex);
765 
766   uint32_t prev_size = indexes.size();
767   uint32_t sym_end = m_symbols.size();
768 
769   for (uint32_t i = 0; i < sym_end; i++) {
770     if (symbol_type == eSymbolTypeAny ||
771         m_symbols[i].GetType() == symbol_type) {
772       if (!CheckSymbolAtIndex(i, symbol_debug_type, symbol_visibility))
773         continue;
774 
775       const char *name = m_symbols[i].GetName().AsCString();
776       if (name) {
777         if (regexp.Execute(name))
778           indexes.push_back(i);
779       }
780     }
781   }
782   return indexes.size() - prev_size;
783 }
784 
785 Symbol *Symtab::FindSymbolWithType(SymbolType symbol_type,
786                                    Debug symbol_debug_type,
787                                    Visibility symbol_visibility,
788                                    uint32_t &start_idx) {
789   std::lock_guard<std::recursive_mutex> guard(m_mutex);
790 
791   const size_t count = m_symbols.size();
792   for (size_t idx = start_idx; idx < count; ++idx) {
793     if (symbol_type == eSymbolTypeAny ||
794         m_symbols[idx].GetType() == symbol_type) {
795       if (CheckSymbolAtIndex(idx, symbol_debug_type, symbol_visibility)) {
796         start_idx = idx;
797         return &m_symbols[idx];
798       }
799     }
800   }
801   return nullptr;
802 }
803 
804 void
805 Symtab::FindAllSymbolsWithNameAndType(ConstString name,
806                                       SymbolType symbol_type,
807                                       std::vector<uint32_t> &symbol_indexes) {
808   std::lock_guard<std::recursive_mutex> guard(m_mutex);
809 
810   LLDB_SCOPED_TIMER();
811   // Initialize all of the lookup by name indexes before converting NAME to a
812   // uniqued string NAME_STR below.
813   if (!m_name_indexes_computed)
814     InitNameIndexes();
815 
816   if (name) {
817     // The string table did have a string that matched, but we need to check
818     // the symbols and match the symbol_type if any was given.
819     AppendSymbolIndexesWithNameAndType(name, symbol_type, symbol_indexes);
820   }
821 }
822 
823 void Symtab::FindAllSymbolsWithNameAndType(
824     ConstString name, SymbolType symbol_type, Debug symbol_debug_type,
825     Visibility symbol_visibility, std::vector<uint32_t> &symbol_indexes) {
826   std::lock_guard<std::recursive_mutex> guard(m_mutex);
827 
828   LLDB_SCOPED_TIMER();
829   // Initialize all of the lookup by name indexes before converting NAME to a
830   // uniqued string NAME_STR below.
831   if (!m_name_indexes_computed)
832     InitNameIndexes();
833 
834   if (name) {
835     // The string table did have a string that matched, but we need to check
836     // the symbols and match the symbol_type if any was given.
837     AppendSymbolIndexesWithNameAndType(name, symbol_type, symbol_debug_type,
838                                        symbol_visibility, symbol_indexes);
839   }
840 }
841 
842 void Symtab::FindAllSymbolsMatchingRexExAndType(
843     const RegularExpression &regex, SymbolType symbol_type,
844     Debug symbol_debug_type, Visibility symbol_visibility,
845     std::vector<uint32_t> &symbol_indexes) {
846   std::lock_guard<std::recursive_mutex> guard(m_mutex);
847 
848   AppendSymbolIndexesMatchingRegExAndType(regex, symbol_type, symbol_debug_type,
849                                           symbol_visibility, symbol_indexes);
850 }
851 
852 Symbol *Symtab::FindFirstSymbolWithNameAndType(ConstString name,
853                                                SymbolType symbol_type,
854                                                Debug symbol_debug_type,
855                                                Visibility symbol_visibility) {
856   std::lock_guard<std::recursive_mutex> guard(m_mutex);
857   LLDB_SCOPED_TIMER();
858   if (!m_name_indexes_computed)
859     InitNameIndexes();
860 
861   if (name) {
862     std::vector<uint32_t> matching_indexes;
863     // The string table did have a string that matched, but we need to check
864     // the symbols and match the symbol_type if any was given.
865     if (AppendSymbolIndexesWithNameAndType(name, symbol_type, symbol_debug_type,
866                                            symbol_visibility,
867                                            matching_indexes)) {
868       std::vector<uint32_t>::const_iterator pos, end = matching_indexes.end();
869       for (pos = matching_indexes.begin(); pos != end; ++pos) {
870         Symbol *symbol = SymbolAtIndex(*pos);
871 
872         if (symbol->Compare(name, symbol_type))
873           return symbol;
874       }
875     }
876   }
877   return nullptr;
878 }
879 
880 typedef struct {
881   const Symtab *symtab;
882   const addr_t file_addr;
883   Symbol *match_symbol;
884   const uint32_t *match_index_ptr;
885   addr_t match_offset;
886 } SymbolSearchInfo;
887 
888 // Add all the section file start address & size to the RangeVector, recusively
889 // adding any children sections.
890 static void AddSectionsToRangeMap(SectionList *sectlist,
891                                   RangeVector<addr_t, addr_t> &section_ranges) {
892   const int num_sections = sectlist->GetNumSections(0);
893   for (int i = 0; i < num_sections; i++) {
894     SectionSP sect_sp = sectlist->GetSectionAtIndex(i);
895     if (sect_sp) {
896       SectionList &child_sectlist = sect_sp->GetChildren();
897 
898       // If this section has children, add the children to the RangeVector.
899       // Else add this section to the RangeVector.
900       if (child_sectlist.GetNumSections(0) > 0) {
901         AddSectionsToRangeMap(&child_sectlist, section_ranges);
902       } else {
903         size_t size = sect_sp->GetByteSize();
904         if (size > 0) {
905           addr_t base_addr = sect_sp->GetFileAddress();
906           RangeVector<addr_t, addr_t>::Entry entry;
907           entry.SetRangeBase(base_addr);
908           entry.SetByteSize(size);
909           section_ranges.Append(entry);
910         }
911       }
912     }
913   }
914 }
915 
916 void Symtab::InitAddressIndexes() {
917   // Protected function, no need to lock mutex...
918   if (!m_file_addr_to_index_computed && !m_symbols.empty()) {
919     m_file_addr_to_index_computed = true;
920 
921     FileRangeToIndexMap::Entry entry;
922     const_iterator begin = m_symbols.begin();
923     const_iterator end = m_symbols.end();
924     for (const_iterator pos = m_symbols.begin(); pos != end; ++pos) {
925       if (pos->ValueIsAddress()) {
926         entry.SetRangeBase(pos->GetAddressRef().GetFileAddress());
927         entry.SetByteSize(pos->GetByteSize());
928         entry.data = std::distance(begin, pos);
929         m_file_addr_to_index.Append(entry);
930       }
931     }
932     const size_t num_entries = m_file_addr_to_index.GetSize();
933     if (num_entries > 0) {
934       m_file_addr_to_index.Sort();
935 
936       // Create a RangeVector with the start & size of all the sections for
937       // this objfile.  We'll need to check this for any FileRangeToIndexMap
938       // entries with an uninitialized size, which could potentially be a large
939       // number so reconstituting the weak pointer is busywork when it is
940       // invariant information.
941       SectionList *sectlist = m_objfile->GetSectionList();
942       RangeVector<addr_t, addr_t> section_ranges;
943       if (sectlist) {
944         AddSectionsToRangeMap(sectlist, section_ranges);
945         section_ranges.Sort();
946       }
947 
948       // Iterate through the FileRangeToIndexMap and fill in the size for any
949       // entries that didn't already have a size from the Symbol (e.g. if we
950       // have a plain linker symbol with an address only, instead of debug info
951       // where we get an address and a size and a type, etc.)
952       for (size_t i = 0; i < num_entries; i++) {
953         FileRangeToIndexMap::Entry *entry =
954             m_file_addr_to_index.GetMutableEntryAtIndex(i);
955         if (entry->GetByteSize() == 0) {
956           addr_t curr_base_addr = entry->GetRangeBase();
957           const RangeVector<addr_t, addr_t>::Entry *containing_section =
958               section_ranges.FindEntryThatContains(curr_base_addr);
959 
960           // Use the end of the section as the default max size of the symbol
961           addr_t sym_size = 0;
962           if (containing_section) {
963             sym_size =
964                 containing_section->GetByteSize() -
965                 (entry->GetRangeBase() - containing_section->GetRangeBase());
966           }
967 
968           for (size_t j = i; j < num_entries; j++) {
969             FileRangeToIndexMap::Entry *next_entry =
970                 m_file_addr_to_index.GetMutableEntryAtIndex(j);
971             addr_t next_base_addr = next_entry->GetRangeBase();
972             if (next_base_addr > curr_base_addr) {
973               addr_t size_to_next_symbol = next_base_addr - curr_base_addr;
974 
975               // Take the difference between this symbol and the next one as
976               // its size, if it is less than the size of the section.
977               if (sym_size == 0 || size_to_next_symbol < sym_size) {
978                 sym_size = size_to_next_symbol;
979               }
980               break;
981             }
982           }
983 
984           if (sym_size > 0) {
985             entry->SetByteSize(sym_size);
986             Symbol &symbol = m_symbols[entry->data];
987             symbol.SetByteSize(sym_size);
988             symbol.SetSizeIsSynthesized(true);
989           }
990         }
991       }
992 
993       // Sort again in case the range size changes the ordering
994       m_file_addr_to_index.Sort();
995     }
996   }
997 }
998 
999 void Symtab::CalculateSymbolSizes() {
1000   std::lock_guard<std::recursive_mutex> guard(m_mutex);
1001   // Size computation happens inside InitAddressIndexes.
1002   InitAddressIndexes();
1003 }
1004 
1005 Symbol *Symtab::FindSymbolAtFileAddress(addr_t file_addr) {
1006   std::lock_guard<std::recursive_mutex> guard(m_mutex);
1007   if (!m_file_addr_to_index_computed)
1008     InitAddressIndexes();
1009 
1010   const FileRangeToIndexMap::Entry *entry =
1011       m_file_addr_to_index.FindEntryStartsAt(file_addr);
1012   if (entry) {
1013     Symbol *symbol = SymbolAtIndex(entry->data);
1014     if (symbol->GetFileAddress() == file_addr)
1015       return symbol;
1016   }
1017   return nullptr;
1018 }
1019 
1020 Symbol *Symtab::FindSymbolContainingFileAddress(addr_t file_addr) {
1021   std::lock_guard<std::recursive_mutex> guard(m_mutex);
1022 
1023   if (!m_file_addr_to_index_computed)
1024     InitAddressIndexes();
1025 
1026   const FileRangeToIndexMap::Entry *entry =
1027       m_file_addr_to_index.FindEntryThatContains(file_addr);
1028   if (entry) {
1029     Symbol *symbol = SymbolAtIndex(entry->data);
1030     if (symbol->ContainsFileAddress(file_addr))
1031       return symbol;
1032   }
1033   return nullptr;
1034 }
1035 
1036 void Symtab::ForEachSymbolContainingFileAddress(
1037     addr_t file_addr, std::function<bool(Symbol *)> const &callback) {
1038   std::lock_guard<std::recursive_mutex> guard(m_mutex);
1039 
1040   if (!m_file_addr_to_index_computed)
1041     InitAddressIndexes();
1042 
1043   std::vector<uint32_t> all_addr_indexes;
1044 
1045   // Get all symbols with file_addr
1046   const size_t addr_match_count =
1047       m_file_addr_to_index.FindEntryIndexesThatContain(file_addr,
1048                                                        all_addr_indexes);
1049 
1050   for (size_t i = 0; i < addr_match_count; ++i) {
1051     Symbol *symbol = SymbolAtIndex(all_addr_indexes[i]);
1052     if (symbol->ContainsFileAddress(file_addr)) {
1053       if (!callback(symbol))
1054         break;
1055     }
1056   }
1057 }
1058 
1059 void Symtab::SymbolIndicesToSymbolContextList(
1060     std::vector<uint32_t> &symbol_indexes, SymbolContextList &sc_list) {
1061   // No need to protect this call using m_mutex all other method calls are
1062   // already thread safe.
1063 
1064   const bool merge_symbol_into_function = true;
1065   size_t num_indices = symbol_indexes.size();
1066   if (num_indices > 0) {
1067     SymbolContext sc;
1068     sc.module_sp = m_objfile->GetModule();
1069     for (size_t i = 0; i < num_indices; i++) {
1070       sc.symbol = SymbolAtIndex(symbol_indexes[i]);
1071       if (sc.symbol)
1072         sc_list.AppendIfUnique(sc, merge_symbol_into_function);
1073     }
1074   }
1075 }
1076 
1077 void Symtab::FindFunctionSymbols(ConstString name, uint32_t name_type_mask,
1078                                  SymbolContextList &sc_list) {
1079   std::vector<uint32_t> symbol_indexes;
1080 
1081   // eFunctionNameTypeAuto should be pre-resolved by a call to
1082   // Module::LookupInfo::LookupInfo()
1083   assert((name_type_mask & eFunctionNameTypeAuto) == 0);
1084 
1085   if (name_type_mask & (eFunctionNameTypeBase | eFunctionNameTypeFull)) {
1086     std::vector<uint32_t> temp_symbol_indexes;
1087     FindAllSymbolsWithNameAndType(name, eSymbolTypeAny, temp_symbol_indexes);
1088 
1089     unsigned temp_symbol_indexes_size = temp_symbol_indexes.size();
1090     if (temp_symbol_indexes_size > 0) {
1091       std::lock_guard<std::recursive_mutex> guard(m_mutex);
1092       for (unsigned i = 0; i < temp_symbol_indexes_size; i++) {
1093         SymbolContext sym_ctx;
1094         sym_ctx.symbol = SymbolAtIndex(temp_symbol_indexes[i]);
1095         if (sym_ctx.symbol) {
1096           switch (sym_ctx.symbol->GetType()) {
1097           case eSymbolTypeCode:
1098           case eSymbolTypeResolver:
1099           case eSymbolTypeReExported:
1100             symbol_indexes.push_back(temp_symbol_indexes[i]);
1101             break;
1102           default:
1103             break;
1104           }
1105         }
1106       }
1107     }
1108   }
1109 
1110   if (!m_name_indexes_computed)
1111     InitNameIndexes();
1112 
1113   for (lldb::FunctionNameType type :
1114        {lldb::eFunctionNameTypeBase, lldb::eFunctionNameTypeMethod,
1115         lldb::eFunctionNameTypeSelector}) {
1116     if (name_type_mask & type) {
1117       auto map = GetNameToSymbolIndexMap(type);
1118 
1119       const UniqueCStringMap<uint32_t>::Entry *match;
1120       for (match = map.FindFirstValueForName(name); match != nullptr;
1121            match = map.FindNextValueForName(match)) {
1122         symbol_indexes.push_back(match->value);
1123       }
1124     }
1125   }
1126 
1127   if (!symbol_indexes.empty()) {
1128     llvm::sort(symbol_indexes.begin(), symbol_indexes.end());
1129     symbol_indexes.erase(
1130         std::unique(symbol_indexes.begin(), symbol_indexes.end()),
1131         symbol_indexes.end());
1132     SymbolIndicesToSymbolContextList(symbol_indexes, sc_list);
1133   }
1134 }
1135 
1136 const Symbol *Symtab::GetParent(Symbol *child_symbol) const {
1137   uint32_t child_idx = GetIndexForSymbol(child_symbol);
1138   if (child_idx != UINT32_MAX && child_idx > 0) {
1139     for (uint32_t idx = child_idx - 1; idx != UINT32_MAX; --idx) {
1140       const Symbol *symbol = SymbolAtIndex(idx);
1141       const uint32_t sibling_idx = symbol->GetSiblingIndex();
1142       if (sibling_idx != UINT32_MAX && sibling_idx > child_idx)
1143         return symbol;
1144     }
1145   }
1146   return nullptr;
1147 }
1148